diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 000000000..b420d9c31
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,14 @@
+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
+
+version: 2
+updates:
+  # Enable version updates for npm
+  - package-ecosystem: "npm"
+    # Look for `package.json` and `lock` files in the `root` directory
+    directory: "/"
+    # Check the npm registry for updates every day (weekdays)
+    schedule:
+      interval: "daily"
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 47b1a2040..8fb063350 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -26,8 +26,28 @@ jobs:
       - name: Lint
         run: npm run lint
 
-      - name: Verify
-        run: npm run verify
+      - name: Short tests (no bench)
+        run: npm run test-all-no-bench
 
-      - name: Fixture smoke
-        run: npm run fixture-smoke
+  windows:
+    runs-on: windows-latest
+    env:
+      PAIROFCLEATS_EMBEDDINGS: stub
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Node
+        uses: actions/setup-node@v4
+        with:
+          node-version: '18'
+          cache: npm
+
+      - name: Install deps
+        run: npm ci
+
+      - name: Windows regression lane
+        run: |
+          node tests/worker-pool-windows.js
+          node tests/search-windows-path-filter.js
+          node tests/fixture-parity.js --fixtures sample
diff --git a/.gitignore b/.gitignore
index 7a18797b7..86d38b692 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,8 +4,37 @@ index-code/
 index-prose/
 ci-artifacts/
 tests/.cache/
+tests/.logs/
+benchmarks/repos/
+benchmarks/cache/
+benchmarks/results/
 docs/benchmarks.json
 docs/phase3-parity-report.json
 *.db
 *.db-shm
 *.db-wal
+__pycache__/
+*.py[cod]
+*.pyo
+*$py.class
+.Python
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+.pytype/
+.coverage
+coverage.xml
+htmlcov/
+.tox/
+.nox/
+.venv/
+venv/
+ENV/
+env/
+env.bak/
+venv.bak/
+*.egg
+*.egg-info/
+.eggs/
+pip-wheel-metadata/
+.pairofcleats/
diff --git a/.pairofcleats.json b/.pairofcleats.json
index 9e8dec64c..68af92315 100644
--- a/.pairofcleats.json
+++ b/.pairofcleats.json
@@ -1,101 +1,109 @@
 {
-  "dictionary": {
-    "languages": [
-      "en"
-    ],
-    "includeSlang": true,
-    "enableRepoDictionary": false,
-    "dir": "",
-    "files": [],
-    "slangDirs": [],
-    "slangFiles": []
-  },
-  "cache": {
-    "root": ""
-  },
+  // Enable sqlite index artifacts for search backends.
+  // Speed impact: adds sqlite build time when stage4 runs.
   "sqlite": {
-    "use": true,
-    "dbDir": "",
-    "annMode": "extension",
-    "compactOnIncremental": false,
-    "vectorExtension": {
-      "provider": "sqlite-vec",
-      "dir": "",
-      "path": "",
-      "table": "dense_vectors_ann",
-      "column": "embedding",
-      "encoding": "float32",
-      "options": ""
-    }
+    // Toggle sqlite index usage/artifact generation.
+    // Speed impact: enabling adds some indexing time and disk usage.
+    "use": true
   },
+  // Search defaults for query-time behavior.
+  // Speed impact: no direct impact on indexing speed.
   "search": {
+    // Prefer ANN search by default when multiple backends exist.
+    // Speed impact: no impact on indexing; affects query latency/recall.
     "annDefault": true,
-    "sqliteFtsNormalize": false,
-    "queryCache": {
-      "enabled": false,
-      "maxEntries": 200,
-      "ttlMs": 0
-    }
-  },
-  "triage": {
-    "recordsDir": "",
-    "storeRawPayload": false,
-    "promoteFields": [
-      "recordType",
-      "source",
-      "recordId",
-      "service",
-      "env",
-      "team",
-      "owner",
-      "vulnId",
-      "cve",
-      "packageName",
-      "packageEcosystem",
-      "severity",
-      "status",
-      "assetId"
-    ],
-    "contextPack": {
-      "maxHistory": 5,
-      "maxEvidencePerQuery": 5
-    }
+    // Dense vector combination strategy for search.
+    // Speed impact: minor impact on embedding/storage cost during indexing.
+    "denseVectorMode": "merged"
   },
+  // Index build pipeline options.
+  // Speed impact: many flags here change CPU/IO per file.
   "indexing": {
-    "concurrency": 4,
-    "importConcurrency": 4,
-    "astDataflow": true,
-    "controlFlow": true,
-    "riskAnalysis": true,
-    "riskAnalysisCrossFile": true,
-    "typeInference": false,
-    "typeInferenceCrossFile": false,
+    "workerPool": {
+        "enabled": true,
+        "maxWorkers": 8
+      },
+    // Sparse postings generation settings.
+    // Speed impact: heavier postings settings increase indexing time/size.
     "postings": {
+      // Build phrase n-gram postings.
+      // Speed impact: increases indexing time and index size.
       "enablePhraseNgrams": true,
+      // Smallest phrase n-gram length.
+      // Speed impact: lower values add more n-grams and cost.
       "phraseMinN": 2,
+      // Largest phrase n-gram length.
+      // Speed impact: higher values increase indexing time and size.
       "phraseMaxN": 4,
+      // Build chargram postings for fuzzy matching.
+      // Speed impact: noticeable extra CPU and disk usage.
       "enableChargrams": true,
+      // Smallest chargram length.
+      // Speed impact: lower values increase chargram volume and cost.
       "chargramMinN": 3,
-      "chargramMaxN": 5
-    }
-  },
-  "sql": {
-    "dialect": "",
-    "dialectByExt": {
-      ".psql": "postgres",
-      ".pgsql": "postgres",
-      ".mysql": "mysql",
-      ".sqlite": "sqlite"
+      // Largest chargram length.
+      // Speed impact: higher values increase chargram volume and cost.
+      "chargramMaxN": 5,
+      // Choose which fields contribute chargrams.
+      // Speed impact: more fields increase indexing work.
+      "chargramSource": "fields",
+      // Cap token length eligible for chargrams.
+      // Speed impact: higher caps increase CPU on long identifiers.
+      "chargramMaxTokenLength": 48,
+      // Track postings per field (name, path, body, etc).
+      // Speed impact: slight overhead for richer scoring.
+      "fielded": true
+    },
+    // When to scan imports ("pre" or "post" indexing).
+    // Speed impact: small; "post" avoids extra upfront work.
+    "importScan": "post",
+    // Enable AST dataflow analysis.
+    // Speed impact: moderate CPU cost on large codebases.
+    "astDataflow": true,
+    // Enable control-flow analysis.
+    // Speed impact: moderate CPU cost on large codebases.
+    "controlFlow": true,
+    // Enable risk analysis rules.
+    // Speed impact: moderate CPU cost; can be heavy on huge repos.
+    "riskAnalysis": true,
+    // Enable cross-file risk correlation.
+    // Speed impact: heavy extra work on large repos.
+    "riskAnalysisCrossFile": true,
+    // Enable type inference.
+    // Speed impact: moderate to heavy CPU cost.
+    "typeInference": true,
+    // Enable cross-file type inference.
+    // Speed impact: heavy extra work on large repos.
+    "typeInferenceCrossFile": true,
+    // Collect git blame/churn metadata per file.
+    // Speed impact: heavy IO/CPU; can dominate indexing time.
+    "gitBlame": false,
+    // Run linting pass for diagnostics.
+    // Speed impact: extra CPU per file.
+    "lint": false,
+    // Compute complexity metrics.
+    // Speed impact: extra CPU per file.
+    "complexity": true,
+    // Python AST parsing options.
+    // Speed impact: small to moderate CPU on Python files.
+    "pythonAst": {
+      // Enable Python AST parsing.
+      // Speed impact: small to moderate on Python-heavy repos.
+      "enabled": true
+    },
+    // Tree-sitter parsing options.
+    // Speed impact: moderate CPU, improved chunking accuracy.
+    "treeSitter": {
+      // Enable tree-sitter parsing.
+      // Speed impact: moderate CPU on supported languages.
+      "enabled": true
     }
   },
-  "models": {
-    "id": "Xenova/all-MiniLM-L12-v2",
-    "dir": ""
-  },
-  "tooling": {
-    "autoInstallOnDetect": false,
-    "installScope": "cache",
-    "allowGlobalFallback": true,
-    "dir": ""
+  // Runtime process limits for the indexer.
+  // Speed impact: higher heap reduces GC stalls on big repos.
+  "runtime": {
+    // Max Node heap size in MB for the indexer process.
+    // Speed impact: too low slows indexing; higher reduces GC overhead.
+    "maxOldSpaceMb": 98048
   }
 }
diff --git a/.rgignore b/.rgignore
new file mode 100644
index 000000000..430ec74d9
--- /dev/null
+++ b/.rgignore
@@ -0,0 +1 @@
+benchmarks/repos/
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 000000000..b97b954f9
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,16 @@
+# Changelog
+
+All notable changes to PairOfCleats are documented in this file.
+
+## Unreleased
+### Breaking
+- None.
+
+### Added
+- None.
+
+### Fixed
+- None.
+
+## v0.2.0 - 2026-01-11
+- Initial internal release.
diff --git a/COMPLETED_PHASES.md b/COMPLETED_PHASES.md
new file mode 100644
index 000000000..2452f7915
--- /dev/null
+++ b/COMPLETED_PHASES.md
@@ -0,0 +1,685 @@
+# Completed Phases
+
+Phases 1-4 were completed during the initial Sublime Text plugin and map rollout. Phases 11-12 and 14-15 were completed as the cache/perf and optional-deps groundwork.
+
+## Phase 1 — Sublime Text 3 Plugin Foundation (Parity + Plumbing)
+
+### 1.1 Plugin repo structure + packaging
+
+* [x] Create `sublime/PairOfCleats/` package skeleton:
+
+  * [x] `PairOfCleats.py` (entrypoint)
+  * [x] `commands/` (command modules)
+  * [x] `lib/` (helpers: config, subprocess, parsing, caching)
+  * [x] `messages/` (install/upgrade notes)
+  * [x] `Default.sublime-commands`
+  * [x] `Main.sublime-menu` (optional)
+  * [x] `Default.sublime-keymap` (optional)
+* [x] Add `README.md` for ST3 plugin installation + prerequisites
+* [x] Add “Package Control” compatibility notes (no external deps beyond Node runtime + repo binaries)
+
+### 1.2 Node/CLI discovery + execution contract
+
+* [x] Implement robust “pairofcleats binary discovery”:
+
+  * [x] Prefer project-local `node_modules/.bin/pairofcleats` when available
+  * [x] Fallback to global `pairofcleats` on PATH
+  * [x] Allow explicit override in ST settings: `pairofcleats_path`
+* [x] Implement repo-root detection:
+
+  * [x] Prefer `.pairofcleats.json` location
+  * [x] Fallback to `.git` root
+  * [x] Fallback to folder of active file
+* [x] Implement subprocess wrapper:
+
+  * [x] Streams output to Sublime panel
+  * [x] Captures JSON payloads when `--json` is used
+  * [x] Supports cancellation (best-effort)
+  * [x] Adds stable environment injection (cache root, embeddings mode, etc.)
+
+### 1.3 Settings + per-project overrides
+
+* [x] Add `PairOfCleats.sublime-settings` defaults:
+
+  * [x] `pairofcleats_path`, `node_path`
+  * [x] `index_mode_default` (code/prose/both)
+  * [x] `search_backend_default` (memory/sqlite-fts/etc)
+  * [x] `open_results_in` (quick_panel / new_tab / output_panel)
+* [x] Support `.sublime-project` settings overrides
+* [x] Validate config and surface actionable error messages
+
+### 1.4 Smoke tests (plugin-side)
+
+* [x] Add Python unit tests that:
+
+  * [x] Import plugin modules without Sublime runtime (mock `sublime`, `sublime_plugin`)
+  * [x] Validate binary discovery behavior
+  * [x] Validate repo-root resolution on fixtures
+  * [x] Validate settings overlay precedence
+
+---
+
+
+## Phase 2 — Sublime Search UX (Queries, Results, Navigation)
+
+### 2.1 Search command(s)
+
+* [x] `PairOfCleats: Search` command:
+
+  * [x] Prompt input panel for query
+  * [x] Optional toggles: code/prose/both, backend, limit
+  * [x] Execute `pairofcleats search ... --json`
+* [x] `PairOfCleats: Search Selection` command:
+
+  * [x] Uses selected text as query
+* [x] `PairOfCleats: Search Symbol Under Cursor` command
+
+### 2.2 Results presentation
+
+* [x] Quick panel results:
+
+  * [x] Show `file:line-range`, symbol name, snippet/headline, score
+  * [x] Preserve stable ordering for repeatability
+* [x] On selection:
+
+  * [x] Open file at best-effort location (line/column)
+  * [x] Highlight match range (if available)
+* [x] Add optional “results buffer” view (for large result sets)
+
+### 2.3 Quality-of-life UX
+
+* [x] Query history (per project)
+* [x] “Repeat last search” command
+* [x] “Explain search” (if supported by CLI flags / internal explain output)
+
+### 2.4 Tests
+
+* [x] Add Node-level “search contract” tests:
+
+  * [x] Ensure `--json` output parseability and required fields
+* [x] Add plugin tests:
+
+  * [x] Search command dispatches correct subprocess args
+  * [x] Results parsing tolerates partial/missing optional fields
+
+---
+
+
+## Phase 3 — Index Lifecycle in Sublime (Build/Watch/Validate + Status)
+
+### 3.1 Build index commands
+
+* [x] `PairOfCleats: Index Build (Code)`
+* [x] `PairOfCleats: Index Build (Prose)`
+* [x] `PairOfCleats: Index Build (All)`
+* [x] Stream progress to an output panel
+* [x] Persist “last index time” + “last index mode” in project cache
+
+### 3.2 Watch mode integration
+
+* [x] `PairOfCleats: Index Watch Start`
+* [x] `PairOfCleats: Index Watch Stop`
+* [x] Prevent duplicate watchers per window/project
+* [x] Robust shutdown on Sublime exit / project close
+
+### 3.3 Validate + repair affordances
+
+* [x] `PairOfCleats: Index Validate`
+* [x] Surface actionable failures (missing artifacts, invalid JSON, stale manifests)
+* [x] Provide “Open index directory” convenience command
+
+### 3.4 Tests
+
+* [x] Node tests for index build/validate on fixtures
+* [x] Plugin tests for lifecycle commands and watcher gating
+
+---
+
+
+## Phase 4 — Codebase Semantic Map (Imports/Exports/Calls/Dataflow/Control Flow → Visual Map)
+
+### What this phase delivers
+
+A **real codebase map** that uses existing and enriched semantic metadata to generate a **diagram-ready model** and one or more **rendered artifacts**.
+
+It must explicitly incorporate and visualize:
+
+* **Imports / Exports / ImportLinks**
+* **Calls / CallLinks / CallSummaries**
+* **Usages / UsageLinks**
+* **Signature / Modifiers / Params / Returns**
+* **Reads / Writes / Mutates / Aliases**
+* **Control flow** (branches, loops, throws, awaits, yields, returns)
+* **AST-derived semantics** (using what the indexer already extracts)
+
+#### Visual grammar (required characteristics)
+
+* **File = outer shape**
+
+  * Shape varies by file type/category (source/test/config/doc/generated/etc.)
+* **Functions/classes = content inside the file shape**
+
+  * The “fill” of the file node is structurally subdivided to represent contained functions/classes
+* **Function details = nested sub-shapes inside function area**
+
+  * Small badges/segments represent modifiers/returns/dataflow/control-flow
+* **Multiple line styles = multiple edge semantics**
+
+  * Imports (file→file), control flow calls (fn→fn), usage deps (fn→fn), dataflow (arg/return/state)
+
+---
+
+### 4.1 Inventory + normalize available semantics from existing artifacts
+
+Leverage what is already produced today, and formalize how it’s consumed:
+
+* [x] **Inputs** (expected present after `index build`):
+
+  * [x] `file_relations.json` (imports, exports, usages, importLinks, functionMeta/classMeta)
+  * [x] `repo_map.json` (chunk-level symbol map, exported flag, signatures)
+  * [x] `chunk_meta.json` (docmeta/metaV2: signature/modifiers/returns/controlFlow/dataflow + relations)
+  * [x] `graph_relations.json` (importGraph/callGraph/usageGraph)
+* [x] Define “canonical IDs” used across the map:
+
+  * [x] `fileId = <repo-relative path>`
+  * [x] `symbolId = <file>::<symbolName>` (already used in relation graphs)
+  * [x] Stable IDs for anonymous/lambda cases (fallback: chunkId when name is `(anonymous)`)
+
+---
+
+### 4.2 Define a versioned “Map Model” schema (diagram-ready)
+
+This is the core contract the plugin will consume.
+
+* [x] Create `docs/map-schema.json` (or similar) with:
+
+  * [x] `version`
+  * [x] `generatedAt`
+  * [x] `root` (repo root logical id)
+  * [x] `legend`:
+
+    * [x] `nodeTypes` (file/function/class/symbol)
+    * [x] `fileShapes` mapping (category → shape)
+    * [x] `functionBadges` mapping (modifier/returns/dataflow/control-flow → badge glyph)
+    * [x] `edgeTypes` mapping (imports/calls/usages/dataflow/aliases/mutations)
+    * [x] `edgeStyles` mapping (solid/dashed/dotted/double, arrowheads, labels)
+  * [x] `nodes`:
+
+    * [x] file nodes with nested “members” (functions/classes)
+    * [x] function nodes with structured “semantic facets”
+  * [x] `edges` (typed, labeled, optionally “port-addressable”)
+* [x] Schema must support **hierarchical nesting**:
+
+  * [x] File node has `members[]` with per-member ports
+  * [x] Member nodes (functions) include `signature`, `modifiers`, `returns`, `controlFlow`, `dataflow`
+* [x] Determinism requirements:
+
+  * [x] Stable ordering (sort keys/ids)
+  * [x] Explicit timestamp field allowed, but everything else must be deterministic
+
+---
+
+### 4.3 Build the semantic “map extractor” (core engine tool)
+
+Implement a Node tool that reads index artifacts and produces the map model.
+
+* [x] Add `tools/code-map.js` (or `tools/report-code-map.js`) that:
+
+  * [x] Locates repo + index dirs using existing `tools/dict-utils.js`
+  * [x] Loads:
+
+    * [x] `file_relations.json`
+    * [x] `repo_map.json`
+    * [x] `chunk_meta.json` (or minimal subset)
+    * [x] `graph_relations.json`
+  * [x] Merges into a single “map model”:
+
+    * [x] **Files** classified into categories (drives file shape)
+    * [x] **Members** extracted per file:
+
+      * [x] functions/methods/classes (from `repo_map` and/or chunk meta)
+      * [x] include line ranges
+      * [x] include `signature`, `modifiers`, `params`, `returns`
+    * [x] **Function semantics**:
+
+      * [x] `dataflow.reads`, `dataflow.writes`, `dataflow.mutations`, `dataflow.aliases`
+      * [x] `controlFlow.branches/loops/returns/throws/awaits/yields/breaks/continues`
+      * [x] `throws`, `awaits`, `yields`, `returnsValue` facets surfaced explicitly
+    * [x] **Edges**:
+
+      * [x] Import edges (file→file) from `importLinks` + raw `imports`
+      * [x] Export edges (file→symbol) from `exports` + repo_map `exported`
+      * [x] Call edges (fn→fn) from `callLinks` or `graph_relations.callGraph`
+      * [x] Usage edges (fn→fn) from `usageLinks` or `graph_relations.usageGraph`
+      * [x] Dataflow edges:
+
+        * [x] Argument flow edges from `callSummaries.argMap` (caller→callee param ports)
+        * [x] Return flow edges using inferred return metadata where available
+        * [x] Optional: “state flow” edges when reads/writes/mutations overlap (guardrailed; see 28.6)
+      * [x] Alias edges:
+
+        * [x] derived from `dataflow.aliases` (function-local or cross-function via calls when resolvable)
+* [x] Add CLI entrypoint:
+
+  * [x] `pairofcleats report map` (preferred, consistent with existing `report` group), or
+  * [x] `pairofcleats map` (top-level)
+* [x] Support scope + size controls:
+
+  * [x] `--scope repo|dir|file|symbol`
+  * [x] `--focus <path or symbol>`
+  * [x] `--include imports,calls,usages,dataflow,exports`
+  * [x] `--only-exported`
+  * [x] `--max-files N`, `--max-members-per-file N`, `--max-edges N`
+  * [x] `--collapse file|dir` (aggregate mode)
+  * [x] `--format json|dot|svg|html` (see 28.4)
+
+---
+
+### 4.4 Generate “shape-based” diagrams (DOT-first, with nested function fills)
+
+To match your “shape with fill containing functions” requirement cleanly, DOT/Graphviz is the most direct representation.
+
+* [x] Implement a DOT generator `src/map/dot-writer.js`:
+
+  * [x] **File nodes as outer shapes** with file-type-dependent shapes:
+
+    * [x] Source code: `box` or `component`
+    * [x] Tests: `box` with distinct border style
+    * [x] Config/data: `cylinder` or `hexagon`
+    * [x] Docs/prose: `note`
+    * [x] Generated/build artifacts: `folder` or `box3d`
+  * [x] **Fill represents members** using HTML-like labels:
+
+    * [x] Outer `<TABLE>` represents the file “container”
+    * [x] Each function/class is a row with a `PORT` so edges can land on that member specifically
+  * [x] **Nested shapes inside the function row** (HTML sub-tables/cells) to represent:
+
+    * [x] modifiers: async/static/generator/visibility
+    * [x] signature/params summary
+    * [x] returns/returnType/returnsValue indicator
+    * [x] dataflow mini-badges: reads/writes/mutates/aliases counts (and/or top N symbols)
+    * [x] controlFlow mini-badges: branches/loops/throws/awaits/yields
+* [x] **Edge encoding** (multiple edge “line types”):
+
+  * [x] Import edges: dashed file→file
+  * [x] Call edges: solid function→function (primary control flow)
+  * [x] Usage edges: thin/secondary style function→function
+  * [x] Dataflow edges:
+
+    * [x] dotted caller→callee(param) edges (argument flow)
+    * [x] dotted callee→caller edges for return flow (if inferred)
+  * [x] Mutation/state edges (optional, guardrailed): double-line or distinct style
+  * [x] Alias edges: dashed-dotted, labeled `alias: a=b`
+* [x] Output modes:
+
+  * [x] `--format dot` always available
+  * [x] `--format svg` if Graphviz present (shell out to `dot -Tsvg`)
+  * [x] `--format html` wraps SVG + legend into a standalone HTML viewer
+* [x] Implement legend rendering:
+
+  * [x] Either embed as a DOT subgraph or in HTML wrapper
+  * [x] Must document shape/edge meaning for users
+
+---
+
+### 4.5 Sublime Text 3 plugin commands for map generation + viewing
+
+Provide first-class UX inside Sublime, even if rendering happens externally.
+
+* [x] Add commands:
+
+  * [x] `PairOfCleats: Map (Repo)`
+  * [x] `PairOfCleats: Map (Current Folder)`
+  * [x] `PairOfCleats: Map (Current File)`
+  * [x] `PairOfCleats: Map (Symbol Under Cursor)`
+  * [x] `PairOfCleats: Map (Selection)`
+* [x] Add a “Map Type” chooser:
+
+  * [x] Import Map
+  * [x] Call Map
+  * [x] Usage/Dependency Map
+  * [x] Dataflow Map (args/returns/state)
+  * [x] Combined Map (guardrailed by size limits)
+* [x] Implement output handling:
+
+  * [x] Write outputs to `.pairofcleats/maps/` (repo-local) or cache dir
+  * [x] Open `.dot` in Sublime for inspection
+  * [x] If `.svg`/`.html` produced:
+
+    * [x] Provide “Open in Browser” command (best-effort)
+* [x] Navigation affordances:
+
+  * [x] When a map is generated, also produce an indexable “node list” JSON:
+
+    * [x] allows Sublime quick panel “Jump to node” (file/function)
+    * [x] opens file at recorded `startLine`
+* [x] Graceful degradation:
+
+  * [x] If `astDataflow` / `controlFlow` metadata is unavailable in the index:
+
+    * [x] show “limited map” warning
+    * [x] offer action: “Rebuild index with dataflow/control-flow enabled” (invokes `index build` with the project’s config expectations)
+
+---
+
+### 4.6 Performance guardrails + scaling strategy (mandatory for real repos)
+
+This phase will generate *very large graphs* unless explicitly constrained.
+
+* [x] Hard limits with user-overrides:
+
+  * [x] `maxFiles`, `maxMembersPerFile`, `maxEdges`
+  * [x] edge sampling policies per edge type
+* [x] Aggregation modes:
+
+  * [x] Directory-level aggregation (folder nodes contain files)
+  * [x] File-only map (no nested functions)
+  * [x] Export-only functions view
+  * [x] “Top-K by degree” (highest call/import fan-in/out)
+* [x] Deterministic sampling:
+
+  * [x] same inputs → same output (stable selection)
+* [x] Cache map builds keyed by:
+
+  * [x] index signature + generator options
+* [x] Failure mode policy:
+
+  * [x] If size exceeds limits, output a “truncated map” plus a summary explaining what was dropped
+
+---
+
+### 4.7 Tests (core + integration + determinism)
+
+Add explicit automated coverage for the map feature.
+
+#### Node tool tests (authoritative)
+
+* [x] `tests/code-map-basic.js`
+
+  * [x] Build a tiny fixture repo with:
+
+    * [x] imports/exports
+    * [x] functions calling other functions
+    * [x] a function with reads/writes/mutations/aliases
+    * [x] a function with branches/loops/throws/awaits
+  * [x] Run `build_index.js --stub-embeddings`
+  * [x] Run `pairofcleats report map --format json`
+  * [x] Assert:
+
+    * [x] file nodes exist
+    * [x] member nodes include `signature/modifiers/returns/dataflow/controlFlow`
+    * [x] edge sets include imports + calls
+* [x] `tests/code-map-dot.js`
+
+  * [x] Generate DOT output
+  * [x] Assert:
+
+    * [x] file “container” nodes exist
+    * [x] function rows/ports exist
+    * [x] edges connect to ports (caller fn → callee fn)
+    * [x] distinct edge styles appear for import vs call vs dataflow
+* [x] `tests/code-map-determinism.js`
+
+  * [x] Run map generation twice and compare outputs (ignore `generatedAt`)
+* [x] `tests/code-map-guardrails.js`
+
+  * [x] Generate a repo with many dummy functions
+  * [x] Ensure truncation behavior is correct and stable
+
+#### Plugin-side tests
+
+* [x] Python unit tests:
+
+  * [x] command registration exists
+  * [x] subprocess args are correct for each map command
+  * [x] output paths computed correctly
+  * [x] “Graphviz missing” fallback behavior (DOT-only) works
+
+
+
+### 4.8 Isometric map viewer (three.js)
+
+* [x] Generate an isometric HTML viewer from the map model (three.js module import)
+* [x] Support zoom with configurable sensitivity
+* [x] Support WASD movement with configurable sensitivity/acceleration/drag
+* [x] Highlight selections and show file/line metadata
+* [x] Double-click opens the selected file/line via a URI template
+* [x] Add layout styles (clustered/radial/flat) with adjustable spacing
+* [x] Add flow-connected highlighting (edges + related nodes) and hover highlights from the selection panel
+* [x] Add grid line rendering + glow, fog, and wireframe tuning (panel configurable)
+* [x] Modularize the isometric viewer client into <500-line modules
+---
+
+## Phase 11 — Resource Lifecycle Management (Caches, Long-Lived Servers, Builds)
+
+**Objective:** Prevent memory and resource leaks in long-running processes (API server, service workers), especially across repeated builds and multi-repo usage.
+
+1. **Add eviction/TTL for API router repo-level caches**
+
+   * [x] **Implement eviction for `repoCaches` map in `tools/api/router.js`.**
+
+     * **Why:** `repoCaches` can grow unbounded if clients query multiple repos or if repo roots vary. Each entry can hold heavy caches (index cache + sqlite connections).
+     * **Fix:**
+
+       * Add:
+
+         * `maxRepos` (e.g., 3–10)
+         * `repoTtlMs` (e.g., 10–30 minutes)
+       * Track `lastUsed` and evict least-recently-used / expired.
+       * On eviction: close sqlite cache handles (`sqliteCache.close()`), clear index cache.
+   * [x] Add metrics for cache size and evictions.
+
+     * **Where:** `tools/api/router.js` and metrics registry.
+
+2. **Add eviction for per-repo index cache and sqlite DB cache**
+
+   * [x] **Index cache eviction**
+
+     * **Why:** `src/retrieval/index-cache.js` caches by `dir` (which can change per build). On repeated re-indexing, old build directories can accumulate.
+     * **Fix:** Convert to LRU with max entries, or TTL purge on access.
+   * [x] **SQLite DB cache eviction**
+
+     * **Where:** `src/retrieval/sqlite-cache.js`
+     * **Why:** Same “dir-per-build” key pattern; can leak connections/handles.
+     * **Fix:** LRU/TTL + ensure `close()` called on eviction.
+
+3. **Add explicit cache invalidation when “current build” pointer changes**
+
+   * [x] Detect when the effective index directory changes (new build) and prune caches for previous builds.
+
+     * **Why:** Keeps hot caches relevant and bounds memory footprint.
+
+**Exit criteria**
+
+* [x] API server memory does not grow unbounded when indexing/searching multiple repos/builds.
+* [x] Old build caches are evicted/pruned automatically.
+* [x] SQLite handles are closed on eviction (verified via tests or instrumentation).
+
+---
+
+## Phase 12 — Performance and Operational Hardening
+
+**Objective:** Improve throughput and robustness under load without changing core behavior.
+
+1. **Reduce event-loop blocking sync filesystem calls on API request paths**
+
+   * [x] Replace `fsSync.*` in API request hot paths with async equivalents where practical.
+
+     * **Why:** Sync I/O can stall concurrent requests in the API server process.
+     * **Where (examples):**
+
+       * `tools/api/router.js` `resolveRepo()` uses `existsSync/statSync`.
+     * **Fix:** Use `fs.promises.stat` with try/catch; cache results briefly if needed.
+
+2. **Prevent decompression “zip bomb” style memory spikes in artifact reading**
+
+   * [x] Add output size limiting to gzip decompression.
+
+     * **Why:** `src/shared/artifact-io.js` uses `gunzipSync(buffer)` and only checks decompressed size *after* decompression. A small compressed file could expand massively and spike memory.
+     * **Fix:**
+
+       * Use `zlib.gunzipSync(buffer, { maxOutputLength: maxBytes + slack })` (if supported in your Node target), or switch to streaming gunzip with explicit byte limits.
+     * **Where:** `src/shared/artifact-io.js` `parseBuffer` / gzip handling.
+
+3. **Add download size limits for tools that fetch large remote assets**
+
+   * [x] Enforce maximum download size (or require hash) for dictionary downloads.
+
+     * **Why:** `tools/download-dicts.js` buffers the entire response in memory (`Buffer.concat`) without a hard cap.
+     * **Fix:** Stream to disk with a cap; abort if exceeded; strongly prefer requiring hashes for non-default URLs.
+
+**Exit criteria**
+
+* [x] API request path avoids avoidable sync I/O.
+* [x] Artifact gzip parsing cannot explode memory beyond configured limits.
+* [x] Large downloads are bounded and/or verified.
+
+---
+
+## Phase 14 — Optional-dependency framework + capability registry (foundation for all phases)
+
+### 14.1 Introduce a consistent “optional dependency” loader
+
+* [x] Add `src/shared/optional-deps.js` with a single, opinionated API:
+
+  * [x] `tryRequire(name)` / `tryImport(name)` helpers (use `createRequire(import.meta.url)` where needed)
+  * [x] Standardized return shape: `{ ok: true, mod } | { ok: false, error, reason }`
+  * [x] Standardized logging hook (only when `PAIROFCLEATS_VERBOSE` or a dedicated flag is enabled)
+* [x] Add `src/shared/capabilities.js` that reports runtime availability:
+
+  * [x] `watcher: { chokidar: true, parcel: boolean }`
+  * [x] `regex: { re2: boolean, re2js: true }`
+  * [x] `hash: { nodeRsXxhash: boolean, wasmXxhash: true }`
+  * [x] `compression: { gzip: true, zstd: boolean }`
+  * [x] `extractors: { pdf: boolean, docx: boolean }`
+  * [x] `mcp: { sdk: boolean, legacy: true }`
+  * [x] `externalBackends: { tantivy: boolean, lancedb: boolean }` (even if “boolean” means “reachable” rather than “installed”)
+* [x] Wire capabilities into existing “status” surfaces:
+
+  * [x] Extend `tools/mcp/repo.js` → `configStatus()` to include capability info and warnings for requested-but-unavailable features
+  * [x] Extend `tools/config-dump.js` (or equivalent) to print capabilities in JSON output mode
+
+### 14.2 Add config + env “backend selectors” (uniform UX)
+
+* [x] Extend `src/shared/env.js` to parse new selectors (string + allowlist):
+
+  * [x] `PAIROFCLEATS_WATCHER_BACKEND` = `auto|chokidar|parcel`
+  * [x] `PAIROFCLEATS_REGEX_ENGINE` = `auto|re2|re2js`
+  * [x] `PAIROFCLEATS_XXHASH_BACKEND` = `auto|native|wasm`
+  * [x] `PAIROFCLEATS_COMPRESSION` = `auto|gzip|zstd|none`
+  * [x] `PAIROFCLEATS_DOC_EXTRACT` = `auto|on|off`
+  * [x] `PAIROFCLEATS_MCP_TRANSPORT` = `auto|sdk|legacy`
+* [x] Add parallel config keys in `.pairofcleats.json` (keep them near existing related config blocks):
+
+  * [x] `indexing.watch.backend`
+  * [x] `search.regex.engine`
+  * [x] `indexing.hash.backend`
+  * [x] `indexing.artifactCompression.mode` enum expansion + `auto`
+  * [x] `indexing.documentExtraction.enabled`
+  * [x] `mcp.transport`
+* [x] Update `docs/config-schema.json`:
+
+  * [x] Add/expand enums (avoid “free string” for anything that’s meant to be policy-controlled)
+  * [x] Add descriptions that clarify fallback rules (`auto` behavior)
+* [x] Update any config validation code paths if they enforce known keys (`src/config/validate.js` is schema-driven; keep schema authoritative)
+
+### 14.3 Add dependency-bundle reference stubs (keeps repo documentation consistent)
+
+For each new dependency introduced in later phases, add a minimal doc file under:
+`docs/references/dependency-bundle/deps/<dep>.md`
+
+* [x] `parcel-watcher.md`
+* [x] `re2.md`
+* [x] `node-rs-xxhash.md`
+* [x] `mongodb-js-zstd.md`
+* [x] `pdfjs-dist.md`
+* [x] `mammoth.md`
+* [x] `modelcontextprotocol-sdk.md`
+* [x] `lancedb.md` (if used)
+* [x] `tantivy.md` (if used)
+* [x] Update `docs/references/dependency-bundle/README.md` if it has an index
+
+### 14.4 Tests (framework-level)
+
+* [x] Add `tests/capabilities-report.js`:
+
+  * [x] Asserts `capabilities` object shape is stable
+  * [x] Asserts `auto` selectors never throw when optional deps are missing
+* [x] Add a script-coverage action to run it:
+
+  * [x] `tests/script-coverage/actions.js`: add action entry that calls `runNode(...)`
+  * [x] (Optional) Add an npm script alias if you want parity with the rest of the repo scripts
+
+**Exit criteria**
+
+* [x] All “capability” calls are side-effect-free and safe when optional deps are absent
+* [x] `config_status` (MCP) can surface “you requested X but it’s not available” warnings without crashing
+* [x] CI passes on Node 18 (Ubuntu + Windows lanes)
+
+---
+
+## Phase 15 — File watching performance: add `@parcel/watcher` backend (keep chokidar fallback)
+
+### 15.1 Add the dependency (prefer optional unless you want it guaranteed everywhere)
+
+* [x] Add `@parcel/watcher` to `package.json`
+
+  * [x] Prefer `optionalDependencies` if you want installs to succeed even when native builds fail
+  * [x] If you add it as a hard dependency, ensure Windows CI remains green
+
+### 15.2 Create a watcher-backend abstraction
+
+* [x] Create `src/index/build/watch/backends/types.js` (or inline JSDoc contract) describing:
+
+  * [x] `start({ root, ignored, onEvent, onError, pollMs? }) -> { close(): Promise<void> }`
+  * [x] Normalized event shape: `{ type: 'add'|'change'|'unlink', absPath }`
+* [x] Extract chokidar wiring out of `src/index/build/watch.js`:
+
+  * [x] Move into `src/index/build/watch/backends/chokidar.js`
+  * [x] Preserve existing semantics (`awaitWriteFinish`, ignored matcher, poll support)
+* [x] Implement parcel watcher backend:
+
+  * [x] New file: `src/index/build/watch/backends/parcel.js`
+  * [x] Map parcel events to the normalized `{type, absPath}` model
+  * [x] Decide how to handle rename/move (often appears as unlink+add):
+
+    * [x] If parcel reports rename, still emit unlink+add for compatibility with current scheduling
+  * [x] Implement “poll” behavior:
+
+    * [x] If poll mode is requested, either:
+
+      * [x] force chokidar with polling, **or**
+      * [x] implement a cheap stat-based poller wrapper (only if needed)
+  * [x] Implement “write stability” guard:
+
+    * [x] Chokidar has `awaitWriteFinish`; parcel does not in the same way
+    * [x] Add a “stabilize file” check in the pipeline: before processing a file, optionally confirm `mtime/size` stable across N ms
+    * [x] Place this in `createDebouncedScheduler()` or immediately before `enqueueOrUpdate()` in `file-processor.js` (prefer a single shared guard)
+
+### 15.3 Wire selection into `watchIndex()`
+
+* [x] Update `src/index/build/watch.js`:
+
+  * [x] Choose backend via (in order): CLI/config → env → `auto` capability
+  * [x] Log selected backend once at startup (only if verbose or `--watch`)
+  * [x] Ensure `pollMs` is still honored (either by backend or by selection logic)
+
+### 15.4 Tests
+
+* [x] Add `tests/watch-backend-selection.js`:
+
+  * [x] Forces `PAIROFCLEATS_WATCHER_BACKEND=chokidar` and asserts no parcel import occurs
+  * [x] Forces `...=parcel` and asserts fallback behavior if module unavailable (no crash, warning path)
+* [x] Add `tests/watch-stability-guard.js`:
+
+  * [x] Simulate “partial write” (write file in two chunks with delay) and assert processor waits/defers correctly
+  * [x] Keep the test deterministic: use explicit timeouts and a temp directory under `tests/.cache`
+* [x] Add corresponding script-coverage actions in `tests/script-coverage/actions.js`
+
+**Exit criteria**
+
+* [x] `pairofcleats index watch` remains correct on Windows and Linux
+* [x] No regressions in ignore behavior (still uses `buildIgnoredMatcher`)
+* [x] Event storms do not cause repeated redundant rebuilds (existing debounce logic preserved)
+
+---
diff --git a/COMPLETE_PLAN.md b/COMPLETE_PLAN.md
deleted file mode 100644
index c08839a4b..000000000
--- a/COMPLETE_PLAN.md
+++ /dev/null
@@ -1,466 +0,0 @@
-# Complete Plan
-
-This document consolidates all phase docs and tracks implementation status. Phase markdown files are removed after merge; this is the single source of truth.
-
-## Status key
-- done: implemented and validated
-- partial: implemented with known gaps or follow-ups
-- todo: not implemented
-- in-progress: actively being implemented
-
-## Baseline goals (status: done)
-- [x] Per-repo indexing with a central cache outside the repo.
-- [x] On-demand indexing with incremental caching and optional CI artifacts.
-- [x] MCP server interface for status/build/search/model download.
-- [x] Non-git repos supported with a strong recommendation to use git.
-
-## Cache layout (status: done)
-- <cache>/repos/<repoId>/index-code/
-- <cache>/repos/<repoId>/index-prose/
-- <cache>/repos/<repoId>/incremental/
-- <cache>/repos/<repoId>/repometrics/
-- <cache>/repos/<repoId>/index-sqlite/index-code.db
-- <cache>/repos/<repoId>/index-sqlite/index-prose.db
-- <cache>/models/
-- <cache>/extensions/
-
-Repo identity:
-- Hash the absolute repo path (run from repo root for stable IDs).
-- Git metadata is captured separately for status/reporting.
-
-SQLite location:
-- Override with `sqlite.dbDir` or `codeDbPath`/`proseDbPath`.
-- Point `sqlite.dbDir` at `index-sqlite` to keep DBs in the repo.
-
-## Model download and bootstrap (status: done)
-- [x] Detect model availability in MCP status and provide a download_models hint.
-- [x] Provide download helper (node) and bootstrap path.
-
-## Git handling (status: done)
-- [x] Warn when git is missing and continue without git metadata.
-- [x] Store commit hash and dirty flag when git is present.
-
-## MCP surface (status: done)
-- [x] index_status(repoPath)
-- [x] build_index(repoPath, mode=all, incremental=true)
-- [x] search(repoPath, query, filters...)
-- [x] download_models()
-- [x] report_artifacts()
-
-## Phase 2: SQLite Candidate Generation (status: done)
-Goal: Use SQLite to generate candidate sets while keeping scoring/rendering in JS.
-Work items:
-- [x] Candidate set creation via token, phrase, and chargram tables.
-- [x] BM25 stats sourced from SQLite (doc_lengths + token_stats).
-- [x] Fallback to file-backed artifacts when SQLite is missing or incomplete.
-- [x] Docs updated to describe SQLite candidate generation.
-Notes:
-- Query tokenization remains in search.js; SQLite provides candidates only.
-- Dense vectors and minhash are still JS-side.
-
-## Phase 3: Parity + Performance Validation (status: done)
-Goal: Validate SQLite vs file-backed parity and capture baseline metrics.
-Work items:
-- [x] Parity harness (tests/parity.js) with overlap and score deltas.
-- [x] Query set in tests/parity-queries.txt.
-- [x] Report output (docs/phase3-parity-report.json).
-- [x] Benchmark harness (tests/bench.js) for latency and artifact sizes.
-
-## Phase 4: Incremental Indexing (status: done)
-Goal: Reuse per-file bundles to avoid re-embedding unchanged files.
-Work items:
-- [x] Per-file cache manifest and bundles outside the repo.
-- [x] Incremental build path in build_index.js.
-- [x] SQLite incremental updates in tools/build-sqlite-index.js.
-- [x] Incremental tests (tests/sqlite-incremental.js).
-Notes:
-- Global postings are rebuilt from cached bundles (not in-place deltas for file-backed JSON).
-
-## Phase 5: CI Artifact Generation + Detection (status: done)
-Goal: Build and restore index artifacts in CI.
-Work items:
-- [x] Build script (tools/ci-build-artifacts.js) with manifest output.
-- [x] Restore script (tools/ci-restore-artifacts.js) with commit checks.
-- [x] Bootstrap restore when ci-artifacts/manifest.json exists.
-- [x] Docs for GitHub and GitLab usage.
-
-## Phase 6: Tests + Benchmarks (status: done)
-Goal: Expand deterministic tests and perf harnesses.
-Work items:
-- [x] Fixture repos under tests/fixtures (sample, mixed).
-- [x] Fixture smoke, parity, eval harnesses.
-- [x] Bench harness (tests/bench.js) + bench-ann script.
-- [x] Query cache, cleanup, uninstall, sqlite incremental/compact, mcp server tests.
-- [x] Add CI workflow to run smoke + parity in GitHub Actions.
-
-## Phase 7: Language Expansion (status: done)
-Goal: Provide stable chunking + metadata for prioritized languages.
-
-Python (status: done)
-- [x] Python AST enrichment when python is available; heuristic fallback.
-- [x] Class/function/method chunking with docstrings and signatures.
-- [x] Improve call graph accuracy for nested functions.
-- [x] Add type-aware docs for dataclasses/attrs.
-
-Swift (status: done)
-- [x] Brace-aware chunking for declarations.
-- [x] Doc comment extraction and signature metadata.
-- [x] Improve parsing of generics and extensions.
-
-ObjC/C/C++ (status: done)
-- [x] Regex-driven chunking for C-family and ObjC blocks.
-- [x] Selector extraction for ObjC methods.
-- [x] Improve call graph and include resolution heuristics.
-
-Rust (status: done)
-- [x] Heuristic chunking for structs/enums/traits/mods/impls/fns.
-- [x] Basic metadata extraction and imports/exports.
-- [x] Improve macro-heavy parsing and impl block method grouping.
-
-## Phase 7b: AST Completion Passes (status: done)
-Goal: Extend AST-backed languages to a "complete" metadata and dataflow feature set.
-Work items:
-- [x] Define and document the AST feature list and per-language coverage.
-- [x] JS AST: signatures/params/modifiers/inheritance + dataflow (reads/writes/mutations/throws/awaits/yields).
-- [x] Python AST: signatures/params/types/bases/modifiers + dataflow (reads/writes/mutations/throws/awaits/yields/globals).
-- [x] Configurable AST dataflow extraction (default on).
-- [x] Add fixtures + language-fidelity assertions for AST metadata.
-
-## Phase 8: SQLite Scoring (FTS5) + ANN Extension (status: done)
-Goal: Optional SQLite-only sparse ranking plus optional vector extension for ANN.
-Work items:
-- [x] FTS5 ranking path (sqlite-fts backend) with shared renderer.
-- [x] Configurable FTS5 weighting and optional normalization.
-- [x] ANN extension support (sqlite-vec) with loadable binary.
-- [x] Archive download support for extension binaries (zip/tar/tgz).
-- [x] ANN extension test harness (tests/sqlite-ann-extension.js).
-
-## Phase 9: Scoring Calibration (status: done)
-Goal: Deterministic ranking and tunable BM25 parameters.
-Work items:
-- [x] Deterministic tie-breakers in ranking and merging.
-- [x] Configurable BM25 parameters (search.bm25.k1/b).
-- [x] Documentation for tuning and parity expectations.
-
-## Phase 10: SQLite Split (status: done)
-Goal: Split code/prose DBs to reduce lock contention.
-Work items:
-- [x] index-code.db and index-prose.db layout.
-- [x] Build/search use split DBs.
-- [x] CI artifacts handle split DBs.
-- [x] Legacy index.db cleanup.
-
-## Phase 11: Parallel Indexing (status: done)
-Goal: Parallel file processing with deterministic ordering.
-Work items:
-- [x] File worker pool with deterministic output ordering.
-- [x] Separate concurrency for import scanning.
-- [x] Configurable concurrency via .pairofcleats.json and CLI.
-
-## Phase 12: MCP Server Packaging (status: done)
-Goal: MCP stdio server for index lifecycle and search.
-Work items:
-- [x] JSON-RPC 2.0 server with content-length framing.
-- [x] Tools: index_status/build_index/search/download_models/report_artifacts.
-- [x] Git-optional behavior with warnings.
-
-## Phase 13: Language Fidelity Review + Enhancements (status: done)
-Goal: Evaluate current fidelity of each supported language and enhance parsing.
-Work items:
-- [x] Build a per-language evaluation checklist (chunking, metadata, relations).
-- [x] Expand fixtures per language and add targeted regression tests.
-- [x] Implement improvements per language and update docs.
-
-## Phase 14: CI Coverage and Full Script Coverage (status: done)
-Goal: Ensure every npm script is exercised and documented.
-Work items:
-- [x] Add CI workflow for smoke + parity + core harnesses.
-- [x] Add a meta-test runner that exercises all scripts (with stub embeddings).
-- [x] Record expected runtime and platform constraints.
-
-## Phase 15: New Languages and Features (status: done)
-Goal: Add new languages and new indexing/search features after baseline completion.
-Work items:
--- [x] Add Go support (chunking + metadata + relations + fixtures + tests).
--- [x] Add Java support (chunking + metadata + relations + fixtures + tests).
--- [x] Add Perl (lite) support for comedy coverage (chunking + minimal metadata).
--- [x] Add Shell (lite) support (chunking + minimal metadata + fixtures + tests).
--- [x] Add AST-based dataflow metadata (reads/writes/mutations/throws/awaits/yields).
--- [x] Add search filters for AST metadata (decorators/modifiers/returns/throws/reads/writes/mutations/extends/visibility).
--- [x] Render AST metadata in human output.
--- [x] Update docs and tests for each addition.
-
-## Phase 16: Unified Parsing + Tooling Bootstrap (status: done)
-Goal: Centralize parsing where possible while keeping native parsers for stable languages, and add tooling detection/install support.
-Work items:
-- [x] Choose and document a unified parser backbone (tree-sitter) plus native parser mapping for JS/Python.
-- [x] Add tooling detection + install scripts with cache-local default installs and optional normal installs.
-- [x] Add config: tooling.autoInstallOnDetect, tooling.installScope, tooling.allowGlobalFallback.
-- [x] Update bootstrap to detect languages and auto-install tooling when configured.
-- [x] Add tests for tooling detection/install logic (stubbed where needed).
-
-## Phase 17: Format Coverage Expansion (status: done)
-Goal: Add rich chunking/metadata for common config and docs formats.
-Work items:
-- [x] Add JSON/TOML/INI/XML parsers and chunking rules.
-- [x] Add Dockerfile/Makefile parsing and chunking rules.
-- [x] Add GitHub Actions YAML parsing (workflow/job/step chunks).
-- [x] Add RST and AsciiDoc heading/section chunking.
-- [x] Update fixtures, language-fidelity checklist, and docs for formats.
-
-## Phase 18: Language Expansion (status: done)
-Goal: Add baseline parsing/chunking/relations for new languages with the unified backbone.
-Work items:
-- [x] TypeScript baseline heuristic chunking + metadata (native TS parser integration deferred).
-- [x] C# baseline heuristic chunking + metadata (tree-sitter/LSP enrichment deferred).
-- [x] Kotlin baseline heuristic chunking + metadata (tree-sitter/LSP enrichment deferred).
-- [x] Ruby baseline heuristic chunking + metadata (tree-sitter/LSP enrichment deferred).
-- [x] PHP baseline heuristic chunking + metadata (tree-sitter/LSP enrichment deferred).
-- [x] Lua baseline heuristic chunking + metadata (tree-sitter/LSP enrichment deferred).
-- [x] SQL baseline statement chunking + metadata (dialect parsing in Phase 19).
-- [x] Add fixtures and language-fidelity assertions for each.
-Notes:
-- Tree-sitter/native parser enrichment remains planned alongside Phase 19-22 work.
-
-## Phase 19: SQL Dialect Parsing (status: done)
-Goal: Provide dialect-aware SQL parsing and metadata.
-Work items:
-- [x] Add PostgreSQL/MySQL/SQLite dialect selection rules (extension + override).
-- [x] Add per-dialect fixtures and tests.
-- [x] Add config for sql.dialect and dialect-by-extension mapping.
-
-## Phase 20: CFG + Dataflow Everywhere (status: done)
-Goal: Add control-flow graphs and dataflow metadata across supported languages.
-Work items:
-- [x] Define shared CFG/dataflow schema in docs/ast-feature-list.md.
-- [x] Implement CFG/dataflow for C/C++/ObjC, Rust, Go, Java, Shell.
-- [x] Reuse shared engine for JS/Python where applicable.
-- [x] Add filters and output rendering for CFG/dataflow metadata.
-- [x] Expand fixtures/tests to validate control-flow and dataflow fields.
-- [x] Evaluate dynamic language handler imports (pros/cons, perf, DX).
-
-## Phase 21: Type Inference (Intra-file) (status: done)
-Goal: Add local type inference for each supported language.
-Work items:
-- [x] Implement intra-file inference for literals, annotations, and symbol tables.
-- [x] Merge inferred types into docmeta and render/filter paths.
-- [x] Validate with fixtures and language-fidelity tests.
-
-## Phase 22: Type Inference (Cross-file) (status: done)
-Goal: Resolve types across files after intra-file stability is confirmed.       
-Work items:
-- [x] Add cross-file symbol resolution and import/usage linking.
-- [x] Use detected tooling when present for richer type info.
-- [x] Validate with tests; provide parity/perf summary after completion.        
-Notes:
-- Cross-file inference is covered by `tests/type-inference-crossfile.js`; large-repo perf runs are still pending.
-
-## Phase 23: Unified Setup Command (status: done)
-Goal: Provide a single guided command that bundles optional setup steps.
-Work items:
-- [x] Add a guided setup script that can install deps, dictionaries, models, extensions, tooling, and build indexes.
-- [x] Support prompts when defaults fail or when optional tooling is detected.
-- [x] Provide non-interactive flags for CI usage.
-- [x] Document and add tests for the unified setup flow.
-
-## Maintenance / Refactor Guardrails (status: done)
-- [x] Break `build_index.js` into focused modules (discovery/import scan/file processing/posting builders/artifact writers/metrics) to keep growth in check.
-
-## Deferred / Do Not Surface (status: deferred)
-- [ ] Evaluate FTS5 vs BM25 parity on larger benchmarks and retune weights.
-  - Do not prioritize or bring this up unless explicitly requested.
-
-## Phase 24: Indexing Core Reliability (status: done)
-- [x] Fix chunk weight wiring (`weightt` typo) and add a regression test for weight effects.
-- [x] Use precomputed token frequencies in BM25 row building; remove unused `wordFreq`/`sparse` artifacts if they remain unused.
-- [x] Add a config option to disable per-chunk `git blame` (or downgrade to file-level) for large repos.
-- [x] Add empty-repo/zero-chunk coverage to ensure postings/metrics stay stable.
-
-## Phase 25: Language Parsing Hardening (status: done)
-- [x] Improve TypeScript import parsing for multi-line imports/exports and dynamic `import()` calls.
-- [x] Add JSX/Stage-3 parsing support (espree or tree-sitter) to avoid fallback chunking in `.jsx/.tsx`.
-- [x] Extend cross-file inference beyond TS (Go/Rust/Java via tooling hooks).
-- [x] Add fixtures/tests for `.tsx/.mts/.cts` and Python AST fallback.
-
-## Phase 26: Search + Scoring Consistency (status: done)
-- [x] Unify MinHash implementation between indexing and search; add a compatibility test.
-- [x] Decide on `sparse_postings_varint.bin`: consume it or remove it from outputs.
-- [x] Add caching for search summaries and unify shared CLI/output code with sqlite search.
-- [x] Expand filter coverage tests (return types, inferred types, returns/async flags).
-
-## Phase 27: SQLite Incremental Safety (status: done)
-- [x] Validate schema version before incremental updates and force rebuild when mismatched.
-- [x] Detect embedding model changes (id/dims) and rebuild or re-ingest dense vectors.
-- [x] Add optional vocab pruning/compaction for long-lived incremental DBs.
-- [x] Add tests for schema mismatch and vector-ann table sync after deletions.
-
-## Phase 28: Tooling + Cache UX (status: done)
-- [x] Make `clean-artifacts --all` preserve models/dicts or add keep flags aligned with uninstall behavior.
-- [x] Add `setup --json` summary output for CI automation.
-- [x] Add Node-based archive extraction fallback for extension downloads.
-- [x] Deduplicate shared helper logic across setup/bootstrap/clean/uninstall scripts.
-
-## Phase 29: MCP + Docs Quality (status: done)
-- [x] Refresh `ROADMAP.md` or mark it as historical to avoid contradicting `COMPLETE_PLAN.md`.
-- [x] Add async MCP build support (stream output vs `spawnSync`) and document error payloads.
-- [x] Add MCP error-path tests (invalid repo path, missing indexes).
-- [x] Add a docs consistency test to catch stale plan/roadmap references.
-
-## Phase 30: Scoring + JSON Consolidation (status: done)
-Goal: Standardize scoring outputs across backends and make JSON payloads consistent and inspectable.
-Work items:
-- [x] Align score labels and semantics across memory/sqlite/sqlite-fts paths (including ANN fallback).
-- [x] Add score breakdowns (BM25/FTS/ANN components, normalization flags, weights).
-- [x] Ensure `--json-compact` preserves the same fields across backends and filters.
-- [x] Update compare/parity harnesses to consume the unified score schema.
-- [x] Add targeted tests for score breakdown parity.
-Notes:
-- Enhancement thread 1 (scoring transparency) is implemented here.
-
-## Phase 31: Index Pipeline Pluginization (status: done)
-Goal: Replace large conditional flows with a registry-based indexing pipeline.
-Work items:
-- [x] Build a per-language/format registry for scanners, parsers, and enrichers.
-- [x] Centralize shared helpers (tokenize, metadata normalization, relations).
-- [x] Reduce build_index control flow into steps with explicit inputs/outputs.
-- [x] Add fixtures/tests for registry ordering and missing-handler fallbacks.
-Notes:
-- Enhancement thread 3 (parser SDK) is implemented here.
-
-## Phase 32: Language Semantics Depth (status: done)
-Goal: Improve type inference, control flow, and dataflow richness with interprocedural context.
-Work items:
-- [x] Expand intra-file type inference precision (literal unions, generics, propagation).
-- [x] Add interprocedural summaries (callsite argument/return linking).
-- [x] Extend dataflow with alias tracking for supported languages.
-- [x] Add fidelity fixtures covering new semantic edges.
-Notes:
-- Enhancement thread 2 (language semantics) is implemented here.
-
-## Phase 33: Continuous Indexing (status: done)
-Goal: Support live updates via watchers and git hooks with safe concurrency.
-Work items:
-- [x] Add a watch mode to trigger incremental indexing on file changes.
-- [x] Add optional git hook installers (post-commit / post-merge).
-- [x] Add lock/health checks to avoid concurrent writes.
-- [x] Document workflows for CI and local dev.
-Notes:
-- Enhancement thread 4 (continuous update loop) is implemented here.
-
-## Phase 34: Artifact Lifecycle + Cache Hygiene (status: done)
-Goal: Manage cache size, retention, and shared artifacts safely.
-Work items:
-- [x] Add cache quota and GC policy (age/size-based eviction).
-- [x] Add artifact health checks and cold-cache rebuild hints.
-- [x] Expand report-artifacts with per-repo and global rollups.
-- [x] Add tests for GC and quota handling.
-Notes:
-- Enhancement thread 5 (cache/artifact hygiene) is implemented here.
-
-## Phase 35: MCP UX Enhancements (status: done)
-Goal: Make MCP interactions richer, safer, and more transparent.
-Work items:
-- [x] Stream progress for long-running MCP tasks (index build, download).
-- [x] Add remediation hints on common errors (missing models/dicts/sqlite).
-- [x] Add MCP tool to inspect config + cache status with warnings.
-- [x] Add MCP-focused tests for error and progress payloads.
-Notes:
-- Enhancement thread 6 (MCP UX) is implemented here.
-
-## Phase 36: Agent-Focused SAST Features (status: done)
-Goal: Provide lightweight risk signals and flows for agent workflows.
-Work items:
-- [x] Add taint-like flow summaries for sources/sinks (configurable).
-- [x] Add risky API usage detectors with metadata tags.
-- [x] Add search filters for risk categories and flows.
-- [x] Add fixtures/tests for sample flows.
-Notes:
-- Enhancement thread 7 (SAST-adjacent) is implemented here.
-
-## Phase 37: Triage Records + Context Packs (Phase 0: spec review + plan) (status: done)
-Goal: Review the v1 triage spec, map touched systems, and capture assumptions for a safe rollout.
-Work items:
-- [x] Review newfeature.md and current build/search/config flows to map integration points.
-- [x] Confirm cache-only storage for triage artifacts (no repo writes).
-- [x] Document assumptions and guardrails before implementation.
-Assumptions/guardrails:
-- Keep `build_index --mode all` semantics as code+prose only; records are opt-in via `--mode records`.
-- Triage records live under the repo cache by default; no triage data written to the repo tree.
-- Promote only selected fields into `docmeta.record` to avoid bloating chunk metadata.
-- Record indexing can be a full rebuild in v1 (expected low volume); incremental support is optional.
-- Meta filtering uses case-insensitive matching and ignores missing fields rather than erroring.
-- Context packs can invoke `search.js` via a child process in v1 (no core search refactor required).
-
-## Phase 38: Triage Records + Context Packs (Phase 1: config + paths + schema) (status: done)
-Goal: Add triage config and path resolution, plus shared helpers for stable record IDs.
-Work items:
-- [x] Add `triage` config defaults to `.pairofcleats.json` and config loaders.
-- [x] Extend `tools/dict-utils.js` with `getTriageRecordsDir()` and allow `getIndexDir(..., 'records')`.
-- [x] Define shared helpers for recordId generation and promoted field extraction.
-
-## Phase 39: Triage Records + Context Packs (Phase 2: ingest + normalize + render + decisions) (status: done)
-Goal: Ingest findings into normalized records and render human/indexable views.
-Work items:
-- [x] Implement `tools/triage/ingest.js` with Dependabot, AWS Inspector, and generic adapters.
-- [x] Add normalization modules in `src/triage/normalize/` with parse warnings and metadata routing.
-- [x] Add `src/triage/render.js` to render canonical markdown views.
-- [x] Implement `tools/triage/decision.js` to create decision records linked to findings.
-
-## Phase 40: Triage Records + Context Packs (Phase 3: records indexing) (status: done)
-Goal: Build a dedicated records index with prose-style tokenization and optional incremental caching.
-Work items:
-- [x] Allow `--mode records` in build args and route to a new records indexer.
-- [x] Add `src/triage/index-records.js` to build `index-records` from record markdown + JSON.
-- [x] Store promoted fields in `docmeta.record` and keep artifacts small.
-
-## Phase 41: Triage Records + Context Packs (Phase 4: records search + meta filters) (status: done)
-Goal: Enable records search with metadata-first filtering and JSON output support.
-Work items:
-- [x] Extend `search.js` to include `--mode records` and optional `--meta`/`--meta-json`.
-- [x] Add record output section and JSON `records` payloads in `src/search/output.js`.
-- [x] Add generic file/ext filters if not already present and apply them to records.
-
-## Phase 42: Triage Records + Context Packs (Phase 5: context packs + MCP + tests + docs) (status: done)
-Goal: Produce LLM-ready context packs, expose MCP tools, and add tests/fixtures/docs.
-Work items:
-- [x] Implement `tools/triage/context-pack.js` (history + repo evidence).
-- [x] Add MCP tool wrappers for ingest/decision/context packs and allow `records` mode in MCP build/search.
-- [x] Add triage fixtures + `tests/triage-records.js` and script wiring in `package.json`.
-- [x] Update README + docs to describe triage workflows and new CLI/MCP tools.
-
-## Phase 43: Prioritized Issues - P0 Correctness (status: done)
-Goal: Fix correctness issues and broken/unused CLI behavior.
-Work items:
-- [x] Fix `--churn` CLI parsing, numeric thresholds, cache keys, and docs.
-- [x] Replace churn metric with git numstat-based churn; add tests.
-- [x] Fix Unicode offset drift between indexing and rendering; add fixture test.
-- [x] Remove or implement build `--chunk` option; update docs/tests.
-- [x] Enable GitHub Actions workflows under `.github/workflows` with CI.
-
-## Phase 44: Prioritized Issues - P1 High ROI (status: done)
-Goal: Bring MCP/CLI parity and improve indexing robustness.
-Work items:
-- [x] Expand MCP `search` filters to CLI parity and default to `--json-compact`.
-- [x] Add MCP ops tools for download/build/maintain workflows.
-- [x] Add `--path` alias filter and ensure CLI/MCP path/ext filters are consistent.
-- [x] Auto-detect repo root for CLI/tools; add `--repo` overrides.
-- [x] Add file-size guardrails with skip/partial index reporting.
-- [x] Graceful shutdown for watch mode with lock cleanup.
-
-## Phase 45: Prioritized Issues - P2 Enhancements (status: done)
-Goal: Improve search UX and reduce index footprint.
-Work items:
-- [x] Add negative terms and quoted phrases to query parsing.
-- [x] Add modified-since/after filters (git-aware recency).
-- [x] Add chunk-author filter and output rendering.
-- [x] Make chargram/phrase-ngrams configurable and handle missing artifacts.
-- [x] Clarify score fields (`score`, `annScore`, `scoreBreakdown`) in JSON + docs.
-- [x] Remove redundant `call` vs `calls` filtering path.
-
-## Phase 46: Prioritized Issues - P3 Maintainability (status: done)
-Goal: Improve packaging, configuration safety, and testability.
-Work items:
-- [x] Add `pairofcleats` CLI entrypoint with subcommands.
-- [x] Add config schema + validation command.
-- [x] Pin dependency versions (remove `*`) and document policy.
-- [x] Refactor `search.js` into modules for testability.
diff --git a/GIGAROAD/ROADMAP.md b/GIGAROAD/ROADMAP.md
new file mode 100644
index 000000000..d920d561d
--- /dev/null
+++ b/GIGAROAD/ROADMAP.md
@@ -0,0 +1,5430 @@
+## Phase 1 — Sublime Text 3 Plugin Foundation (Parity + Plumbing)
+
+### 1.1 Plugin repo structure + packaging
+
+* [ ] Create `sublime/PairOfCleats/` package skeleton:
+
+  * [ ] `PairOfCleats.py` (entrypoint)
+  * [ ] `commands/` (command modules)
+  * [ ] `lib/` (helpers: config, subprocess, parsing, caching)
+  * [ ] `messages/` (install/upgrade notes)
+  * [ ] `Default.sublime-commands`
+  * [ ] `Main.sublime-menu` (optional)
+  * [ ] `Default.sublime-keymap` (optional)
+* [ ] Add `README.md` for ST3 plugin installation + prerequisites
+* [ ] Add “Package Control” compatibility notes (no external deps beyond Node runtime + repo binaries)
+
+### 1.2 Node/CLI discovery + execution contract
+
+* [ ] Implement robust “pairofcleats binary discovery”:
+
+  * [ ] Prefer project-local `node_modules/.bin/pairofcleats` when available
+  * [ ] Fallback to global `pairofcleats` on PATH
+  * [ ] Allow explicit override in ST settings: `pairofcleats_path`
+* [ ] Implement repo-root detection:
+
+  * [ ] Prefer `.pairofcleats.json` location
+  * [ ] Fallback to `.git` root
+  * [ ] Fallback to folder of active file
+* [ ] Implement subprocess wrapper:
+
+  * [ ] Streams output to Sublime panel
+  * [ ] Captures JSON payloads when `--json` is used
+  * [ ] Supports cancellation (best-effort)
+  * [ ] Adds stable environment injection (cache root, embeddings mode, etc.)
+
+### 1.3 Settings + per-project overrides
+
+* [ ] Add `PairOfCleats.sublime-settings` defaults:
+
+  * [ ] `pairofcleats_path`, `node_path`
+  * [ ] `index_mode_default` (code/prose/both)
+  * [ ] `search_backend_default` (memory/sqlite-fts/etc)
+  * [ ] `open_results_in` (quick_panel / new_tab / output_panel)
+* [ ] Support `.sublime-project` settings overrides
+* [ ] Validate config and surface actionable error messages
+
+### 1.4 Smoke tests (plugin-side)
+
+* [ ] Add Python unit tests that:
+
+  * [ ] Import plugin modules without Sublime runtime (mock `sublime`, `sublime_plugin`)
+  * [ ] Validate binary discovery behavior
+  * [ ] Validate repo-root resolution on fixtures
+  * [ ] Validate settings overlay precedence
+
+---
+
+
+## Phase 2 — Sublime Search UX (Queries, Results, Navigation)
+
+### 2.1 Search command(s)
+
+* [ ] `PairOfCleats: Search` command:
+
+  * [ ] Prompt input panel for query
+  * [ ] Optional toggles: code/prose/both, backend, limit
+  * [ ] Execute `pairofcleats search ... --json`
+* [ ] `PairOfCleats: Search Selection` command:
+
+  * [ ] Uses selected text as query
+* [ ] `PairOfCleats: Search Symbol Under Cursor` command
+
+### 2.2 Results presentation
+
+* [ ] Quick panel results:
+
+  * [ ] Show `file:line-range`, symbol name, snippet/headline, score
+  * [ ] Preserve stable ordering for repeatability
+* [ ] On selection:
+
+  * [ ] Open file at best-effort location (line/column)
+  * [ ] Highlight match range (if available)
+* [ ] Add optional “results buffer” view (for large result sets)
+
+### 2.3 Quality-of-life UX
+
+* [ ] Query history (per project)
+* [ ] “Repeat last search” command
+* [ ] “Explain search” (if supported by CLI flags / internal explain output)
+
+### 2.4 Tests
+
+* [ ] Add Node-level “search contract” tests:
+
+  * [ ] Ensure `--json` output parseability and required fields
+* [ ] Add plugin tests:
+
+  * [ ] Search command dispatches correct subprocess args
+  * [ ] Results parsing tolerates partial/missing optional fields
+
+---
+
+
+## Phase 3 — Index Lifecycle in Sublime (Build/Watch/Validate + Status)
+
+### 3.1 Build index commands
+
+* [ ] `PairOfCleats: Index Build (Code)`
+* [ ] `PairOfCleats: Index Build (Prose)`
+* [ ] `PairOfCleats: Index Build (All)`
+* [ ] Stream progress to an output panel
+* [ ] Persist “last index time” + “last index mode” in project cache
+
+### 3.2 Watch mode integration
+
+* [ ] `PairOfCleats: Index Watch Start`
+* [ ] `PairOfCleats: Index Watch Stop`
+* [ ] Prevent duplicate watchers per window/project
+* [ ] Robust shutdown on Sublime exit / project close
+
+### 3.3 Validate + repair affordances
+
+* [ ] `PairOfCleats: Index Validate`
+* [ ] Surface actionable failures (missing artifacts, invalid JSON, stale manifests)
+* [ ] Provide “Open index directory” convenience command
+
+### 3.4 Tests
+
+* [ ] Node tests for index build/validate on fixtures
+* [ ] Plugin tests for lifecycle commands and watcher gating
+
+---
+
+
+## Phase 4 — Codebase Semantic Map (Imports/Exports/Calls/Dataflow/Control Flow → Visual Map)
+
+### What this phase delivers
+
+A **real codebase map** that uses existing and enriched semantic metadata to generate a **diagram-ready model** and one or more **rendered artifacts**.
+
+It must explicitly incorporate and visualize:
+
+* **Imports / Exports / ImportLinks**
+* **Calls / CallLinks / CallSummaries**
+* **Usages / UsageLinks**
+* **Signature / Modifiers / Params / Returns**
+* **Reads / Writes / Mutates / Aliases**
+* **Control flow** (branches, loops, throws, awaits, yields, returns)
+* **AST-derived semantics** (using what the indexer already extracts)
+
+#### Visual grammar (required characteristics)
+
+* **File = outer shape**
+
+  * Shape varies by file type/category (source/test/config/doc/generated/etc.)
+* **Functions/classes = content inside the file shape**
+
+  * The “fill” of the file node is structurally subdivided to represent contained functions/classes
+* **Function details = nested sub-shapes inside function area**
+
+  * Small badges/segments represent modifiers/returns/dataflow/control-flow
+* **Multiple line styles = multiple edge semantics**
+
+  * Imports (file→file), control flow calls (fn→fn), usage deps (fn→fn), dataflow (arg/return/state)
+
+---
+
+### 4.1 Inventory + normalize available semantics from existing artifacts
+
+Leverage what is already produced today, and formalize how it’s consumed:
+
+* [ ] **Inputs** (expected present after `index build`):
+
+  * [ ] `file_relations.json` (imports, exports, usages, importLinks, functionMeta/classMeta)
+  * [ ] `repo_map.json` (chunk-level symbol map, exported flag, signatures)
+  * [ ] `chunk_meta.json` (docmeta/metaV2: signature/modifiers/returns/controlFlow/dataflow + relations)
+  * [ ] `graph_relations.json` (importGraph/callGraph/usageGraph)
+* [ ] Define “canonical IDs” used across the map:
+
+  * [ ] `fileId = <repo-relative path>`
+  * [ ] `symbolId = <file>::<symbolName>` (already used in relation graphs)
+  * [ ] Stable IDs for anonymous/lambda cases (fallback: chunkId when name is `(anonymous)`)
+
+---
+
+### 4.2 Define a versioned “Map Model” schema (diagram-ready)
+
+This is the core contract the plugin will consume.
+
+* [ ] Create `docs/map-schema.json` (or similar) with:
+
+  * [ ] `version`
+  * [ ] `generatedAt`
+  * [ ] `root` (repo root logical id)
+  * [ ] `legend`:
+
+    * [ ] `nodeTypes` (file/function/class/symbol)
+    * [ ] `fileShapes` mapping (category → shape)
+    * [ ] `functionBadges` mapping (modifier/returns/dataflow/control-flow → badge glyph)
+    * [ ] `edgeTypes` mapping (imports/calls/usages/dataflow/aliases/mutations)
+    * [ ] `edgeStyles` mapping (solid/dashed/dotted/double, arrowheads, labels)
+  * [ ] `nodes`:
+
+    * [ ] file nodes with nested “members” (functions/classes)
+    * [ ] function nodes with structured “semantic facets”
+  * [ ] `edges` (typed, labeled, optionally “port-addressable”)
+* [ ] Schema must support **hierarchical nesting**:
+
+  * [ ] File node has `members[]` with per-member ports
+  * [ ] Member nodes (functions) include `signature`, `modifiers`, `returns`, `controlFlow`, `dataflow`
+* [ ] Determinism requirements:
+
+  * [ ] Stable ordering (sort keys/ids)
+  * [ ] Explicit timestamp field allowed, but everything else must be deterministic
+
+---
+
+### 4.3 Build the semantic “map extractor” (core engine tool)
+
+Implement a Node tool that reads index artifacts and produces the map model.
+
+* [ ] Add `tools/code-map.js` (or `tools/report-code-map.js`) that:
+
+  * [ ] Locates repo + index dirs using existing `tools/dict-utils.js`
+  * [ ] Loads:
+
+    * [ ] `file_relations.json`
+    * [ ] `repo_map.json`
+    * [ ] `chunk_meta.json` (or minimal subset)
+    * [ ] `graph_relations.json`
+  * [ ] Merges into a single “map model”:
+
+    * [ ] **Files** classified into categories (drives file shape)
+    * [ ] **Members** extracted per file:
+
+      * [ ] functions/methods/classes (from `repo_map` and/or chunk meta)
+      * [ ] include line ranges
+      * [ ] include `signature`, `modifiers`, `params`, `returns`
+    * [ ] **Function semantics**:
+
+      * [ ] `dataflow.reads`, `dataflow.writes`, `dataflow.mutations`, `dataflow.aliases`
+      * [ ] `controlFlow.branches/loops/returns/throws/awaits/yields/breaks/continues`
+      * [ ] `throws`, `awaits`, `yields`, `returnsValue` facets surfaced explicitly
+    * [ ] **Edges**:
+
+      * [ ] Import edges (file→file) from `importLinks` + raw `imports`
+      * [ ] Export edges (file→symbol) from `exports` + repo_map `exported`
+      * [ ] Call edges (fn→fn) from `callLinks` or `graph_relations.callGraph`
+      * [ ] Usage edges (fn→fn) from `usageLinks` or `graph_relations.usageGraph`
+      * [ ] Dataflow edges:
+
+        * [ ] Argument flow edges from `callSummaries.argMap` (caller→callee param ports)
+        * [ ] Return flow edges using inferred return metadata where available
+        * [ ] Optional: “state flow” edges when reads/writes/mutations overlap (guardrailed; see 28.6)
+      * [ ] Alias edges:
+
+        * [ ] derived from `dataflow.aliases` (function-local or cross-function via calls when resolvable)
+* [ ] Add CLI entrypoint:
+
+  * [ ] `pairofcleats report map` (preferred, consistent with existing `report` group), or
+  * [ ] `pairofcleats map` (top-level)
+* [ ] Support scope + size controls:
+
+  * [ ] `--scope repo|dir|file|symbol`
+  * [ ] `--focus <path or symbol>`
+  * [ ] `--include imports,calls,usages,dataflow,exports`
+  * [ ] `--only-exported`
+  * [ ] `--max-files N`, `--max-members-per-file N`, `--max-edges N`
+  * [ ] `--collapse file|dir` (aggregate mode)
+  * [ ] `--format json|dot|svg|html` (see 28.4)
+
+---
+
+### 4.4 Generate “shape-based” diagrams (DOT-first, with nested function fills)
+
+To match your “shape with fill containing functions” requirement cleanly, DOT/Graphviz is the most direct representation.
+
+* [ ] Implement a DOT generator `src/map/dot-writer.js`:
+
+  * [ ] **File nodes as outer shapes** with file-type-dependent shapes:
+
+    * [ ] Source code: `box` or `component`
+    * [ ] Tests: `box` with distinct border style
+    * [ ] Config/data: `cylinder` or `hexagon`
+    * [ ] Docs/prose: `note`
+    * [ ] Generated/build artifacts: `folder` or `box3d`
+  * [ ] **Fill represents members** using HTML-like labels:
+
+    * [ ] Outer `<TABLE>` represents the file “container”
+    * [ ] Each function/class is a row with a `PORT` so edges can land on that member specifically
+  * [ ] **Nested shapes inside the function row** (HTML sub-tables/cells) to represent:
+
+    * [ ] modifiers: async/static/generator/visibility
+    * [ ] signature/params summary
+    * [ ] returns/returnType/returnsValue indicator
+    * [ ] dataflow mini-badges: reads/writes/mutates/aliases counts (and/or top N symbols)
+    * [ ] controlFlow mini-badges: branches/loops/throws/awaits/yields
+* [ ] **Edge encoding** (multiple edge “line types”):
+
+  * [ ] Import edges: dashed file→file
+  * [ ] Call edges: solid function→function (primary control flow)
+  * [ ] Usage edges: thin/secondary style function→function
+  * [ ] Dataflow edges:
+
+    * [ ] dotted caller→callee(param) edges (argument flow)
+    * [ ] dotted callee→caller edges for return flow (if inferred)
+  * [ ] Mutation/state edges (optional, guardrailed): double-line or distinct style
+  * [ ] Alias edges: dashed-dotted, labeled `alias: a=b`
+* [ ] Output modes:
+
+  * [ ] `--format dot` always available
+  * [ ] `--format svg` if Graphviz present (shell out to `dot -Tsvg`)
+  * [ ] `--format html` wraps SVG + legend into a standalone HTML viewer
+* [ ] Implement legend rendering:
+
+  * [ ] Either embed as a DOT subgraph or in HTML wrapper
+  * [ ] Must document shape/edge meaning for users
+
+---
+
+### 4.5 Sublime Text 3 plugin commands for map generation + viewing
+
+Provide first-class UX inside Sublime, even if rendering happens externally.
+
+* [ ] Add commands:
+
+  * [ ] `PairOfCleats: Map (Repo)`
+  * [ ] `PairOfCleats: Map (Current Folder)`
+  * [ ] `PairOfCleats: Map (Current File)`
+  * [ ] `PairOfCleats: Map (Symbol Under Cursor)`
+  * [ ] `PairOfCleats: Map (Selection)`
+* [ ] Add a “Map Type” chooser:
+
+  * [ ] Import Map
+  * [ ] Call Map
+  * [ ] Usage/Dependency Map
+  * [ ] Dataflow Map (args/returns/state)
+  * [ ] Combined Map (guardrailed by size limits)
+* [ ] Implement output handling:
+
+  * [ ] Write outputs to `.pairofcleats/maps/` (repo-local) or cache dir
+  * [ ] Open `.dot` in Sublime for inspection
+  * [ ] If `.svg`/`.html` produced:
+
+    * [ ] Provide “Open in Browser” command (best-effort)
+* [ ] Navigation affordances:
+
+  * [ ] When a map is generated, also produce an indexable “node list” JSON:
+
+    * [ ] allows Sublime quick panel “Jump to node” (file/function)
+    * [ ] opens file at recorded `startLine`
+* [ ] Graceful degradation:
+
+  * [ ] If `astDataflow` / `controlFlow` metadata is unavailable in the index:
+
+    * [ ] show “limited map” warning
+    * [ ] offer action: “Rebuild index with dataflow/control-flow enabled” (invokes `index build` with the project’s config expectations)
+
+---
+
+### 4.6 Performance guardrails + scaling strategy (mandatory for real repos)
+
+This phase will generate *very large graphs* unless explicitly constrained.
+
+* [ ] Hard limits with user-overrides:
+
+  * [ ] `maxFiles`, `maxMembersPerFile`, `maxEdges`
+  * [ ] edge sampling policies per edge type
+* [ ] Aggregation modes:
+
+  * [ ] Directory-level aggregation (folder nodes contain files)
+  * [ ] File-only map (no nested functions)
+  * [ ] Export-only functions view
+  * [ ] “Top-K by degree” (highest call/import fan-in/out)
+* [ ] Deterministic sampling:
+
+  * [ ] same inputs → same output (stable selection)
+* [ ] Cache map builds keyed by:
+
+  * [ ] index signature + generator options
+* [ ] Failure mode policy:
+
+  * [ ] If size exceeds limits, output a “truncated map” plus a summary explaining what was dropped
+
+---
+
+### 4.7 Tests (core + integration + determinism)
+
+Add explicit automated coverage for the map feature.
+
+#### Node tool tests (authoritative)
+
+* [ ] `tests/code-map-basic.js`
+
+  * [ ] Build a tiny fixture repo with:
+
+    * [ ] imports/exports
+    * [ ] functions calling other functions
+    * [ ] a function with reads/writes/mutations/aliases
+    * [ ] a function with branches/loops/throws/awaits
+  * [ ] Run `build_index.js --stub-embeddings`
+  * [ ] Run `pairofcleats report map --format json`
+  * [ ] Assert:
+
+    * [ ] file nodes exist
+    * [ ] member nodes include `signature/modifiers/returns/dataflow/controlFlow`
+    * [ ] edge sets include imports + calls
+* [ ] `tests/code-map-dot.js`
+
+  * [ ] Generate DOT output
+  * [ ] Assert:
+
+    * [ ] file “container” nodes exist
+    * [ ] function rows/ports exist
+    * [ ] edges connect to ports (caller fn → callee fn)
+    * [ ] distinct edge styles appear for import vs call vs dataflow
+* [ ] `tests/code-map-determinism.js`
+
+  * [ ] Run map generation twice and compare outputs (ignore `generatedAt`)
+* [ ] `tests/code-map-guardrails.js`
+
+  * [ ] Generate a repo with many dummy functions
+  * [ ] Ensure truncation behavior is correct and stable
+
+#### Plugin-side tests
+
+* [ ] Python unit tests:
+
+  * [ ] command registration exists
+  * [ ] subprocess args are correct for each map command
+  * [ ] output paths computed correctly
+  * [ ] “Graphviz missing” fallback behavior (DOT-only) works
+
+---
+
+
+## Phase 5 — Optional: Service-Mode Integration for Sublime (API-backed Workflows)
+
+*(Renumbered from prior Phase 28; content largely unchanged, but consider adding map endpoints.)*
+
+### 5.1 Map endpoints (if service mode is adopted)
+
+* [ ] Extend `api-server` to support:
+
+  * [ ] `GET /map?scope=...&format=...`
+  * [ ] `GET /map/nodes?filter=...` for quick panels
+* [ ] Sublime plugin optionally consumes the API for faster iteration
+
+### 5.2 Tests
+
+* [ ] API contract tests for map endpoints
+* [ ] Sublime plugin integration tests (mock HTTP server)
+
+---
+
+
+## Phase 6 — Distribution Readiness (Package Control + Cross-Platform)
+
+*(Renumbered from prior Phase 29.)*
+
+* [ ] Packaging rules for ST3 (no compiled Python deps)
+* [ ] Windows/macOS/Linux path + quoting correctness
+* [ ] Document Graphviz optional dependency (for SVG/HTML rendering)
+* [ ] Provide minimal “DOT-only mode” documentation
+
+Tests:
+
+* [ ] `python -m py_compile` over plugin package
+* [ ] Cross-platform subprocess quoting tests (Node)
+
+---
+
+
+## Phase 7 — Verification Gates (Regression + Parity + UX Acceptance)
+
+*(Renumbered from prior Phase 30.)*
+
+* [ ] Parity checklist vs existing extension behaviors (where applicable)
+* [ ] Deterministic outputs for map/search commands
+* [ ] Performance acceptance criteria (map generation with guardrails)
+* [ ] End-to-end smoke suite including:
+
+  * [ ] index build
+  * [ ] search
+  * [ ] map generation (json + dot)
+  * [ ] optional svg rendering when Graphviz available
+
+---
+
+### Notes on dependency leverage (aligned to the map phase)
+
+This map phase is intentionally designed to **maximize reuse** of what the repo already has:
+
+* Existing semantics extraction already provides the key fields you listed:
+
+  * `imports/exports/usages/importLinks` via relations
+  * `calls/callDetails` + cross-file `callLinks/usageLinks/callSummaries`
+  * `signature/modifiers/returns` via docmeta/functionMeta
+  * `reads/writes/mutations/aliases` via AST dataflow (when enabled)
+  * `controlFlow` counts already present in docmeta/functionMeta
+* Existing graph tooling:
+
+  * `graphology`-backed `graph_relations.json` provides a strong base graph layer
+* The missing piece is the **visual model + rendering/export** and **Sublime UX** around it, which Phase 28 supplies.
+
+
+## Phase 8 — Test Gate Stabilization and Determinism
+
+**Objective:** Make the current test suite reliable (non-flaky) and green, so subsequent refactors (security, caching, RPC hardening) have a trustworthy safety net.
+
+1. **Fix failing Phase 22 gate: `type-inference-lsp-enrichment` (Python tooling return type missing)**
+
+   * [ ] **Broaden hover fallback conditions in LSP tooling providers so missing return types are recovered even when parameter types are present.**
+
+     * **Why:** All three LSP tooling providers currently only fetch hover when *both* `returnType` is missing *and* `paramTypes` is empty. If a provider can parse param types from `documentSymbol.detail` but that string omits return type (a plausible LSP behavior), it will never attempt hover and will miss return types (exact symptom reported by the failing test).
+     * **Where:**
+
+       * `src/index/tooling/pyright-provider.js`
+
+         * Current gating (too strict):
+           `if (!info || (!info.returnType && !Object.keys(info.paramTypes || {}).length)) { ... hover ... }`
+       * `src/index/tooling/clangd-provider.js` (same pattern)
+       * `src/index/tooling/sourcekit-provider.js` (same pattern)
+     * **Fix:**
+
+       * Change hover fallback gating to trigger when **either** return type is missing **or** param types are missing, e.g.:
+
+         * `if (!info || !info.returnType || !Object.keys(info.paramTypes || {}).length) { ... }`
+       * Keep a small timeout override (already present) and consider a per-file/per-symbol hover cap if you want to prevent worst-case hover storms.
+     * **Tests:**
+
+       * Keep `tests/type-inference-lsp-enrichment.js` as the regression gate.
+       * Add/adjust a focused unit/integration test fixture path where `documentSymbol.detail` omits return type but hover includes it (this directly validates the new behavior rather than relying on chance).
+   * [ ] **Validate stored tooling return types match exact expectations for Python (`str`)**
+
+     * **Why:** The test asserts `entry.type === 'str'` (exact string match). Any normalization differences (e.g., `builtins.str`, `str:`) will fail.
+     * **Where:** Return type extraction path:
+
+       * `src/index/tooling/signature-parse/python.js` (`parsePythonSignature`)
+       * `src/index/tooling/pyright-provider.js` (populating `entry.returns`)
+       * `src/index/type-inference-crossfile/apply.js` (`addInferredReturn`)
+     * **Fix:** Ensure the Python return type passed into `addInferredReturn()` is the normalized “plain” name the project expects (currently looks intended to already be `str`, but explicitly confirm by tests).
+
+2. **Fix failing Phase 22 gate: `embeddings-dims-mismatch` (test is flaky due to cache file selection)**
+
+   * [ ] **Make the test select a cache entry that matches the identity it intends to mutate.**
+
+     * **Why:** The cache directory can contain *multiple* caches for the same file hash/signature but different identity keys (e.g., stub embeddings default dims 384 from `build_index` stage vs. a subsequent `build-embeddings --dims 8`). The test currently mutates an arbitrary first file returned by `readdir`, which is OS/filesystem-order dependent, causing nondeterministic behavior (observed in `tests/phase22-logs/embeddings-dims-mismatch.js.log`).
+     * **Where:** `tests/embeddings-dims-mismatch.js`
+
+       * Current behavior: `const targetFile = cacheFiles[0];` (no filtering)
+     * **Fix (recommended):**
+
+       * Read all cache files, parse JSON, and select one whose `cacheMeta.identity.dims === 8` **and** `cacheMeta.identity.stub === true` (or match `cacheMeta.identityKey` computed from `buildCacheIdentity`).
+       * Sort `cacheFiles` for determinism even after filtering.
+     * **Tests:** The test itself is the gate; ensure it passes consistently on Windows/macOS/Linux.
+
+3. **De-flake related embeddings cache test to prevent future intermittent failures**
+
+   * [ ] Apply the same deterministic cache selection strategy to `tests/embeddings-cache-identity.js`.
+
+     * **Why:** It uses the same “first file” selection pattern and can fail depending on directory enumeration order and presence of other identity caches.
+     * **Where:** `tests/embeddings-cache-identity.js`
+     * **Fix:** Filter for identity matching the run’s intended dims/provider/stub flags (same as above), and sort before selecting.
+
+4. **Add a “Phase 22 gate” smoke runner (optional but strongly recommended)**
+
+   * [ ] Create a single script to run only the gate tests and report failures clearly.
+
+     * **Why:** Reduces time-to-signal and encourages frequent local verification during refactors.
+     * **Where:** e.g., `tools/run-phase22-gates.js` or `npm run test:phase22`
+     * **Exit expectation:** One command that deterministically reproduces CI gate results.
+
+**Exit criteria**
+
+* [ ] `tests/type-inference-lsp-enrichment.js` passes.
+* [ ] `tests/embeddings-dims-mismatch.js` passes deterministically (no filesystem-order dependence).
+* [ ] `tests/embeddings-cache-identity.js` passes deterministically.
+* [ ] No new flaky tests introduced (verified via at least 5 repeated local runs on one platform, and ideally at least one Windows run).
+
+---
+
+
+## Phase 9 — Security and Input-Hardening (Local Servers + Indexing)
+
+**Objective:** Close high-impact vulnerabilities and unsafe defaults that could be exploited when indexing untrusted repositories or exposing the local API server beyond localhost.
+
+1. **Prevent symlink-based repo escape during discovery/indexing**
+
+   * [ ] **Stop following symlinks when discovering and stat’ing files.**
+
+     * **Why:** If a repository contains a tracked symlink pointing outside the repo (e.g., to `/etc/passwd`), the current logic can follow it and read/index external files. This is a classic “repo escape / data exfiltration” risk when indexing untrusted repos.
+     * **Where:** `src/index/build/discover.js`
+
+       * Uses `fs.stat()` (follows symlinks) on each path.
+     * **Fix:**
+
+       * Use `lstat` first; if it is a symlink:
+
+         * Default behavior: **skip** the entry.
+         * Optional (configurable) behavior: allow symlinks only if resolved target remains within `rootDir` (realpath boundary check).
+       * Ensure both “git ls-files” path discovery and fallback `fdir` scanning apply the same symlink policy.
+     * **Tests:**
+
+       * Add a fixture repo containing a symlink file pointing outside repo root.
+       * Assert indexing does not read it (and ideally logs a warning or records a skip reason).
+   * [ ] **Ensure downstream file reads cannot accidentally follow symlinks even if discovery misses one.**
+
+     * **Why:** Defense-in-depth; discovery should prevent it, but a second gate at file-read time reduces risk.
+     * **Where:** `src/index/build/file-processor.js` and any shared read helpers (e.g., `src/shared/encoding.js` `readTextFileWithHash`)
+     * **Fix:** If feasible, check `lstat` before read in the pre-read stage (or pass `lstat` results from discovery and enforce “no symlink reads”).
+
+2. **Lock down API server defaults (CORS, repo selection, and exposure)**
+
+   * [ ] **Remove unconditional permissive CORS (`Access-Control-Allow-Origin: *`) or make it explicitly opt-in.**
+
+     * **Why:** If the server is started with `--host 0.0.0.0` (supported), permissive CORS plus no auth makes it trivial for any web page on the same network to call the API from a browser (cross-site request from an untrusted origin).
+     * **Where (currently sets `*`):**
+
+       * `tools/api/router.js` (sets headers broadly, including metrics endpoint)
+       * `tools/api/response.js`
+       * `tools/api/sse.js`
+     * **Fix (recommended safe default):**
+
+       * Default allowlist: `http://127.0.0.1:*` and `http://localhost:*` only (or no CORS headers at all unless configured).
+       * Add config flags:
+
+         * `api.cors.allowedOrigins` (array)
+         * `api.cors.allowAnyOrigin` (explicit opt-in, default false)
+   * [ ] **Add authentication for non-localhost bindings (or always, with a “dev disable” escape hatch).**
+
+     * **Why:** The API allows expensive operations (search) and can access the filesystem via repo selection (see next item). This should not be anonymous if reachable from other machines.
+     * **Fix:**
+
+       * Support a bearer token header, e.g. `Authorization: Bearer <token>` with `PAIR_OF_CLEATS_API_TOKEN` env var.
+       * If `host` is not `127.0.0.1/localhost`, require token by default.
+   * [ ] **Restrict `repoPath` override in API requests (prevent arbitrary filesystem indexing/search).**
+
+     * **Why:** Current API accepts a request body that can set `repoPath`, and then resolves and operates on that directory. Without an allowlist, this is arbitrary directory read/search capability.
+     * **Where:** `tools/api/router.js` `resolveRepo(value)` and usage in `/search`, `/status`, `/stream/search`.
+     * **Fix options:**
+
+       * Option A (strict): disallow `repoPath` in request; only use the server’s configured repo.
+       * Option B (allowlist): allow only if within a configured set of allowed roots (`api.allowedRepoRoots`), enforced by realpath boundary checks.
+     * **Tests:**
+
+       * Confirm requests with disallowed repoPath return 400/403.
+       * Confirm allowed repo paths still work.
+
+3. **Harden API request body parsing and limits**
+
+   * [ ] **Replace string concatenation body parsing with byte-safe buffering and strict size enforcement.**
+
+     * **Why:** Current `parseBody` in `tools/api/router.js` does `data += chunk` and uses `data.length` (characters, not bytes). This is less reliable and can be slower for large payloads due to repeated string reallocations.
+     * **Fix:**
+
+       * Accumulate Buffers in an array; track `byteLength`.
+       * Enforce a hard cap in bytes (e.g., 1 MiB configurable).
+       * Only decode once at the end.
+   * [ ] **Validate `Content-Type` for JSON endpoints.**
+
+     * **Why:** Avoid ambiguous parsing and reduce attack surface.
+     * **Fix:** Require `application/json` for POST bodies on `/search` and stream endpoints (except where intentionally flexible).
+
+**Exit criteria**
+
+* [ ] Indexing does not follow symlinks by default (tested with a symlink fixture).
+* [ ] API no longer emits permissive CORS headers by default.
+* [ ] API requests cannot arbitrarily set `repoPath` unless explicitly allowed/configured.
+* [ ] API body parsing is byte-safe and enforces a clear, tested size limit.
+
+---
+
+
+## Phase 10 — RPC Robustness and Memory-Safety (LSP + MCP + JSON-RPC)
+
+**Objective:** Prevent unbounded memory growth and improve resilience when communicating with external processes (LSP servers, MCP transport), including malformed or oversized JSON-RPC frames.
+
+1. **Implement `maxBufferBytes` enforcement in framed JSON-RPC parser**
+
+   * [ ] **Enforce `maxBufferBytes` in `createFramedJsonRpcParser`.**
+
+     * **Why:** The function accepts `maxBufferBytes` but does not enforce it, leaving an unbounded buffer growth path if a peer sends large frames or never terminates headers.
+     * **Where:** `src/shared/jsonrpc.js` (`createFramedJsonRpcParser`)
+     * **Fix:**
+
+       * Track buffer size after concatenation.
+       * If buffer exceeds limit:
+
+         * Clear internal buffer.
+         * Call `onError(new Error(...))`.
+         * Optionally enter a “failed/closed” state to reject further data.
+       * Consider separate thresholds:
+
+         * `maxHeaderBytes` (protect header scan)
+         * `maxMessageBytes` (protect content-length payload)
+   * [ ] **Add explicit tests for oversized frames.**
+
+     * **Where:** Add a new unit test under `tests/` that pushes > limit into parser and asserts:
+
+       * `onError` called
+       * parser does not continue to grow memory
+
+2. **Apply bounded JSON-RPC parsing in LSP client**
+
+   * [ ] Replace `StreamMessageReader` usage with the bounded framed parser (or wrap it with size checks).
+
+     * **Why:** `StreamMessageReader` will buffer messages; without explicit size enforcement at your integration boundary, a misbehaving server can cause OOM.
+     * **Where:** `src/integrations/tooling/lsp/client.js`
+     * **Fix:**
+
+       * Wire `proc.stdout` `data` into `createFramedJsonRpcParser`.
+       * Feed parsed messages into the existing dispatch/response correlation logic.
+       * Ensure shutdown/kill closes parser cleanly.
+
+3. **Apply bounded JSON-RPC parsing in MCP transport**
+
+   * [ ] Replace `StreamMessageReader` usage similarly.
+
+     * **Where:** `tools/mcp/transport.js`
+     * **Fix:** Same pattern as LSP client; enforce message size limits and fail gracefully.
+
+**Exit criteria**
+
+* [ ] `createFramedJsonRpcParser` enforces max buffer/message sizes with tests.
+* [ ] LSP client no longer relies on unbounded message buffering.
+* [ ] MCP transport no longer relies on unbounded message buffering.
+
+---
+
+
+## Phase 11 — Resource Lifecycle Management (Caches, Long-Lived Servers, Builds)
+
+**Objective:** Prevent memory and resource leaks in long-running processes (API server, service workers), especially across repeated builds and multi-repo usage.
+
+1. **Add eviction/TTL for API router repo-level caches**
+
+   * [ ] **Implement eviction for `repoCaches` map in `tools/api/router.js`.**
+
+     * **Why:** `repoCaches` can grow unbounded if clients query multiple repos or if repo roots vary. Each entry can hold heavy caches (index cache + sqlite connections).
+     * **Fix:**
+
+       * Add:
+
+         * `maxRepos` (e.g., 3–10)
+         * `repoTtlMs` (e.g., 10–30 minutes)
+       * Track `lastUsed` and evict least-recently-used / expired.
+       * On eviction: close sqlite cache handles (`sqliteCache.close()`), clear index cache.
+   * [ ] Add metrics for cache size and evictions.
+
+     * **Where:** `tools/api/router.js` and metrics registry.
+
+2. **Add eviction for per-repo index cache and sqlite DB cache**
+
+   * [ ] **Index cache eviction**
+
+     * **Why:** `src/retrieval/index-cache.js` caches by `dir` (which can change per build). On repeated re-indexing, old build directories can accumulate.
+     * **Fix:** Convert to LRU with max entries, or TTL purge on access.
+   * [ ] **SQLite DB cache eviction**
+
+     * **Where:** `src/retrieval/sqlite-cache.js`
+     * **Why:** Same “dir-per-build” key pattern; can leak connections/handles.
+     * **Fix:** LRU/TTL + ensure `close()` called on eviction.
+
+3. **Add explicit cache invalidation when “current build” pointer changes**
+
+   * [ ] Detect when the effective index directory changes (new build) and prune caches for previous builds.
+
+     * **Why:** Keeps hot caches relevant and bounds memory footprint.
+
+**Exit criteria**
+
+* [ ] API server memory does not grow unbounded when indexing/searching multiple repos/builds.
+* [ ] Old build caches are evicted/pruned automatically.
+* [ ] SQLite handles are closed on eviction (verified via tests or instrumentation).
+
+---
+
+
+## Phase 12 — Performance and Operational Hardening
+
+**Objective:** Improve throughput and robustness under load without changing core behavior.
+
+1. **Reduce event-loop blocking sync filesystem calls on API request paths**
+
+   * [ ] Replace `fsSync.*` in API request hot paths with async equivalents where practical.
+
+     * **Why:** Sync I/O can stall concurrent requests in the API server process.
+     * **Where (examples):**
+
+       * `tools/api/router.js` `resolveRepo()` uses `existsSync/statSync`.
+     * **Fix:** Use `fs.promises.stat` with try/catch; cache results briefly if needed.
+
+2. **Prevent decompression “zip bomb” style memory spikes in artifact reading**
+
+   * [ ] Add output size limiting to gzip decompression.
+
+     * **Why:** `src/shared/artifact-io.js` uses `gunzipSync(buffer)` and only checks decompressed size *after* decompression. A small compressed file could expand massively and spike memory.
+     * **Fix:**
+
+       * Use `zlib.gunzipSync(buffer, { maxOutputLength: maxBytes + slack })` (if supported in your Node target), or switch to streaming gunzip with explicit byte limits.
+     * **Where:** `src/shared/artifact-io.js` `parseBuffer` / gzip handling.
+
+3. **Add download size limits for tools that fetch large remote assets**
+
+   * [ ] Enforce maximum download size (or require hash) for dictionary downloads.
+
+     * **Why:** `tools/download-dicts.js` buffers the entire response in memory (`Buffer.concat`) without a hard cap.
+     * **Fix:** Stream to disk with a cap; abort if exceeded; strongly prefer requiring hashes for non-default URLs.
+
+**Exit criteria**
+
+* [ ] API request path avoids avoidable sync I/O.
+* [ ] Artifact gzip parsing cannot explode memory beyond configured limits.
+* [ ] Large downloads are bounded and/or verified.
+
+---
+
+
+## Phase 13 — Documentation and Configuration Hardening
+
+**Objective:** Ensure the fixed behavior is discoverable, configurable, and hard to misconfigure into an unsafe state.
+
+1. **Document security posture and safe defaults**
+
+   * [ ] Document:
+
+     * API server host binding risks (`--host 0.0.0.0`)
+     * CORS policy and how to configure allowed origins
+     * Auth token configuration (if implemented)
+     * RepoPath allowlist behavior
+   * [ ] Add a prominent note: indexing untrusted repos and symlinks policy.
+
+2. **Add configuration schema coverage for new settings**
+
+   * [ ] If adding config keys (CORS/auth/cache TTL), ensure they are:
+
+     * Reflected in whatever config docs you maintain
+     * Validated consistently (even if validation is lightweight)
+
+**Exit criteria**
+
+* [ ] README/docs reflect new defaults and how to safely expose services.
+* [ ] New options are documented and validated enough to prevent silent misconfiguration.
+
+---
+
+---
+
+
+## Phase 14 — Optional-dependency framework + capability registry (foundation for all phases)
+
+### 14.1 Introduce a consistent “optional dependency” loader
+
+* [ ] Add `src/shared/optional-deps.js` with a single, opinionated API:
+
+  * [ ] `tryRequire(name)` / `tryImport(name)` helpers (use `createRequire(import.meta.url)` where needed)
+  * [ ] Standardized return shape: `{ ok: true, mod } | { ok: false, error, reason }`
+  * [ ] Standardized logging hook (only when `PAIROFCLEATS_VERBOSE` or a dedicated flag is enabled)
+* [ ] Add `src/shared/capabilities.js` that reports runtime availability:
+
+  * [ ] `watcher: { chokidar: true, parcel: boolean }`
+  * [ ] `regex: { re2: boolean, re2js: true }`
+  * [ ] `hash: { nodeRsXxhash: boolean, wasmXxhash: true }`
+  * [ ] `compression: { gzip: true, zstd: boolean }`
+  * [ ] `extractors: { pdf: boolean, docx: boolean }`
+  * [ ] `mcp: { sdk: boolean, legacy: true }`
+  * [ ] `externalBackends: { tantivy: boolean, lancedb: boolean }` (even if “boolean” means “reachable” rather than “installed”)
+* [ ] Wire capabilities into existing “status” surfaces:
+
+  * [ ] Extend `tools/mcp/repo.js` → `configStatus()` to include capability info and warnings for requested-but-unavailable features
+  * [ ] Extend `tools/config-dump.js` (or equivalent) to print capabilities in JSON output mode
+
+### 14.2 Add config + env “backend selectors” (uniform UX)
+
+* [ ] Extend `src/shared/env.js` to parse new selectors (string + allowlist):
+
+  * [ ] `PAIROFCLEATS_WATCHER_BACKEND` = `auto|chokidar|parcel`
+  * [ ] `PAIROFCLEATS_REGEX_ENGINE` = `auto|re2|re2js`
+  * [ ] `PAIROFCLEATS_XXHASH_BACKEND` = `auto|native|wasm`
+  * [ ] `PAIROFCLEATS_COMPRESSION` = `auto|gzip|zstd|none`
+  * [ ] `PAIROFCLEATS_DOC_EXTRACT` = `auto|on|off`
+  * [ ] `PAIROFCLEATS_MCP_TRANSPORT` = `auto|sdk|legacy`
+* [ ] Add parallel config keys in `.pairofcleats.json` (keep them near existing related config blocks):
+
+  * [ ] `indexing.watch.backend`
+  * [ ] `search.regex.engine`
+  * [ ] `indexing.hash.backend`
+  * [ ] `indexing.artifactCompression.mode` enum expansion + `auto`
+  * [ ] `indexing.documentExtraction.enabled`
+  * [ ] `mcp.transport`
+* [ ] Update `docs/config-schema.json`:
+
+  * [ ] Add/expand enums (avoid “free string” for anything that’s meant to be policy-controlled)
+  * [ ] Add descriptions that clarify fallback rules (`auto` behavior)
+* [ ] Update any config validation code paths if they enforce known keys (`src/config/validate.js` is schema-driven; keep schema authoritative)
+
+### 14.3 Add dependency-bundle reference stubs (keeps repo documentation consistent)
+
+For each new dependency introduced in later phases, add a minimal doc file under:
+`docs/references/dependency-bundle/deps/<dep>.md`
+
+* [ ] `parcel-watcher.md`
+* [ ] `re2.md`
+* [ ] `node-rs-xxhash.md`
+* [ ] `mongodb-js-zstd.md`
+* [ ] `pdfjs-dist.md`
+* [ ] `mammoth.md`
+* [ ] `modelcontextprotocol-sdk.md`
+* [ ] `lancedb.md` (if used)
+* [ ] `tantivy.md` (if used)
+* [ ] Update `docs/references/dependency-bundle/README.md` if it has an index
+
+### 14.4 Tests (framework-level)
+
+* [ ] Add `tests/capabilities-report.js`:
+
+  * [ ] Asserts `capabilities` object shape is stable
+  * [ ] Asserts `auto` selectors never throw when optional deps are missing
+* [ ] Add a script-coverage action to run it:
+
+  * [ ] `tests/script-coverage/actions.js`: add action entry that calls `runNode(...)`
+  * [ ] (Optional) Add an npm script alias if you want parity with the rest of the repo scripts
+
+**Exit criteria**
+
+* [ ] All “capability” calls are side-effect-free and safe when optional deps are absent
+* [ ] `config_status` (MCP) can surface “you requested X but it’s not available” warnings without crashing
+* [ ] CI passes on Node 18 (Ubuntu + Windows lanes)
+
+---
+
+
+## Phase 15 — File watching performance: add `@parcel/watcher` backend (keep chokidar fallback)
+
+### 15.1 Add the dependency (prefer optional unless you want it guaranteed everywhere)
+
+* [ ] Add `@parcel/watcher` to `package.json`
+
+  * [ ] Prefer `optionalDependencies` if you want installs to succeed even when native builds fail
+  * [ ] If you add it as a hard dependency, ensure Windows CI remains green
+
+### 15.2 Create a watcher-backend abstraction
+
+* [ ] Create `src/index/build/watch/backends/types.js` (or inline JSDoc contract) describing:
+
+  * [ ] `start({ root, ignored, onEvent, onError, pollMs? }) -> { close(): Promise<void> }`
+  * [ ] Normalized event shape: `{ type: 'add'|'change'|'unlink', absPath }`
+* [ ] Extract chokidar wiring out of `src/index/build/watch.js`:
+
+  * [ ] Move into `src/index/build/watch/backends/chokidar.js`
+  * [ ] Preserve existing semantics (`awaitWriteFinish`, ignored matcher, poll support)
+* [ ] Implement parcel watcher backend:
+
+  * [ ] New file: `src/index/build/watch/backends/parcel.js`
+  * [ ] Map parcel events to the normalized `{type, absPath}` model
+  * [ ] Decide how to handle rename/move (often appears as unlink+add):
+
+    * [ ] If parcel reports rename, still emit unlink+add for compatibility with current scheduling
+  * [ ] Implement “poll” behavior:
+
+    * [ ] If poll mode is requested, either:
+
+      * [ ] force chokidar with polling, **or**
+      * [ ] implement a cheap stat-based poller wrapper (only if needed)
+  * [ ] Implement “write stability” guard:
+
+    * [ ] Chokidar has `awaitWriteFinish`; parcel does not in the same way
+    * [ ] Add a “stabilize file” check in the pipeline: before processing a file, optionally confirm `mtime/size` stable across N ms
+    * [ ] Place this in `createDebouncedScheduler()` or immediately before `enqueueOrUpdate()` in `file-processor.js` (prefer a single shared guard)
+
+### 15.3 Wire selection into `watchIndex()`
+
+* [ ] Update `src/index/build/watch.js`:
+
+  * [ ] Choose backend via (in order): CLI/config → env → `auto` capability
+  * [ ] Log selected backend once at startup (only if verbose or `--watch`)
+  * [ ] Ensure `pollMs` is still honored (either by backend or by selection logic)
+
+### 15.4 Tests
+
+* [ ] Add `tests/watch-backend-selection.js`:
+
+  * [ ] Forces `PAIROFCLEATS_WATCHER_BACKEND=chokidar` and asserts no parcel import occurs
+  * [ ] Forces `...=parcel` and asserts fallback behavior if module unavailable (no crash, warning path)
+* [ ] Add `tests/watch-stability-guard.js`:
+
+  * [ ] Simulate “partial write” (write file in two chunks with delay) and assert processor waits/defers correctly
+  * [ ] Keep the test deterministic: use explicit timeouts and a temp directory under `tests/.cache`
+* [ ] Add corresponding script-coverage actions in `tests/script-coverage/actions.js`
+
+**Exit criteria**
+
+* [ ] `pairofcleats index watch` remains correct on Windows and Linux
+* [ ] No regressions in ignore behavior (still uses `buildIgnoredMatcher`)
+* [ ] Event storms do not cause repeated redundant rebuilds (existing debounce logic preserved)
+
+---
+
+
+## Phase 16 — Safe regex acceleration: optional native RE2 (`re2`) with `re2js` fallback
+
+### 16.1 Add dependency + backend wrapper
+
+* [ ] Add `re2` (native) as an optional dependency (recommended)
+* [ ] Refactor `src/shared/safe-regex.js` into a backend-based module:
+
+  * [ ] Keep current behavior as the fallback backend (`re2js`)
+  * [ ] Add `src/shared/safe-regex/backends/re2.js`
+  * [ ] Add `src/shared/safe-regex/backends/re2js.js` (wrap existing usage cleanly)
+* [ ] Preserve existing safety constraints:
+
+  * [ ] `maxPatternLength`
+  * [ ] `maxInputLength`
+  * [ ] Guard flags normalization (only `gimsyu` supported as today)
+
+### 16.2 Integrate selector + compatibility contract
+
+* [ ] Add `createSafeRegex({ engine, ...limits })` selection:
+
+  * [ ] `engine=auto` uses `re2` if available else `re2js`
+  * [ ] `engine=re2` hard-requires native; if missing, returns a clear error (or a warning + fallback if you prefer)
+* [ ] Validate behavioral parity:
+
+  * [ ] Ensure `.exec()` and `.test()` match expectations for `g` and non-`g`
+  * [ ] Ensure `.lastIndex` semantics are either compatible or explicitly *not supported* (and documented)
+
+### 16.3 Update call sites
+
+* [ ] Verify these flows still behave correctly:
+
+  * [ ] `src/retrieval/output/filters.js` (file/path filters)
+  * [ ] `src/retrieval/output/risk-tags.js` (risk tagging)
+  * [ ] Any structural search / rulepack path using regex constraints
+
+### 16.4 Tests
+
+* [ ] Add `tests/safe-regex-engine.js`:
+
+  * [ ] Conformance tests (flags, match groups, global behavior)
+  * [ ] Safety limit tests (pattern length, input length)
+  * [ ] Engine-selection tests (`auto`, forced `re2js`)
+* [ ] Add script-coverage action(s)
+
+**Exit criteria**
+
+* [ ] No user-visible semantic regressions in filtering/risk-tagging
+* [ ] “Engine auto” is safe and silent (no noisy logs) unless verbose
+
+---
+
+
+## Phase 17 — Hashing performance: optional native xxhash (`@node-rs/xxhash`) with `xxhash-wasm` fallback
+
+### 17.1 Add dependency + unify backend contract
+
+* [ ] Add `@node-rs/xxhash` as optional dependency (or hard dep if you accept platform constraints)
+* [ ] Create `src/shared/hash/xxhash-backend.js`:
+
+  * [ ] `hash64(buffer|string) -> hex16` (exact output format must match existing `checksumString()` + `checksumFile()`)
+  * [ ] `hash64Stream(readable) -> hex16` (if supported; otherwise implement chunking in JS)
+* [ ] Update `src/shared/hash.js`:
+
+  * [ ] Keep `sha1()` unchanged
+  * [ ] Route `checksumString()` / `checksumFile()` through the backend contract
+  * [ ] Preserve deterministic formatting (`formatXxhashHex`)
+
+### 17.2 Introduce selector + telemetry
+
+* [ ] Add `PAIROFCLEATS_XXHASH_BACKEND=auto|native|wasm`
+* [ ] Emit backend choice in verbose logs (once)
+
+### 17.3 Tests
+
+* [ ] Add `tests/xxhash-backends.js`:
+
+  * [ ] Assert `checksumString('abc')` matches a known baseline (record from current implementation)
+  * [ ] Assert `checksumFile()` matches `checksumString()` on same content (via temp file)
+  * [ ] If native backend is available, assert native and wasm match exactly
+  * [ ] If native is missing, ensure test still passes (skips “native parity” block)
+* [ ] Add script-coverage action(s)
+
+**Exit criteria**
+
+* [ ] No change to bundle identity semantics (incremental cache stability)
+* [ ] `checksumFile()` remains bounded-memory for large files (streaming or chunked reads)
+
+---
+
+
+## Phase 18 — Artifact compression upgrade: add Zstandard (`zstd`) alongside gzip
+
+### 18.1 Add compression dependency
+
+* [ ] Add `@mongodb-js/zstd` (recommended as optional dependency due to native bindings)
+* [ ] Decide “streaming vs buffer-only” support:
+
+  * [ ] If streaming is supported: implement streaming JSONL writers/readers
+  * [ ] If buffer-only: restrict zstd to JSON object/array artifacts, keep JSONL as gzip (document clearly)
+
+### 18.2 Introduce compression abstraction (avoid sprinkling `if (mode===...)` everywhere)
+
+* [ ] Add `src/shared/compression.js`:
+
+  * [ ] `compressBuffer(mode, buffer, level?)`
+  * [ ] `decompressBuffer(mode, buffer)`
+  * [ ] Optional stream helpers if supported
+* [ ] Update `src/index/build/artifacts/compression.js`:
+
+  * [ ] Expand `mode` validation: `gzip|zstd|none`
+  * [ ] Keep current defaults unchanged (`gzip` or `null` based on existing config)
+* [ ] Update `src/index/build/artifacts.js`:
+
+  * [ ] Replace hard-coded `.json.gz` with extension derived from compression mode
+
+    * [ ] gzip: `.json.gz`
+    * [ ] zstd: `.json.zst` (or `.json.zstd`; pick one and standardize)
+  * [ ] Ensure `compressionKeepRaw` behavior remains correct
+
+### 18.3 Update readers/writers for new extensions
+
+* [ ] Update `src/shared/artifact-io.js`:
+
+  * [ ] Extend `resolveArtifactPath()` to check:
+
+    * [ ] `<name>.json` then `<name>.json.gz` then `<name>.json.zst`
+    * [ ] Also handle `.bak` variants for each
+  * [ ] Extend `readJsonFile()` to decode zstd when applicable
+* [ ] Update `src/shared/json-stream.js`:
+
+  * [ ] Add zstd path for `writeJsonArrayFile()` / `writeJsonObjectFile()` when compression is requested
+  * [ ] If JSONL is to support zstd: update `writeJsonLinesFile()` and `readJsonLinesArraySync()`
+
+### 18.4 Update artifact contract + metrics
+
+* [ ] Update `docs/artifact-contract.md`:
+
+  * [ ] New allowed compression modes
+  * [ ] New filename extensions
+  * [ ] Backward compatibility statement (gzip still readable)
+* [ ] Update `src/index/build/artifacts/metrics.js` to report `compression.mode=zstd`
+* [ ] Update `docs/config-schema.json` to restrict/describe valid modes
+
+### 18.5 Tests
+
+* [ ] Add `tests/artifact-zstd-readwrite.js`:
+
+  * [ ] Write a compressed artifact (zstd) using production writer
+  * [ ] Read it with `readJsonFile()` and assert payload matches
+* [ ] Extend `tests/artifact-bak-recovery.js` with a zstd variant:
+
+  * [ ] `.json.zst` + `.bak` fallback behavior
+* [ ] Add script-coverage action(s)
+
+**Exit criteria**
+
+* [ ] `loadIndex()` can transparently read `.json`, `.json.gz`, and `.json.zst` artifacts
+* [ ] Existing gzip artifacts remain fully compatible
+* [ ] Failure-mode behavior (`.bak` recovery) remains correct for new extensions
+
+---
+
+
+## Phase 19 — Massive functionality boost: PDF + DOCX ingestion (prose mode)
+
+### 19.1 Add document extraction dependencies
+
+* [ ] Add `pdfjs-dist` (PDF text extraction)
+* [ ] Add `mammoth` (DOCX → text/HTML extraction)
+
+### 19.2 Introduce “extractor” layer in indexing pipeline
+
+* [ ] Create `src/index/build/extractors/`:
+
+  * [ ] `text.js` (wrap existing `readTextFileWithHash` path)
+  * [ ] `pdf.js` (buffer → extracted text; include page separators if possible)
+  * [ ] `docx.js` (buffer → extracted text; preserve headings if possible)
+  * [ ] `index.js` (select extractor by extension + config)
+* [ ] Add a new constant set in `src/index/constants.js`:
+
+  * [ ] `EXTS_EXTRACTABLE_BINARY = new Set(['.pdf', '.docx'])`
+* [ ] Add `.pdf` and `.docx` to `EXTS_PROSE` **only if** extraction is enabled (or add them unconditionally but ensure they don’t get skipped)
+
+### 19.3 Fix binary-skip logic to allow extractable docs
+
+You must handle both “pre-read” scanning and “post-read” binary checks:
+
+* [ ] Update `src/index/build/file-scan.js` / `createFileScanner()`:
+
+  * [ ] If `ext` ∈ `EXTS_EXTRACTABLE_BINARY` and extraction enabled:
+
+    * [ ] Do **not** mark as `{ reason: 'binary' }`
+    * [ ] Still allow minified checks to run when relevant (likely irrelevant for pdf/docx)
+* [ ] Update `src/index/build/file-processor/skip.js`:
+
+  * [ ] If `ext` extractable and extraction enabled, do not return `binarySkip`
+* [ ] Update `src/index/build/file-processor.js`:
+
+  * [ ] Branch early on `ext`:
+
+    * [ ] For `.pdf`/`.docx`: read buffer → extractor → `text`
+    * [ ] For all else: existing text decoding path
+  * [ ] Ensure `hash` still derives from raw bytes (current `sha1(buffer)` behavior is good)
+  * [ ] Ensure `stats.bytes` is still the raw size for guardrails
+
+### 19.4 Chunking strategy for extracted docs
+
+* [ ] Decide on an initial, deterministic chunking approach:
+
+  * [ ] Minimal viable: treat extracted output as prose and let default prose chunking apply
+  * [ ] Better: add dedicated chunkers:
+
+    * [ ] Add `src/index/chunking/prose/pdf.js` to split by page markers
+    * [ ] Add `src/index/chunking/prose/docx.js` to split by headings / paragraph blocks
+* [ ] Update `src/index/chunking/dispatch.js`:
+
+  * [ ] Map `.pdf` and `.docx` to their chunkers (or prose fallback)
+
+### 19.5 Search + metadata integration
+
+* [ ] Ensure extracted docs appear in:
+
+  * [ ] `file_meta.json` (file path + ext)
+  * [ ] `chunk_meta.*` (chunks with correct file associations)
+* [ ] Consider adding a metadata flag for UI filters:
+
+  * [ ] `fileMeta[i].isExtractedDoc = true` (or reuse existing `externalDocs` pattern if appropriate)
+* [ ] Verify retrieval filters treat these files correctly (extension/path filters)
+
+### 19.6 Tests (must include “end-to-end search finds doc content”)
+
+* [ ] Add fixture files under `tests/fixtures/docs/`:
+
+  * [ ] `sample.pdf` with a known unique phrase
+  * [ ] `sample.docx` with a known unique phrase
+* [ ] Add `tests/pdf-docx-extraction.js`:
+
+  * [ ] Unit-level extraction returns expected text
+* [ ] Add `tests/pdf-docx-index-search.js`:
+
+  * [ ] Build prose index for a temp repo that includes the docs
+  * [ ] Run `search.js --mode prose` and assert the phrases match chunks
+* [ ] Add script-coverage action(s)
+
+**Exit criteria**
+
+* [ ] PDF/DOCX are no longer silently dropped as “binary” (when enabled)
+* [ ] Prose search can retrieve content from these formats reliably
+* [ ] No regression to binary detection for non-extractable files
+
+---
+
+
+## Phase 20 — MCP server: migrate from custom JSON-RPC plumbing to official MCP SDK (reduce maintenance)
+
+### 20.1 Add MCP SDK and plan transport layering
+
+* [ ] Add `@modelcontextprotocol/sdk` dependency
+* [ ] Decide migration strategy:
+
+  * [ ] **Option A (recommended):** keep `tools/mcp-server.js` as the entrypoint, but implement server via SDK and keep legacy behind a flag
+  * [ ] Option B: replace legacy entirely (higher risk)
+
+### 20.2 Implement SDK-based server
+
+* [ ] Add `src/integrations/mcp/sdk-server.js` (or similar):
+
+  * [ ] Register tools from `src/integrations/mcp/defs.js`
+  * [ ] Dispatch calls to existing handlers in `tools/mcp/tools.js` (or migrate handlers into `src/` cleanly)
+  * [ ] Preserve progress notifications semantics expected by `tests/mcp-server.js`:
+
+    * [ ] `notifications/progress`
+    * [ ] Include `{ tool: 'build_index', phase, message }` fields (match current tests)
+* [ ] Update `tools/mcp-server.js`:
+
+  * [ ] If `mcp.transport=legacy` or env forces legacy → use current transport
+  * [ ] Else → use SDK transport
+
+### 20.3 Remove or isolate legacy transport surface area
+
+* [ ] Keep `tools/mcp/transport.js` for now, but:
+
+  * [ ] Move to `tools/mcp/legacy/transport.js`
+  * [ ] Update imports accordingly
+  * [ ] Reduce churn risk while you validate parity
+
+### 20.4 Tests
+
+* [ ] Ensure these existing tests continue to pass without rewriting expectations unless protocol mandates it:
+
+  * [ ] `tests/mcp-server.js`
+  * [ ] `tests/mcp-robustness.js`
+  * [ ] `tests/mcp-schema.js`
+* [ ] Add `tests/mcp-transport-selector.js`:
+
+  * [ ] Force `PAIROFCLEATS_MCP_TRANSPORT=legacy` and assert legacy path still works
+  * [ ] Force `...=sdk` and assert SDK path works
+* [ ] Add script-coverage action(s)
+
+**Exit criteria**
+
+* [ ] MCP server behavior is unchanged from the client perspective (tool list, outputs, progress events)
+* [ ] Maintenance burden reduced: eliminate custom framing/parsing where SDK provides it
+
+---
+
+
+## Phase 21 — Tantivy sparse backend (optional, high impact on large repos)
+
+> This phase is intentionally split into “abstraction first” and “backend integration” to keep risk controlled.
+
+### 21.1 Extract a sparse-retrieval interface
+
+* [ ] Create `src/retrieval/sparse/`:
+
+  * [ ] `types.js` contract: `search({ query, topN, filters, mode }) -> hits[]`
+  * [ ] `providers/sqlite-fts.js` wrapper around existing SQLite FTS ranking
+  * [ ] `providers/js-bm25.js` wrapper around the in-memory BM25 path
+* [ ] Update `src/retrieval/pipeline.js` to call the provider rather than direct sqlite/JS branching:
+
+  * [ ] Keep behavior identical as baseline
+  * [ ] Preserve determinism (stable tie-breaking)
+
+### 21.2 Implement Tantivy integration (choose one operational model)
+
+* [ ] Choose packaging model:
+
+  * [ ] **Sidecar model:** `tools/tantivy-server` (Rust) + Node client
+  * [ ] **Embedded binding:** Node N-API module
+* [ ] Add `src/retrieval/sparse/providers/tantivy.js`:
+
+  * [ ] Build query → execute → map results to `{ idx, score }`
+  * [ ] Support candidate-set filtering if feasible (or document it as a limitation and handle via post-filtering)
+* [ ] Add `tools/build-tantivy-index.js`:
+
+  * [ ] Consume existing artifacts (`chunk_meta`, token streams) and build tantivy index on disk
+  * [ ] Store alongside other indexes (e.g., under repo cache root)
+  * [ ] Consider incremental updates later; start with full rebuild
+
+### 21.3 Config + CLI integration
+
+* [ ] Add config:
+
+  * [ ] `tantivy.enabled`
+  * [ ] `tantivy.path` (optional override)
+  * [ ] `tantivy.autoBuild` (optional)
+* [ ] Extend backend policy logic (see `src/retrieval/cli/backend-context.js` and backend-policy tests):
+
+  * [ ] Allow `--backend tantivy` (or `--sparse-backend tantivy`)
+  * [ ] Ensure `auto` fallback behavior remains predictable
+
+### 21.4 Tests (gated if tantivy isn’t always available in CI)
+
+* [ ] Add `tests/tantivy-smoke.js`:
+
+  * [ ] Builds tantivy index for `tests/fixtures/sample`
+  * [ ] Executes a basic query and asserts hits are non-empty
+* [ ] Gate it behind env:
+
+  * [ ] `PAIROFCLEATS_TEST_TANTIVY=1` to run
+  * [ ] Otherwise test exits 0 with “skipped” message (match existing patterns in repo)
+* [ ] Add script-coverage action(s) that run it only when env flag is set (or mark as skipped in coverage if you keep strictness)
+
+**Exit criteria**
+
+* [ ] Tantivy backend can be enabled without changing default behavior
+* [ ] For large repos, sparse retrieval latency is materially improved (benchmarks added in Phase 15)
+
+---
+
+
+## Phase 22 — LanceDB vector backend (optional, high impact on ANN scaling)
+
+### 22.1 Extract a vector-ANN provider interface
+
+* [ ] Create `src/retrieval/ann/`:
+
+  * [ ] `types.js`: `query({ embedding, topN, candidateSet, mode }) -> hits[]`
+  * [ ] `providers/sqlite-vec.js` wrapper around `rankVectorAnnSqlite`
+  * [ ] `providers/hnsw.js` wrapper around `rankHnswIndex`
+* [ ] Update `src/retrieval/pipeline.js` to use the provider interface
+
+### 22.2 Implement LanceDB integration (choose operational model)
+
+* [ ] Choose packaging model:
+
+  * [ ] Node library integration, **or**
+  * [ ] Sidecar service (Python) + HTTP
+* [ ] Add `src/retrieval/ann/providers/lancedb.js`:
+
+  * [ ] Query by vector and return `{ idx, sim }`
+  * [ ] Handle filtering:
+
+    * [ ] If LanceDB supports “where id IN (…)” efficiently → push down
+    * [ ] Otherwise → post-filter and overfetch
+
+### 22.3 Build tooling for vector index creation
+
+* [ ] Add `tools/build-lancedb-index.js`:
+
+  * [ ] Ingest `dense_vectors_*` artifacts
+  * [ ] Store LanceDB table in cache (mode-specific)
+  * [ ] Validate dims/model compatibility using existing `index_state.json` semantics
+
+### 22.4 Tests (gated)
+
+* [ ] Add `tests/lancedb-ann-smoke.js`:
+
+  * [ ] Build embeddings (stub) → build lancedb table → run a nearest-neighbor query → assert stable result ordering
+* [ ] Gate behind `PAIROFCLEATS_TEST_LANCEDB=1`
+* [ ] Add script-coverage action(s) gated similarly
+
+**Exit criteria**
+
+* [ ] LanceDB ANN can be enabled without breaking sqlite/hnsw fallbacks
+* [ ] Demonstrable memory and/or latency win for ANN retrieval at scale
+
+---
+
+
+## Phase 23 — Benchmarks, regression gates, and release hardening (prove the ROI)
+
+### 23.1 Extend microbench suite (`tools/bench/micro/`)
+
+* [ ] Add `tools/bench/micro/watch.js`:
+
+  * [ ] Event storm simulation (if feasible) or synthetic scheduler load
+* [ ] Add `tools/bench/micro/regex.js`:
+
+  * [ ] Compare `re2js` vs `re2` on representative patterns/inputs
+* [ ] Add `tools/bench/micro/hash.js`:
+
+  * [ ] Compare wasm vs native checksum throughput
+* [ ] Add `tools/bench/micro/compression.js`:
+
+  * [ ] gzip vs zstd compress/decompress for representative artifact payload sizes
+* [ ] Add `tools/bench/micro/extractors.js`:
+
+  * [ ] PDF/DOCX extraction throughput and memory ceiling
+
+### 23.2 Add “no-regression” assertions where it matters
+
+* [ ] Add deterministic snapshot tests (lightweight, not full golden files):
+
+  * [ ] Ensure chunk IDs stable across backends
+  * [ ] Ensure ordering stable under ties
+* [ ] Add metrics validation:
+
+  * [ ] `index-*.json` metrics reflect new compression/extractor options correctly
+
+### 23.3 Documentation + UX polish
+
+* [ ] Update `README.md`:
+
+  * [ ] Mention PDF/DOCX support and how to enable/disable
+  * [ ] Mention optional performance backends and how `auto` works
+* [ ] Update `docs/external-backends.md` for Tantivy/LanceDB reality (what’s implemented vs planned)
+* [ ] Update `docs/mcp-server.md` for SDK migration
+
+**Exit criteria**
+
+* [ ] Benchmarks show measurable improvement (and are reproducible)
+* [ ] CI remains green on Node 18 + Windows lane
+* [ ] New features are discoverable via config docs + `config_status`
+
+---
+
+
+## Phase 24 — LibUV threadpool utilization (explicit control + docs + tests)
+
+**Objective:** Make libuv threadpool sizing an explicit, validated, and observable runtime control so PairOfCleats I/O concurrency scales predictably across platforms and workloads.
+
+### 24.1 Audit: identify libuv-threadpool-bound hot paths and mismatch points
+
+* [ ] Audit all high-volume async filesystem call sites (these ultimately depend on libuv threadpool behavior):
+
+  * [ ] `src/index/build/file-processor.js` (notably `runIo(() => fs.stat(...))`, `runIo(() => fs.readFile(...))`)
+  * [ ] `src/index/build/file-scan.js` (`fs.open`, `handle.read`)
+  * [ ] `src/index/build/preprocess.js` (file sampling + `countLinesForEntries`)
+  * [ ] `src/shared/file-stats.js` (stream-based reads for line counting)
+* [ ] Audit concurrency derivation points where PairOfCleats may exceed practical libuv parallelism:
+
+  * [ ] `src/shared/threads.js` (`ioConcurrency = ioBase * 4`, cap 32/64)
+  * [ ] `src/index/build/runtime/workers.js` (`createRuntimeQueues` pending limits)
+* [ ] Decide and record the intended precedence rules for threadpool sizing:
+
+  * [ ] Whether PairOfCleats should **respect an already-set `UV_THREADPOOL_SIZE`** (recommended, matching existing `NODE_OPTIONS` behavior where flags aren’t overridden if already present).
+
+### 24.2 Add a first-class runtime setting + env override
+
+* [ ] Add config key (new):
+
+  * [ ] `runtime.uvThreadpoolSize` (number; if unset/invalid => no override)
+* [ ] Add env override (new):
+
+  * [ ] `PAIROFCLEATS_UV_THREADPOOL_SIZE` (number; same parsing rules as other numeric env overrides)
+* [ ] Implement parsing + precedence:
+
+  * [ ] Update `src/shared/env.js`
+
+    * [ ] Add `uvThreadpoolSize: parseNumber(env.PAIROFCLEATS_UV_THREADPOOL_SIZE)`
+  * [ ] Update `tools/dict-utils.js`
+
+    * [ ] Extend `getRuntimeConfig(repoRoot, userConfig)` to resolve `uvThreadpoolSize` with precedence:
+
+      * `userConfig.runtime.uvThreadpoolSize` → else `envConfig.uvThreadpoolSize` → else `null`
+    * [ ] Clamp/normalize: floor to integer; require `> 0`; else `null`
+    * [ ] Update the function’s return shape and JSDoc:
+
+      * from `{ maxOldSpaceMb, nodeOptions }`
+      * to `{ maxOldSpaceMb, nodeOptions, uvThreadpoolSize }`
+
+### 24.3 Propagate `UV_THREADPOOL_SIZE` early enough (launcher + spawned scripts)
+
+* [ ] Update `bin/pairofcleats.js` (critical path)
+
+  * [ ] In `runScript()`:
+
+    * [ ] Resolve `runtimeConfig` as today.
+    * [ ] Build child env as an object (don’t pass `process.env` by reference when you need to conditionally add keys).
+    * [ ] If `runtimeConfig.uvThreadpoolSize` is set and `process.env.UV_THREADPOOL_SIZE` is not set, add:
+
+      * [ ] `UV_THREADPOOL_SIZE = String(runtimeConfig.uvThreadpoolSize)`
+    * [ ] (Optional) If `--verbose` or `PAIROFCLEATS_VERBOSE`, log a one-liner showing the chosen `UV_THREADPOOL_SIZE` for the child process.
+* [ ] Update other scripts that spawn Node subcommands and already apply runtime Node options, so they also carry the threadpool sizing consistently:
+
+  * [ ] `tools/setup.js` (`buildRuntimeEnv()`)
+  * [ ] `tools/bootstrap.js` (`baseEnv`)
+  * [ ] `tools/ci-build-artifacts.js` (`baseEnv`)
+  * [ ] `tools/bench-language-repos.js` (repo child env)
+  * [ ] `tests/bench.js` (bench child env when spawning search/build steps)
+  * [ ] `tools/triage/context-pack.js`, `tools/triage/ingest.js` (where `resolveNodeOptions` is used)
+  * Implementation pattern: wherever you currently do `{ ...process.env, NODE_OPTIONS: resolvedNodeOptions }`, also conditionally set `UV_THREADPOOL_SIZE` from `runtimeConfig.uvThreadpoolSize` if not already present.
+
+> (Optional refactor, if you want to reduce repetition): add a helper in `tools/dict-utils.js` like `resolveRuntimeEnv(runtimeConfig, baseEnv)` and migrate the call sites above to use it.
+
+### 24.4 Observability: surface “configured vs effective” values
+
+* [ ] Update `tools/config-dump.js`
+
+  * [ ] Include in `payload.derived.runtime`:
+
+    * [ ] `uvThreadpoolSize` (configured value from `getRuntimeConfig`)
+    * [ ] `effectiveUvThreadpoolSize` (from `process.env.UV_THREADPOOL_SIZE` or null/undefined if absent)
+* [ ] Add runtime warnings in indexing startup when mismatch is likely:
+
+  * [ ] Update `src/index/build/runtime/workers.js` (in `resolveThreadLimitsConfig`, verbose mode is already supported)
+
+    * [ ] Compute `effectiveUv = Number(process.env.UV_THREADPOOL_SIZE) || null`
+    * [ ] If `effectiveUv` is set and `ioConcurrency` is materially larger, emit a single warning suggesting alignment.
+    * [ ] If `effectiveUv` is not set, consider a *non-fatal* hint when `ioConcurrency` is high (e.g., `>= 16`) and `--verbose` is enabled.
+* [ ] (Services) Emit one-time startup info in long-running modes:
+
+  * [ ] `tools/api-server.js`
+  * [ ] `tools/indexer-service.js`
+  * [ ] `tools/mcp-server.js`
+  * Log: effective `UV_THREADPOOL_SIZE`, and whether it was set by PairOfCleats runtime config or inherited from the environment.
+
+### 24.5 Documentation updates
+
+* [ ] Update env overrides doc:
+
+  * [ ] `docs/env-overrides.md`
+
+    * [ ] Add `PAIROFCLEATS_UV_THREADPOOL_SIZE`
+    * [ ] Explicitly note: libuv threadpool size must be set **before the Node process starts**; PairOfCleats applies it by setting `UV_THREADPOOL_SIZE` in spawned child processes (via `bin/pairofcleats.js` and other tool launchers).
+* [ ] Update config docs:
+
+  * [ ] `docs/config-schema.json` add `runtime.uvThreadpoolSize`
+  * [ ] `docs/config-inventory.md` add `runtime.uvThreadpoolSize (number)`
+  * [ ] `docs/config-inventory.json` add entry for `runtime.uvThreadpoolSize`
+* [ ] Update setup documentation:
+
+  * [ ] `docs/setup.md` add a short “Performance tuning” note:
+
+    * [ ] When indexing large repos or using higher `--threads`, consider setting `runtime.uvThreadpoolSize` (or `PAIROFCLEATS_UV_THREADPOOL_SIZE`) to avoid libuv threadpool becoming the limiting factor.
+* [ ] (Optional) Add a benchmark note:
+
+  * [ ] `docs/benchmarks.md` mention that benchmarking runs should control `UV_THREADPOOL_SIZE` for reproducibility.
+
+### 24.6 Tests: schema validation + env propagation
+
+* [ ] Update config validation tests:
+
+  * [ ] `tests/config-validate.js` ensure `runtime.uvThreadpoolSize` is accepted by schema validation.
+* [ ] Add a focused propagation test:
+
+  * [ ] New: `tests/uv-threadpool-env.js`
+
+    * [ ] Create a temp repo dir with a `.pairofcleats.json` that sets `runtime.uvThreadpoolSize`.
+    * [ ] Run: `node bin/pairofcleats.js config dump --json --repo <temp>`
+    * [ ] Assert:
+
+      * `payload.derived.runtime.uvThreadpoolSize` matches the config
+      * `payload.derived.runtime.effectiveUvThreadpoolSize` matches the propagated env (or check `process.env.UV_THREADPOOL_SIZE` if you expose it directly in the dump)
+* [ ] Add a non-override semantics test (if that’s the decided rule):
+
+  * [ ] New: `tests/uv-threadpool-no-override.js`
+
+    * [ ] Set parent env `UV_THREADPOOL_SIZE=…`
+    * [ ] Also set config `runtime.uvThreadpoolSize` to a different value
+    * [ ] Assert child sees the parent value (i.e., wrapper respects existing env)
+
+**Exit criteria**
+
+* [ ] `runtime.uvThreadpoolSize` is in schema + inventory and validated by `tools/validate-config.js`.
+* [ ] `pairofcleats …` launches propagate `UV_THREADPOOL_SIZE` to child processes when configured.
+* [ ] Users can confirm configured/effective behavior via `pairofcleats config dump --json`.
+* [ ] Docs clearly explain when and how the setting applies.
+
+---
+
+
+## Phase 25 — Threadpool-aware I/O scheduling guardrails
+
+**Objective:** Reduce misconfiguration risk by aligning PairOfCleats internal I/O scheduling with the effective libuv threadpool size and preventing runaway pending I/O buildup.
+
+### 25.1 Add a “threadpool-aware” cap option for I/O queue sizing
+
+* [ ] Add config (optional, but recommended if you want safer defaults):
+
+  * [ ] `indexing.ioConcurrencyCap` (number) **or** `runtime.ioConcurrencyCap` (number)
+  * Choose the namespace based on your ownership map (`docs/config-inventory-notes.md` suggests runtime is `tools/dict-utils.js`, indexing is build runtime).
+* [ ] Implement in:
+
+  * [ ] `src/shared/threads.js` (preferred, because it’s the canonical concurrency resolver)
+
+    * [ ] After computing `ioConcurrency`, apply:
+
+      * `ioConcurrency = min(ioConcurrency, ioConcurrencyCap)` when configured
+      * (Optional) `ioConcurrency = min(ioConcurrency, effectiveUvThreadpoolSize)` when a new boolean is enabled, e.g. `runtime.threadpoolAwareIo === true`
+  * [ ] `src/index/build/runtime/workers.js`
+
+    * [ ] Adjust `maxIoPending` to scale from the *final* `ioConcurrency`, not the pre-cap value.
+
+### 25.2 Split “filesystem I/O” from “process I/O” (optional, higher impact)
+
+If profiling shows git/tool subprocess work is being unnecessarily throttled by a threadpool-aware cap:
+
+* [ ] Update `src/shared/concurrency.js` to support two queues:
+
+  * [ ] `fs` queue (bounded by threadpool sizing)
+  * [ ] `proc` queue (bounded separately)
+* [ ] Update call sites:
+
+  * [ ] `src/index/build/file-processor.js`
+
+    * [ ] Use `fsQueue` for `fs.stat`, `fs.readFile`, `fs.open`
+    * [ ] Use `procQueue` for `getGitMetaForFile` (and any other spawn-heavy steps)
+  * [ ] `src/index/build/runtime/workers.js` and `src/index/build/indexer/steps/process-files.js`
+
+    * [ ] Wire new queues into runtime and shard runtime creation.
+
+### 25.3 Tests + benchmarks
+
+* [ ] Add tests that validate:
+
+  * [ ] Caps are applied deterministically
+  * [ ] Pending limits remain bounded
+  * [ ] No deadlocks when both queues exist
+* [ ] Update or add a micro-benchmark to show:
+
+  * [ ] Throughput difference when `UV_THREADPOOL_SIZE` and internal `ioConcurrency` are aligned vs misaligned.
+
+**Exit criteria**
+
+* [ ] Internal I/O concurrency cannot silently exceed intended caps.
+* [ ] No regression in incremental/watch mode stability.
+* [ ] Benchmarks show either improved throughput or reduced memory/queue pressure (ideally both).
+
+---
+
+
+## Phase 26 — (Conditional) Native LibUV work: only if profiling proves a real gap
+
+**Objective:** Only pursue *direct* libuv usage (via a native addon) if profiling demonstrates a material bottleneck that cannot be addressed through configuration and queue hygiene.
+
+### 26.1 Profiling gate and decision record
+
+* [ ] Add a short profiling harness / guidance doc:
+
+  * [ ] `docs/perf-profiling.md` (new) describing how to profile indexing (CPU + I/O wait) and what thresholds justify native work.
+* [ ] Establish decision criteria (example):
+
+  * [ ] If ≥20–30% wall time is spent in JS-level file scanning/reading overhead beyond disk throughput limits, consider native.
+  * [ ] Otherwise, stay in JS + threadpool tuning.
+
+### 26.2 Prototype native module (N-API) using libuv for a specific hot path
+
+* [ ] Only target one narrow, measurable function (examples):
+
+  * [ ] Fast “sample read + binary/minified detection” replacing parts of `src/index/build/file-scan.js`
+  * [ ] Batched `stat + read` pipeline for small files
+* [ ] Provide a clean fallback path to existing JS implementation.
+* [ ] Add CI coverage for:
+
+  * [ ] Linux/macOS/Windows builds (or prebuilds)
+  * [ ] ABI compatibility across supported Node versions
+
+### 26.3 Packaging and docs
+
+* [ ] Update:
+
+  * [ ] `package.json` optionalDependencies/build tooling (node-gyp/prebuildify/etc.)
+  * [ ] `docs/setup.md` to explain native build requirements/fallback behavior
+
+**Exit criteria**
+
+* [ ] Prototype demonstrates measurable improvement on representative repos.
+* [ ] Install friction and cross-platform maintenance cost are explicitly accepted (or the work is abandoned).
+
+#### 18 Bottom line
+
+* **Do not add libuv directly** to this Node codebase.
+* **Do add explicit support for libuv threadpool sizing** (via `UV_THREADPOOL_SIZE`) because the current concurrency model (notably `ioConcurrency` up to 64) strongly suggests you will otherwise hit an invisible throughput ceiling.
+
+---
+
+
+
+## Phase 27 — File processing & artifact assembly (chunk payloads/writers/shards)
+
+**Reviewed snapshot:** `PairOfCleats-main` (zip import)  
+**Scope driver:** `pairofcleats_review_section_3_files_and_checklist.md` (Section 3)  
+**Review date:** 2026-01-12  
+
+### Severity / priority scale
+
+- **P0** — correctness, broken reads, data loss/corruption, or contract violations that can invalidate an index
+- **P1** — determinism/stability, significant performance regressions, security/CI risks, or high-maintenance debt
+- **P2** — cleanup, minor performance wins, refactors, and documentation improvements
+
+---
+
+## Executive summary
+
+### P0 (must address)
+
+- **Chunk-meta sharding cleanup bug can cause the loader to read stale shard data** when switching builds from sharded chunk-meta to non-sharded JSONL. This is because `loadChunkMeta()` prefers `chunk_meta.meta.json` / `chunk_meta.parts` over `chunk_meta.jsonl`. Current cleanup logic does not remove the sharded artifacts in the “jsonl, not sharded” path.  
+  - Impact: **incorrect chunks, incorrect file mapping, confusing debug output, and potentially broken search** for any repo where a previous build produced `chunk_meta.meta.json` / `chunk_meta.parts`.  
+  - Primary locus: `src/index/build/artifacts/writers/chunk-meta.js`.
+
+- **Fast import scanning likely mis-parses `es-module-lexer` records** by treating `entry.d` as a module specifier string. In `es-module-lexer`, `d` is not a specifier (it is typically a numeric “dynamic import” marker). This can yield non-string imports (numbers), downstream crashes in normalization, and/or incorrect `fileRelations.imports` / `externalDocs`.  
+  - Primary locus: `src/index/build/imports.js`.
+
+- **Piece assembly can silently accept structurally-invalid inputs** because `validateLengths()` treats an empty list as “valid” even when the expected length is non-zero. This can produce assembled indexes with mismatched arrays (e.g., `docLengths`, embeddings vectors) without an early, actionable error.  
+  - Primary locus: `src/index/build/piece-assembly.js`.
+
+- **Piece assembly appears to drop the `comment` field in field postings/docLengths** (field tokens include `comment`, but assembly only merges `name/signature/doc/body`). If `comment` is enabled in fielded search, this can corrupt/disable that feature in assembled outputs.  
+  - Primary locus: `src/index/build/piece-assembly.js` (and, secondarily, `src/index/build/postings.js` conventions).
+
+### P1 (high-value next)
+
+- **Determinism risks** (import link ordering; vocab ordering derived from `Map` insertion order; shard batch sorting ties; repo-map ordering) can cause noisy diffs and unstable IDs across builds even when inputs are unchanged.
+- **Artifact manifest robustness**: `pieces/manifest.json` generation can silently record `null` checksums/bytes on error; this weakens contract guarantees and can hide partial artifact failures.
+- **CI metadata hygiene**: `tools/ci-build-artifacts.js` records remote URLs; sanitize to avoid leaking credentials in CI logs/artifacts.
+
+### P2 (cleanup / maintainability)
+
+- Documentation drift (notably the claim that compressed payloads embed a `compression` field) and contract documentation gaps (assembled stage semantics, meta schema examples) should be corrected.
+- Several low-risk performance wins are available (avoid `split('\n')` in hot paths; reduce repeated per-chunk work; minimize transient array concat).
+
+---
+
+## 27.1 Per-file processing correctness (Checklist A)
+
+**Audit**
+
+Reviewed the per-file pipeline as implemented in:
+
+- `src/index/build/file-processor.js`
+- `src/index/build/file-processor/*` (assemble/cached-bundle/chunk/incremental/meta/read/relations/skip/timings)
+- Supporting callsites and artifacts emitted downstream: `src/index/build/artifacts.js`, `src/index/build/artifacts/file-meta.js`, and chunk-meta serialization (`src/index/build/artifacts/writers/chunk-meta.js`)
+- Relevant tests in scope: `tests/file-processor/skip.test.js`, `tests/file-processor/cached-bundle.test.js`
+
+Key pipeline stages observed:
+
+1. Resolve file identity (`abs`, `relKey`) and caps → early skip checks
+2. Load cached bundle (incremental) when enabled
+3. Read + decode file; hash
+4. Language context (registry), segment discovery, chunking
+5. Comments extraction (optional) → comment-to-chunk assignment
+6. Relations, docmeta, flow/meta enrichment (code mode)
+7. Tokenization (main thread or worker), minhash, phrase/chargram sources
+8. Embeddings attach (optional)
+9. Assemble final chunk payloads + per-file relations → persist incremental bundle
+
+**Gaps / issues**
+
+#### Offsets: define and test offset units (byte vs. UTF-16 index)
+
+- `start` / `end` offsets are produced and consumed as **JavaScript string indices** (UTF‑16 code units) throughout the file pipeline (`text.slice(c.start, c.end)` etc.).  
+- The checklist explicitly calls out **byte offsets**. Current docs/contracts do not define the unit for `start`/`end`, which leaves room for misinterpretation and subtle bugs for non‑ASCII content.
+
+**Why it matters**
+- If any consumer assumes byte offsets (e.g., a non-JS reader, a tool that indexes into raw file bytes), chunks will be mis-sliced for multi-byte UTF‑8 sequences.
+
+**Where to address**
+- Primary: `src/index/build/file-processor.js` and `src/index/build/artifacts/writers/chunk-meta.js` (and docs under `docs/`).
+
+#### Chunk boundary invariants are not asserted at the file-processor boundary
+
+- `file-processor.js` assumes `chunkSegments()` returns non-overlapping, in-range chunks. It does not assert invariants such as:
+  - `0 <= start <= end <= text.length`
+  - monotonically increasing chunk ranges (or “overlap only when configured”)
+  - “no accidental overlap” beyond configured overlap window
+- This makes debugging chunking regressions harder: errors will surface downstream (postings build, artifact read) rather than at the boundary.
+
+#### Skip reasons: observable coverage is incomplete
+
+Covered / explicit:
+- `oversize` (max bytes / max lines), `minified`, `binary`, `read-failure` (and `unreadable` via scan results)
+
+Missing or ambiguous:
+- **unsupported language** (no explicit skip reason visible in `file-processor.js` / `skip.js`)
+- **parse / relation extraction failures**: most errors will currently throw and likely fail the build rather than record a per-file skip reason (no “parse-error” skip).
+
+#### Provenance: per-file outputs are missing stable “content identity” fields
+
+- Chunk payloads contain `file` (rel path), `ext`, and `lang`, which is good.
+- `file_meta.json` contains `id`, `file`, `ext`, git metadata, etc.
+- **Neither chunk meta nor file meta currently records a stable file content hash** (even though the pipeline already computes `fileHash` for incremental caching).
+
+This makes post-hoc debugging harder:
+- You cannot quickly tell whether a chunk came from a particular file revision without recomputing hashes from source.
+
+#### Minor correctness nits
+
+- Comment assignment edge: comments starting exactly at `chunk.end` can be assigned to the previous chunk due to a strict `<` comparison in `assignCommentsToChunks()` (`src/index/build/file-processor/chunk.js`).
+- Timing accounting: `addParseDuration()` is invoked multiple times per file (parseStart and relationStart paths), which risks double-counting in aggregated metrics.
+
+**Remaining work**
+
+- [ ] **Document offset units** for `start`/`end` (recommendation: define as UTF‑16 code-unit offsets, because that is what JS uses), and add at least one non‑ASCII regression test that validates:
+  - [ ] `text.slice(start, end)` reproduces the chunk text
+  - [ ] `offsetToLine()` aligns with `startLine/endLine` for multi-byte characters  
+  (Files: `src/index/build/file-processor.js`, `docs/artifact-contract.md`, `docs/contracts/indexing.md`, plus a new/extended test)
+
+- [ ] Add **boundary asserts** (behind a dev/test flag if needed) after chunking:
+  - [ ] in-range checks (`0..text.length`)
+  - [ ] monotonic chunk ordering
+  - [ ] overlap detection (only allow configured overlap)  
+  (File: `src/index/build/file-processor.js`)
+
+- [ ] Make **unsupported-language** behavior explicit and test-covered:
+  - [ ] decide: skip with reason `unsupported-language` vs. treat as `unknown` with generic chunking
+  - [ ] add test coverage for the chosen behavior  
+  (Files: `src/index/build/file-processor.js`, `src/index/build/file-processor/skip.js`, tests under `tests/file-processor/`)
+
+- [ ] Add **parse-error** (and relation-error) per-file skip handling:
+  - [ ] catch and record failures from `lang.chunk`, `lang.buildRelations`, `lang.extractDocMeta`, `flow()`, etc.
+  - [ ] ensure the build can proceed when a single file fails (configurable)  
+  (File: `src/index/build/file-processor.js`)
+
+- [ ] Add **file-level content hash** to `file_meta.json` (and optionally, to each chunk’s `metaV2`):
+  - [ ] store `hash` and `hashAlgo`
+  - [ ] ensure incremental and non-incremental builds agree  
+  (Files: `src/index/build/file-processor.js`, `src/index/build/artifacts/file-meta.js`, `docs/artifact-contract.md`)
+
+- [ ] Fix the comment boundary condition in `assignCommentsToChunks()`:
+  - [ ] consider `<=` for boundary tests, or implement overlap-based assignment using comment `(start,end)`  
+  (File: `src/index/build/file-processor/chunk.js`)
+
+- [ ] Audit and correct **timing double-counting** in `createTimingsTracker()` usage:
+  - [ ] ensure parseMs reflects one pass, and relation/flow have separate counters if desired  
+  (Files: `src/index/build/file-processor.js`, `src/index/build/file-processor/timings.js`)
+
+---
+
+## 27.2 Artifact contract correctness (Checklist B)
+
+**Audit**
+
+Reviewed artifact write orchestration and contract touchpoints:
+
+- Orchestration: `src/index/build/artifacts.js`
+- Contract-level helpers: `src/index/build/artifacts/checksums.js`, `src/index/build/artifacts/compression.js`
+- Writers: `src/index/build/artifacts/writers/chunk-meta.js`, `.../file-relations.js`, `.../repo-map.js`
+- Schema docs: `docs/artifact-contract.md`, `docs/contracts/indexing.md`
+- Guardrail tests: `tests/artifact-size-guardrails.js`, `tests/artifact-formats.js`, `tests/artifact-bak-recovery.js`
+
+Confirmed:
+- JSON and JSONL writers use `atomic: true` (temp + rename + `.bak` semantics) via shared JSON stream helpers.
+- `pieces/manifest.json` is generated and includes checksums for files that can be read at generation time.
+- Readers are designed to be backward compatible with older shapes (e.g., token shard files and meta shapes in `tests/artifact-formats.js`).
+
+**Gaps / issues**
+
+#### P0: Chunk-meta sharding cleanup is incomplete (stale shards override new JSONL)
+
+- In `enqueueChunkMetaArtifacts()` (`src/index/build/artifacts/writers/chunk-meta.js`):
+  - When `chunkMetaUseJsonl === true` and `chunkMetaUseShards === false`, the writer removes `chunk_meta.json` and `chunk_meta.json.gz`, but **does not remove**:
+    - `chunk_meta.meta.json`
+    - `chunk_meta.parts/`
+- `loadChunkMeta()` prefers meta/parts if they exist, even if `chunk_meta.jsonl` exists. Therefore, stale shards can override a newly-written JSONL file.
+
+#### Sharded directory atomicity remains “best effort” only
+
+- Token postings shards: `artifacts.js` deletes and recreates `token_postings.shards/` and writes part files atomically, but the directory as a whole can still be left in a partial state if the process crashes mid-write (no staging directory + atomic rename).
+- Chunk meta shards: similar; additionally, the parts directory is not cleared before writing, which can leave orphan part files.
+
+This is not always fatal if readers rely solely on `meta.parts`, but it violates the “no partially-written states” intent of the checklist.
+
+#### Manifest robustness: checksum/stat errors are swallowed
+
+- `writePiecesManifest()` catches errors from `fs.stat` and `checksumFile` and records `bytes: null` / `checksum: null`, without failing the build or preserving error details.
+- That makes it easy to produce an apparently “valid” manifest that cannot be validated later.
+
+#### Documentation drift: compression description is inaccurate
+
+- `docs/artifact-contract.md` claims the JSON payload contains a `compression` field when `.json.gz` is written. Current writers compress the raw JSON stream; they do not inject a `compression` field into the JSON object.
+
+#### Contract clarity gaps
+
+- The docs do not clearly document:
+  - precedence rules when multiple formats are present (meta/parts vs jsonl vs json)
+  - the on-disk schema for `token_postings.meta.json` and `chunk_meta.meta.json` (fields vs arrays vs legacy)
+  - whether `.json.gz` is a sidecar (both present) or a replacement (only gz present)
+
+**Remaining work**
+
+- [ ] **Fix chunk-meta cleanup** when `chunkMetaUseJsonl && !chunkMetaUseShards`:
+  - [ ] remove `chunk_meta.meta.json` if present
+  - [ ] remove `chunk_meta.parts/` if present  
+  (File: `src/index/build/artifacts/writers/chunk-meta.js`)
+
+- [ ] Ensure shard writes do not accumulate orphan files:
+  - [ ] delete `chunk_meta.parts/` before writing new sharded parts (or write to staging dir + rename)
+  - [ ] confirm `token_postings.shards/` cleanup is complete on all branches  
+  (Files: `src/index/build/artifacts/writers/chunk-meta.js`, `src/index/build/artifacts.js`)
+
+- [ ] Implement **directory-level atomicity** for sharded artifacts:
+  - [ ] write shards to `*.tmp/` directory
+  - [ ] atomically swap into place via rename (and optionally keep a directory-level `.bak`)  
+  (Files: `src/index/build/artifacts/writers/chunk-meta.js`, `src/index/build/artifacts.js`)
+
+- [ ] Make manifest generation strict for required artifacts:
+  - [ ] either (a) fail the build on checksum/stat failure, or (b) record an `error` field and ensure validation tooling treats it as failure  
+  (File: `src/index/build/artifacts/checksums.js`)
+
+- [ ] Update docs to match implementation:
+  - [ ] remove/adjust claim about `compression` field
+  - [ ] add schema examples for meta files (fields/arrays/legacy)
+  - [ ] document precedence rules for readers  
+  (Files: `docs/artifact-contract.md`, `docs/contracts/indexing.md`)
+
+- [ ] Add a regression test that explicitly covers the stale chunk-meta shard override:
+  - [ ] build A: sharded chunk meta written
+  - [ ] build B: non-sharded jsonl written, ensure shards removed or ignored
+  - [ ] loader reads build B’s jsonl, not build A’s shards  
+  (New test; or extend `tests/artifact-formats.js` / `tests/artifact-size-guardrails.js`)
+
+---
+
+## 27.3 Sharding / pieces / postings (Checklist C)
+
+**Audit**
+
+Reviewed:
+
+- Shard planning: `src/index/build/shards.js` + tests (`tests/shard-plan.js`)
+- Postings build: `src/index/build/postings.js`
+- Tokenization primitives: `src/index/build/tokenization.js` + buffering tests (`tests/tokenization-buffering.js`)
+- Piece assembly/merge: `src/index/build/piece-assembly.js` + test (`tests/piece-assembly.js`)
+- Piece compaction tool: `tools/compact-pieces.js`
+
+**Gaps / issues**
+
+#### Determinism: import links and vocab ordering are under-specified
+
+- **Imports / importLinks**:
+  - `scanImports()` runs with concurrency and stores per-module Sets of importing files. The final arrays are not sorted.
+  - `buildImportLinksFromRelations()` builds `importLinks` lists that may include the current file and are not explicitly sorted/deduped.
+  - Result: output can vary based on processing order, which can vary with concurrency and scheduling.
+
+- **Vocab ordering**:
+  - `buildPostings()` converts multiple Maps to vocab arrays via `Array.from(map.keys())`.
+  - This relies on Map insertion order being stable across builds. It often is, but it is not a strong contract and can be perturbed by changes in traversal order or parallelism.
+  - Risk: **token IDs may shift across builds** even when inputs are unchanged, creating noisy diffs and complicating caching.
+
+#### Postings canonicalization: sorted/canonical postings are assumed but not asserted
+
+- Many consumers assume postings are in docId order and token vocab order is stable.
+- There is no explicit “canonicalize and validate” step before writing postings, and few tests assert canonical ordering.
+
+#### Piece assembly: field postings coverage mismatch + weak validation
+
+- **Field postings merge omits the `comment` field** (see P0 summary).
+- **validateLengths()** can silently allow missing arrays when expected > 0 (see P0 summary).
+- Vocab arrays in assembly are also derived from Map insertion order; if input order differs, assembled token IDs can differ.
+
+#### Shard planning: tie-break determinism should be explicit
+
+- Some sorts are deterministic (by label, by relPath), but shard batching uses weight-based partitioning without explicit tie-breakers when weights are equal. This is likely stable in current Node versions, but should be explicitly stable to avoid cross-version drift.
+
+**Remaining work**
+
+#### Shard planning
+
+- [ ] Add explicit tie-breakers in shard batching and balancing when weights are equal:
+  - [ ] include `label` or `id` in comparator
+  - [ ] document determinism guarantees  
+  (File: `src/index/build/shards.js`)
+
+- [ ] Add a “very large repo” synthetic shard-plan test:
+  - [ ] verifies bounded memory and time
+  - [ ] verifies stable shard labels/IDs across runs  
+  (New test; extend `tests/shard-plan.js`)
+
+#### Postings / tokenization
+
+- [ ] Canonicalize vocab ordering for stability:
+  - [ ] define canonical sort order (lexicographic; or localeCompare with explicit locale; or bytewise)
+  - [ ] apply consistently to token vocab, phrase vocab, chargram vocab, and field vocabs  
+  (File: `src/index/build/postings.js` and any upstream postings-map builders)
+
+- [ ] Canonicalize and/or validate postings ordering:
+  - [ ] assert postings doc IDs are strictly increasing per token (or stable canonical order)
+  - [ ] assert vocab/postings arrays align and lengths match  
+  (File: `src/index/build/postings.js`; plus tests)
+
+- [ ] Expand quantization tests to include:
+  - [ ] scale correctness
+  - [ ] dims mismatch handling
+  - [ ] doc/code embeddings “fallback to main embedding” behavior  
+  (File: `tests/postings-quantize.js`)
+
+#### Piece assembly
+
+- [ ] Fix `validateLengths()` to fail when expected > 0 and list is empty or mismatched:
+  - [ ] treat `[]` as invalid when `expected > 0`
+  - [ ] include artifact name + input dir in error message for fast triage  
+  (File: `src/index/build/piece-assembly.js`)
+
+- [ ] Merge **all field postings present in inputs**, including `comment` (and any future fields):
+  - [ ] do not hardcode `name/signature/doc/body`
+  - [ ] merge based on keys present in `field_postings.json` / `field_tokens.json` or config  
+  (File: `src/index/build/piece-assembly.js`)
+
+- [ ] Determinize assembly:
+  - [ ] sort `inputs` deterministically by path (or require stable input ordering and document it)
+  - [ ] sort merged vocabs (or guarantee stable order via canonicalization)
+  - [ ] ensure assembled output is byte-for-byte stable for same inputs  
+  (Files: `tools/assemble-pieces.js`, `src/index/build/piece-assembly.js`)
+
+- [ ] Add a regression test: **assembled output equals monolithic output** for the same fixture:
+  - [ ] build monolithic index
+  - [ ] build two partial indexes (or reuse shards) and assemble
+  - [ ] compare chunk_meta + token_postings + manifest semantics  
+  (New test; extend `tests/piece-assembly.js`)
+
+- [ ] Verify manifests list all required parts:
+  - [ ] ensure meta files are included and checksummed
+  - [ ] ensure shard part counts match meta.parts and manifest counts match meta totals  
+  (Files: `src/index/build/artifacts/checksums.js`, tests)
+
+---
+
+## 27.4 Performance improvements to prioritize (Checklist D)
+
+**Audit**
+
+The current implementation is functional and reasonably structured, but several areas will become dominant costs on large repos:
+
+- Per-file pipeline does multiple passes over the same data (chunking, tokenization, docmeta, lint/complexity).
+- Artifact writing constructs full in-memory arrays for potentially huge artifacts and then serializes them.
+- Some hot paths allocate transient arrays aggressively.
+
+### High-impact improvements (prioritized)
+
+#### Avoid “build huge arrays then serialize”
+
+- `buildPostings()` currently materializes large `vocab` and `postings` arrays in memory.
+  - [ ] Add a streaming/sharded writer path that writes postings shards incrementally as postings are built (or at least allows releasing intermediate Maps earlier).
+- `chunk_meta` estimation uses JSON.stringify samples, which is OK, but writing sharded JSONL still relies on iterators that materialize per-entry objects.
+  - [ ] Consider a “lightweight entry view” or direct JSONL streaming that avoids building large intermediate objects for fields not needed.
+
+#### Reduce repeated parsing/enrichment passes
+
+- Complexity + lint are computed in the per-chunk loop but cached per file; move the computation to a single per-file pre-pass to remove repeated cache checks.
+- Where feasible, consider combining:
+  - chunking + tokenization (tokenize the chunk as soon as you slice it, but avoid repeated slice work)
+  - relations/docmeta extraction caching to avoid per-chunk repeated derived work
+
+#### Minimize transient allocations
+
+- Avoid `text.split('\n')` for context windows in `file-processor.js`. Use a line-scan utility that slices the relevant ranges without splitting the entire file.
+- Replace repeated `array.concat()` in loops (e.g., `commentFieldTokens = commentFieldTokens.concat(tokens)`) with `push(...tokens)` or manual push for large arrays.
+- In tokenization, buffer reuse is good, but `buildTokenSequence()` still clones arrays (`slice()`) each call. Confirm this is intentional and consider:
+  - pre-sizing output arrays when token counts are known/estimable
+  - returning typed arrays for `seq` where possible (if consumers permit)
+
+**Remaining work**
+
+- [ ] Replace `split('\n')` usage in `src/index/build/file-processor.js` with a targeted line-scan helper.  
+- [ ] Move complexity/lint computation outside the per-chunk loop in `file-processor.js`.  
+- [ ] Reduce transient array concatenations in comment token aggregation.  
+- [ ] Explore a streaming postings writer for very large repos (phase-level refactor).  
+- [ ] Add at least one micro-benchmark or perf regression test covering:
+  - piece assembly (`src/index/build/piece-assembly.js`)
+  - piece compaction (`tools/compact-pieces.js`)
+
+---
+
+## 27.5 Refactoring goals (Checklist E)
+
+**Audit**
+
+Current state:
+- Artifact writing is orchestrated from `artifacts.js` via `enqueueJsonObject/Array/Lines` + special-case writers (chunk meta writer).
+- Schema definitions are implicit in “writer payload construction” and spread across multiple modules.
+- Multiple identifiers exist (`chunk.id`, `metaV2.chunkId`, graph keys `file::name`), which increases the chance of accidental drift.
+
+**Remaining work**
+
+- [ ] Introduce a single “artifact writer” abstraction with a consistent interface:
+  - [ ] `write(name, payload | iterator, { format, sharded, compression, pieceType })`
+  - [ ] built-in cleanup rules and directory-level atomic swaps
+  - [ ] standard metadata (version, generatedAt, schemaVersion)  
+  (Impacts: `src/index/build/artifacts.js`, `src/index/build/artifacts/writers/*`)
+
+- [ ] Separate schema definitions from I/O:
+  - [ ] define schemas for artifacts in a central module (even if only via JS object contracts + comments)
+  - [ ] ensure docs mirror those schema definitions  
+  (Impacts: `docs/artifact-contract.md`, `docs/contracts/indexing.md`)
+
+- [ ] Create a single canonical chunk-id generator and use it everywhere:
+  - [ ] prefer `metaV2.chunkId` (content-based) for graphs/relations keys instead of ad-hoc `file::name`
+  - [ ] ensure assembled and non-assembled builds produce identical chunkIds  
+  (Impacts: `src/index/build/graphs.js`, and any code producing chunk identifiers)
+
+---
+
+## 27.6 Tests (Checklist F)
+
+**Audit**
+
+In-scope tests are generally helpful and cover:
+- `.bak` recovery semantics (`tests/artifact-bak-recovery.js`)
+- artifact precedence formats (`tests/artifact-formats.js`)
+- size guardrails forcing sharding (`tests/artifact-size-guardrails.js`)
+- shard planning (`tests/shard-plan.js`)
+- shard vs non-shard equivalence (`tests/shard-merge.js`)
+- quantization correctness (`tests/postings-quantize.js`)
+- incremental tokenization caching (`tests/incremental-tokenization-cache.js`)
+
+However, multiple tests are still existence/shape-heavy and do not verify semantic meaning deeply, especially around assembled outputs and import scanning.
+
+**Gaps / issues**
+
+- `tests/file-processor/cached-bundle.test.js` uses shapes for `allImports` and `codeRelations.calls` that do not match the likely real shapes; it can pass while not meaningfully validating correctness.
+- No tests cover:
+  - chunk-meta cleanup when switching formats (P0 issue)
+  - compressed sidecar `.json.gz` artifacts and their `.bak` semantics
+  - partial shard write behavior (meta missing, orphan parts, etc.)
+  - import scanning correctness for dynamic imports / es-module-lexer record handling
+  - deterministic `importLinks` ordering
+  - perf regression for `compact-pieces` / `assembleIndexPieces`
+
+**Remaining work**
+
+- [ ] Strengthen artifact format tests to assert semantic meaning:
+  - [ ] verify loader precedence (meta/parts vs jsonl vs json) in more combinations
+  - [ ] verify meta.parts path normalization and correctness
+
+- [ ] Add regression tests for atomic write failures:
+  - [ ] simulate rename failures (via dependency injection or controlled FS behavior)
+  - [ ] assert `.bak` fallback and cleanup behavior
+
+- [ ] Add regression tests for partial shard writes:
+  - [ ] parts written, meta missing
+  - [ ] meta references missing parts
+  - [ ] stale orphan parts do not affect reads
+
+- [ ] Add stress fixtures for large token/postings sets:
+  - [ ] ensure bounded memory / time
+  - [ ] ensure canonical ordering remains correct under stress
+
+- [ ] Add at least one perf regression test:
+  - [ ] compaction: `tools/compact-pieces.js`
+  - [ ] assembly: `src/index/build/piece-assembly.js`
+
+- [ ] Fix `tests/file-processor/cached-bundle.test.js` to use realistic shapes:
+  - [ ] `allImports` should be `{ [moduleName: string]: string[] }`
+  - [ ] `codeRelations.calls/usages` should match the real structure used by `buildRelationGraphs()` / `buildCallIndex()`  
+  (File: `tests/file-processor/cached-bundle.test.js`)
+
+---
+
+## Appendix A: File-by-file findings
+
+This section enumerates each in-scope file and lists file-specific items to address (beyond cross-cutting tasks already listed above).
+
+### src/index/build/artifacts.js
+- [ ] (P1) Consider directory-level atomic swap for `token_postings.shards/` (staging dir + rename).
+- [ ] (P1) Normalize shard part paths to POSIX in any meta/manifest structures (avoid OS-separator leakage).
+- [ ] (P2) Consider sorting `pieceEntries` by `path` before writing the manifest to reduce diff noise.
+
+### src/index/build/artifacts/checksums.js
+- [ ] (P1) Do not silently accept checksum/stat failures for required pieces; fail or record errors explicitly.
+
+### src/index/build/artifacts/compression.js
+- [ ] (P2) Update docs to clarify that gzip is a sidecar (`.json` and `.json.gz` both exist).
+- [ ] (P2) Consider extending compression to sharded artifacts (optional future work).
+
+### src/index/build/artifacts/file-meta.js
+- [ ] (P1) Make file ID assignment stable by sorting unique file paths before assigning IDs.
+- [ ] (P1) Add file content hash (and algo) and file size to `file_meta.json`.
+- [ ] (P2) Remove or rename `chunk_authors` in file meta (currently derived from the first chunk and not file-level).
+
+### src/index/build/artifacts/filter-index.js
+- [ ] (P2) Consider persisting schema version/config hash in the filter index artifact for easier debugging.
+
+### src/index/build/artifacts/metrics.js
+- [ ] (P2) Do not swallow metrics write errors silently (log or propagate based on severity).
+
+### src/index/build/artifacts/token-mode.js
+- [ ] (P2) Make parsing more robust (case-insensitive modes; integer parsing + clamping).
+
+### src/index/build/artifacts/writers/chunk-meta.js
+- [ ] (P0) Remove stale `chunk_meta.meta.json` and `chunk_meta.parts/` when writing non-sharded JSONL.
+- [ ] (P1) Clear or stage-swap `chunk_meta.parts/` when writing sharded output.
+- [ ] (P1) Normalize `meta.parts` entries to POSIX paths.
+- [ ] (P2) Consider normalizing field naming conventions (`chunk_authors` vs `startLine/endLine`).
+
+### src/index/build/artifacts/writers/file-relations.js
+- [ ] (P2) Consider JSONL/sharding for very large `file_relations` outputs; add versioning metadata.
+
+### src/index/build/artifacts/writers/repo-map.js
+- [ ] (P1) Ensure `exported` detection handles default exports correctly (depends on relations schema).
+- [ ] (P2) Consider sorting output by `{file, name}` for stability.
+
+### src/index/build/file-processor.js
+- [ ] (P1) Add explicit boundary asserts for chunks after chunking.
+- [ ] (P1) Replace `split('\n')` with line-scan utility for context extraction.
+- [ ] (P2) Move complexity/lint to per-file scope; avoid repeated per-chunk cache checks.
+- [ ] (P2) Fix possible timing double-counting across parse/relation durations.
+- [ ] (P1) Add explicit unsupported-language and parse-error skip reasons (configurable).
+
+### src/index/build/file-processor/assemble.js
+- [ ] (P1) Ensure field token fields written here (including `comment`) are consistently supported by postings and piece assembly.
+
+### src/index/build/file-processor/cached-bundle.js
+- [ ] (P2) Validate cached bundle shapes more strictly; ensure importLinks shape is consistent.
+
+### src/index/build/file-processor/chunk.js
+- [ ] (P2) Adjust comment-to-chunk assignment at boundary (`chunk.end === comment.start`) and consider overlap-based assignment.
+
+### src/index/build/file-processor/incremental.js
+- [ ] (P2) Ensure cache invalidation includes schema/version changes for any artifact-impacting changes.
+
+### src/index/build/file-processor/meta.js
+- [ ] (P2) Deduplicate `externalDocs` outputs; consider ordering for determinism.
+
+### src/index/build/file-processor/read.js
+- [ ] (P2) Consider UTF-8 safe truncation (avoid splitting multi-byte sequences mid-codepoint).
+
+### src/index/build/file-processor/relations.js
+- [ ] (P2) Consider sorting/deduping relation arrays (imports/exports/usages) for determinism.
+
+### src/index/build/file-processor/skip.js
+- [ ] (P1) Add explicit unsupported-language skip reason (or document that unknown languages are processed).
+- [ ] (P2) Add coverage for `unreadable` and `read-failure` skip paths.
+
+### src/index/build/file-processor/timings.js
+- [ ] (P2) Validate that parse/token/embed durations are not double-counted; document semantics.
+
+### src/index/build/graphs.js
+- [ ] (P2) Prefer canonical `chunkId` keys where possible instead of `file::name` to avoid collisions.
+- [ ] (P2) Sort serialized node lists for full determinism (neighbors are already sorted).
+
+### src/index/build/imports.js
+- [ ] (P0) Fix `es-module-lexer` import record handling (`entry.d` is not a specifier string).
+- [ ] (P1) Sort and dedupe `importLinks` deterministically; exclude self-links unless explicitly desired.
+- [ ] (P1) Ensure concurrency does not affect output ordering (sort module keys and file arrays before serialization).
+
+### src/index/build/piece-assembly.js
+- [ ] (P0) Make `validateLengths()` strict when `expected > 0`.
+- [ ] (P0) Merge all field postings (including `comment`) and docLengths based on actual input keys.
+- [ ] (P1) Canonicalize vocab ordering in assembled outputs.
+- [ ] (P2) Remove redundant filterIndex construction (avoid double work; rely on writeIndexArtifacts).
+
+### src/index/build/postings.js
+- [ ] (P1) Canonicalize vocab ordering (token/phrase/chargram/field) explicitly.
+- [ ] (P2) Validate docLengths are finite and consistent; avoid NaN avgDocLen.
+- [ ] (P2) Sort Object.entries() iteration for field postings and weights for deterministic output.
+
+### src/index/build/shards.js
+- [ ] (P1) Add explicit tie-breakers in weight-based sorts/batching for determinism across runtimes.
+- [ ] (P2) Document heuristic thresholds (minFilesForSubdir, hugeThreshold, tenth-largest targets).
+
+### src/index/build/tokenization.js
+- [ ] (P2) Review buffer reuse effectiveness (arrays are still cloned); consider pre-sizing and reducing transient allocations further.
+
+### tools/assemble-pieces.js
+- [ ] (P1) Sort `inputDirs` by default (or add `--sort`) to ensure deterministic assembled output.
+- [ ] (P2) When `--force` is used, consider cleaning the output dir first to avoid stale artifacts.
+
+### tools/ci-build-artifacts.js
+- [ ] (P1) Sanitize remote URLs before writing them to `manifest.json` to avoid leaking credentials.
+
+### tools/ci-restore-artifacts.js
+- [ ] (P2) Optionally validate `pieces/manifest.json` checksums after restore (fast fail on corrupt artifacts).
+
+### tools/compact-pieces.js
+- [ ] (P1) Consider directory-level atomic swap semantics (avoid rm+rename window).
+- [ ] (P2) Add perf regression harness and validate output equivalence post-compaction.
+
+### tests/artifact-bak-recovery.js
+- [ ] (P2) Expand coverage to include: both primary and backup corrupt; json.gz sidecars; and cleanup expectations.
+
+### tests/artifact-formats.js
+- [ ] (P1) Add explicit precedence test: sharded meta/parts must not override fresh jsonl when shards are stale (post-fix).
+
+### tests/artifact-size-guardrails.js
+- [ ] (P2) Extend to cover: chunkMetaFormat=jsonl with switching shard/no-shard, and cleanup behavior.
+
+### tests/artifacts/file-meta.test.js
+- [ ] (P1) Update test if file ID assignment is changed to sorted-by-path; assert stability across different chunk orders.
+
+### tests/artifacts/token-mode.test.js
+- [ ] (P2) Add coverage for invalid modes, case-insensitive parsing, and maxTokens/maxFiles parsing edge cases.
+
+### tests/clean-artifacts.js
+- [ ] (P2) Consider adding a check that `.bak` files are handled correctly (optional).
+
+### tests/file-processor/cached-bundle.test.js
+- [ ] (P1) Fix test fixtures to use realistic `allImports` and `codeRelations` shapes, and assert semantic correctness (not only presence).
+
+### tests/file-processor/skip.test.js
+- [ ] (P2) Add coverage for `unreadable` and `read-failure` paths (permissions, ENOENT races).
+
+### tests/filter-index-artifact.js
+- [ ] (P2) Add a schema assertion for filter_index fields/versioning to prevent drift.
+
+### tests/filter-index.js
+- [ ] (P2) Consider adding a determinism check for serialized filter index (same inputs => same output).
+
+### tests/graph-chunk-id.js
+- [ ] (P2) Add a collision regression test for graph keys, or migrate to chunkId-based keys.
+
+### tests/incremental-tokenization-cache.js
+- [ ] (P2) Add a second invalidation scenario (e.g., tokenization config changes that affect stemming/synonyms).
+
+### tests/piece-assembly.js
+- [ ] (P1) Add semantic equivalence test vs monolithic build and add a determinism test (same inputs => identical assembled output).
+
+### tests/postings-quantize.js
+- [ ] (P2) Extend to test scale and dims, and doc/code embedding behavior.
+
+### tests/shard-merge.js
+- [ ] (P2) Consider adding checksum and manifest equivalence checks as well.
+
+### tests/shard-plan.js
+- [ ] (P2) Add stress case coverage (many files, equal weights, perfProfile enabled).
+
+### tests/tokenization-buffering.js
+- [ ] (P2) Consider adding a non-ASCII tokenization regression case.
+
+### docs/artifact-contract.md
+- [ ] (P1) Fix compression description (no embedded `compression` field) and clarify `.json.gz` sidecar semantics.
+- [ ] (P1) Add explicit precedence rules (meta/parts vs jsonl vs json).
+- [ ] (P2) Add schema examples for meta files and `pieces/manifest.json`.
+
+### docs/contracts/coverage-ledger.md
+- [ ] (P2) Add entries for new/critical tooling: `tools/assemble-pieces.js`, `tools/compact-pieces.js`, and CI artifact scripts.
+
+### docs/contracts/indexing.md
+- [ ] (P1) Clarify which artifacts are “required” vs “optional/configurable” (e.g., minhash signatures).
+- [ ] (P1) Document sharded meta schema and loader precedence.
+
+
+## Phase 28 — Section 2 — Index build orchestration review (findings + required fixes)
+
+### Executive summary: highest-priority issues (fix first)
+
+#### Correctness / functional
+
+- [ ] **Sharding path creates fresh worker pools + queues per shard work item, with no explicit teardown.**  
+  This is very likely to cause thread/resource leaks, excessive pool creation overhead, and/or a build process that does not exit cleanly.  
+  _Primary file:_ `src/index/build/indexer/steps/process-files.js`  
+  _Related:_ `src/index/build/runtime/workers.js`, `src/index/build/worker-pool.js`
+
+- [ ] **`--mode all` behavior is inconsistent with “extracted-prose” expectations (tests + CLI surface).**  
+  `tests/build-index-all.js` expects an `extracted-prose` index to be produced for `--mode all`, and `parseBuildArgs(...)` already resolves `modes` to include it; however the CLI entry (`build_index.js`) discards the computed `modes` and delegates to the core build entry, which (in the current tree) resolves “all” differently.  
+  _Primary file(s) in scope:_ `build_index.js`, `src/index/build/args.js`, `tests/build-index-all.js`  
+  _Note:_ the root cause may live outside this section’s file list, but the mismatch is observable from the files in scope and should be corrected at the boundary.
+
+- [ ] **Watch debounce scheduler does not safely handle async `onRun` errors (risk of unhandled promise rejection).**  
+  `createDebouncedScheduler(...)` calls `onRun()` without `await`/`.catch(...)`. In `watchIndex(...)`, `onRun` is async. Any unexpected throw/rejection (e.g., from lock release, filesystem exceptions) can become an unhandled rejection.  
+  _Primary file:_ `src/index/build/watch.js`
+
+#### Determinism / reproducibility
+
+- [ ] **Locale-dependent sorts in ordering-critical paths (`localeCompare`) should be replaced with deterministic lexicographic compares.**  
+  Ordering drives chunk IDs, manifest key ordering, and shard planning stability; `localeCompare` can vary by ICU/locale.  
+  _Primary files:_  
+  - `src/index/build/indexer/steps/discover.js`  
+  - `src/index/build/indexer/steps/process-files.js`  
+  - `tools/shard-census.js`
+
+#### Incremental correctness across versions
+
+- [ ] **Incremental cache signature likely needs a “tool/build schema version” component.**  
+  Today, signature invalidation is strongly config-based. If tokenization/chunk schema/postings semantics change across releases without config changes, the cache can be reused incorrectly.  
+  _Primary file:_ `src/index/build/indexer/signatures.js`  
+  _Related:_ `src/index/build/incremental.js`, `tests/incremental-*.js`
+
+---
+
+### A. Pipeline mapping and boundaries
+
+#### A.1 Current pipeline map (as implemented)
+
+**Audit**
+
+The index build pipeline, as observable from the files in scope, is structured as:
+
+1. **CLI entry**
+   - `build_index.js` → parses args and calls the core build entry with `argv` + `rawArgv`.
+
+2. **Runtime construction**
+   - `src/index/build/runtime.js` → `createBuildRuntime(...)`  
+   - `src/index/build/runtime/runtime.js` → loads config(s), applies stage overrides (`runtime/stage.js`), resolves caps/guardrails (`runtime/caps.js`), ignore rules (`ignore.js`), concurrency and queues/pools (`runtime/workers.js`, `worker-pool.js`), crash logging (`crash-log.js`), and creates a build output root.
+
+3. **Mode build orchestration**
+   - `src/index/build/indexer.js` → `buildIndexForMode(...)` for each mode.
+   - `src/index/build/indexer/pipeline.js` coordinates the build steps per mode.
+
+4. **Per-mode pipeline stages**
+   - **Discover**: `indexer/steps/discover.js` (uses `discover.js` + optional preprocessed discovery)  
+   - **Incremental plan + whole-index reuse**: `indexer/steps/incremental.js` (wraps `incremental.js`)  
+   - **Relations pre-scan**: `indexer/steps/relations.js` (`preScanImports`)  
+   - **Estimate context window**: `estimateContextWindow(...)` (not in scope; used by pipeline)  
+   - **Process files**: `indexer/steps/process-files.js`  
+     - optional sharding plan execution
+     - per-file chunking + postings accumulation + incremental bundle read/write
+   - **Relations post-scan + cross-file inference**: `indexer/steps/relations.js` (`postScanImports`, `runCrossFileInference`)  
+   - **Incremental manifest pruning**: `incremental.js` (`pruneIncrementalManifest(...)`)  
+   - **Postings build**: `indexer/steps/postings.js`  
+   - **Write artifacts**: `indexer/steps/write.js`  
+   - **Optional**: enqueue embeddings job when using an external embeddings service (called from pipeline)
+
+5. **Promotion**
+   - `src/index/build/promotion.js` writes/updates a `current.json` pointer to a successful build root (promotion is performed outside the per-mode pipeline).
+
+**Contract boundaries (recommended)**
+
+- The pipeline currently “spans layers” in a few places:
+  - CLI args parsing (“mode all”) and computed mode lists are not consistently treated as an API contract boundary.
+  - Sharding logic (planning + execution) creates runtime sub-instances rather than remaining a pure scheduling layer.
+  - Incremental state is mutated from multiple steps (process-files + relations cross-file inference updates).
+
+These are workable, but they heighten the importance of clear contracts/invariants per stage.
+
+---
+
+#### A.2 Stage-by-stage contracts (inputs/outputs/invariants/errors/determinism)
+
+> This section captures what the code *currently* does, plus what should be made explicit (and tested).
+
+##### Stage: Discover
+
+**Primary implementation**
+- `src/index/build/indexer/steps/discover.js`
+- `src/index/build/discover.js`
+
+**Inputs**
+- `runtime.root`, `runtime.ignoreMatcher`, `runtime.maxFileBytes`, `runtime.fileCaps`, `runtime.guardrails` (maxDepth/maxFiles), mode (`code`/`prose`/`extracted-prose`)
+- Optional precomputed discovery bundle `{ entries, skippedFiles, lineCounts }` from preprocessing (if provided by orchestration layer)
+
+**Outputs**
+- `state.entries`: ordered list of discovered file entries
+- `state.skippedFiles`: per-mode skips (plus common skips)
+- Entries are annotated with `orderIndex` for deterministic downstream ordering
+
+**Invariants**
+- Entries must have:
+  - `abs` absolute path
+  - `rel` repo-relative path (POSIX form) with no `..`
+  - `stat` with at least `size`, `mtimeMs`
+- Deterministic ordering: sorting by `rel` must be stable and locale-independent.
+- `skippedFiles` should preserve a stable ordering for reproducibility (currently sorted in discover.js).
+
+**Error behavior**
+- Per-file stat errors or size cap failures are recorded as skips, not fatal errors.
+- Discover-level failures (e.g., inability to crawl filesystem) should throw and abort build.
+
+**Determinism requirements**
+- Must not use locale-sensitive comparisons (`localeCompare`) or OS-dependent casing assumptions.
+- Normalize paths consistently (POSIX rel keys).
+
+**Remaining work**
+- [ ] Replace locale-dependent sorting in `indexer/steps/discover.js` with deterministic compare (and document determinism requirement).
+- [ ] Consider adding `stat.isFile()` checks (defensive) before admitting entries (especially for non-git discovery paths).
+- [ ] Consider making “tracked-only” behavior explicit at the API boundary (discover uses `git ls-files` when root is a git repo root) and ensure watch mode semantics align (see Watch section).
+
+---
+
+##### Stage: Incremental plan / reuse
+
+**Primary implementation**
+- `src/index/build/indexer/steps/incremental.js`
+- `src/index/build/incremental.js`
+- `src/index/build/indexer/signatures.js`
+
+**Inputs**
+- `outDir` (mode-specific index output dir)
+- `tokenizationKey` (derived from dict signature + tokenization/postings config)
+- `cacheSignature` (derived from broader runtime feature/config surface)
+- current discovered entries list + their `stat` for whole-index reuse decision
+
+**Outputs**
+- `incrementalState` with:
+  - `manifest` (files, signature, tokenizationKey, bundleFormat, shards metadata)
+  - `bundleDir` + bundle format
+- `reused` boolean indicating full-index reuse (early exit)
+- For per-file reuse, `readCachedBundle(...)` is used by file processor layer.
+
+**Invariants**
+- `manifest.files` keys represent the exact set of indexed files, keyed by deterministic relKey.
+- Whole-index reuse must only return true if:
+  - stage coverage is sufficient for requested stage
+  - manifest key set matches current entries key set (including deletions)
+  - size + mtime checks match for all files (or an approved hash fallback mechanism is used)
+  - signature + tokenizationKey match
+
+**Error behavior**
+- Corrupt/missing manifest should fall back to “rebuild” (not crash).
+- Bundle read failures should fall back to “recompute file” (not crash), unless explicitly configured otherwise.
+
+**Determinism requirements**
+- Signature computation must be stable (`stableStringify` is used).
+- Manifest writing should be stable in structure and ordering (even if JSON object key order is mostly stable in practice).
+
+**Remaining work**
+- [ ] Add an explicit “cache schema / tool version” component to `cacheSignature` (or a separate `cacheSchemaVersion` field checked alongside it).
+- [ ] Treat `manifest.version` as a compatibility gate (migrate or reset when unsupported); ensure `manifest.files` is validated as a *plain object* (not an array).
+- [ ] Decide whether whole-index reuse should allow hash fallback (currently it is strict on mtime/size) — if yes, add an opt-in and tests.
+
+---
+
+##### Stage: Process files (chunking + postings accumulation)
+
+**Primary implementation**
+- `src/index/build/indexer/steps/process-files.js`
+- `src/index/build/state.js`
+- `src/index/build/file-scan.js` (via file processor layer)
+- `src/index/build/workers/indexer-worker.js` (worker pool tokenization)
+- `src/index/build/worker-pool.js`, `src/index/build/runtime/workers.js` (pool + queue orchestration)
+
+**Inputs**
+- Ordered entries list with `orderIndex`
+- Runtime config: tokenization config, postings config, feature flags, caps/guardrails, worker pool config, concurrency limits, sharding config
+- Incremental state with manifest + bundle directory
+- Optional import map from pre-scan stage
+
+**Outputs**
+- Mutated `state`:
+  - `chunks` (+ `chunkMeta`)
+  - `tokenPost`, `phrasePost`, `trigramPost`, `chargramPost`
+  - `df`, `docLengths`, `fileRelations`, `importLinks`
+  - `fileMeta` and `fileChunkMap`
+  - `totalTokens`, `totalChunks`
+  - `skippedFiles` additions for per-file failures
+- `tokenizationStats` + `shardSummary` + `shardPlan` (for reporting and later artifact writing)
+- Incremental manifest updates + bundle writes for non-cached files
+
+**Invariants**
+- Chunk IDs must be assigned deterministically and match the ordering derived from discovered entries (not processing completion order).
+  - Current mechanism: `orderedAppender` ensures deterministic append order even with concurrency/sharding.
+- Postings and DF must reflect the same token stream used to produce chunk meta.
+- For cached files:
+  - The cached bundle contents must be compatible with the current tokenizationKey/signature.
+  - Cached chunks must be appended in the same deterministic order.
+
+**Error behavior**
+- Per-file failures: retry per `indexingConfig.fileRetries` (via `runWithQueue` retry handling); if ultimately failing, abort build (current behavior).
+- Crash logging is best-effort (debug mode only).
+
+**Determinism requirements**
+- Ordering must not depend on concurrency, sharding, or locale settings.
+- Any feature that modifies existing chunks (token retention “auto”, cross-file inference update) must be deterministic given the same inputs.
+
+**Remaining work**
+- [ ] Fix sharding runtime lifecycle (see Section C/D): avoid creating worker pools per shard item; ensure explicit teardown; ensure sharding does not leak threads/handles.
+- [ ] Replace localeCompare usage in shard plan sorting with deterministic ordering.
+- [ ] Consider exposing and testing a “deterministic build mode” in which timestamps/build IDs do not affect artifact contents (at least for core artifacts).
+
+---
+
+##### Stage: Relations (import scan + cross-file inference)
+
+**Primary implementation**
+- `src/index/build/indexer/steps/relations.js`
+- `src/index/build/feature-metrics.js` (for reporting)
+
+**Inputs**
+- `state.fileRelations` from per-file processing (and/or pre-scan)
+- runtime feature flags:
+  - `indexingConfig.importScan`
+  - `typeInferenceEnabled`, `riskAnalysisEnabled`
+  - `*CrossFileEnabled` flags
+- incremental state (to update cached bundles after cross-file inference)
+
+**Outputs**
+- `state.importLinks` from `postScanImports`
+- Optionally updated `state.chunks` and file metadata from `applyCrossFileInference`
+- `graphRelations` structure for index artifacts
+- Optional incremental bundle updates via `updateIncrementalBundlesWithChunks(...)`
+
+**Invariants**
+- importLinks should be stable given stable fileRelations + scan plan.
+- If cross-file inference updates are applied:
+  - updates must be reflected in persisted incremental bundles (or explicitly excluded)
+  - index artifacts written later must correspond to the updated state.
+
+**Error behavior**
+- Import scan failures should degrade gracefully (ideally mark relations as unavailable and continue) unless configured otherwise.
+- Cross-file inference failures should not leave state partially mutated; either apply atomically or abort.
+
+**Determinism requirements**
+- Import scan output ordering should be stable.
+- Graph construction should be stable (avoid hash/map iteration nondeterminism in serialization).
+
+**Remaining work**
+- [ ] Add tests ensuring cross-file inference updates are persisted into incremental bundles when enabled.
+- [ ] Clarify the artifact contract for `graphRelations` in `index_state.json` and ensure it is versioned.
+
+---
+
+##### Stage: Postings build
+
+**Primary implementation**
+- `src/index/build/indexer/steps/postings.js`
+
+**Inputs**
+- `state` with postings sets + DF + doc lengths + chunks
+- `runtime.postingsConfig`, token retention configuration
+
+**Outputs**
+- A postings artifact structure ready for serialization (plus metrics like context window)
+- Optional token retention adjustments applied to chunks (auto)
+
+**Invariants**
+- Postings must refer to valid chunk IDs.
+- DF counts must align with unique tokens per doc.
+- Token retention must not change postings/DF (only the retained token/gram arrays stored in chunks for downstream consumers).
+
+**Error behavior**
+- Failures should abort (postings are core artifact).
+
+**Determinism requirements**
+- Postings list ordering must be stable (e.g., chunk IDs sorted ascending).
+- DF computation must not depend on processing order (it currently does not, provided chunk order is deterministic).
+
+**Remaining work**
+- [ ] Add/verify tests around token retention “auto” switching (sample vs none) to ensure artifact stability and correctness.
+
+---
+
+##### Stage: Write artifacts + promotion
+
+**Primary implementation**
+- `src/index/build/indexer/steps/write.js`
+- `src/index/build/promotion.js`
+- `src/index/build/build-state.js` (build_state.json)
+
+**Inputs**
+- runtime + mode
+- `state`, `postings`, `timing`, `entries`, `shardSummary`, `graphRelations`
+- (promotion) build root + mode list
+
+**Outputs**
+- Mode-specific index directory:
+  - `index_state.json`
+  - chunk meta, file meta, postings, perf profile, feature metrics, relations graph
+- Promotion pointer file:
+  - `current.json` mapping mode → build root
+
+**Invariants**
+- Artifact writes should be atomic where practical.
+- `index_state.json` must contain:
+  - tool version + config hash
+  - stage
+  - tokenizationKey + cacheSignature (if incremental is enabled)
+  - feature flags summary (for transparency)
+
+**Error behavior**
+- Any write failure should abort promotion; promotion must only occur after successful writes.
+
+**Determinism requirements**
+- Artifact contents (excluding timestamps) should be stable given stable inputs.
+- Promotion pointer must not “flip” to a partial build.
+
+**Remaining work**
+- [ ] Validate that `promotion.js` cannot write a `current.json` pointer that escapes the intended cache root (path traversal hardening).
+- [ ] Consider making build_state updates resilient to concurrent writes (or explicitly “best effort” with documentation).
+
+---
+
+### B. Incremental builds: deeper review
+
+#### B.1 What is already solid
+
+**Audit**
+
+- Clear separation between:
+  - tokenizationKey (tokenization + dictionary + postings surface)
+  - cacheSignature (broader runtime feature surface)
+- Per-file bundle read has a hash fallback mechanism to handle mtime/size mismatch scenarios (when a cached hash exists).
+- Manifest pruning deletes bundles for deleted files (`pruneIncrementalManifest`).
+- Whole-index reuse checks stage coverage and verifies manifest key set matches entries key set (including deletions) and validates per-file stat checks (`shouldReuseIncrementalIndex`).
+- A dedicated test suite exists for:
+  - signature invalidation (`tests/incremental-cache-signature.js`)
+  - manifest updates (`tests/incremental-manifest.js`)
+  - reuse semantics including deletions (`tests/incremental-reuse.js`)
+  - incremental plan behavior (`tests/indexer/incremental-plan.test.js`)
+
+#### B.2 Gaps / risks
+
+**Remaining work (correctness + durability)**
+
+- [ ] **Cache invalidation across tool updates:** include a “tool version / schema version / algorithm version” in the incremental signature.  
+  Suggested approach:
+  - Add a `runtime.cacheSchemaVersion` constant (bumped on any semantic change), and include it in `buildIncrementalSignature(...)`.
+  - Or include `runtime.toolInfo.version` (and document that caches are invalidated across versions).
+- [ ] **Manifest version compatibility:** enforce `manifest.version` compatibility explicitly; if unsupported, reset (and optionally delete bundles).  
+  Also validate `manifest.files` is a plain object: `loaded.files && typeof loaded.files === 'object' && !Array.isArray(loaded.files)`.
+- [ ] **Bundle cleanup on invalidation:** when signature/tokenizationKey mismatches, consider deleting the bundles directory (or moving aside) to avoid disk bloat.
+- [ ] **Whole-index reuse strictness:** decide if whole-index reuse should support content-hash fallback for stat mismatch (opt-in).  
+  If not, document that mtime/size must match exactly, and why (performance vs safety).
+- [ ] **Stage interactions:** confirm and test that:
+  - stage1 builds do not reuse stage2 caches (signature should differ, but confirm)
+  - stage2 builds do not reuse stage1 caches
+  - stage4 behaviors are consistent (if stage4 writes different artifact sets)
+- [ ] **RelKey normalization:** ensure relKey generation is consistently POSIX and case-handled on Windows for both discovery and watch paths.
+
+---
+
+### C. Concurrency and robustness
+
+#### C.1 Locking
+
+**Audit**
+
+- `src/index/build/lock.js` implements:
+  - atomic lock acquisition via `fs.open(lockPath, 'wx')`
+  - stale lock detection via pid + timestamp (and mtime fallback)
+  - optional wait/poll to acquire lock
+
+**Remaining work**
+- [ ] Ensure the lock file handle is closed even if `writeFile(...)` fails (use try/finally around the acquired `handle`).
+- [ ] Consider including `buildId` and `mode(s)` in the lock file payload to improve observability/debugging.
+- [ ] Add a test that simulates write failure during lock acquisition (can be done by injecting a stubbed fs layer, or by creating a read-only directory).
+
+#### C.2 Sharding + queues + worker pools
+
+**Audit**
+
+- The pipeline uses a queue abstraction (`createTaskQueues`, `runWithQueue`) and worker pools (`Piscina`) to parallelize CPU-heavy tasks.
+- Sharding aims to distribute work based on line counts / cost predictions, while preserving deterministic output ordering via an ordered appender.
+
+**Remaining work (critical)**
+- [ ] **Do not create worker pools per shard item.**  
+  Options (choose one):
+  1) **Preferred:** share the parent runtime’s worker pools across all shards; only shard the scheduling/queueing.  
+  2) If per-shard pools are required: create **one** shard runtime per shard worker (batch), reuse it for all work items in that batch, and **always** `destroy()` pools and tear down queues in a `finally`.
+- [ ] Add a regression test / harness that runs a sharded build and asserts the process exits promptly (no lingering worker threads).  
+  Practical approach: spawn `node build_index.js ...` with `--shards.enabled` and ensure it exits within a timeout; also enable `--verbose` to detect repeated pool creation.
+- [ ] Audit `maxPending` sizing on queues in shard runtime creation; ensure it cannot exceed a safe bound when shard concurrency is high.
+
+#### C.3 Watch mode robustness
+
+**Audit**
+
+- Watch mode uses chokidar and a debounce scheduler to coalesce changes.
+- It maintains a tracked file set to decide whether removals/oversize transitions should trigger rebuilds.
+- It always enables incremental to avoid full reindexing on every change.
+
+**Remaining work**
+- [ ] Make `createDebouncedScheduler(...)` safe for async `onRun`:
+  - wrap `onRun()` in `Promise.resolve(...).catch(...)`
+  - optionally provide an `onError` callback
+- [ ] Ensure “extracted-prose only” watch mode is supported:
+  - update `isIndexablePath(...)` to treat `extracted-prose` as both `code` and `prose` for extension filtering
+  - add coverage in `tests/watch-filter.js`
+- [ ] Decide how to handle untracked file changes in git repos (discover is tracked-only):
+  - either document that watch will trigger rebuilds but new untracked files will not be indexed
+  - or add an optional “include untracked” mode for watch builds (with tests)
+
+---
+
+### D. Performance and scalability
+
+#### D.1 Discovery and preprocessing overhead
+
+**Audit**
+
+- Discovery uses `git ls-files -z` when root is the git repo root, otherwise fdir crawl.
+- It performs a per-file `fs.stat` in a sequential loop (async, but awaited one-by-one).
+- Preprocess stage can scan file headers to detect binary/minified, and optionally count lines.
+
+**Remaining work**
+- [ ] Parallelize `fs.stat` in discovery with a concurrency limit (e.g., 32) to reduce wall-clock time on large repos.
+- [ ] Consider using fdir’s `withStats()` to avoid a separate stat syscall for non-git discovery paths.
+- [ ] Ensure file-type detection does not misclassify common text types as binary (treat certain `application/*` mimes as text if needed).
+
+#### D.2 Sharding overhead
+
+**Audit**
+
+- Sharding may require a full line-count pass (expensive) unless line counts are provided.
+- Shard planning uses predicted cost from perf profiles when available.
+
+**Remaining work**
+- [ ] Add an option to avoid full line counting when perf profile is available and sufficiently fresh (approximate weights).
+- [ ] Revisit per-shard file concurrency hard cap (`min(2, ...)`) — it can underutilize configured `runtime.fileConcurrency` on larger machines.
+- [ ] Avoid per-shard runtime creation (performance + correctness; see Section C).
+
+#### D.3 Worker pool overhead
+
+**Audit**
+
+- Worker tasks validate cloneability of inputs/outputs for each task (deep scan with limits).
+- Worker pool supports restart/backoff, and permanent disable on repeated opaque failures.
+
+**Remaining work**
+- [ ] Gate cloneability validation behind a debug flag or environment variable; keep it on by default in CI/tests, off in production, or vice versa (choose explicitly).
+- [ ] Consider using transfer lists for large typed arrays in quantize tasks to reduce cloning overhead.
+- [ ] Add metrics to quantify:
+  - pool restart frequency
+  - clone-check overhead
+  - task latency distribution
+
+---
+
+### E. Refactoring / code quality / test gaps
+
+#### E.1 Duplication and clarity
+
+**Audit**
+
+- Multiple modules duplicate “max bytes per extension” logic and cap normalization:
+  - `discover.js` has `resolveMaxBytesForExt`
+  - `watch.js` has `maxBytesForExt`
+  - `tools/shard-census.js` has its own normalization helpers
+- Ordering uses both explicit `<` comparisons and `localeCompare` in different places.
+
+**Remaining work**
+- [ ] Centralize “max bytes per extension” and “cap normalization” logic into a single helper module (likely `runtime/caps.js` or a shared `file-caps.js`) and reuse across discover/watch/tools.
+- [ ] Standardize ordering comparisons: provide a shared `compareRelPaths(a, b)` helper that is locale-independent and (optionally) Windows-case-aware.
+- [ ] Run formatter / lint pass on files with inconsistent indentation (not functionally wrong, but increases diff noise and review friction).
+
+#### E.2 Tests to add or strengthen
+
+**Remaining work**
+- [ ] **Build all modes:** Ensure `tests/build-index-all.js` reliably enforces that `--mode all` produces `code`, `prose`, and `extracted-prose` artifacts (and fix the orchestration boundary if currently inconsistent).
+- [ ] **Watch extracted-prose:** add a case to `tests/watch-filter.js` where `modes=['extracted-prose']` and confirm indexable file changes trigger scheduling.
+- [ ] **Watch async error safety:** add a test that uses an async `onRun` that rejects once, and assert no `unhandledRejection` occurs (attach a listener in the test).
+- [ ] **Sharding teardown:** add a harness test that enables sharding and asserts no lingering worker threads prevent exit.
+- [ ] **Incremental schema version:** add a test that simulates a tool version/schema version change and confirms caches are invalidated.
+
+---
+
+### File-by-file findings (actionable)
+
+> Items below are intentionally concrete and file-scoped to minimize ambiguity.
+
+#### `build_index.js`
+
+- [ ] Pass the resolved `modes` from `parseBuildArgs(...)` through to the build orchestrator (or otherwise guarantee that “mode all” resolves identically at every boundary).  
+  _Why:_ prevents drift between CLI arg parsing and internal orchestration; aligns with `tests/build-index-all.js`.
+
+#### `src/index/build/args.js`
+
+- [ ] Consider adding `argv.modes` (or similar) so downstream layers do not need to re-derive the “all → modes” mapping (and so the CLI entry can pass a single object).
+
+#### `src/index/build/build-state.js`
+
+- [ ] Document that `build_state.json` is best-effort and may lose updates under concurrent writers; or introduce an append-only/event model to prevent lost updates.
+- [ ] Consider `timer.unref()` on heartbeat interval for cases where build-state heartbeat should not keep the process alive (optional).
+
+#### `src/index/build/crash-log.js`
+
+- [ ] Consider throttling `updateFile(...)` writes when debug crash logging is enabled (currently potentially writes state on every file).
+
+#### `src/index/build/discover.js`
+
+- [ ] Add concurrency-limited parallel statting for large repos.
+- [ ] Add defensive `stat.isFile()` gating for non-git crawls.
+
+#### `src/index/build/failure-taxonomy.js`
+
+- No blocking issues found in scope; consider expanding taxonomy categories over time as needed.
+
+#### `src/index/build/feature-metrics.js`
+
+- No blocking issues found; consider adding an explicit schema version to metrics output to support future evolution.
+
+#### `src/index/build/file-scan.js`
+
+- [ ] Treat certain `file-type` “application/*” results (e.g., json/xml) as potentially text, or ensure `file-type` is only advisory and always confirm with istextorbinary when in doubt.
+#### `src/index/build/ignore.js`
+
+- [ ] Consider supporting nested `.gitignore` semantics for non-git discovery paths (optional, but improves parity with developer expectations).
+
+#### `src/index/build/incremental.js`
+
+- [ ] Validate `manifest.files` is a plain object; reset if array/invalid.
+- [ ] Enforce manifest version compatibility; reset or migrate.
+- [ ] Consider deleting stale bundles on signature/tokenizationKey mismatch to avoid disk bloat.
+
+#### `src/index/build/indexer.js`
+
+- No major issues; ensure per-mode runtime mutations are intentional and documented.
+
+#### `src/index/build/indexer/pipeline.js`
+
+- [ ] Ensure any ordering-critical sorts remain locale-independent (primary issue is in discover step; pipeline relies on it).
+- [ ] Consider explicitly documenting the per-mode stage graph and how it maps to artifacts and cache signature components.
+
+#### `src/index/build/indexer/signatures.js`
+
+- [ ] Add cache schema / tool version component to `buildIncrementalSignature(...)`.
+- [ ] Consider adding explicit versions for:
+  - chunk schema
+  - postings schema
+  - relations graph schema
+
+#### `src/index/build/indexer/steps/discover.js`
+
+- [ ] Replace `localeCompare` sort with deterministic compare.
+- [ ] Avoid mutating shared entry objects if discovery is reused across modes (optional; low risk today, but cleaner).
+
+#### `src/index/build/indexer/steps/incremental.js`
+
+- [ ] Add more granular status reporting (e.g., why reuse rejected) for observability; currently logs are decent but could be structured.
+
+#### `src/index/build/indexer/steps/postings.js`
+
+- [ ] Add tests for token retention “auto” switching correctness and stability.
+
+#### `src/index/build/indexer/steps/process-files.js`
+
+- [ ] Fix sharding runtime lifecycle (do not create per-work-item pools; ensure teardown).
+- [ ] Replace localeCompare in shard plan sorting with deterministic compare.
+- [ ] Revisit per-shard concurrency cap (min(2, ...)).
+- [ ] Consider hoisting shard runtime creation outside the inner work-item loop if per-shard runtime instances remain desired.
+
+#### `src/index/build/indexer/steps/relations.js`
+
+- [ ] Add tests ensuring cross-file inference updates are persisted into incremental bundles when enabled.
+- [ ] Clarify error strategy for import scan failures (degrade vs abort) and encode it in tests/config.
+
+#### `src/index/build/indexer/steps/write.js`
+
+- [ ] Ensure `index_state.json` always includes the correct cache signature / tokenizationKey values used for the build (especially when any runtime config is adapted per mode).
+
+#### `src/index/build/lock.js`
+
+- [ ] Close file handle in a `finally` if write fails during lock acquisition.
+
+#### `src/index/build/perf-profile.js`
+
+- No major correctness issues; consider exporting a schema version.
+
+#### `src/index/build/preprocess.js`
+
+- [ ] Document that preprocess is currently for `code` + `prose` only (or extend support to `extracted-prose` explicitly if desired).
+
+#### `src/index/build/promotion.js`
+
+- [ ] Harden path handling so `current.json` cannot point outside `repoCacheRoot` even if inputs are malformed.
+
+#### `src/index/build/runtime.js`
+
+- No blocking issues found in scope.
+
+#### `src/index/build/runtime/caps.js`
+
+- No blocking issues found; consider consolidating cap normalization usage across tools.
+
+#### `src/index/build/runtime/hash.js`
+
+- No blocking issues found.
+
+#### `src/index/build/runtime/logging.js`
+
+- No blocking issues found; consider documenting the distinction between structured logs and progress logs.
+
+#### `src/index/build/runtime/runtime.js`
+
+- [ ] Consider making the “tracked-only discovery” behavior visible in logs when git is used (helps users understand why new files may not be indexed).
+- [ ] Consider ensuring any per-mode adaptive config does not bleed across modes (currently low risk, but worth documenting).
+
+#### `src/index/build/runtime/stage.js`
+
+- No blocking issues found; stage overrides appear coherent and tested (`tests/build-runtime/stage-overrides.test.js`).
+
+#### `src/index/build/runtime/tree-sitter.js`
+
+- No blocking issues found in scope.
+
+#### `src/index/build/runtime/workers.js`
+
+- [ ] Review queue pending-limit sizing with sharding enabled; ensure worst-case bounds are safe.
+
+#### `src/index/build/state.js`
+
+- No blocking issues found; consider adding explicit assertions/guards in merge functions to prevent mismatched id offsets if used elsewhere.
+
+#### `src/index/build/watch.js`
+
+- [ ] Make debounce scheduler safe for async `onRun` (catch rejections).
+- [ ] Support `extracted-prose` as a mode for indexable path filtering.
+- [ ] Consider reducing rebuild churn from untracked files (optional).
+
+#### `src/index/build/worker-pool.js`
+
+- [ ] Consider exposing a “debug clone checks” toggle (ties into worker validation overhead discussion).
+- [ ] Add optional transferList support for quantize tasks.
+
+#### `src/index/build/workers/indexer-worker.js`
+
+- [ ] Gate cloneability validation behind a debug/config toggle if performance becomes an issue.
+
+#### `tools/shard-census.js`
+
+- [ ] Replace `localeCompare` with deterministic compare for stable reporting.
+- [ ] Consider reusing shared cap/normalization utilities rather than duplicating.
+
+#### Tests
+
+##### `tests/build-index-all.js`
+
+- [ ] Ensure the build orchestration actually builds `extracted-prose` for `--mode all` (fix boundary mismatch if needed).
+
+##### `tests/watch-filter.js`
+
+- [ ] Add an `extracted-prose`-only mode coverage case.
+- [ ] Add an async debounce safety test (unhandled rejection prevention).
+
+##### `tests/worker-pool*.js`
+
+- No immediate gaps; consider adding a perf regression test if clone checks are made optional.
+
+---
+
+### Deliverables
+
+- [ ] Fix sharding runtime lifecycle and add regression coverage.
+- [ ] Resolve “mode all” / extracted-prose mismatch and ensure `tests/build-index-all.js` passes reliably.
+- [ ] Harden watch debounce scheduling against async rejection.
+- [ ] Replace localeCompare sorts in ordering-critical paths.
+- [ ] Add a cache schema/tool version component to incremental signature and add a test for invalidation.
+
+### Exit criteria
+
+- [ ] Sharded builds do not leak worker threads/handles and the process exits cleanly.
+- [ ] `--mode all` produces `code`, `prose`, and `extracted-prose` indices; validated by test.
+- [ ] Watch mode does not emit unhandled promise rejections under forced error paths.
+- [ ] Deterministic ordering is documented and enforced (no locale-dependent sorts in critical ordering paths).
+- [ ] Incremental cache reuse is safe across code releases (explicit schema/version invalidation).
+
+
+## Phase 29 — Embeddings & ANN (onnx/HNSW/batching/candidate sets)
+
+**Objective:** harden the embeddings + ANN stack for correctness, determinism (where required), performance, and resilient fallbacks across **index build**, **build-embeddings tooling**, and **retrieval-time ANN execution**.
+
+### 29.1 Correctness
+
+#### 29.1.1 Model identity (cache keys, preprocessing, normalization, dims)
+
+##### Current state (verified)
+- [x] Tooling cache keys include **file hash** + **chunk signature** + **embedding identity** (`tools/build-embeddings/cache.js`, `tools/build-embeddings/run.js`).
+- [x] Tooling includes **dims mismatch guardrails** with explicit hard-fail paths and tests (`tools/build-embeddings/embed.js`, `tests/embeddings-dims-mismatch.js`, `tests/embeddings-dims-validation.js`).
+
+##### Remaining gaps / action items
+- [ ] **Expand embedding identity to include preprocessing + provider-specific knobs**, not just `{modelId, provider, mode, stub, dims, scale}`:
+  - Why: changing `onnx` tokenizer/model path or execution provider can change embeddings without changing `modelId`/`provider`, allowing silent cache reuse.
+  - Files:
+    - `tools/build-embeddings/cache.js` (identity schema)
+    - `tools/build-embeddings/run.js` (identity inputs)
+  - Add fields (at minimum):
+    - ONNX: `onnx.modelPath` (resolved), `onnx.tokenizerId`, `onnx.executionProviders`, `onnx.threads`, `onnx.graphOptimizationLevel`
+    - Common: pooling strategy (mean), `normalize=true`, truncation/max_length policy
+    - Quantization: `minVal/maxVal` (currently fixed -1..1), quantization “version”
+- [ ] **Include a tooling/version fingerprint in cache identity** (or bumpable `identity.version`) so cache invalidates when embedding algorithm changes:
+  - Why: changes to doc extraction, pooling logic, quantization, or merging should invalidate caches even if file hashes are unchanged.
+  - Files: `tools/build-embeddings/cache.js`, optionally `tools/build-embeddings/chunks.js`
+- [ ] **Add strict provider validation**: unknown `indexing.embeddings.provider` should not silently map to `xenova`.
+  - Why: silent fallback can produce “correct-looking” but unintended embeddings and cache identity mismatch.
+  - Files: `src/shared/onnx-embeddings.js` (normalizeEmbeddingProvider), `src/index/embedding.js`, `tools/build-embeddings/cli.js`, `src/retrieval/embedding.js`
+- [ ] **Unify default stub embedding dimensions across build + retrieval + tooling** (currently inconsistent defaults: 384 vs 512).
+  - Why: any code path that calls stub embeddings without an explicit `dims` risks producing query embeddings that cannot match the index dims.
+  - Files: `src/shared/embedding.js` (defaults to 512), `src/index/embedding.js` (defaults to 384), `tools/build-embeddings/run.js` (defaults to 384), `src/retrieval/embedding.js` (passes `dims`, but can pass null in some ANN-only paths).
+  - Recommendation: pick **384** as the single default everywhere OR require dims explicitly in stub mode and fail loudly if missing.
+- [ ] **Index-build (inline) path lacks explicit dims mismatch failure** comparable to build-embeddings tool:
+  - `src/index/build/file-processor/embeddings.js` currently coerces unexpected shapes to empty arrays and proceeds.
+  - Add an explicit “dims contract” check and fail fast (or disable embeddings) if:
+    - vectors are not arrays/typed arrays,
+    - dims are inconsistent across chunks,
+    - batch output length mismatches input length.
+- [ ] **Make per-file embedding cache writes atomic** (cache files are written with `fs.writeFile`):
+  - Why: partial/corrupt cache JSON can cause repeated recompute; while not “poisoning,” it degrades throughput and can mask real failures.
+  - Files: `tools/build-embeddings/run.js` (cache writes), optionally reuse `tools/build-embeddings/atomic.js` or shared atomic writer.
+
+**Exit criteria**
+- [ ] Changing any embedding-relevant knob (model path/tokenizer/provider/normalization/pooling/quantization) forces cache miss.
+- [ ] Dims mismatch fails loudly (or deterministically disables embeddings) in **both** build-embeddings and inline index-build paths.
+- [ ] Stub-mode dims are consistent across indexing + retrieval.
+
+---
+
+#### 29.1.2 Determinism (float handling, batching order)
+
+##### Current state (verified)
+- [x] Quantization uses deterministic rounding (`src/index/embedding.js`).
+- [x] Batched embedding retains input ordering in both tooling and index build (`tools/build-embeddings/embed.js`, `src/index/build/file-processor/embeddings.js`).
+
+##### Remaining gaps / action items
+- [ ] **Document and/or enforce determinism requirements for HNSW build**:
+  - HNSW graph structure can vary with insertion order; current insertion order is “file processing order,” which depends on `Map` insertion order derived from chunk meta traversal.
+  - Files: `tools/build-embeddings/run.js`, `tools/build-embeddings/hnsw.js`
+  - Recommendation: ensure vectors are added to HNSW in a stable order (e.g., ascending `chunkIndex`).
+- [ ] **Avoid nondeterministic file sampling in context window estimation**:
+  - `src/index/build/context-window.js` uses the first N files in `files[]`; if upstream file enumeration order is OS-dependent, context window results can change.
+  - Recommendation: sort file paths before sampling (or explicitly document nondeterminism).
+- [ ] **Normalize float types across providers**:
+  - Many paths convert typed arrays into JS arrays; this is deterministic but increases the surface for subtle differences and performance regressions.
+  - Recommendation: standardize on `Float32Array` where feasible and only convert at serialization boundaries.
+
+**Exit criteria**
+- [ ] HNSW build is reproducible across runs given identical artifacts/config (or nondeterminism is clearly documented and accepted).
+- [ ] Context window selection is stable given identical repo state.
+
+---
+
+#### 29.1.3 Robust fallback behavior (missing models/extensions/unsupported configs)
+
+##### Current state (verified)
+- [x] Retrieval embedding errors are caught and return `null` (`src/retrieval/embedding.js`), which allows the search pipeline to continue in sparse-only mode.
+- [x] SQLite vector extension usage is guarded and can be disabled via sanitization (`tests/vector-extension-sanitize.js`).
+
+##### Remaining gaps / action items
+- [ ] **ONNX embedder config validation is partially ineffective**:
+  - `src/shared/onnx-embeddings.js:createOnnxEmbedder()` checks `normalizeEmbeddingProvider('onnx') !== 'onnx'` which is a no-op (constant input).
+  - Replace with validation of the *actual* requested provider (or remove the dead check).
+- [ ] **Improve “missing model” errors with clear remediation** (especially for offline envs):
+  - Recommend: explicitly mention `tools/download-models.js` and where the model path is expected.
+  - Files: `src/shared/onnx-embeddings.js`, `src/index/embedding.js`
+- [ ] **HNSW load path should fall back to `.bak` on corrupt primary**, not only when primary is missing:
+  - Today: `src/shared/hnsw.js` only chooses `.bak` if primary missing; it does not retry `.bak` if `readIndexSync()` throws.
+- [ ] **Use HNSW meta for safety checks**:
+  - Retrieval load does not read `dense_vectors_hnsw.meta.json`, so it cannot validate `dims`, `space`, or `model` before querying.
+  - Files: `src/shared/hnsw.js`
+- [ ] **Add explicit tests for “extension missing” fallback**:
+  - Currently there is sanitization coverage, but not “load failure / missing shared library” behavior.
+  - Files/tests: `tools/build-embeddings/sqlite-dense.js` + new test.
+
+**Exit criteria**
+- [ ] Missing/corrupt HNSW artifacts do not crash retrieval; the system degrades gracefully to another ANN backend or sparse-only.
+- [ ] Missing ONNX model artifacts fail with actionable errors (or clean fallback in non-strict modes).
+
+---
+
+### 29.2 Batching & scheduling
+
+#### 29.2.1 Batch auto-tuning (memory/CPU/repo size)
+
+##### Current state (verified)
+- [x] Both index-build and build-embeddings tooling implement “auto batch” based on `os.totalmem()` (`src/index/build/runtime/embeddings.js`, `tools/build-embeddings/cli.js`).
+- [x] Language-specific multipliers exist and are tested (`src/index/build/embedding-batch.js`, `tests/embedding-batch-multipliers.js`).
+
+##### Remaining gaps / action items
+- [ ] **Unify and justify auto-batch heuristics**:
+  - Index-build uses `totalGb * 16` with min 16.
+  - build-embeddings tool uses `totalGb * 32` with min 32.
+  - Decide a single policy OR clearly document why they intentionally differ.
+- [ ] **Incorporate CPU oversubscription controls**:
+  - ONNX runtime can be multi-threaded (`threads` option), while the embedding queue can also be concurrent.
+  - Add a policy: e.g., `embeddingConcurrency * onnxThreads <= cpuCount` (or document exceptions).
+  - Files: `src/index/build/runtime/embeddings.js`, `src/shared/onnx-embeddings.js`
+- [ ] **Adapt batch sizing to repo characteristics**:
+  - For tiny repos/files, large batch sizes increase latency without improving throughput.
+  - For huge repos, file-by-file batching underutilizes the accelerator (many small batches).
+  - Recommendation: introduce a global “embedding batcher” that batches across files with:
+    - max batch size,
+    - max tokens/estimated memory per batch,
+    - stable ordering.
+  - Files impacted: `src/index/build/file-processor/embeddings.js`, `tools/build-embeddings/run.js`
+
+**Exit criteria**
+- [ ] Batch sizing + concurrency are predictable and safe across low-memory hosts, multi-core hosts, and both small and large repos.
+- [ ] Default settings do not oversubscribe CPU when ONNX threads are enabled.
+
+---
+
+#### 29.2.2 Embedding queues (backpressure, bounded memory)
+
+##### Current state (verified)
+- [x] Service-mode job enqueue provides a `maxQueued` hook (`src/index/build/indexer/embedding-queue.js`).
+
+##### Remaining gaps / action items
+- [ ] **Define and enforce backpressure defaults**:
+  - If `maxQueued` is unset/null, behavior depends on `enqueueJob()` (not in scope here); ensure a safe default exists.
+  - Add explicit documentation + a test that verifies queue growth is bounded.
+- [ ] **Ensure service jobs include enough identity to be safe**:
+  - Job payload includes `{repo, mode}`, but not an embedding identity fingerprint.
+  - Include `embeddingProvider`, model id, and/or a hash of embedding config to prevent mismatched worker configuration from producing incompatible embeddings.
+
+**Exit criteria**
+- [ ] Queue growth is bounded by default; overload produces clear errors and does not OOM the process.
+
+---
+
+#### 29.2.3 Session/model reuse
+
+##### Current state (verified)
+- [x] ONNX sessions are cached per normalized config (`src/shared/onnx-embeddings.js`).
+- [x] Retrieval embedder instances are cached in-process (`src/retrieval/embedding.js`).
+
+##### Remaining gaps / action items
+- [ ] **Guard concurrent use of shared ONNX sessions if required**:
+  - If `onnxruntime-node` sessions are not safe for concurrent `run()` calls, add a per-session mutex/queue.
+  - At minimum: document thread-safety assumptions and add a stress test.
+- [ ] **Avoid duplicate pipeline/session loads in index-build**:
+  - `src/index/embedding.js` does not maintain a global cache similar to retrieval; if multiple embedder instances are constructed in one process, models may be loaded multiple times.
+
+**Exit criteria**
+- [ ] A single model/session is loaded once per process per config, and safely shared across all embedding calls.
+
+---
+
+### 29.3 ANN correctness
+
+#### 29.3.1 Distance metric correctness (HNSW scoring)
+
+##### Current state (verified)
+- [x] HNSW ranker applies a stable tie-break (`idx`) after converting distances to similarity (`src/shared/hnsw.js`).
+
+##### Remaining gaps / action items
+- [ ] **Confirm and test distance-to-similarity conversion for each HNSW space** (`l2`, `cosine`, `ip`):
+  - Current code treats `ip` the same as `cosine` (`sim = 1 - distance`).
+  - This may be correct or incorrect depending on hnswlib’s distance definition for `ip`.
+  - Required: add unit tests with known vectors and expected distances/similarities and adjust conversion if needed.
+  - Files: `src/shared/hnsw.js`, new test (e.g., `tests/hnsw-distance-metrics.js`).
+
+**Exit criteria**
+- [ ] For each supported space, returned `sim` is monotonic with the true similarity notion used elsewhere in scoring.
+
+---
+
+#### 29.3.2 Atomic safety (no torn reads/writes)
+
+##### Current state (verified)
+- [x] Build writes HNSW `.bin` and `.meta.json` via atomic replace with `.bak` retention (`tools/build-embeddings/atomic.js`, `tools/build-embeddings/hnsw.js`).
+- [x] There is a test that asserts `.bak` is created on replace (`tests/hnsw-atomic.js`).
+
+##### Remaining gaps / action items
+- [ ] **HNSW reader should support “corrupt primary” fallback**:
+  - Implement: try primary, and if read fails, try `.bak` before giving up.
+  - Files: `src/shared/hnsw.js`
+- [ ] **Validate `.bin` / `.meta.json` pairing**:
+  - Ensure meta file exists, parseable, and matches expected dims/space/model before using the index.
+  - If mismatch, treat index as unavailable and fall back.
+
+**Exit criteria**
+- [ ] Retrieval never crashes due to a torn/corrupt HNSW file; fallback paths are exercised by tests.
+
+---
+
+#### 29.3.3 Candidate set semantics (HNSW + sqlite-vec)
+
+##### Current state (verified)
+- [x] SQLite candidate pushdown behavior is tested for small vs large candidate sets (`tests/sqlite-vec-candidate-set.js`).
+
+##### Remaining gaps / action items
+- [ ] **Handle empty candidate sets explicitly in HNSW path**:
+  - `rankHnswIndex()` currently treats an empty set as “no filter” (because `candidateSet.size` is falsy), which can return results when none are desired.
+  - Files: `src/shared/hnsw.js`
+- [ ] **Document and test candidate-set cap behavior**:
+  - HNSW uses a `candidateSetCap` default of 1000; ensure callers understand whether this can truncate results.
+  - Add tests for:
+    - empty set → empty hits,
+    - small set → only those labels,
+    - very large set → filter still applied and returned hits are subset, with stable ordering.
+- [ ] **Align candidate-set tie-break behavior across backends**:
+  - SQLite ANN tests require deterministic tie-break by `rowid`.
+  - HNSW already tie-breaks by `idx`. Ensure both are consistent with retrieval expectations.
+
+**Exit criteria**
+- [ ] Candidate sets behave identically (semantically) across ANN backends: never return items outside the set, deterministic ordering for ties, predictable truncation rules.
+
+---
+
+### 29.4 Performance improvements to prioritize
+
+#### 29.4.1 Float32Array end-to-end (avoid JS arrays of floats)
+- [ ] **Standardize the embedding contract to return `Float32Array`**:
+  - Files: `src/index/embedding.js`, `src/retrieval/embedding.js`, `src/shared/onnx-embeddings.js`, `src/shared/embedding.js`
+- [ ] **Update downstream code to accept typed arrays** (don’t gate on `Array.isArray`):
+  - Files: `src/index/build/file-processor/embeddings.js`, `tools/build-embeddings/embed.js`, `tools/build-embeddings/run.js`, `tools/build-embeddings/hnsw.js`
+- [ ] **Defer conversion to JS arrays only at serialization boundaries** (JSON writing).
+
+#### 29.4.2 Minimize serialization between threads/processes (transferable buffers)
+- [ ] Where embeddings are computed in worker threads/processes (service mode), prefer:
+  - transferring `ArrayBuffer`/`SharedArrayBuffer` instead of JSON arrays,
+  - or using binary packed formats for vectors.
+- [ ] Add an explicit “embedding payload format” version in job payloads so workers and callers stay compatible.
+  - File touchpoints: `src/index/build/indexer/embedding-queue.js` (job payload)
+
+#### 29.4.3 Pre-allocate and reuse buffers
+- [ ] **ONNX embedding path**:
+  - Avoid per-call allocations:
+    - re-use `BigInt64Array` buffers for token ids/masks where shapes are stable,
+    - avoid `Array.from()` conversions for slices.
+  - Files: `src/shared/onnx-embeddings.js`
+- [ ] **Index-build merge path**:
+  - Avoid allocating a new zero vector per chunk in `attachEmbeddings()`.
+  - File: `src/index/build/file-processor/embeddings.js`
+
+#### 29.4.4 Candidate generation tuning
+- [ ] Push sparse filters earlier and reduce dense scoring work:
+  - prefer ANN-restricted candidate sets before dense dot products,
+  - prefer pushing candidate constraints into sqlite-vec queries when small enough (already partially implemented).
+  - (Some of this lives outside the reviewed file list; track as cross-cutting work.)
+
+**Exit criteria**
+- [ ] Embedding pipelines avoid unnecessary conversions/allocations; measurable CPU and memory reductions on large repos.
+- [ ] ANN candidate generation demonstrably reduces dense scoring load for common queries.
+
+---
+
+### 29.5 Refactoring goals
+
+#### 29.5.1 Single embedding interface shared by build + retrieval
+- [ ] Create a single shared adapter interface, e.g.:
+  - `embed(texts: string[], opts) => Float32Array[]`
+  - `embedOne(text: string, opts) => Float32Array`
+- [ ] Move provider selection + error handling behind adapters:
+  - `xenova`, `onnx`, `stub`.
+- [ ] Ensure both index-build and retrieval use the same adapter and the same preprocessing defaults.
+
+#### 29.5.2 Centralize normalization & preprocessing
+- [ ] Eliminate duplicated `normalizeVec()` implementations:
+  - `src/index/embedding.js`
+  - `src/shared/onnx-embeddings.js`
+  - `tools/build-embeddings/embed.js` (indirectly uses index/embedding normalization)
+- [ ] Centralize:
+  - pooling strategy,
+  - normalization strategy,
+  - truncation/max_length policy,
+  - doc/code merge policy.
+
+#### 29.5.3 Clear ANN backend adapters
+- [ ] Wrap sqlite-vec and HNSW behind a single “ANN adapter” contract with:
+  - candidate set semantics,
+  - deterministic tie-break contract,
+  - consistent error handling and stats reporting.
+  - (Some of this lives outside the reviewed file list.)
+
+**Exit criteria**
+- [ ] Build + retrieval cannot diverge in embedding shape/normalization/pooling without a deliberate, versioned change.
+- [ ] ANN behavior is consistent regardless of backend.
+
+---
+
+### 29.6 Tests
+
+#### 29.6.1 Coverage checklist
+
+##### Already covered (verified)
+- [x] Cache identity/invalidation (baseline) — `tests/embeddings-cache-identity.js`, `tests/embeddings-cache-invalidation.js`
+- [x] Dims mismatch (tooling) — `tests/embeddings-dims-mismatch.js`, `tests/embeddings-dims-validation.js`
+- [x] ANN candidate set correctness (sqlite-vec) — `tests/sqlite-vec-candidate-set.js`
+- [x] HNSW artifacts existence + atomic replace — `tests/hnsw-ann.js`, `tests/hnsw-atomic.js`
+
+##### Missing / needs additions
+- [ ] **Cache identity tests must cover provider-specific knobs**, especially ONNX config:
+  - Add tests proving that changing `onnx.tokenizerId` or `onnx.modelPath` changes identityKey and forces cache miss.
+- [ ] **Add extension missing/fallback tests**:
+  - Simulate vector extension load failure and ensure build/search does not crash and disables vector ANN.
+- [ ] **Add HNSW candidate set tests**:
+  - empty set returns empty hits,
+  - filter does not leak labels,
+  - tie-break stability.
+- [ ] **Add HNSW `.bak` fallback tests**:
+  - corrupt primary index/meta triggers `.bak` load and does not crash.
+- [ ] **Add performance regression test for embedding batching throughput** (required by checklist):
+  - Recommended approach (stable in CI):
+    - Use a synthetic embedder function with a fixed per-call overhead + per-item cost.
+    - Assert that `runBatched()` with batchSize>1 achieves >= X% speedup vs batchSize=1 on a fixed input size.
+    - Use generous thresholds to avoid flakiness; focus on catching *major* regressions (e.g., accidental O(n²) behavior or disabling batching).
+  - Candidate target: `tools/build-embeddings/embed.js:runBatched()` and/or `src/index/build/file-processor/embeddings.js` batching path.
+
+**Exit criteria**
+- [ ] Tests fail if embedding identity changes are not reflected in cache keys.
+- [ ] Tests cover ANN candidate set semantics for both sqlite-vec and HNSW.
+- [ ] At least one performance regression test exists for batching throughput.
+
+---
+
+### Appendix A — File-by-file review notes (actionable items)
+
+> The checklist items above are the canonical “what to fix.” This appendix maps concrete file-level changes back to those items.
+
+#### src
+
+##### `src/index/build/context-window.js`
+- [ ] Sort/sanitize file list before sampling to reduce OS-dependent nondeterminism.
+- [ ] Consider documenting that context-window estimation is heuristic and may vary with sampling strategy.
+
+##### `src/index/build/embedding-batch.js`
+- [ ] Consider parsing `baseSize` if it may come from config as a numeric string.
+- [ ] Add explicit documentation for multiplier precedence (fallback vs user config).
+
+##### `src/index/build/file-processor/embeddings.js`
+- [ ] Add dims contract validation (non-empty vectors must share dims; fail fast otherwise).
+- [ ] Support `Float32Array` outputs (don’t rely on `Array.isArray`).
+- [ ] Avoid allocating `new Array(dims).fill(0)` per chunk; reuse a single `zeroVec`.
+- [ ] Validate that `getChunkEmbeddings(texts).length === texts.length`; if not, log + fail or retry with a clear warning.
+- [ ] Ensure doc embedding results are length-aligned with `docPayloads` (currently assumes perfect alignment).
+
+##### `src/index/build/indexer/embedding-queue.js`
+- [ ] Include embedding identity/config hash in job payload to prevent mismatched worker behavior.
+- [ ] Consider switching job IDs to `crypto.randomUUID()` for collision resistance.
+- [ ] Ensure `maxQueued` has a safe default; document backpressure behavior.
+
+##### `src/index/build/runtime/embeddings.js`
+- [ ] Reconcile auto-batch policy with tooling (`tools/build-embeddings/cli.js`).
+- [ ] Consider incorporating ONNX thread settings into concurrency auto-tune to avoid oversubscription.
+
+##### `src/index/embedding.js`
+- [ ] Centralize `normalizeVec`/`quantizeVec` into shared utilities; remove duplication.
+- [ ] Add strict provider validation (unknown provider should error/warn).
+- [ ] Harden `normalizeBatchOutput()` to:
+  - guarantee output length equals input count,
+  - handle unexpected tensor dims more defensively,
+  - avoid returning a single huge vector when output is 3D.
+- [ ] Prefer returning `Float32Array` (or at least accept typed arrays downstream).
+
+##### `src/retrieval/embedding.js`
+- [ ] Use a normalized/fingerprinted ONNX config in the embedder cache key (avoid JSON-order sensitivity).
+- [ ] If retrieval can request embeddings without known dims (ANN-only paths), require dims or ensure consistent default dims.
+- [ ] Consider logging embedder load failures once (rate-limited) to aid debugging.
+
+##### `src/shared/embedding.js`
+- [ ] Unify stub default dims with the rest of the system (recommend 384).
+- [ ] Optionally return `Float32Array` to match the desired end-to-end contract.
+
+##### `src/shared/hnsw.js`
+- [ ] Implement `.bak` fallback when the primary index exists but is corrupt/unreadable.
+- [ ] Read/validate `dense_vectors_hnsw.meta.json` to confirm `dims/space/model` before using the index.
+- [ ] Handle empty candidate sets explicitly by returning `[]`.
+- [ ] Add unit tests for distance conversion across spaces (l2/cosine/ip) and adjust similarity conversion if required.
+
+##### `src/shared/onnx-embeddings.js`
+- [ ] Remove/fix dead provider check (`normalizeEmbeddingProvider('onnx')`).
+- [ ] Add clearer error messaging for missing model artifacts + remediation steps.
+- [ ] Improve performance by avoiding heavy array conversions and by reusing buffers/tensors.
+- [ ] Consider concurrency guards around `session.run()` if onnxruntime sessions are not safe concurrently.
+
+---
+
+#### tools
+
+##### `tools/build-embeddings.js`
+- No issues observed beyond those in underlying implementation modules.
+
+##### `tools/build-embeddings/atomic.js`
+- [ ] Consider consolidating atomic replace logic with `src/shared/json-stream.js` to avoid divergence (optional refactor).
+
+##### `tools/build-embeddings/cache.js`
+- [ ] Expand identity schema to include preprocessing and provider-specific config (especially ONNX knobs).
+- [ ] Add a bumpable “identity version” or build-tool version fingerprint.
+
+##### `tools/build-embeddings/chunks.js`
+- [ ] Consider incorporating doc-related signals into the chunk signature (or into identity versioning) so doc embedding caches invalidate when doc extraction logic changes.
+- [ ] Consider normalizing `start/end` to finite numbers before signature generation (avoid stringifying `undefined`).
+
+##### `tools/build-embeddings/cli.js`
+- [ ] Document (or change) the behavior where `mode=service` is coerced to `inline` for this tool.
+- [ ] Unify auto-batch defaults with index-build runtime (or document why they differ).
+
+##### `tools/build-embeddings/embed.js`
+- [ ] Update to accept and return typed arrays (`Float32Array`) instead of insisting on JS arrays.
+- [ ] Consider failing fast on non-vector outputs instead of silently returning `[]` entries (to avoid quietly producing all-zero embeddings).
+
+##### `tools/build-embeddings/hnsw.js`
+- [ ] Ensure stable vector insertion order into HNSW (ascending chunkIndex).
+- [ ] When adding vectors reconstructed from cache (dequantized), consider re-normalizing for cosine space to reduce drift.
+
+##### `tools/build-embeddings/manifest.js`
+- [ ] Consider reading HNSW meta to report accurate `count`/`dims` for ANN piece files, rather than relying on `totalChunks` (defensive correctness).
+
+##### `tools/build-embeddings/run.js`
+- [ ] Make cache writes atomic (optional but recommended).
+- [ ] Use `Number.isFinite()` for chunk start/end to avoid 0/NaN edge cases from `||` coercion.
+- [ ] Apply `ensureVectorArrays()` to embedded doc batches just like code batches.
+- [ ] Make HNSW build deterministic (stable insertion order).
+- [ ] Consider adding a global cross-file batcher for throughput.
+
+##### `tools/build-embeddings/sqlite-dense.js`
+- [ ] Add tests for “vector extension missing/failed to load” fallback behavior.
+- [ ] Consider batching inserts in larger chunks or using prepared statements more aggressively for performance on large vector sets.
+
+##### `tools/compare-models.js`
+- [ ] If comparing ONNX vs xenova providers, ensure the script can capture and report provider config differences (identity) to interpret deltas correctly (minor enhancement).
+
+##### `tools/download-models.js`
+- [ ] Consider supporting explicit download of ONNX model artifacts when users rely on `indexing.embeddings.provider=onnx` and custom `onnx.modelPath`.
+- [ ] Improve output to show where models were cached and what to set in config if needed.
+
+---
+
+#### tests
+
+##### `tests/build-embeddings-cache.js`
+- [ ] Extend to assert cache identity changes for ONNX config changes (once identity schema is expanded).
+
+##### `tests/embedding-batch-autotune.js`
+- [ ] Consider loosening or documenting assumptions about minimum batch size on low-memory systems (or adjust runtime min to match test expectations).
+
+##### `tests/embedding-batch-multipliers.js`
+- No issues; good coverage of multiplier normalization.
+
+##### `tests/embeddings-cache-identity.js`
+- [ ] Extend to cover ONNX-specific identity fields (tokenizerId/modelPath/etc).
+
+##### `tests/embeddings-cache-invalidation.js`
+- [ ] Add invalidation scenarios tied to preprocessing knobs (pooling/normalize/max_length) once surfaced in identity.
+
+##### `tests/embeddings-dims-mismatch.js`
+- Good.
+
+##### `tests/embeddings-dims-validation.js`
+- Good.
+
+##### `tests/embeddings-sqlite-dense.js`
+- [ ] Add coverage for vector extension load failure paths (extension missing), not only baseline dense sqlite insertions.
+
+##### `tests/embeddings-validate.js`
+- Good baseline index-state + artifact validation coverage.
+
+##### `tests/hnsw-ann.js`
+- [ ] Add correctness assertions beyond “backend selected”:
+  - candidate set filtering (once exposed),
+  - tie-break determinism,
+  - sanity check of returned ordering for a known query on fixture corpus.
+
+##### `tests/hnsw-atomic.js`
+- [ ] Add test for `.bak` fallback on corrupt primary index/meta (reader-side).
+
+##### `tests/smoke-embeddings.js`
+- Good smoke harness; consider adding new tests to this suite after implementing performance regression and fallback tests.
+
+##### `tests/sqlite-vec-candidate-set.js`
+- [ ] Add a column-name sanitization test (table is covered; column is not).
+
+##### `tests/vector-extension-sanitize.js`
+- Good table sanitization coverage; extend for column sanitization as above.
+
+---
+
+
+## Phase 30 — Index analysis features (metadata/risk/git/type-inference) — Review findings & remediation checklist
+
+**Objective:** Review the Section 4 file set (56 files) and produce a concrete, exhaustive remediation checklist that (1) satisfies the provided Phase 4 checklist (A–G) and (2) captures additional defects, inconsistencies, and improvements found during review.
+
+**Scope:** All files enumerated in `pairofcleats_review_section_4_files_and_checklist.md` (src/tests/docs).  
+**Out of scope:** Implementing fixes in-code (this document is a work plan / punch list).
+
+---
+
+### Summary (priority ordered)
+
+#### P0 — Must fix (correctness / crash / schema integrity)
+
+- [ ] **Risk rules regex compilation is currently mis-wired.** `src/index/risk-rules.js` calls `createSafeRegex()` with an incorrect argument signature, so rule regex configuration (flags, limits) is not applied, and invalid patterns can throw and abort normalization.  
+  - Fix in: `src/index/risk-rules.js` (see §B.1).
+- [ ] **Risk analysis can crash indexing on long lines.** `src/index/risk.js` calls SafeRegex `test()` / `exec()` without guarding against SafeRegex input-length exceptions. One long line can throw and fail the whole analysis pass.  
+  - Fix in: `src/index/risk.js` (see §B.2).
+- [ ] **Metadata v2 drops inferred/tooling parameter types (schema data loss).** `src/index/metadata-v2.js` normalizes type maps assuming values are arrays; nested maps (e.g., `inferredTypes.params.<name>[]`) are silently discarded.  
+  - Fix in: `src/index/metadata-v2.js` + tests + schema/docs (see §A.1–A.4).
+
+#### P1 — Should fix (determinism, performance, docs, validation gaps)
+
+- [ ] **`metaV2` validation is far too shallow and does not reflect the actual schema shape.** `src/index/validate.js` only validates a tiny subset of fields and does not traverse nested type maps.  
+- [ ] **Docs drift:** `docs/metadata-schema-v2.md` and `docs/risk-rules.md` do not fully match current code (field names, structures, and configuration).  
+- [ ] **Performance risks:** risk scanning does redundant passes and does not short-circuit meaningfully when capped; markdown parsing is duplicated (inline + fenced); tooling providers re-read files rather than reusing already-loaded text.
+
+#### P2 — Nice to have (quality, maintainability, test depth)
+
+- [ ] Improve signature parsing robustness for complex types (C-like, Python, Swift).
+- [ ] Clarify and standardize naming conventions (chunk naming vs provider symbol naming, “generatedBy”, “embedded” semantics).
+- [ ] Expand tests to cover surrogate pairs (emoji), CRLF offsets, and risk rules/config edge cases.
+
+---
+
+### A) Metadata v2: correctness, determinism, and validation
+
+#### Dependency guidance (best choices)
+- `ajv` — encode **metadata-schema-v2** as JSON Schema and validate `metaV2` as a hard gate in `tools/index-validate` (or equivalent).  
+- `semver` — version `metaV2.schemaVersion` independently and gate readers/writers.
+
+#### A.1 `metaV2.types` loses nested inferred/tooling param types (P0)
+
+##### Affected files
+- `src/index/metadata-v2.js`
+- `docs/metadata-schema-v2.md`
+- `src/index/validate.js`
+- `tests/metadata-v2.js`
+
+##### Findings
+- [ ] **Data loss bug:** `normalizeTypeMap()` assumes `raw[key]` is an array of entries. If `raw[key]` is an object map (e.g., `raw.params` where `raw.params.<paramName>` is an array), it is treated as non-array and dropped.  
+  - Evidence: `normalizeTypeMap()` (lines ~78–91) only normalizes `Array.isArray(entries)` shapes.
+- [ ] **Downstream effect:** `splitToolingTypes()` is applied to `docmeta.inferredTypes`; because nested shapes are not handled, **tooling-derived param types will not appear in `metaV2.types.tooling.params`**, and inferred param types will be absent from `metaV2.types.inferred.params`.
+
+##### Required remediation
+- [ ] Update `normalizeTypeMap()` to support nested “param maps” (and any similar nested structures) rather than dropping them. A pragmatic approach:
+  - [ ] If `entries` is an array → normalize as today.
+  - [ ] If `entries` is an object → treat it as a nested map and normalize each subkey:
+    - preserve the nested object shape in output (preferred), or
+    - flatten with a predictable prefix strategy (only if schema explicitly adopts that).
+- [ ] Update `splitToolingTypes()` so it correctly separates tooling vs non-tooling entries **inside nested maps** (e.g., `params.<name>[]`, `locals.<name>[]`).
+- [ ] Update `tests/metadata-v2.js` to assert:
+  - [ ] inferred param types survive into `metaV2.types.inferred.params.<paramName>[]`
+  - [ ] tooling param types survive into `metaV2.types.tooling.params.<paramName>[]`
+  - [ ] non-tooling inferred types do not leak into tooling bucket (and vice versa)
+
+#### A.2 Declared types coverage is incomplete (P1)
+
+##### Findings
+- [ ] `buildDeclaredTypes()` currently only materializes:
+  - param annotations via `docmeta.paramTypes`
+  - return annotation via `docmeta.returnType`  
+  It does **not** cover:
+  - [ ] parameter defaults (`docmeta.paramDefaults`)
+  - [ ] local types (`docmeta.localTypes`)
+  - [ ] any other declared type sources the codebase may already emit
+
+##### Required remediation
+- [ ] Decide which “declared” facets are part of Metadata v2 contract and implement them consistently (and document them):
+  - [ ] `declared.defaults` (if desired)
+  - [ ] `declared.locals` (if desired)
+- [ ] Update `docs/metadata-schema-v2.md` accordingly.
+- [ ] Add tests in `tests/metadata-v2.js` for any newly included declared facets.
+
+#### A.3 Determinism and stable ordering in `metaV2` (P1)
+
+##### Findings
+- [ ] Several arrays are produced via Set insertion order (e.g., `annotations`, `params`, `risk.tags`, `risk.categories`). While *often* stable, they can drift if upstream traversal order changes.
+- [ ] `metaV2` mixes optional `null` vs empty collections inconsistently across fields (some fields null, others empty arrays). This matters for artifact diffs and schema validation.
+
+##### Required remediation
+- [ ] Standardize ordering rules for arrays that are semantically sets:
+  - [ ] Sort `annotations` (lexicographic) before emitting.
+  - [ ] Sort `params` (lexicographic) before emitting.
+  - [ ] Sort risk `tags`/`categories` (lexicographic) before emitting.
+- [ ] Establish a consistent “empty means null” vs “empty means []” policy for v2 and enforce it in `buildMetaV2()` and schema/docs.
+
+#### A.4 `generatedBy` and `embedded` semantics are unclear (P2)
+
+##### Findings
+- [ ] `generatedBy` currently uses `toolInfo?.version` only; if `tooling` already contains `tool` and `version`, this can be redundant and underspecified.
+- [ ] `embedded` is emitted whenever `chunk.segment` exists, even when the segment is not embedded (parentSegmentId may be null). This makes the field name misleading.
+
+##### Required remediation
+- [ ] Decide and document the intended meaning:
+  - [ ] Option A: `generatedBy = "<tool>@<version>"` and keep `tooling` for structured detail.
+  - [ ] Option B: remove `generatedBy` and rely solely on `tooling`.
+- [ ] Restrict `embedded` field to truly-embedded segments only **or** rename the field to something like `segmentContext` / `embedding`.
+
+#### A.5 Validation gaps for Metadata v2 (P1)
+
+##### Findings (in `src/index/validate.js`)
+- [ ] `validateMetaV2()` (lines ~162–206) validates only:
+  - `chunkId` presence
+  - `file` presence
+  - `risk.flows` has `source` and `sink`
+  - type entries have `.type` for a shallow, array-only traversal  
+  It does **not** validate:
+  - [ ] `segment` object shape
+  - [ ] range/start/end types and ordering invariants
+  - [ ] `lang`, `ext`, `kind`, `name` constraints
+  - [ ] nested types map shapes (params/locals)
+  - [ ] `generatedBy`/`tooling` shape and required fields
+  - [ ] cross-field invariants (e.g., range within segment, embedded context consistency)
+
+##### Required remediation
+- [ ] Establish **one canonical validator** for `metaV2` (preferably schema-based):
+  - [ ] Add an explicit JSON Schema for v2 (in docs or tooling directory).
+  - [ ] Validate `metaV2` against the schema in `validateIndexArtifacts()`.
+- [ ] If schema-based validation is not yet possible, expand `validateMetaV2()` to:
+  - [ ] traverse nested `params`/`locals` maps for type entries
+  - [ ] validate `range` numbers, monotonicity, and non-negativity
+  - [ ] validate the presence/type of stable core fields as defined in `docs/metadata-schema-v2.md`
+- [ ] Add tests (or fixtures) that exercise validation failures for each major failure class.
+
+#### A.6 Docs drift: `docs/metadata-schema-v2.md` vs implementation (P1)
+
+##### Findings
+- [ ] The schema doc should be reviewed line-by-line against current `buildMetaV2()` output:
+  - field names
+  - optionality
+  - nesting of `types.*`
+  - risk shapes and analysisStatus shape
+  - relations link formats
+
+##### Required remediation
+- [ ] Update `docs/metadata-schema-v2.md` to reflect the actual emitted shape **or** update `buildMetaV2()` to match the doc (pick one, do not leave them divergent).
+- [ ] Add a “schema change log” section so future modifications don’t silently drift.
+
+---
+
+### B) Risk rules and risk analysis
+
+#### Dependency guidance (best choices)
+- `re2`/RE2-based engine (already present via `re2js`) — keep for ReDoS safety, but ensure wrapper behavior cannot crash indexing.
+- `ajv` — validate rule bundle format (ids, patterns, severities, categories, etc.) before compiling.
+
+#### B.1 Risk regex compilation is broken (P0)
+
+##### Affected file
+- `src/index/risk-rules.js`
+
+##### Findings
+- [ ] **Incorrect call signature:** `compilePattern()` calls `createSafeRegex(pattern, flags, regexConfig)` but `createSafeRegex()` accepts `(pattern, config)` (per `src/shared/safe-regex.js`).  
+  Consequences:
+  - `regexConfig` is ignored entirely
+  - the intended default flags (`i`) are not applied
+  - any user-configured safe-regex limits are not applied
+- [ ] **No error shielding:** `compilePattern()` does not catch regex compilation errors. An invalid pattern can throw and abort normalization.
+
+##### Required remediation
+- [ ] Fix `compilePattern()` to call `createSafeRegex(pattern, safeRegexConfig)` (or a merged config object).
+- [ ] Wrap compilation in `try/catch` and return `null` on failure (or record a validation error) so rule bundles cannot crash indexing.
+- [ ] Add tests that verify:
+  - [ ] configured flags (e.g., `i`) actually take effect
+  - [ ] invalid patterns do not crash normalization and are surfaced as actionable diagnostics
+  - [ ] configured `maxInputLength` and other safety controls are honored
+
+#### B.2 Risk analysis can crash on long inputs (P0)
+
+##### Affected file
+- `src/index/risk.js`
+
+##### Findings
+- [ ] `matchRuleOnLine()` calls SafeRegex `test()` and `exec()` without guarding against exceptions thrown by SafeRegex input validation (e.g., when line length exceeds `maxInputLength`).  
+  - This is a hard failure mode: one long line can abort analysis for the entire file (or build, depending on call site error handling).
+
+##### Required remediation
+- [ ] Ensure **risk analysis never throws** due to regex evaluation. Options:
+  - [ ] Add `try/catch` around `rule.requires.test(...)`, `rule.excludes.test(...)`, and `pattern.exec(...)` to treat failures as “no match”.
+  - [ ] Alternatively (or additionally), change the SafeRegex wrapper to return `false/null` instead of throwing for overlong input.
+  - [ ] Add a deterministic “line too long” cap behavior:
+    - skip risk evaluation for that line
+    - optionally record `analysisStatus.exceeded` includes `maxLineLength` (or similar)
+
+#### B.3 `scope` and cap semantics need tightening (P1)
+
+##### Findings
+- [ ] `scope === 'file'` currently evaluates only `lineIdx === 0` (first line). This is likely not the intended meaning of “file scope”.
+- [ ] `maxMatchesPerFile` currently caps **number of matching lines**, not number of matches (variable name implies match-count cap).
+
+##### Required remediation
+- [ ] Define (in docs + code) what `scope: "file"` means:
+  - [ ] “pattern evaluated against entire file text” (recommended), or
+  - [ ] “pattern evaluated once per file via a representative subset”
+- [ ] Implement `maxMatchesPerFile` as an actual match-count cap (or rename it to `maxMatchingLines`).
+- [ ] Add tests for both behaviors.
+
+#### B.4 Performance: redundant scanning and weak short-circuiting (P1)
+
+##### Findings
+- [ ] Risk analysis scans the same text repeatedly (sources, sinks, sanitizers are scanned in separate loops).
+- [ ] When caps are exceeded (bytes/lines), flows are skipped, but line scanning for matches still proceeds across the entire file, which defeats the purpose of caps for large/minified files.
+
+##### Required remediation
+- [ ] Add an early-exit path when `maxBytes`/`maxLines` caps are exceeded:
+  - either skip all analysis and return `analysisStatus: capped`
+  - or scan only a bounded prefix/suffix and clearly mark that results are partial
+- [ ] Consider a single-pass scanner per line that evaluates all rule categories in one traversal.
+- [ ] Add a prefilter stage for candidate files/lines (cheap substring checks) before SafeRegex evaluation.
+
+#### B.5 Actionability and determinism of outputs (P1)
+
+##### Findings
+- [ ] `dedupeMatches()` collapses evidence to one match per rule id (may not be sufficient for remediation).
+- [ ] Time-based caps (`maxMs`) can introduce nondeterminism across machines/runs (what gets included depends on wall clock).
+
+##### Required remediation
+- [ ] Preserve up to N distinct match locations per rule (configurable) rather than only first hit.
+- [ ] Prefer deterministic caps (maxBytes/maxLines/maxNodes/maxEdges) over time caps; if `maxMs` remains, ensure it cannot cause nondeterministic partial outputs without clearly indicating partiality.
+- [ ] Sort emitted matches/flows deterministically (by line/col, rule id) before output.
+
+#### B.6 Docs drift: `docs/risk-rules.md` vs implementation (P1)
+
+##### Findings
+- [ ] `docs/risk-rules.md` should be updated to reflect:
+  - actual rule bundle fields supported (`requires`, `excludes`, `scope`, `maxMatchesPerLine`, `maxMatchesPerFile`, etc.)
+  - actual emitted `risk.analysisStatus` shape (object vs string)
+  - actual matching semantics (line-based vs file-based)
+
+##### Required remediation
+- [ ] Update the doc to match current behavior (or update code to match doc), then add tests that lock it in.
+
+---
+
+### C) Git signals (metadata + blame-derived authorship)
+
+#### Dependency guidance (best choices)
+- `simple-git` (already used) — ensure it’s called in a way that scales: batching where feasible, caching aggressively, and defaulting expensive paths off unless explicitly enabled.
+
+#### C.1 Default blame behavior and cost control (P1)
+
+##### Affected file
+- `src/index/git.js`
+
+##### Findings
+- [ ] `blameEnabled` defaults to **true** (`options.blame !== false`). If a caller forgets to pass `blame:false`, indexing will run `git blame` per file (very expensive).
+- [ ] `git log` + `git log --numstat` are executed per file; caching helps within a run but does not avoid the O(files) subprocess cost.
+
+##### Required remediation
+- [ ] Make blame opt-in by default:
+  - [ ] change default to `options.blame === true`, **or**
+  - [ ] ensure all call sites pass `blame:false` unless explicitly requested via config
+- [ ] Consider adding a global “gitSignalsPolicy” (or reuse existing policy object) that centrally controls:
+  - blame on/off
+  - churn computation on/off
+  - commit log depth
+- [ ] Performance optimization options (choose based on ROI):
+  - [ ] batch `git log` queries when indexing many files (e.g., per repo, not per file)
+  - [ ] compute churn only when needed for ranking/filtering
+  - [ ] support “recent churn only” explicitly in docs (currently it’s “last 10 commits”)
+
+#### C.2 Minor correctness and maintainability issues (P2)
+
+##### Findings
+- [ ] Misleading JSDoc: `parseLineAuthors()` is documented as “Compute churn from git numstat output” (it parses blame authors, not churn). This can mislead future maintenance.
+
+##### Required remediation
+- [ ] Fix the JSDoc to match the function purpose and parameter type.
+
+#### C.3 Tests improvements (P1)
+
+##### Affected tests
+- `tests/git-blame-range.js`
+- `tests/git-meta.js`
+- `tests/churn-filter.js`
+- `tests/git-hooks.js`
+
+##### Findings
+- [ ] No tests assert “blame is off by default” (or the intended default policy).
+- [ ] No tests cover rename-following semantics (`--follow`) or untracked files.
+- [ ] Caching behavior is not validated (e.g., “git blame called once per file even if many chunks”).
+
+##### Required remediation
+- [ ] Add tests that explicitly validate the intended default blame policy.
+- [ ] Add a caching-focused test that ensures repeated `getGitMeta()` calls for the same file do not spawn repeated git commands (can be validated via mocking or by instrumenting wrapper counts).
+- [ ] Decide whether rename-following is required and add tests if so.
+
+---
+
+### D) Type inference (local + cross-file + tooling providers)
+
+#### Dependency guidance (best choices)
+- LSP-based providers (clangd/sourcekit/pyright) — keep optional and guarded; correctness should degrade gracefully.
+- TypeScript compiler API — keep optional and isolated; add caching/incremental compilation for large repos.
+
+#### D.1 Provider lifecycle and resilience (P1)
+
+##### Affected files
+- `src/index/type-inference-crossfile/tooling.js`
+- `src/index/tooling/*.js`
+- `src/integrations/tooling/lsp/client.js`
+- `src/integrations/tooling/providers/lsp.js`
+- `src/integrations/tooling/providers/shared.js`
+
+##### Findings
+- [ ] `createLspClient().request()` can leave pending requests forever if a caller forgets to supply `timeoutMs` (pending map leak). Current provider code *usually* supplies a timeout, but this is not enforced.
+- [ ] Diagnostics timing: providers request symbols immediately after `didOpen` and then `didClose` quickly; some servers publish diagnostics asynchronously and may not emit before close, leading to inconsistent diagnostic capture.
+
+##### Required remediation
+- [ ] Enforce a default request timeout in `createLspClient.request()` if none is provided.
+- [ ] For diagnostics collection, consider:
+  - [ ] waiting a bounded time for initial diagnostics after `didOpen`, or
+  - [ ] explicitly requesting diagnostics if server supports it (varies), or
+  - [ ] documenting that diagnostics are “best effort” and may be incomplete
+
+#### D.2 Unicode/offset correctness: add stronger guarantees (P1)
+
+##### Affected files
+- `src/integrations/tooling/lsp/positions.js`
+- `src/shared/lines.js` (supporting)
+- `tests/type-inference-lsp-enrichment.js`
+- `tests/segment-pipeline.js` + fixtures
+
+##### Findings
+- [ ] `positions.js` JSDoc claims “1-based line/column”; column is actually treated as 0-based (correct for LSP), but the doc comment is misleading.
+- [ ] Test coverage does not explicitly include surrogate pairs (emoji), which are the common failure mode when mixing code-point vs UTF-16 offsets.
+
+##### Required remediation
+- [ ] Fix the JSDoc to reflect actual behavior (LSP: 0-based character offsets; line converted to 1-based for internal helpers).
+- [ ] Add tests with:
+  - [ ] emoji in identifiers and/or strings before symbol definitions
+  - [ ] CRLF line endings fixtures (if Windows compatibility is required)
+
+#### D.3 Generic LSP provider chunk matching is weaker than clangd provider (P2)
+
+##### Affected file
+- `src/integrations/tooling/providers/lsp.js`
+
+##### Findings
+- [ ] `findChunkForOffsets()` requires strict containment (symbol range must be within chunk range). clangd-provider uses overlap scoring, which is more robust.
+
+##### Required remediation
+- [ ] Update generic provider to use overlap scoring like clangd-provider to reduce missed matches.
+
+#### D.4 TypeScript provider issues (P2/P1 depending on usage)
+
+##### Affected file
+- `src/index/tooling/typescript-provider.js`
+
+##### Findings
+- [ ] `loadTypeScript()` resolve order includes keys that are not implemented (`global`) and duplicates (`cache` vs `tooling`).
+- [ ] Parameter name extraction uses `getText()` which can produce non-identifiers for destructuring params (bad keys for `params` map).
+- [ ] Naming convention risk: provider writes keys like `Class.method` which may not match chunk naming conventions; if mismatched, types will not attach.
+
+##### Required remediation
+- [ ] Fix the resolution order logic and document each lookup path purpose.
+- [ ] Only record parameter names for identifiers; skip or normalize destructuring params.
+- [ ] Validate chunk naming alignment (structural chunk naming vs provider symbol naming) and add a test for a class method mapping end-to-end.
+
+#### D.5 Cross-file inference merge determinism and evidence (P2)
+
+##### Affected files
+- `src/index/type-inference-crossfile/apply.js`
+- `src/index/type-inference-crossfile/pipeline.js`
+
+##### Findings
+- [ ] `mergeTypeList()` dedupes by `type|source` but drops evidence differences; confidence merging strategy is simplistic.
+- [ ] Output ordering is not explicitly sorted after merges.
+
+##### Required remediation
+- [ ] Decide how to treat evidence in merges (keep first, merge arrays, keep highest confidence).
+- [ ] Sort merged type lists deterministically (confidence desc, type asc, source asc).
+
+#### D.6 Signature parsing robustness (P2)
+
+##### Affected files
+- `src/index/tooling/signature-parse/clike.js`
+- `src/index/tooling/signature-parse/python.js`
+- `src/index/tooling/signature-parse/swift.js`
+
+##### Findings
+- [ ] Parsers are intentionally lightweight, but they will fail on common real-world signatures:
+  - C++ templates, function pointers, references
+  - Python `*args/**kwargs`, keyword-only params, nested generics
+  - Swift closures and attributes
+
+##### Required remediation
+- [ ] Add test fixtures covering at least one “hard” signature per language.
+- [ ] Consider using tooling hover text more consistently (already used as fallback in clangd-provider) or integrate a minimal parser that handles nested generics and defaults.
+
+---
+
+### E) Performance improvements to prioritize (cross-cutting)
+
+#### E.1 Risk analysis hot path (P1)
+- [ ] Single-pass line scan for sources/sinks/sanitizers.
+- [ ] Early return on caps (maxBytes/maxLines) rather than scanning the whole file anyway.
+- [ ] Cheap prefilter before SafeRegex evaluation.
+- [ ] Avoid per-line SafeRegex exceptions (see §B.2).
+
+#### E.2 Markdown segmentation duplication (P2)
+- [ ] `segments.js` parses markdown twice (inline code spans + fenced blocks). Consider extracting both from one micromark event stream.
+
+#### E.3 Tooling providers I/O duplication (P2)
+- [ ] Providers re-read file text from disk; if indexing already has the content in memory, pass it through (where feasible) to reduce I/O.
+
+---
+
+### F) Refactoring goals (maintainability / policy centralization)
+
+- [ ] Consolidate analysis feature toggles into a single `analysisPolicy` object that is passed to:
+  - metadata v2 builder
+  - risk analysis
+  - git analysis
+  - type inference (local + cross-file + tooling)
+- [ ] Centralize schema versioning and validation:
+  - one metadata v2 schema
+  - one risk rule bundle schema
+  - one place that validates both as part of artifact validation
+
+---
+
+### G) Tests: required additions and upgrades
+
+#### Existing tests reviewed (from the provided list)
+- `tests/metadata-v2.js`
+- `tests/churn-filter.js`
+- `tests/git-blame-range.js`
+- `tests/git-hooks.js`
+- `tests/git-meta.js`
+- `tests/minhash-parity.js`
+- `tests/segment-pipeline.js` (+ fixtures)
+- `tests/type-inference-crossfile*.js`
+- `tests/type-inference-lsp-enrichment.js`
+- `tests/type-inference-*-provider-no-*.js` (clangd/sourcekit)
+
+#### Required test upgrades (P1/P0 where noted)
+- [ ] **P0:** Add tests for metadata v2 nested inferred/tooling param types (see §A.1).
+- [ ] **P0:** Add tests for risk rule compilation config correctness (flags honored, invalid patterns handled) (see §B.1).
+- [ ] **P0:** Add risk analysis “long line” test to ensure no crashes (see §B.2).
+- [ ] **P1:** Add unicode offset tests that include surrogate pairs (emoji) for:
+  - LSP position mapping
+  - chunk start offsets around unicode
+- [ ] **P1:** Add git caching/policy tests (default blame policy + no repeated subprocess calls where caching is intended).
+
+---
+
+**Deliverables**
+- This remediation checklist (this document)
+- Updated `docs/metadata-schema-v2.md` and `docs/risk-rules.md` that match implementation
+- Expanded test suite that locks in:
+  - metaV2 types correctness (including nested)
+  - risk rule compilation correctness and non-crashing evaluation
+  - unicode offset correctness (including surrogate pairs)
+  - intended git blame policy and caching
+
+**Exit criteria**
+- All P0 items are fixed and covered by tests.
+- Metadata v2 output matches the schema doc, and `validateIndexArtifacts()` validates it meaningfully.
+- Risk analysis and tooling passes are “best-effort”: they may skip/partial, but they never crash indexing.
+
+
+## Phase 31 — Language handlers & chunking review (Section 5)
+
+**Objective:** Make language detection, per-language chunking, tree-sitter integration, and ingestion tooling *deterministic, robust on real-world code*, and *well-tested* — with clear fallback behavior, predictable chunk boundaries, and guardrails against performance/pathological inputs.
+
+**Scope reference:** Review Section 5 file list + checklist (see the attached “review section 5 files and checklist” markdown).
+
+### Note
+While generating the markdown deliverable, I noticed one small wording issue in the YAML section of the produced document: it currently describes the tab bug using code spans that don’t clearly distinguish '\t' vs '\\t' (because Markdown code spans visually collapse some intent). The underlying identified bug is correct and the remediation tasks are correct, but that one wording line could be clarified to explicitly contrast '\\t' (backslash+t) vs '\t' (actual tab).
+
+---
+
+### 31.0 Priority findings summary (what must be fixed first)
+
+#### P0 — Breaks correctness, tests, or core workflows
+- [ ] **Fix YAML tab handling + Windows path normalization bugs** in `src/index/chunking/formats/yaml.js` (tabs currently checked as the literal string `"\t"`; Windows paths normalized with the wrong regex).  
+  - Affects: skipping list items / indentation detection; GitHub Actions workflow detection on Windows-style paths.
+- [ ] **Fix C-like docstring/attribute extraction off-by-one** in `src/lang/clike.js` (doc comment extraction currently skips the line immediately above declarations).  
+  - Affects: docstring/attributes in C/C++/ObjC chunks (and downstream docmeta / fidelity).
+- [ ] **Fix broken test syntax** in `tests/language-registry/collectors.test.js` (invalid escaped quotes).  
+  - Affects: test suite execution.
+- [ ] **Fix ingestion tools writing output before ensuring directory exists** in:
+  - `tools/ctags-ingest.js`
+  - `tools/gtags-ingest.js`
+  - `tools/lsif-ingest.js`
+  - `tools/scip-ingest.js`  
+  Creating the write stream before `ensureOutputDir()` can fail when the output directory does not exist.
+- [ ] **Fix SQL statement splitting for standard SQL escaping (`''` / `""`)** in `src/lang/sql.js`.  
+  Current quote toggling assumes backslash-escaping and will mis-split statements containing doubled quotes.
+
+#### P1 — Tree-sitter quality/perf gaps that will surface at scale
+- [ ] **Fix `findNameNode` traversal depth bug** in `src/lang/tree-sitter/chunking.js` (depth increments per node instead of per level; the search stops after ~4 iterations).  
+  - Affects: chunk naming quality and method/class qualification.
+- [ ] **Make tree-sitter worker path functional and deterministic** (`src/lang/workers/tree-sitter-worker.js` + `src/lang/tree-sitter/chunking.js`).  
+  - Worker currently does not preload/init grammars; `buildTreeSitterChunksAsync()` treats a `null` worker result as “success” and does not fall back.
+
+#### P2 — Cleanup, clarity, and long-term maintainability
+- [ ] **Remove or use unused imports** (e.g., `parseTypeScriptSignature` in `src/lang/typescript/chunks-babel.js`).
+- [ ] **Add missing/edge-case tests** (Windows paths, tabs, unicode identifiers, SQL quoting, tree-sitter worker behavior, etc.).
+- [ ] **Document chunk metadata semantics** (particularly `meta.endLine` inclusivity and byte vs. code-unit offsets) in `docs/contracts/chunking.md` (and/or a new contract doc).
+
+---
+
+### 31.1 Chunking pipeline: mapping, fallback, limits, determinism
+
+#### 31.1.1 Fallback behavior and deterministic output
+- [ ] **Audit & document** the full fallback chain in `src/index/chunking/dispatch.js`:
+  - code chunker → code-format chunker → prose chunker → root chunk (prose extensions) → fixed-size blob fallback.
+- [ ] **Add regression tests** that verify:
+  - A failed code chunker returns `null` and the dispatcher properly falls back.
+  - “Prose mode” behavior for `.md/.rst/.adoc/.txt/.mdx` is stable (chunk headings when possible; otherwise single chunk).
+  - “Code mode” for prose files intentionally uses blob fallback (or adjust if that’s not desired).
+
+#### 31.1.2 Limits: correctness + performance under large inputs
+- [ ] **Add tests for multi-byte UTF-8 boundaries** in `applyChunkingLimits()` (`src/index/chunking/limits.js`):
+  - Ensure splits never create invalid surrogate pairs.
+  - Ensure byte limits are enforced correctly with emoji / non-ASCII identifiers.
+- [ ] **Performance review:** `resolveByteBoundary()` currently calls `Buffer.byteLength(text.slice(0, mid))` repeatedly.
+  - [ ] Consider a faster strategy (e.g., pre-encoding once to a `Buffer`, or maintaining cumulative byte counts per line) to avoid repeated substring allocations.
+- [ ] **Clarify contract semantics** for:
+  - Whether `chunk.end` is exclusive (it is treated as exclusive almost everywhere).
+  - Whether `meta.endLine` is “line containing end offset” vs “last included line”.  
+    (Many language chunkers use `offsetToLine(end)` vs `offsetToLine(end - 1)`; this should be intentional and documented.)
+  - Update `docs/contracts/chunking.md` accordingly and add examples.
+
+---
+
+### 31.2 Format chunkers: YAML, JSON, XML, INI/TOML, Markdown, RST/Asciidoc
+
+#### 31.2.1 YAML (`src/index/chunking/formats/yaml.js`)
+**Bugs**
+- [ ] **Fix tab detection** in `chunkYamlTopLevel()` and list-item skipping:
+  - Current code checks `line.startsWith("\t")` (literal backslash + t) instead of `line.startsWith("\t")` as a tab character.
+  - Locations:
+    - line ~60: `line.startsWith('\t')` in list-item skip condition
+    - line ~92: `line.startsWith('\t')` in indentation calculation
+- [ ] **Fix Windows path normalization** in `chunkYaml()`:
+  - Current: `normalizedPath = relPath.replace(/\\\\/g, '/')`  
+    This matches *double* backslashes; typical Windows paths contain single backslashes.
+  - Should be: `relPath.replace(/\\/g, '/')` (single backslash regex)
+
+**Hardening / improvements**
+- [ ] **Add YAML tests** covering:
+  - Tab-indented YAML (even if discouraged, tools may produce it).
+  - Workflow path detection for both `".github/workflows/foo.yml"` and `".github\\workflows\\foo.yml"`.
+  - A workflow file with `jobs:` where indentation is not 2 spaces (ensure graceful behavior).
+- [ ] **Document YAML chunker limitations** (top-level-only + heuristics for GH Actions) in the chunking contract or a dedicated “format chunkers” doc section.
+
+#### 31.2.2 JSON (`src/index/chunking/formats/json.js`)
+- [ ] **Test hygiene:** Fix test calls that pass arguments in the wrong positions (e.g., `chunkJson(jsonText, {})` in `tests/chunking/json.test.js` currently passes `{}` as `relPath`).  
+  Update to `chunkJson(jsonText, null, {})` for clarity and future-proofing.
+- [ ] **Optional robustness improvement:** consider using `jsonc-parser` for tolerant parsing (trailing commas/comments) *if desired*.
+  - If adopted, ensure invalid JSON still cleanly falls back (i.e., return `null`).
+
+#### 31.2.3 XML (`src/index/chunking/formats/xml.js`)
+- [ ] Add tests for:
+  - Nested tags with attributes + self-closing tags.
+  - CDATA blocks and processing instructions.
+  - Malformed tag recovery (should return `null`, triggering fallback, rather than producing broken chunks).
+
+#### 31.2.4 Markdown (`src/index/chunking/formats/markdown.js`)
+- [ ] Add tests for:
+  - Headings inside fenced blocks (should not create chunks; current `inFence` logic covers ``` and ~~~).
+  - Setext headings vs horizontal rules (ensure `---` under a paragraph is treated correctly).
+
+#### 31.2.5 RST/Asciidoc (`src/index/chunking/formats/rst-asciidoc.js`)
+- [ ] Add tests for:
+  - RST overline+underline headings and nested sectioning.
+  - Asciidoc `==` headings inside code/list blocks to avoid false positives.
+
+#### 31.2.6 INI/TOML (`src/index/chunking/formats/ini-toml.js`)
+- [ ] Add tests for:
+  - TOML array-of-tables (`[[table]]`).
+  - INI sections with unusual whitespace and comments.
+
+---
+
+### 31.3 Language registry: selection, options, and collector mapping
+
+#### 31.3.1 Registry correctness (`src/index/language-registry/registry.js`)
+- [ ] **Confirm and document intentional grouping** of C/C++/ObjC into `id: 'clike'`:
+  - Ensure docs and tests consistently reflect that `.c/.h/.cpp/.hpp/.m/.mm` map to the same language id.
+  - Update language-fidelity expectations and/or docs if users expect separate ids.
+
+- [ ] Expand `tests/language-registry/selection.test.js` to cover:
+  - C/C++/ObjC extensions: `.c`, `.h`, `.cpp`, `.hpp`, `.m`, `.mm`
+  - Ambiguous extensions and “special names”:
+    - `Dockerfile`, `dockerfile`, `*.Dockerfile`
+    - `Makefile`, `makefile`
+    - `CMakeLists.txt`
+    - `.gitignore`-style config names (if supported elsewhere)
+
+#### 31.3.2 Import collectors map (`tests/language-registry/collectors.test.js`)
+- [ ] **Fix syntax error** at the Dart fixture entry:
+  - Replace `text: "import 'package:foo/bar.dart';",` with a valid JS string literal:
+    - `text: "import 'package:foo/bar.dart';",`
+
+- [ ] Add edge-case import collector tests for:
+  - Multiline imports (where applicable).
+  - Imports inside comments (should be ignored where the collector claims to ignore comments).
+  - Duplicate imports / whitespace variants (ensure normalization works).
+
+---
+
+### 31.4 Tree-sitter backbone: wasm init, language loading, chunk extraction, workers
+
+#### 31.4.1 Name extraction (`src/lang/tree-sitter/chunking.js`)
+- [ ] **Fix `findNameNode()` depth logic**:
+  - Current implementation increments `depth` per dequeued node, not per BFS level.
+  - Result: the search stops after ~4 processed nodes and often fails to find a name.
+  - Expected: traverse up to N levels or up to a node-count budget (explicitly), and return the first plausible identifier.
+
+- [ ] Add tests that assert:
+  - Function and class chunk names are extracted correctly across multiple language grammars.
+  - Member/method names are found for nested AST shapes where the `name` field is not a direct child.
+
+#### 31.4.2 Worker-mode tree-sitter chunking (`src/lang/workers/tree-sitter-worker.js`, `src/lang/tree-sitter/chunking.js`)
+- [ ] **Initialize and preload grammars inside the worker** (or add a per-worker lazy-init path):
+  - Today, the worker calls `buildTreeSitterChunks()` without ensuring tree-sitter wasm + language grammar are loaded in that worker thread.
+  - Proposed fix:
+    - In the worker, resolve language id from `ext`/`languageId`, then `await preloadTreeSitterLanguages([resolvedId], treeSitterOptions)` before parsing.
+- [ ] **Make `buildTreeSitterChunksAsync()` treat `null` results as a failure signal** and fall back to in-thread parsing (or to non-tree-sitter chunking), at least when worker-mode is enabled.
+- [ ] Add tests that explicitly enable worker-mode and assert that:
+  - Chunks are returned (not `null`) for a known fixture.
+  - The result matches non-worker behavior (same chunk boundaries, or documented acceptable differences).
+  - If a grammar is missing/unavailable, it falls back cleanly and deterministically.
+
+#### 31.4.3 Configuration normalization (`src/lang/tree-sitter/options.js`)
+- [ ] Improve boolean normalization:
+  - Current `normalizeEnabled()` only recognizes `false` and the literal string `'off'`.
+  - Expand to treat `'false'`, `'0'`, `'no'` (case-insensitive) as disabled, and `'true'`, `'1'`, `'yes'`, `'on'` as enabled.
+- [ ] Add tests for config parsing from environment/JSON where booleans may be strings.
+
+#### 31.4.4 Offsets: bytes vs JS string indices
+- [ ] Add an explicit contract note and tests around offset units used by:
+  - tree-sitter (`node.startIndex/endIndex`)
+  - parse5 and other JS parsers
+  - Python AST (line/col from Python runtime)  
+  Ensure all chunk `start/end` offsets are consistent with JS string slicing expectations, particularly with non-BMP unicode characters.
+
+---
+
+### 31.5 Language handlers: correctness fixes & hardening
+
+#### 31.5.1 C-like (`src/lang/clike.js`)
+- [ ] **Fix docstring extraction index** for functions and ObjC methods:
+  - Current:
+    - ObjC method chunk meta: `extractDocComment(lines, i - 1, ...)` and `collectAttributes(lines, i - 1, ...)`
+    - C-like functions: `extractDocComment(lines, i - 1)`
+  - This skips the immediate preceding line.  
+  - Fix: pass `i` (0-based declaration start line) instead of `i - 1`.
+  - Locations:
+    - ~417–418, ~463 in `src/lang/clike.js`
+
+- [ ] Add tests for C-like doc comment capture:
+  - A `/** ... */` or `// ...` directly above a `struct`, `class`, `enum`, and `function`.
+  - ObjC method with `///` doc comment above it.
+
+#### 31.5.2 SQL (`src/lang/sql.js`)
+- [ ] **Fix quote handling** in both `stripSqlComments()` and `splitSqlStatements()`:
+  - SQL escaping commonly uses doubled quotes:
+    - `'It''s fine'`
+    - `"a ""quoted"" identifier"`
+  - Current logic toggles on every `'`/`"` not preceded by backslash, which breaks on doubled quotes.
+
+- [ ] Add tests that include:
+  - Semicolons inside strings with doubled quotes.
+  - PostgreSQL dollar-quoted strings combined with single-quoted strings.
+  - MySQL delimiter blocks that contain semicolons.
+
+#### 31.5.3 CSS (`src/lang/css.js`)
+- [ ] Add guardrails to prevent pathological chunk explosion when using the CSS tree-sitter parser:
+  - Options:
+    - Enforce a max node/chunk count (consistent with tree-sitter default maxChunkNodes behavior).
+    - Or switch to `buildTreeSitterChunks()` and its existing limits.
+- [ ] Add tests for:
+  - Nested `@media` with many rules (ensure performance and deterministic chunk output).
+  - Files exceeding the max node threshold (ensure fallback to heuristic).
+
+#### 31.5.4 TypeScript (`src/lang/typescript/chunks-babel.js`)
+- [ ] Remove or use unused import `parseTypeScriptSignature` (currently imported but not referenced).
+- [ ] Add/extend tests ensuring:
+  - Babel-based TS chunker produces signatures and types consistently where expected.
+  - Worker/non-worker tree-sitter paths do not regress TS chunking (when enabled).
+
+---
+
+### 31.6 Imports, relations, and control-flow metrics
+
+#### 31.6.1 Import collectors
+- [ ] Add test coverage for:
+  - Normalization rules (`normalizeImportToken()` behavior).
+  - Edge cases per language (e.g., JS `import type`, TS `import("x")`, Python relative imports).
+- [ ] Validate that collectors return stable, sorted output (dedupe + order determinism), or document if order is intentionally non-deterministic.
+
+#### 31.6.2 Relations builders (`src/index/language-registry/simple-relations.js`, per-language `relations.js`)
+- [ ] Add a small integration test that:
+  - Runs `collectLanguageImports()` and `buildLanguageRelations()` for a multi-language fixture set.
+  - Verifies the resulting `imports`, `exports`, `calls`, and `usages` sets match expectations.
+
+---
+
+### 31.7 Ingestion tools: ctags / gtags / lsif / scip
+
+#### 31.7.1 Output directory creation order
+- [ ] Move `await ensureOutputDir()` to occur *before* `fs.createWriteStream(outputPath, ...)` in:
+  - `tools/ctags-ingest.js` (write stream is created before the dir is ensured)
+  - `tools/gtags-ingest.js`
+  - `tools/lsif-ingest.js`
+  - `tools/scip-ingest.js`
+
+#### 31.7.2 Robustness improvements
+- [ ] Add tests / smoke scripts that verify:
+  - Tools succeed when output directory doesn’t exist.
+  - Tools correctly handle empty input streams.
+  - Tools fail with actionable errors on malformed JSON lines.
+
+- [ ] Add optional flags/docs for:
+  - Strict vs tolerant ingest behavior (skip malformed lines vs fail-fast).
+  - Path normalization expectations (repo-root relative vs absolute).
+
+---
+
+### 31.8 Docs and test suite alignment
+
+#### 31.8.1 Fix broken / missing documentation references
+- [ ] The Section 5 checklist references docs that are *not present* in this repo snapshot (e.g., `docs/contracts/language-registry.md`, `docs/contracts/ast.md`, and `docs/optional/*`).  
+  Decide whether to:
+  - Create these docs, or
+  - Update the checklist to point to existing docs (`docs/language-handler-imports.md`, `docs/language-fidelity.md`, etc.).
+
+#### 31.8.2 Update existing docs for discovered behavior
+- [ ] Update `docs/contracts/chunking.md` to include:
+  - Chunk offset semantics (exclusive `end`, unicode considerations).
+  - `meta.startLine/endLine` semantics and examples.
+  - Expected behavior for overlapping chunks (if allowed) vs non-overlapping (if required).
+- [ ] Update `docs/language-fidelity.md` if docstring expectations for C-like currently fail due to the off-by-one bug.
+
+#### 31.8.3 Add a “known limitations” section (recommended)
+- [ ] Document known heuristic limitations for:
+  - SQL parsing (heuristic statement splitting vs full parser).
+  - YAML parsing (line-based, top-level heuristics).
+  - Language relations (regex-based calls/usages for some languages).
+
+---
+
+### Deliverables
+- [ ] All P0/P1 fixes implemented with unit tests.
+- [ ] Updated docs reflecting chunk semantics and configuration.
+- [ ] A focused regression test pack covering:
+  - YAML tabs + Windows workflow paths
+  - C-like doc comments
+  - SQL doubled-quote handling
+  - Tree-sitter worker-mode functionality
+  - Chunking limits with unicode/multi-byte text
+
+---
+
+### Exit criteria
+- [ ] `npm test` (or the project’s test runner) executes without syntax errors (including `collectors.test.js`).
+- [ ] Format chunkers are robust against malformed inputs and fall back deterministically.
+- [ ] Tree-sitter worker-mode returns real chunks for supported languages and falls back when grammars are missing.
+- [ ] Chunk metadata semantics are documented and consistent across chunkers (or differences are explicitly justified).
+- [ ] Ingestion tools succeed when output directories are missing and produce valid NDJSON outputs.
+
+
+## Phase 32 — (Review) — Retrieval, Services & Benchmarking/Eval (Latency End-to-End)
+
+### Objective
+
+Validate and improve the **retrieval pipeline**, **services surfaces (API + MCP)**, and **benchmark/eval tooling** so that:
+
+* Search semantics are correct and contract-aligned (query parsing, filters, ranking, explain output, context expansion).
+* Backends behave consistently (memory / sqlite / sqlite-fts / lmdb) and performance paths are not accidentally disabled.
+* Services are robust (streaming behavior, cancellation, backpressure, security posture).
+* Benchmarks and eval harnesses are actionable, reproducible, and can enforce latency/quality budgets.
+
+### Scope
+
+Reviewed the complete Section 8 list from the attached markdown checklist document fileciteturn0file0, including:
+
+* Retrieval CLI + pipeline + filters + output formatting
+* SQLite/LMDB helpers and cache layers
+* Core integrations used by tools/services
+* API server (router + SSE) and MCP transport/tools
+* Benchmark harnesses (micro + language) and query tooling
+* Eval harness
+* Related docs + tests + fixtures
+
+(Where files referenced other modules not in the Section 8 list, I noted mismatches and dependency risks, but the primary focus remains the Section 8 scope.)
+
+---
+
+### Exit Criteria (What “Done” Looks Like)
+
+#### Correctness & Contracts
+
+* [ ] Query parsing supports required constructs (operators/quoting/negation/precedence) or docs/contracts explicitly define the simplified grammar.
+* [ ] Filters are correctly detected as “active” and do not disable backend fast-paths accidentally.
+* [ ] Explain output matches actual scoring math and is emitted only when requested (or contracts updated to reflect always-present fields).
+
+#### Performance & Latency
+
+* [ ] SQLite FTS fast-path is not disabled by default (especially for large indexes).
+* [ ] Context expansion avoids repeated O(N) scans per query (or is cached/optimized).
+* [ ] Benchmarks can write baselines reliably and optionally enforce budgets.
+
+#### Services Robustness
+
+* [ ] API streaming handles backpressure and connection close without hanging.
+* [ ] API/MCP support cancellation/timeout propagation to stop expensive work.
+* [ ] CORS/security posture is explicitly intentional and documented.
+
+#### Tests & Tooling
+
+* [ ] Tests cover discovered regressions and add missing edge cases (FTS eligibility, extracted-prose query caching, MCP id=0, etc.).
+* [ ] Bench/eval docs match actual behavior and command usage.
+
+---
+
+## Findings & Required Work
+
+### 8.A — Retrieval Semantics, Explain, Context Expansion
+
+#### A1 — **Critical: Filter “active” detection is wrong (breaks performance paths)**
+
+**Files:**
+
+* `src/retrieval/filters.js`
+* `src/retrieval/cli.js`
+* `src/retrieval/pipeline.js`
+* `src/retrieval/sqlite-helpers.js` (indirect impact via CLI choices)
+
+**What I found:**
+`hasActiveFilters()` treats *any non-empty object* as “active,” which causes `filtersActive` to be true even when no user filters are set, because the CLI always includes internal objects like `filePrefilter`.
+
+**Impact:**
+
+* Forces filter pass on every query.
+* Can disable SQLite FTS eligibility for large indexes because allowed-id pushdown cannot be used when the “allowed set” becomes huge.
+* Prevents “lazy chunk loading” decisions that should apply when there are no real filters.
+* Creates major, silent performance regressions at scale.
+
+**Action items:**
+
+* [ ] Fix `hasActiveFilters()` to ignore internal/config-only keys (e.g., `filePrefilter`) and only count user-constraining filters.
+* [ ] Add unit tests for `hasActiveFilters()` default filter object and typical combinations.
+* [ ] Add an integration test ensuring sqlite-fts remains eligible on a large index when no filters are set (or at least verify the path selection in stats/debug output).
+
+---
+
+#### A2 — **Context expansion does repeated O(N) indexing work per query**
+
+**Files:**
+
+* `src/retrieval/context-expansion.js`
+* `src/retrieval/cli.js` (enables context expansion)
+* `src/retrieval/pipeline.js`
+
+**What I found:**
+`buildContextIndex()` rebuilds `byName` and `byFile` maps every query.
+
+**Impact:**
+
+* For large repos, this adds noticeable latency per query.
+* Violates checklist intent: “avoids repeated file reads / expensive rebuilds.”
+
+**Action items:**
+
+* [ ] Cache context index per loaded index signature (store on the loaded index object or in `index-cache.js`).
+* [ ] Add tests to ensure expansions are stable and do not cross branch/filters (if applicable).
+* [ ] Document the intended semantic boundaries of context expansion (same file vs cross-file, name matching rules, etc.).
+
+---
+
+#### A3 — Explain output / scoring contract alignment is ambiguous
+
+**Files:**
+
+* `src/retrieval/pipeline.js`
+* `src/retrieval/output/explain.js`
+* `src/retrieval/cli/render-output.js`
+* Docs: `docs/contracts/retrieval-ranking.md` (very high-level)
+
+**What I found:**
+The pipeline always builds `scoreBreakdown` objects, even if explain is not requested; compact JSON hides it, but full JSON may expose it unintentionally.
+
+**Action items:**
+
+* [ ] Decide contract behavior:
+
+  * Option 1: Only compute/attach `scoreBreakdown` when explain requested.
+  * Option 2: Always include but document it (and remove `--explain` implication of optionality).
+* [ ] Add snapshot tests asserting the presence/absence of explain fields by mode/output format.
+* [ ] Ensure explain’s boost attribution matches scoring math (phrase + symbol boosts currently depend on the already-boosted score; document or adjust).
+
+---
+
+### 8.B — Query Parsing & Filtering
+
+#### B1 — Query parsing does not satisfy checklist requirements
+
+**Files:**
+
+* `src/retrieval/query.js`
+* `src/retrieval/query-parse.js`
+* Tests/docs indirectly
+
+**What I found:**
+Parsing supports:
+
+* quoted phrases (`"..."`)
+* negation via `-token` and `-"phrase"`
+
+It does **not** support:
+
+* boolean operators (AND/OR/NOT) semantics
+* precedence / parentheses
+* actionable errors for malformed queries (unbalanced quotes become literal tokens)
+
+**Action items:**
+
+* [ ] Either implement full operator parsing & precedence or explicitly constrain and document the query grammar.
+* [ ] Add detection + actionable error messages for unbalanced quotes and invalid constructs.
+* [ ] Add tests for negated phrases, nested quotes, malformed input, and operator tokens.
+
+---
+
+#### B2 — Filtering: performance and correctness concerns
+
+**Files:**
+
+* `src/retrieval/output/filters.js`
+* `src/retrieval/filter-index.js`
+
+**Key improvements:**
+
+* [ ] Ensure case-sensitive file filters don’t lose correctness through normalization shortcuts (currently used for prefiltering; confirm final checks are strict).
+* [ ] Consider memory growth of filter index structures; document expected footprint and add soft limits/metrics.
+
+---
+
+### 8.C — Ranking Determinism & Tie-Breaking
+
+#### C1 — Dense ranking should defensively validate embedding dimensionality
+
+**Files:**
+
+* `src/retrieval/rankers.js`
+* `src/retrieval/embedding.js`
+* `src/retrieval/sqlite-helpers.js`
+
+**What I found:**
+`rankDenseVectors()` assumes query embedding length matches index vector dimension. If not, dot-products can become NaN and ranking becomes unstable.
+
+**Action items:**
+
+* [ ] Validate query embedding length vs index dims; if mismatch, either truncate safely or skip dense scoring with a clear warning.
+* [ ] Add tests for dims mismatch (stub embeddings + configured dims is a good harness).
+
+---
+
+#### C2 — SQLite dense vector scale fallback looks unsafe
+
+**Files:**
+
+* `src/retrieval/sqlite-helpers.js`
+* Related: `src/storage/sqlite/vector.js` (quantization uses 2/255)
+
+**What I found:**
+If `dense_meta.scale` is missing for any reason, sqlite helper defaults scale to **1.0**, which would break score normalization badly for uint8 quantized vectors.
+
+**Action items:**
+
+* [ ] Change fallback scale default to `2/255` (and minVal to `-1` consistent with vector quantization).
+* [ ] Add a regression test ensuring dense scoring remains bounded even when meta is missing/corrupt (or fail loudly).
+
+---
+
+### 8.D — Services: API Server & MCP
+
+#### D1 — SSE backpressure “drain wait” can hang indefinitely on closed connections
+
+**Files:**
+
+* `tools/api/sse.js`
+
+**What I found:**
+If `res.write()` returns false, the code awaits `'drain'` only. If the client disconnects before drain fires, that promise may never resolve.
+
+**Action items:**
+
+* [ ] Replace `await once('drain')` with `Promise.race([drain, close, error])`.
+* [ ] Add tests simulating backpressure + early disconnect (larger payload / forced write buffering).
+
+---
+
+#### D2 — Streaming contracts/docs do not match actual /search/stream behavior
+
+**Files:**
+
+* `tools/api/router.js`
+* Docs: `docs/api-server.md`, `docs/contracts/api-mcp.md`
+
+**What I found:**
+`/search/stream` only emits:
+
+* `start`
+* `result` OR `error`
+* `done`
+
+Docs/contracts claim progress streaming and/or richer semantics.
+
+**Action items:**
+
+* [ ] Decide: implement progress events (pipeline milestones) OR revise docs/contracts to match current behavior.
+* [ ] If implementing progress: add hooks from retrieval CLI/pipeline → core API → router SSE.
+
+---
+
+#### D3 — Cancellation/timeout propagation is missing end-to-end
+
+**Files:**
+
+* `tools/api/router.js`
+* `tools/mcp/transport.js`
+* `tools/mcp/tools.js`
+* `src/integrations/core/index.js`
+* `src/retrieval/cli.js` (currently no signal handling)
+
+**What I found:**
+Timeouts exist in MCP wrapper, but they do not abort underlying work. API does not abort search on client disconnect. Retrieval does not consume `AbortSignal`.
+
+**Action items:**
+
+* [ ] Introduce `AbortController` per request/tool call.
+* [ ] Wire close events (`req.on('close')`) and timeout timers to `abort()`.
+* [ ] Teach retrieval pipeline / embedding fetch to check `signal.aborted` and throw a consistent cancellation error.
+* [ ] Add tests:
+
+  * API stream abort stops work early (not just stops writing).
+  * MCP tool timeout aborts the underlying work, not just returns an error.
+
+---
+
+#### D4 — Security posture: permissive CORS is risky
+
+**Files:**
+
+* `tools/api/router.js`
+* Docs: `docs/api-server.md`
+
+**What I found:**
+CORS is `*` by default. Even though server defaults to localhost, permissive CORS enables untrusted sites to read responses from a local service in a browser context.
+
+**Action items:**
+
+* [ ] Default CORS to disabled or restricted (require explicit `--cors` enablement).
+* [ ] Document threat model: local-only, trusted environment, or add token-based auth.
+* [ ] Add tests for CORS behavior (preflight, allowed origins).
+
+---
+
+### 8.E — Benchmarks & Latency Budgets
+
+#### E1 — Microbench “dense” vs “hybrid” distinction is not actually implemented
+
+**Files:**
+
+* `tools/bench/micro/run.js`
+* `tools/bench/micro/search.js`
+* `tools/bench/micro/tinybench.js`
+* Docs: `docs/benchmarks.md`
+
+**What I found:**
+Bench tasks labeled “dense” and “hybrid” do not reliably enforce different scoring regimes. Some of the logic implies profiles/env-driven behavior that isn’t applied.
+
+**Action items:**
+
+* [ ] Implement explicit scoring strategy selection (via args/env/profile) for sparse vs dense vs hybrid.
+* [ ] Confirm the benchmark measures what it claims (esp. hybrid weighting).
+* [ ] Add “sanity asserts” in benchmark output to record which strategy actually ran.
+
+---
+
+#### E2 — Baseline writing can fail because directories don’t exist
+
+**Files:**
+
+* `tools/bench/micro/tinybench.js`
+* Docs: `docs/benchmarks.md`
+
+**What I found:**
+`--write-baseline` writes to `benchmarks/baselines/...` but does not create the directory first.
+
+**Action items:**
+
+* [ ] Ensure baseline directory exists via `fs.mkdirSync(..., { recursive:true })`.
+* [ ] Add a test for `--write-baseline` success on a clean repo checkout.
+* [ ] Update docs to clarify how baselines are created and stored.
+
+---
+
+#### E3 — SQLite cache reuse is missing in benchmark harnesses
+
+**Files:**
+
+* `tools/bench/micro/run.js`
+* `tools/bench/micro/tinybench.js`
+
+**What I found:**
+Bench harnesses often pass `sqliteCache = null`, which may force repeated DB opens and distort warm-run measurements.
+
+**Action items:**
+
+* [ ] Instantiate and reuse `createSqliteDbCache()` across runs for warm scenarios.
+* [ ] Record cache reuse status in benchmark output for transparency.
+
+---
+
+#### E4 — Latency “budgets” are described but not enforceable
+
+**Files:**
+
+* `docs/benchmarks.md`
+* Tests: existing bench tests do not enforce budgets
+
+**Action items:**
+
+* [ ] Define target budgets (p50/p95) for representative queries and backends.
+* [ ] Add CI-friendly “perf smoke” tests that fail if budgets regress beyond thresholds (with generous margins and stable fixtures).
+* [ ] Document environment assumptions for benchmarks (CPU, disk, warmup, etc.).
+
+---
+
+### 8.F — Eval Harness
+
+#### F1 — Matching logic is permissive and may inflate scores
+
+**Files:**
+
+* `tools/eval/run.js`
+* Docs: `docs/eval.md`
+
+**What I found:**
+Expected match uses `hit.name.includes(expected.name)`; that may treat `foo` as matching `foobar`.
+
+**Action items:**
+
+* [ ] Decide strictness: exact name match vs substring vs regex.
+* [ ] Add dataset option `matchMode` or per-expected matcher configuration.
+* [ ] Add tests for false-positive matching cases.
+
+---
+
+## Additional Concrete Bugs Found (Non-Checklist)
+
+### G1 — Retrieval output summary “word count” logic uses character length
+
+**Files:**
+
+* `src/retrieval/output/format.js`
+
+**What I found:**
+The summary logic compares `.length` of the string (characters) to a “maxWords” variable and uses it to adjust `maxWords`. This is unit-inconsistent and likely incorrect behavior.
+
+**Action items:**
+
+* [ ] Fix to track word count, not character length.
+* [ ] Avoid calling `getBodySummary()` twice.
+* [ ] Add tests for summary length behavior.
+
+---
+
+### G2 — Parity test references missing benchmark query file path
+
+**Files:**
+
+* `tests/parity.js`
+* Existing file: `tests/parity-queries.txt`
+
+**What I found:**
+`tests/parity.js` reads from `benchmarks/queries/parity-queries.txt`, but the queries file exists under `tests/parity-queries.txt`.
+
+**Action items:**
+
+* [ ] Update parity test to load from `tests/parity-queries.txt` (or move file to benchmarks).
+* [ ] Add a guard assertion that query file exists with a clear message.
+
+---
+
+### G3 — Language benchmark progress renderer imports wrong relative paths
+
+**Files:**
+
+* `tools/bench/language/progress/render.js`
+
+**What I found:**
+Imports reference `../../../src/shared/...` but need one more `../` to reach repo root. As written, this resolves to `tools/src/shared/...` which doesn’t exist.
+
+**Action items:**
+
+* [ ] Fix import paths to `../../../../src/shared/...`.
+* [ ] Add a smoke test that loads the module (ensures no runtime import failures).
+
+---
+
+### G4 — MCP transport drops valid JSON-RPC ids when id = 0
+
+**Files:**
+
+* `tools/mcp/transport.js`
+
+**What I found:**
+`if (!id) return;` treats `0` as falsy and drops responses/notifications. JSON-RPC allows `id: 0`.
+
+**Action items:**
+
+* [ ] Change checks to `(id === null || id === undefined)`.
+* [ ] Add MCP tests sending `id: 0`.
+
+---
+
+### G5 — Bench query generator emits invalid CLI fragments (and lacks quoting)
+
+**Files:**
+
+* `tools/bench-query-generator.js`
+
+**What I found:**
+At least one strategy emits `--signature` without a value. Additionally, values with spaces (authors, types) are not quoted, which will break shell parsing.
+
+**Action items:**
+
+* [ ] Fix signature strategy to emit `--signature "<value>"`.
+* [ ] Quote/escape all flag values safely.
+* [ ] Clarify intended consumer (CLI vs internal harness) and ensure output format matches it.
+
+---
+
+## Test Coverage Additions (Highly Recommended)
+
+### New/Expanded Tests
+
+* [ ] `hasActiveFilters()` default object returns false; internal config-only objects don’t activate filters.
+* [ ] sqlite-fts eligibility remains enabled for unfiltered queries on large (>900 chunks) indexes.
+* [ ] Query cache includes extracted-prose payloads and validates required fields when mode enabled.
+* [ ] SSE backpressure + client disconnect doesn’t hang.
+* [ ] API abort cancels search work (requires AbortSignal support).
+* [ ] MCP id=0 support.
+* [ ] `--write-baseline` creates directories and succeeds.
+
+---
+
+## Documentation Corrections Required
+
+* [ ] `docs/api-server.md`: align stream behavior (progress vs start/result/done), update security/CORS discussion.
+* [ ] `docs/contracts/api-mcp.md`: align `/search/stream` contract to actual behavior or update implementation.
+* [ ] `docs/benchmarks.md`: document baseline creation and ensure code supports it (mkdir); clarify dense/hybrid distinctions.
+* [ ] `docs/mcp-server.md`: appears outdated vs actual transport implementation; update to match current code.
+
+## Phase 33 — Review Section 7 — Storage backends (SQLite + LMDB)
+
+**Objective:** Perform an audit of the storage backends (SQLite + LMDB) and their supporting tooling (build, validation, compaction, incremental updates, ANN extension management, and backend selection). Identify *all* correctness bugs, edge cases, documentation drift, missing tests, and performance/refactoring opportunities, aligned to the provided checklist.
+
+#### Out-of-scope (not deeply reviewed, but referenced when necessary)
+
+- Non-listed call-sites (e.g. retrieval query code) were spot-checked only when needed to validate schema/index/query alignment.
+
+---
+
+### Executive summary
+
+#### Top P0 / correctness items
+
+- [ ] **(P0) SQLite ANN table is not updated when it already exists** in:
+  - `src/storage/sqlite/build/from-bundles.js` (vector table existence sets `vectorAnnReady = true` but **does not** prepare `insertVectorAnn`) — see around L120.
+  - `src/storage/sqlite/build/incremental-update.js` (same pattern) — see around L240.
+
+  **Impact:** when the ANN virtual table already exists (most importantly during incremental updates), deleted rows *can* be removed (because deletes run via `deleteDocIds(...)`), but replacement vectors for changed chunks are **not reinserted**, leaving the ANN table sparse/out-of-sync with `dense_vectors`. This can silently degrade or break ANN-based retrieval depending on how the extension is queried.
+
+- [ ] **(P0) Retrieval-side fail-closed is incomplete for SQLite schema versions.**
+
+  `src/retrieval/cli-sqlite.js` validates required table *names* but does **not** enforce `PRAGMA user_version == SCHEMA_VERSION` (or otherwise fail-closed on schema mismatch). This violates the checklist requirement (“readers fail closed on unknown versions”) for the SQLite reader path.
+
+- [ ] **(P0) Bundle-build path does not hard-fail on embedding dimension mismatches** (`src/storage/sqlite/build/from-bundles.js`).
+
+  The code currently *warns once* on a dims mismatch but continues (and may still insert inconsistent vectors). This risks producing an index with an internally inconsistent dense-vector corpus (which can cause downstream errors or silent relevance regressions).
+
+#### High-signal P1 / robustness items
+
+- [ ] **WAL / sidecar handling is inconsistent across build vs incremental update paths.**  
+  Full rebuild paths use `replaceSqliteDatabase(...)` which removes sidecars, but incremental updates modify the DB in-place under WAL mode and do not explicitly checkpoint/truncate. If later tooling removes sidecars without a checkpoint, this can create “single-file DB” assumptions that do not hold.
+
+- [ ] **Indexing for hot maintenance queries can be improved**: `chunks(mode, file)` exists, but multiple maintenance queries order by `id` and would benefit from `(mode, file, id)`.
+
+- [ ] **Docs drift:** `docs/sqlite-incremental-updates.md` (and a few related docs) describe doc-id behavior and operational details that do not match current implementation (doc-id reuse/free-list behavior; ratio guard details; and operational caveats).
+
+#### “Good news” / items that look solid already
+
+- Most bulk write paths are transactional (build ingest, compaction copy, incremental applyChanges).
+- The extension download hardening in `tools/download-extensions.js` has multiple safety layers (hash verification support, archive path traversal protection, size/entry limits).
+- LMDB corruption handling has targeted tests (`tests/lmdb-corruption.js`) and tooling integration (`tests/lmdb-report-artifacts.js`).
+
+---
+
+## Checklist coverage and required follow-ups
+
+### A) Schema & migrations
+
+**Audit**
+
+- SQLite schema is versioned via `PRAGMA user_version` with `SCHEMA_VERSION = 7` (`src/storage/sqlite/schema.js`).
+- Incremental update explicitly checks schema version and required tables before mutating (`src/storage/sqlite/build/incremental-update.js`).
+- Table-level constraints are generally well-defined (primary keys per (mode, …), plus supporting indexes for vocab/postings).
+
+**Gaps / issues**
+
+- [ ] **Fail-closed at read time:** Add a `user_version` gate to the SQLite reader path (at minimum in `src/retrieval/cli-sqlite.js` / sqlite backend creation).
+  - Desired behavior:  
+    - If backend is *forced* to SQLite: throw a clear error (“SQLite schema mismatch: expected X, found Y”).
+    - If backend is not forced (auto): treat SQLite as unavailable and fall back to the file-backed backend, with a warning.
+- [ ] **Index alignment with hot predicates:** Consider adding `CREATE INDEX idx_chunks_file_id ON chunks(mode, file, id)` to support:
+  - `SELECT id FROM chunks WHERE mode=? AND file=? ORDER BY id`
+  - `SELECT file, id FROM chunks WHERE mode=? ORDER BY file, id` (incremental update id reuse scan)
+- [ ] **Document upgrade path explicitly:** The system is effectively “rebuild on schema bump”. Ensure docs and user-facing error messaging make that explicit (and fail closed rather than attempting to limp on).
+- [ ] **Consider column-level schema validation for critical tables** (optional but recommended): required-table-name checks do not catch incompatible column changes if a user provides an arbitrary SQLite file containing tables with the right names.
+
+---
+
+### B) SQLite build pipeline
+
+**Audit**
+
+- Build-from-artifacts path uses bulk inserts and creates secondary indexes after ingest (`src/storage/sqlite/build/from-artifacts.js`).
+- Build-from-bundles supports a fast-path using bundle workers (`src/storage/sqlite/build/from-bundles.js` + `bundle-loader.js`).
+- Validation includes `PRAGMA integrity_check` (full) and cross-table count consistency checks (`src/storage/sqlite/build/validate.js`).
+
+**Gaps / issues**
+
+- [ ] **(P0) Fix ANN insert statement preparation when the ANN table already exists:**
+  - In `src/storage/sqlite/build/from-bundles.js`:
+    - When `hasVectorTable` is true (L120), prepare `insertVectorAnn` immediately (same SQL as the “created table” path near L209).
+  - In `src/storage/sqlite/build/incremental-update.js`:
+    - When `vectorAnnReady` is set based on `hasVectorTable` (L240), prepare `insertVectorAnn` as well.
+  - Add a CI-friendly unit test that does not require a real sqlite-vec binary (see “Tests” section below).
+- [ ] **(P0) Enforce embedding dims consistency in bundle builds.**
+  - Recommendation: pre-scan each bundle (or the whole manifest) to ensure all embeddings are either absent or have a single consistent dimension; then hard-fail the build if mismatched.
+  - Current behavior: warns once around L197 and continues; this should be tightened to match the artifacts build path which throws on mismatch.
+- [ ] **Failure cleanup should include SQLite sidecars** (`.db-wal`, `.db-shm`) in:
+  - `src/storage/sqlite/build/from-artifacts.js`
+  - `src/storage/sqlite/build/from-bundles.js`
+
+  Today they remove only `outPath` on failure. If WAL/SHM exist, they can be left behind as confusing debris and can interfere with subsequent runs.
+- [ ] **Consider ensuring the produced DB is “single-file”** after build by checkpointing/truncating WAL (or switching journal mode back), rather than relying on implicit behavior.
+- [ ] **Prepared statement churn:** `deleteDocIds(...)` dynamically prepares multiple statements per chunk; consider statement caching keyed by chunk size to reduce overhead during large deletes.
+
+---
+
+### C) LMDB backend
+
+**Audit**
+
+- LMDB has a clear key-space separation (`meta:*`, `artifact:*`) and an explicit schema version (`src/storage/lmdb/schema.js`).
+- LMDB build tool stores artifacts plus metadata into LMDB (`tools/build-lmdb-index.js`).
+- Corruption handling is at least partially validated via tests (`tests/lmdb-corruption.js`, `tests/lmdb-report-artifacts.js`).
+
+**Gaps / issues**
+
+- [ ] Ensure the LMDB *reader* path (not in this checklist set) fails closed on schema mismatch the same way SQLite incremental update does (explicit schema version check; clear error messaging).
+- [ ] Consider adding a lightweight “LMDB quick check” command in tooling (or enhancing `tools/index-validate.js`) that validates the presence of all required keys (schema version, chunk meta, vocab, postings, etc.) and reports missing keys explicitly.
+- [ ] Document LMDB key invariants and expected artifact presence (which artifacts are mandatory vs optional).
+
+---
+
+### D) Incremental updates
+
+**Audit**
+
+- Incremental update gating exists (requires incremental manifest, rejects schema mismatch, rejects high change ratios) (`src/storage/sqlite/build/incremental-update.js`).
+- It preserves doc-id stability per-file by reusing IDs for changed files and reusing free IDs from deletions.
+- Deletes are applied across all relevant tables using `deleteDocIds(...)` with consistent table lists.
+
+**Gaps / issues**
+
+- [ ] **(P0) ANN table insertion bug** (same as in section B) must be fixed for incremental updates.
+- [ ] **WAL lifecycle:** after an in-place incremental update, run:
+  - `PRAGMA wal_checkpoint(TRUNCATE);`
+  - optionally `PRAGMA journal_mode = DELETE;` (if the project prefers single-file DBs)
+
+  This ensures the on-disk DB is not “dependent on sidecars” after the update and reduces the likelihood of later tooling accidentally discarding uncheckpointed state.
+- [ ] **Manifest match logic:** `isManifestMatch(...)` falls back to mtime/size when one side has a hash and the other does not.
+  - Consider tightening: if an incremental manifest provides a hash but the DB manifest row does not, treat as “changed” and update the DB row hash (this gradually converges the DB to the stronger invariant).
+- [ ] **Performance of doc-id reuse scan:** the “scan all chunks ordered by file,id” approach is correct but can be expensive; if it becomes a bottleneck, consider either:
+  - adding `(mode,file,id)` index, and/or
+  - materializing file→docId list in a side table (only if necessary).
+
+---
+
+### E) Performance
+
+**Audit**
+
+- Build pragmas in `src/storage/sqlite/build/pragmas.js` are set to favor build throughput (WAL + relaxed synchronous) and are restored (partially).
+- Compaction tool is designed to reduce doc-id sparsity and reclaim file size (`tools/compact-sqlite-index.js`).
+
+**Gaps / issues**
+
+- [ ] **Avoid repeated `COUNT(*)` scans** for backend auto-selection where possible (`src/storage/backend-policy.js`).
+  - Options: use `file_manifest` sum, maintain a meta counter, or store chunk count in `index_state.json`.
+- [ ] **Improve maintenance query performance** via `(mode,file,id)` index as noted above.
+- [ ] **Reduce query-time statement re-preparation** in `src/retrieval/sqlite-helpers.js` (`chunkArray(...)` creates fresh SQL each time); consider caching by chunk size.
+- [ ] **Add at least one p95 query latency regression test** using a stable fixture DB (details below).
+
+---
+
+### F) Refactoring goals
+
+**Audit**
+
+- The codebase already separates schema SQL, prepared statements, and build/validate logic into dedicated modules.
+
+**Gaps / issues**
+
+- [ ] **De-duplicate shared helpers:**
+  - `updateIndexStateManifest(...)` exists in both `tools/build-lmdb-index.js` and `tools/build-sqlite-index/index-state.js`.
+  - `chunkArray(...)` exists in both build and retrieval code (or adjacent helpers).
+- [ ] **Centralize ANN table setup logic** so that “table exists” vs “table created” paths always prepare the insert statement (avoid the current drift between `prepareVectorAnnTable(...)` and the bundle/incremental paths).
+- [ ] **Clarify naming:** `toVectorId(...)` is currently a “coerce to BigInt” helper; consider renaming to reflect that it does not encode/transform the id.
+
+---
+
+## Tests and benchmarks — required additions
+
+### Must-add tests (CI-friendly)
+
+- [ ] **Unit test: ANN insertion when the ANN table already exists** (no real extension binary required).
+  - Approach:
+    - Create a temporary SQLite DB with all required tables plus a *plain* `dense_vectors_ann` table (not virtual) matching the schema used by insert/delete (`rowid` + `embedding` BLOB column).
+    - Pass a mocked `vectorConfig` into `incrementalUpdateDatabase(...)` with:
+      - `loadVectorExtension: () => ({ ok: true })`
+      - `hasVectorTable: () => true`
+      - `encodeVector: () => Buffer.from([0])` (or similar stable stub)
+    - Run an incremental update that modifies at least one file and assert that:
+      - rows are deleted for removed docIds
+      - rows are inserted/replaced for changed docIds
+- [ ] **Unit test: bundle-build dims mismatch hard failure**
+  - Create two bundle files in the incremental bundle dir: one with embedding length N, one with embedding length N+1.
+  - Assert build fails (or returns count 0 with a clear reason) rather than “warn and continue”.
+
+### Additional recommended tests
+
+- [ ] **Reader fail-closed test:** Provide a DB with `user_version != SCHEMA_VERSION` and confirm:
+  - forced SQLite backend errors clearly
+  - auto backend falls back without using SQLite.
+- [ ] **Incremental WAL checkpoint test** (if WAL checkpointing is implemented): verify that after incremental update:
+  - no `*.db-wal` / `*.db-shm` remain (or WAL is truncated to a small size, depending on desired policy).
+
+### Benchmark / regression testing
+
+- [ ] **p95 query latency regression guard (fixture-based)**
+  - Add a small but non-trivial fixture SQLite DB (or build it deterministically during test setup) and run a representative query workload:
+    - candidate generation (ngrams)
+    - FTS ranking (if enabled)
+    - dense vector scoring (if enabled)
+  - Measure per-query durations and assert p95 stays under a budget (or does not regress beyond a tolerance vs a baseline).
+  - Keep it deterministic: single-threaded, warm cache (or explicit warm-up iterations), fixed query set, fixed limits.
+
+---
+
+## File-by-file findings and action items
+
+> This section lists concrete issues and improvement opportunities per reviewed file.  
+> Items are written as actionable checkboxes; severity tags (P0/P1/P2) are included where appropriate.
+
+### `src/storage/backend-policy.js`
+
+- [ ] Clarify threshold semantics for `autoSqliteThresholdChunks` / `autoSqliteThresholdBytes` when set to `0` (current code uses `> 0`, so `0` behaves like “disabled” rather than “always use SQLite”).
+- [ ] Consider avoiding expensive `COUNT(*)` scans for auto-selection; store chunk count in a meta table or `index_state.json` and read that instead (or sum `file_manifest.chunk_count`).
+- [ ] Consider logging/telemetry: when auto-select declines SQLite due to missing/invalid thresholds, surface that decision (currently it is silent except for return fields).
+
+### `src/storage/lmdb/schema.js`
+
+- [ ] Add brief inline documentation describing key-space expectations (which keys must exist for a usable LMDB index).
+- [ ] Consider adding a helper to enumerate expected artifact keys for validation tooling (to avoid drift).
+
+### `src/storage/sqlite/build-helpers.js`
+
+- [ ] Ensure `vectorConfig.extension.table` / `.column` are always sanitized before being interpolated into SQL (call-site currently depends on the caller to sanitize).
+- [ ] Consider making `buildChunkRow(...)` treat empty strings/arrays consistently (e.g., avoid turning `''` into `null` unintentionally for fields where empty-string is meaningful).
+- [ ] Consider reducing confusion: `buildChunkRow(...)` returns fields (`signature`, `doc`) that are not inserted into `chunks` but only into `chunks_fts`.
+
+### `src/storage/sqlite/build/bundle-loader.js`
+
+- [ ] Ensure loader failures return actionable error messages (bundle path, reason). (Current errors are decent; confirm `readBundleFile(...)` includes enough context.)
+- [ ] Consider exposing a small “max in-flight bundles” safeguard if worker threads are enabled (to avoid memory spikes on extremely large bundles).
+
+### `src/storage/sqlite/build/delete.js`
+
+- [ ] Cache delete statements by chunk size to reduce repeated `db.prepare(...)` overhead when deleting many docIds.
+- [ ] Consider supporting a temp table approach (`CREATE TEMP TABLE ids(...)`) if deletion performance becomes a bottleneck for large deletes.
+- [ ] Verify that the `vectorDeleteTargets` contract remains consistent across callers (column name `rowid` vs explicit id columns).
+
+### `src/storage/sqlite/build/from-artifacts.js`
+
+- [ ] Tighten shard discovery: `listShardFiles(...)` includes `.jsonl` but ingestion reads shards via `readJson(...)`; either:
+  - restrict token-postings shards to `.json`, or
+  - add JSONL support for token-postings shards (if they can be JSONL in practice).
+- [ ] Consider inserting `dense_meta` inside the same transaction as the first dense-vector batch (atomicity / consistency).
+- [ ] For `chunkMeta` ingestion (non-piece path), avoid building a single giant `rows` array in memory if the artifact can be large; use chunked batching as done in `ingestChunkMetaPieces(...)`.
+- [ ] Failure cleanup: remove sidecars (`outPath-wal`, `outPath-shm`) as well as `outPath` on failure.
+
+### `src/storage/sqlite/build/from-bundles.js`
+
+- [ ] **(P0) Prepare `insertVectorAnn` even when the ANN table already exists** (see around L120).  
+  The “table exists” branch sets `vectorAnnReady = true` but does not prepare the insert statement, so embeddings are not inserted into ANN.
+- [ ] **(P0) Make embedding dims mismatch a hard failure.**  
+  Current warning-only behavior (around L197) can produce inconsistent dense vectors.
+- [ ] Guard against malformed bundles: `count += result.bundle.chunks.length` should handle missing/invalid `chunks` gracefully (use `?.length || 0`).
+- [ ] Remove unused import (`path` is currently imported but not used).
+- [ ] Failure cleanup should remove SQLite sidecars, not just the DB file.
+
+### `src/storage/sqlite/build/incremental-update.js`
+
+- [ ] **(P0) Prepare `insertVectorAnn` when the ANN table already exists** (see around L240).  
+  Without this, incremental updates delete ANN rows but do not reinsert replacement vectors.
+- [ ] Add explicit WAL checkpointing/truncation at the end of a successful update (to keep the DB self-contained and avoid large WAL growth).
+- [ ] Consider tightening `isManifestMatch(...)` semantics when hashes are available on only one side (to converge DB manifest quality).
+- [ ] Performance: consider `(mode,file,id)` index or other optimization for `getDocIdsForFile(...)` scanning and per-file id lists.
+- [ ] Remove (or convert to assertion) the redundant “dims mismatch warn” path inside applyChanges; dims mismatch should already be rejected earlier.
+
+### `src/storage/sqlite/build/manifest.js`
+
+- [ ] De-duplicate `conflicts` output (currently can include repeated normalized paths).
+- [ ] Consider strict hash preference: if `entry.hash` is present but `dbEntry.hash` is null, treat as mismatch and update DB hash (do not silently match on mtime/size).
+
+### `src/storage/sqlite/build/pragmas.js`
+
+- [ ] Consider restoring `journal_mode` (or explicitly checkpointing) after build to ensure “single-file DB” invariants if the project expects that.
+- [ ] Consider surfacing pragma failures (currently swallowed silently).
+
+### `src/storage/sqlite/build/statements.js`
+
+- [ ] Consider adding `idx_chunks_file_id` (see schema/index alignment notes).
+- [ ] Reduce confusion: `buildChunkRowWithMeta(...)` populates fields not present in the schema (e.g., `churn_added`, `churn_deleted`, `churn_commits`). Either:
+  - add these columns to the schema if they are intended, or
+  - stop emitting them to avoid “looks supported but isn’t”.
+
+### `src/storage/sqlite/build/validate.js`
+
+- [ ] Consider validating ANN invariants when ANN is enabled:
+  - `dense_vectors_ann` row count should match `dense_vectors` row count for the mode (or at least have no orphans).
+- [ ] Consider making full `integrity_check` optional for very large DBs (it can be expensive); provide a quick-check mode and/or configurable validation levels.
+
+### `src/storage/sqlite/build/vocab.js`
+
+- [ ] Consider caching prepared statements by chunk size (similar to delete/vocab fetch) to reduce repeated SQL compilation overhead.
+- [ ] Error messaging: if `missing.length` is huge, cap printed missing values in the thrown error and include only a sample plus counts (to avoid megabyte-scale exception strings).
+
+### `src/storage/sqlite/incremental.js`
+
+- [ ] Document the on-disk incremental manifest contract and failure modes (missing manifest, conflicts, ratio guard).
+- [ ] Consider adding a small helper to validate the incremental manifest shape early, with clearer error output.
+
+### `src/storage/sqlite/schema.js`
+
+- [ ] Consider adding `(mode,file,id)` index for maintenance queries.
+- [ ] Ensure docs (`docs/sqlite-index-schema.md`) stay in sync when schema changes.
+
+### `src/storage/sqlite/utils.js`
+
+- [ ] `normalizeFilePath(...)` returns the input unchanged when it is not a string; consider returning `null` instead to reduce accidental “undefined as key” behavior.
+- [ ] `replaceSqliteDatabase(...)`: consider logging when fallback rename/remove paths are taken (debuggability of replacement failures).
+
+### `src/storage/sqlite/vector.js`
+
+- [ ] `toVectorId(...)` is effectively “coerce to BigInt”; consider renaming to reflect that (e.g., `toSqliteRowidInt64(...)`) to avoid implying a non-trivial mapping.
+- [ ] Consider making quantization parameters (`minVal`, `maxVal`) configurable or derived from embedding model metadata (avoid silent saturation if embeddings are out of range).
+
+---
+
+### Tooling files
+
+#### `tools/build-lmdb-index.js`
+
+- [ ] Consider a `--validate` option that checks required artifacts exist before writing LMDB (fail early, clearer errors).
+- [ ] Consider writing a small LMDB “manifest” key listing which artifacts were written (enables tool-side validation and reduces drift).
+
+#### `tools/build-sqlite-index.js`
+
+- [ ] Consider exit codes and messaging consistency across build modes (full rebuild vs incremental vs skipped).
+
+#### `tools/build-sqlite-index/cli.js`
+
+- [ ] Consider validating incompatible flag combinations early (e.g., `--bundle-workers` without a bundle dir).
+- [ ] Consider adding `--no-compact` / `--compact` clarity in CLI help (if not already covered elsewhere).
+
+#### `tools/build-sqlite-index/index-state.js`
+
+- [ ] De-duplicate `updateIndexStateManifest(...)` with the LMDB equivalent; extract to a shared helper module.
+- [ ] Consider including schema version and build mode (full vs incremental) in `index_state.json` for observability.
+
+#### `tools/build-sqlite-index/run.js`
+
+- [ ] Ensure `stopHeartbeat()` is always invoked via `try/finally` (avoid leaking an interval on error when `exitOnError=false`).
+- [ ] After incremental updates, consider forcing WAL checkpoint/truncate (see incremental update section).
+- [ ] Consider making the “incremental fallback to rebuild” reason more explicit in output (currently logged, but could include key stats: changedFiles, deletedFiles, ratio).
+
+#### `tools/build-sqlite-index/temp-path.js`
+
+- [ ] Consider a “same filesystem guarantee” note: temp DB path must be on same filesystem for atomic rename (current implementation uses same directory, which is good; document this).
+
+#### `tools/clean-artifacts.js`
+
+- [ ] Consider adding a `--dry-run` option that prints what would be deleted without deleting it (safety for new users).
+
+#### `tools/compact-sqlite-index.js`
+
+- [ ] If vector extension is enabled but cannot be loaded, consider warning that compaction may drop ANN acceleration (and suggest remediation, e.g. rerun embeddings rebuild once extension is available).
+- [ ] Consider recording pre/post compaction stats into `index_state.json` (bytes, row counts) for observability.
+
+#### `tools/download-extensions.js`
+
+- [ ] Consider streaming zip extraction rather than buffering each entry into memory (`adm-zip` forces buffer extraction; if large binaries become common, consider a streaming zip library).
+- [ ] Consider setting file permissions for extracted binaries explicitly per-platform conventions (e.g., preserve exec bit if needed, although shared libraries typically do not require it).
+
+#### `tools/index-validate.js`
+
+- [ ] Consider including actionable remediation hints per failure mode (e.g., “run build-index”, “run build-sqlite-index”, “run download-extensions”).
+
+#### `tools/report-artifacts.js`
+
+- [ ] Consider clarifying the units in output when printing both formatted size and raw bytes (currently raw bytes are printed in parentheses without a label).
+
+#### `tools/vector-extension.js`
+
+- [ ] Consider keying `loadCache` by (db, config) rather than only db (avoids surprising behavior if config changes during a long-lived process).
+- [ ] Consider restoring prior `trusted_schema` value after `ensureVectorTable(...)` (minimize global DB setting changes).
+
+#### `tools/verify-extensions.js`
+
+- [ ] Consider adding a quick “smoke query” that verifies the ANN table can be created and queried (optional).
+
+---
+
+### Test files
+
+#### `tests/backend-policy.js`
+
+- [ ] Add coverage for threshold edge cases (e.g., `autoSqliteThresholdChunks=0` semantics).
+- [ ] Add a test case where SQLite exists but artifact metadata cannot be read (ensure fallback behavior is correct and reason is surfaced).
+
+#### `tests/compact-pieces.js`
+
+- [ ] No issues noted (acts as a compaction functional check for artifact pieces).
+
+#### `tests/lmdb-backend.js`
+
+- [ ] Consider adding schema version mismatch coverage (fail closed when schema version differs).
+
+#### `tests/lmdb-corruption.js`
+
+- [ ] Consider asserting on error message content to ensure corruption reporting remains actionable.
+
+#### `tests/lmdb-report-artifacts.js`
+
+- [ ] Consider adding a test for “missing required key” vs “corruption” differentiation (if validation tooling can distinguish).
+
+#### `tests/retrieval-backend-policy.js`
+
+- [ ] Add coverage for schema version mismatch fallback (once reader-side user_version check exists).
+
+#### `tests/smoke-sqlite.js`
+
+- [ ] Add coverage for `user_version` mismatch behavior once implemented.
+
+#### `tests/sqlite-ann-extension.js`
+
+- [ ] Add a CI-friendly companion test that does not require the real extension binary (mock vectorConfig approach described above) to ensure ANN insert/delete invariants are enforced in CI.
+
+#### `tests/sqlite-ann-fallback.js`
+
+- [ ] Consider adding explicit coverage that fallback ANN search never returns out-of-range docIds (robustness guard).
+
+#### `tests/sqlite-auto-backend.js`
+
+- [ ] Add a test that covers the “SQLite present but too small” path + verifies reason reporting is stable.
+
+#### `tests/sqlite-build-delete.js`
+
+- [ ] Add coverage for deleting from an ANN table using `rowid` column and BigInt inputs (ensures `toVectorId(...)` conversion remains correct).
+
+#### `tests/sqlite-build-indexes.js`
+
+- [ ] Add coverage for any new maintenance index (e.g., `(mode,file,id)`), if introduced.
+
+#### `tests/sqlite-build-manifest.js`
+
+- [ ] Add a test for “manifest has hash but DB does not” semantics (once tightened).
+
+#### `tests/sqlite-build-vocab.js`
+
+- [ ] Add stress coverage for token sets larger than SQLite’s `IN` limit (ensuring chunking logic remains correct).
+
+#### `tests/sqlite-bundle-missing.js`
+
+- [ ] Add bundle-shape validation coverage (missing `chunks` field should not crash build loop).
+
+#### `tests/sqlite-cache.js`
+
+- [ ] No issues noted (validates cache path behavior / read path).
+
+#### `tests/sqlite-chunk-id.js`
+
+- [ ] No issues noted (docId/chunkId behavior).
+
+#### `tests/sqlite-compact.js`
+
+- [ ] Consider adding coverage for compaction with ANN enabled but extension mocked (ensures dense_vectors_ann remains consistent after compaction).
+
+#### `tests/sqlite-incremental-no-change.js`
+
+- [ ] Consider verifying `index_state.json` is unchanged (or only updated timestamp changes), depending on desired policy.
+
+#### `tests/sqlite-incremental.js`
+
+- [ ] Add coverage for doc-id reuse behavior (free-list) to prevent accidental regression to “always append”.
+
+#### `tests/sqlite-index-state-fail-closed.js`
+
+- [ ] Consider adding coverage that “pending” flips back to false on successful build (already implied but could be explicit).
+
+#### `tests/sqlite-missing-dep.js`
+
+- [ ] No issues noted (validates better-sqlite3 missing behavior).
+
+#### `tests/sqlite-sidecar-cleanup.js`
+
+- [ ] Add incremental-update sidecar cleanup coverage if WAL checkpointing/truncation is implemented.
+
+---
+
+### Documentation files
+
+#### `docs/contracts/sqlite.md`
+
+- [ ] Explicitly document the `user_version` contract and the “fail closed / rebuild on mismatch” behavior.
+- [ ] Ensure the list of required tables aligns with the actual reader/build code paths (and clearly separate “core” vs “optional” tables).
+
+#### `docs/external-backends.md`
+
+- [ ] Consider updating to reflect current backend-policy behavior (auto selection thresholds, forced backend semantics).
+
+#### `docs/model-compare-sqlite.json`, `docs/parity-sqlite-ann.json`, `docs/parity-sqlite-fts-ann.json`
+
+- [ ] Ensure these reports are either generated artifacts (and documented as such) or kept in sync with the current schema/tooling versions (otherwise they can mislead).
+
+#### `docs/references/dependency-bundle/deps/better-sqlite3.md`
+
+- [ ] Confirm documented behavior matches current runtime expectations (particularly around extension loading, platform binaries, and supported SQLite features).
+
+#### `docs/sqlite-ann-extension.md`
+
+- [ ] Document the invariant that `dense_vectors_ann` must remain consistent with `dense_vectors` (no orphans; same cardinality per mode when enabled).
+- [ ] Document how incremental updates maintain the ANN table (and note limitations when extension is not available).
+
+#### `docs/sqlite-compaction.md`
+
+- [ ] Clarify how compaction interacts with the ANN extension table (and the remediation path if ANN is temporarily unavailable during compaction).
+
+#### `docs/sqlite-incremental-updates.md`
+
+- [ ] Update doc-id behavior description to match implementation (per-file id reuse + free-list reuse rather than always appending).
+- [ ] Document the ratio guard behavior and fallback to full rebuild more explicitly.
+- [ ] Document WAL/sidecar expectations for incremental updates (single-file vs WAL sidecars).
+
+#### `docs/sqlite-index-schema.md`
+
+- [ ] Reconfirm schema matches `SCHEMA_VERSION = 7` (columns, indexes, optional extension table).
+- [ ] If `(mode,file,id)` index is added, document it as a maintenance/performance index.
+
+---
+
+## Exit criteria for this review section
+
+The following items should be completed to consider “Review Section 7” fully addressed:
+
+- [ ] ANN insert-preparation bug fixed in both bundle-build and incremental-update code paths.
+- [ ] Reader-side schema version fail-closed behavior implemented and tested.
+- [ ] Bundle-build embedding dims mismatch becomes a hard failure (with tests).
+- [ ] WAL/sidecar policy is explicitly decided, implemented consistently, and documented (at minimum for incremental updates).
+- [ ] At least one CI-friendly test covers ANN table sync invariants without requiring a real extension binary.
+- [ ] At least one fixture-based p95 latency regression test is added (or an equivalent deterministic perf guard).
+
+---
+
+---
+
+# Phase 34 — Phase 2/3/4/5/6 verification gates
+
+**Objective:** run and gate the regression tests that confirm Phase 2 contract alignment, Phase 3 chunking invariants, Phase 4 retrieval semantics, Phase 5 durability, and Phase 6 embeddings correctness.
+
+## 34.1 CLI flag removal and error handling
+- [ ] `tests/search-removed-flags.js`
+  - [ ] Failure: Expected actionable error for --human.
+  - [ ] Log: `logs/phase-22/search-removed-flags.log:1`
+- [ ] `tests/search-missing-flag-values.js`
+  - [ ] Failure: Expected missing value message for --type.
+  - [ ] Log: `logs/phase-22/search-missing-flag-values.log:1`
+
+## 34.10 Phase 9 CI gating + flaky test recovery
+- [ ] `tests/script-coverage.js`
+  - [ ] Failure: Error: unsafe tar entry: C:/Users/sneak/Development/PairOfCleats_CODEX/tests/.cache/download-extensions/tar/.tmp/extract-1768204937568/vec0.dll
+  - [ ] Log: `tests/.logs/2026-01-12T08-02-14-028Z/download-extensions-test.attempt-3.log:15`
+
+## 34.11 Phase 10 modularization regression sweep
+- [ ] `tests/search-help.js`
+  - [ ] Failure: Help output missing flag: --calls.
+  - [ ] Log: `logs/phase-22/search-help.log:1`
+
+## 34.12 Phase 11 docs/help parity checks
+- [ ] `tests/search-help.js`
+  - [ ] Failure: Help output missing flag: --calls.
+  - [ ] Log: `logs/phase-22/search-help.log:1`
+- [ ] `tests/search-removed-flags.js`
+  - [ ] Failure: Expected actionable error for --human.
+  - [ ] Log: `logs/phase-22/search-removed-flags.log:1`
+
+## 34.29 file processor skip
+- [ ] `tests/file-processor/skip.test.js`
+  - [ ] Failure: Expected binary buffer to skip with reason=binary.
+  - [ ] Log: `logs/phase-22/file-processor-skip.log:1`
+
+## 34.32 lang js chunking
+- [ ] `tests/lang/js-chunking.test.js`
+  - [ ] Failure: Missing exported function chunk (alpha).
+  - [ ] Log: `logs/phase-22/lang-js-chunking.log:1`
+
+## 34.34 lang js relations
+- [ ] `tests/lang/js-relations.test.js`
+  - [ ] Failure: Missing exports for run/default: [].
+  - [ ] Log: `logs/phase-22/lang-js-relations.log:1`
+
+## 34.38 language registry collectors
+- [ ] `tests/language-registry/collectors.test.js`
+  - [ ] Failure: dockerfile mismatch: ["node:18"] !== ["base","node:18"].
+  - [ ] Log: `logs/phase-22/language-registry-collectors.log:1`
+
+**Exit criteria**
+- [ ] All verification tests pass.
+
+---
diff --git a/HISTORIC_COMPLETED_PHASES.md b/HISTORIC_COMPLETED_PHASES.md
new file mode 100644
index 000000000..0b7c4be95
--- /dev/null
+++ b/HISTORIC_COMPLETED_PHASES.md
@@ -0,0 +1,2918 @@
+# Completed phases
+
+# Phase 1 — Truth Alignment, Spec Freeze, and Correctness Harness
+
+**Objective:** Establish the authoritative definition of “what the tool does,” then encode it into tests, validations, and reproducible fixtures so every subsequent phase is measurable.
+
+## 1.1 Feature truth table (claims → evidence → tests → limitations)
+
+### Dependency guidance (best choices)
+- `ajv` — model the truth table itself as a JSON Schema and **validate it in CI** so the “claims → evidence → tests → limits” ledger can’t silently drift.
+  - Compile schemas once at startup (`new Ajv({ strict: true, allErrors: true })`), not per file/run.
+- `jsonc-parser` — if feature flags or config files are JSONC, use offset-aware parsing (`getLocation`, `parseTree`) so you can attach *precise* diagnostics to a feature claim.
+- `semver` — version every claim bundle and feature gate using semver ranges rather than ad-hoc strings.
+
+- [x] Build `docs/truth-table.md` that covers:
+  - [x] Build modes: code / prose / records / mixed
+  - [x] Chunking rules (by language and file type)
+  - [x] Tokenization semantics (code vs prose)
+  - [x] Index artifact outputs (memory + sqlite + shard formats)
+  - [x] Search semantics (filters, scoring, explain)
+  - [x] Enrichment outputs (risk, types, relations, git)
+  - [x] Service/API/MCP behavior (contracts, stability expectations)
+- [x] For each claim:
+  - [x] link to implementation module(s)
+  - [x] list configuration toggles
+  - [x] list known limitations / failure modes
+  - [x] identify a fixture-based test that demonstrates it
+
+## 1.2 Acceptance-test fixtures and golden expectations
+
+### Dependency guidance (best choices)
+- `seedrandom` — make all randomized fixture generation deterministic (seed = repo hash + test name), so flaky “random repos” never block correctness gates.
+- `xxhash-wasm` — use fast, stable hashing to derive fixture IDs and to detect unintended fixture drift (hash raw inputs + normalized outputs).
+
+- [x] Add fixture repos representing:
+  - [x] small: <1k files mixed code/prose
+  - [x] medium: 5k–50k files with mixed languages
+  - [x] multi-language mixed-file repo (HTML+JS+CSS, markdown code fences, etc)
+- [x] Define “must-hit” retrieval assertions:
+  - [x] symbol lookup (name/kind)
+  - [x] structural filters (e.g., `--kind`, `--signature`, `--decorator`)
+  - [x] risk filter behavior (even if basic initially)
+  - [x] type inference visibility (even if minimal initially)
+
+## 1.3 Tool invocation correctness (install-root vs repo-root)
+
+### Dependency guidance (best choices)
+- `execa` — standardize all subprocess calls (git, node, pnpm) with robust quoting, streaming output capture, timeouts, and non-throwing exit handling.
+  - Prefer `reject: false` and check `exitCode` explicitly; capture `stdout`, `stderr`, and combined `all` output.
+- `semver` — validate runtime/tool versions (Node, npm/pnpm, optional native deps) and emit actionable errors early.
+
+- [x] Implement and require a single resolver:
+  - [x] `resolveToolRoot()` (ESM-safe, based on `import.meta.url`)
+  - [x] `resolveRepoRoot()` (explicit > inferred; deterministic)
+- [x] Convert *all* scripts that spawn other scripts/tools to use toolRoot resolution.
+- [x] Add tests that run commands from a directory **outside** repoRoot.
+
+## 1.4 Determinism and reproducibility baseline
+
+### Dependency guidance (best choices)
+- `seedrandom` — seed any randomized ordering (file traversal, shard selection, benchmark query generation).
+- `xxhash-wasm` — deterministic hashing for chunk IDs and segment IDs; avoid crypto hashes unless explicitly required.
+- `msgpackr` — if you snapshot intermediate artifacts for determinism tests, prefer MsgPack for speed and stable binary outputs.
+
+- [x] Ensure build artifacts include:
+  - [x] tool version, node version, OS, effective config hash
+  - [x] repo provenance (git commit + dirty flag when available)
+- [x] Establish a deterministic test mode:
+  - [x] deterministic embedding stub (by default in tests)
+  - [x] deterministic ordering everywhere (files, shards, chunk IDs)
+
+**Deliverables**
+- `docs/truth-table.md`
+- fixture repos + goldens
+- installed-package E2E test suite
+
+**Exit criteria**
+- Tier-1 E2E tests pass reliably (Linux) and are reproducible locally.
+- “Truth table” coverage: every user-visible feature claim has a test or explicit limitation.
+
+---
+
+# Phase 2 — Artifact Contracts, Metadata Schema v2, and Atomic Build Durability
+
+**Objective:** Make artifacts and metadata self-describing, versioned, validated, and crash-safe.
+
+## 2.1 Artifact contract (schema + invariants)
+
+### Dependency guidance (best choices)
+- `ajv` — enforce artifact schema invariants (index file, shard manifests, metadata v2, benchmark outputs) as a hard gate.
+  - Consider Ajv standalone validation for hot-path validation during large builds (generate validators once).
+- `msgpackr` — use for compact, fast serialization of intermediate shard artifacts (especially metadata-rich chunks).
+  - Prefer a versioned envelope (magic bytes + schema version + codec version) so upgrades are safe.
+- `fflate` — compress large artifacts (shards, posting lists) with streaming APIs to avoid event-loop stalls.
+- `xxhash-wasm` — compute stable content hashes and IDs efficiently; cache initialized WASM instance and reuse.
+- `roaring-wasm` (optional but high ROI) — represent posting lists and large ID sets as compressed bitmaps for fast intersection/union.
+  - Explicitly call `dispose()` on bitmaps to avoid WASM memory growth.
+- `better-sqlite3` — if SQLite is a backend, standardize on prepared statements + WAL mode + transactional writes for durability.
+- `lmdb` (optional) — consider as an alternative backend for very high write throughput; gate behind optional dependency/feature flag (install friction).
+
+- [x] Define/refresh `docs/artifact-contract.md`:
+  - [x] every artifact file + format + version
+  - [x] required fields + optional fields
+  - [x] invariants (cross-artifact) and validation rules
+- [x] Strengthen `tools/index-validate`:
+  - [x] schema validation per artifact
+  - [x] cross checks: chunk IDs, file references, postings references, embedding references
+  - [x] human remediation hints for each failure class
+
+## 2.2 **Metadata schema v2** (rich per-chunk metadata contract)
+
+### Dependency guidance (best choices)
+- `ajv` — treat **Metadata Schema v2** as the canonical contract.
+  - Encode “required when …” rules as schema + additional runtime checks (Ajv can’t express every cross-field invariant cleanly).
+- `semver` — version metadata schema independently from the index container version; negotiate reader compatibility.
+
+This is the foundation for advanced rich metadata, risk flows, and type inference.
+
+- [x] Create `docs/metadata-schema-v2.md` defining:
+  - [x] stable core: `chunkId`, `file`, `segment`, `range`, `lang`, `ext`, `kind`, `name`
+  - [x] provenance: `generatedBy`, `tooling`, `parser`, versions
+  - [x] doc metadata: signature, docstring/doc-comments, annotations, decorators/attributes
+  - [x] control-flow summary: branches/loops/returns/throws/awaits/async/generator
+  - [x] dataflow summary: reads/writes/mutates/aliases (local first; later cross-file)
+  - [x] dependencies: imports, referenced modules, includes
+  - [x] risk metadata: sources/sinks/sanitizers/flows (+ confidence)
+  - [x] type metadata: declared/inferred/tooling (+ confidence)
+  - [x] embedded metadata: segment parent, embedded language, embedding context
+- [x] Define compatibility rules with existing `docmeta`:
+  - [x] migration mapping from current fields to v2 fields
+  - [x] deprecation schedule for legacy keys
+
+## 2.3 Atomic build and “current” pointer
+
+### Dependency guidance (best choices)
+- `better-sqlite3` — implement “current pointer” and multi-stage build state updates as **atomic transactions**.
+  - Use WAL journaling; keep write transactions short and bounded.
+- `fflate` — if “current pointer” points at compressed shard bundles, stream compress/decompress rather than buffering whole bundles.
+
+- [x] Build to staging directory `builds/<buildId>/...` (default format: `YYYYMMDDTHHMMSSZ_<gitShortSha|nogit>_<configHash8>`)
+- [x] Validate staging artifacts before promoting to “current”
+- [x] Ensure readers never see partial outputs:
+  - [x] atomic rename/swap semantics
+  - [x] sqlite temp file + rename
+  - [x] shard manifest atomicity
+
+## 2.4 Durable state machine for multi-stage builds
+
+### Dependency guidance (best choices)
+- `better-sqlite3` / `lmdb` — persist the build state machine (stage, shard progress, error ledger, tool versions, input manifest hashes) in a durable store.
+  - Prefer append-only event logs + periodic snapshots rather than in-place mutation only.
+- `pino` — log state transitions as structured events (runId, shardId, stage, timings, error category).
+- `prom-client` — expose state machine counters/histograms for throughput and failure rates (per stage, per language).
+
+- [x] Create a build state model with explicit phases:
+  - [x] discovery → preprocessing → stage1 → stage2 → stage3 → validation → promote
+- [x] Ensure stage2/stage3 jobs cannot remain “running forever”:
+  - [x] heartbeat timestamps: persist `lastHeartbeatAt` every **30s** while a job is `running`
+  - [x] stale job detection: consider a job stale if `now - lastHeartbeatAt` exceeds:
+    - [x] **10 minutes** for stage2 (enrichment; mostly CPU + local IO)
+    - [x] **15 minutes** for stage3 (embeddings; can be longer-running, but heartbeat is independent of work duration)
+  - [x] recovery policy: mark stale jobs as `failed` and re-queue up to **2 retries** (default) with exponential backoff (**2m**, **10m**)
+  - [x] resumable checkpoints: persist progress at least every **1,000 files** or **120 seconds** (whichever comes first)
+
+**Deliverables**
+- `docs/artifact-contract.md`
+- `docs/metadata-schema-v2.md`
+- hardened `index-validate`
+- atomic build/promotion implementation + tests
+
+**Exit criteria**
+- Killing the process mid-build never corrupts last-known-good index.
+- Any index can be validated deterministically; schema v2 is published and enforced.
+
+---
+
+# Phase 3 — Generalized Hybrid Chunking and Prose Extraction (Correctness)
+
+**Objective:** Make file segmentation and chunking correct for real-world mixed files (embedded languages) and ensure comments are consistently extracted and searchable as prose when desired.
+
+## 3.1 Introduce a **SegmentedDocument** pipeline
+
+### Dependency guidance (best choices)
+- `file-type` + `istextorbinary` — aggressively avoid parsing binaries; detect via magic bytes first, then fallback heuristics.
+- `chardet` + `iconv-lite` — only attempt encoding detection/decoding when UTF-8 decoding fails; preserve byte offsets by tracking decoding strategy.
+- `fdir` — fast directory traversal (significantly faster than naive `fs.readdir` recursion).
+- `ignore` — implement `.gitignore` semantics correctly (and cache per-directory ignore matchers).
+- `picomatch` — precompile include/exclude globs for the segment discovery pre-pass (don’t recompile per file).
+- `linguist-languages` — unify extension → languageId mapping, but keep project overrides (repo-local config) higher priority.
+
+- [x] Define a new internal representation:
+  - [x] `FileDocument { file, bytes, text, ext, langHint }`
+  - [x] `Segment { segmentId, type: code|prose|config|comment|embedded, languageId, start, end, parentSegmentId?, meta }`
+  - [x] `Chunk { chunkId, segmentId, start, end, name, kind, metaV2 }`
+- [x] Replace “single chunker per file” with:
+  1) segment discovery
+  2) per-segment chunking
+  3) chunk merging + stable ordering + overlap rules
+
+## 3.2 Mixed-file support coverage (beyond HTML)
+
+### Dependency guidance (best choices)
+- Markdown / MDX / prose containers:
+  - `micromark` — extract **exact byte ranges** of headings, paragraphs, and fenced code blocks (language from info string).
+  - `yaml` + `smol-toml` + `jsonc-parser` — parse frontmatter blocks into config segments with node/range provenance.
+  - `@mdx-js/mdx` — for MDX, compile with plugins disabled by default; enable remark/rehype plugins only when requested (performance).
+- Web component containers:
+  - `@vue/compiler-sfc` — use `parse()` to get descriptor blocks and their `loc`/range; treat template/script/style as segments and preserve ordering.
+  - `svelte` — use compiler `parse()`; extract `<script>`/`<style>`/markup regions via node ranges.
+  - `@astrojs/compiler` — parse frontmatter and template; treat embedded scripts/styles as segments with correct languageId.
+- Infrastructure / config / DSL:
+  - `dockerfile-ast` — parse Dockerfile into instruction nodes; keep comments and continuations intact.
+  - `fast-xml-parser` — parse XML with `preserveOrder` when positional/ordering matters for chunk boundaries.
+  - `graphql` — use `parse()` + `visit()` to extract definitions and references with location mapping.
+  - `protobufjs` — parse `.proto` to reflection model for symbol metadata and cross-file references.
+- Templates:
+  - `@handlebars/parser` — parse templates into AST to extract helpers/partials and embedded JS-like expressions as metadata.
+  - `nunjucks` — prefer API-supported parsing paths (custom tags expose parser API); treat templates as prose+embedded expressions if full AST isn’t stable.
+
+Implement segment discovery + chunking for at least:
+
+- [x] Markdown/RST/AsciiDoc:
+  - [x] heading segments (existing)
+  - [x] fenced code blocks (```lang) as **embedded code segments**
+  - [x] inline code spans as optional micro-segments (configurable; concrete defaults):
+    - [x] `indexing.segments.inlineCodeSpans = false` (default)
+    - [x] if enabled: only emit spans with **≥ 8** non-whitespace characters
+    - [x] per-file caps: **≤ 200** spans AND **≤ 64 KiB** total inline-code bytes (truncate beyond cap)
+  - [x] frontmatter blocks (YAML/TOML/JSON) as **config segments**
+- [x] Web components and template containers:
+  - [x] `.vue` (template/script/style)
+  - [x] `.svelte` (script/style/template)
+  - [x] `.astro` (frontmatter + template + style)
+- [x] “HTML inside other languages” baseline:
+  - [x] JSX/TSX: treat JSX regions as embedded markup segments (at least for metadata; chunk boundaries already exist)
+- [x] JSON/YAML embedded inside comments or strings (best-effort):
+  - [x] detect fenced blocks in comments/docstrings tagged `json`, `yaml`, `toml`
+  - [x] treat them as embedded config segments if parseable
+
+## 3.3 Comment extraction as first-class segments
+
+### Dependency guidance (best choices)
+- `@es-joy/jsdoccomment` — parse JSDoc blocks into structured AST; preserve descriptions as prose segments and tags/types as metadata.
+  - Use `commentParserToESTree` when you need type expressions and tags integrated into an ESTree-like form.
+- `jsdoc-type-pratt-parser` — parse complex JSDoc type expressions into AST; store both raw and normalized forms.
+- `@typescript-eslint/typescript-estree` — for JS/TS, enable comment/tokens output to reliably extract all comments with ranges.
+  - Prefer a non-type-aware parse for comment extraction (fast path), and only enable type-aware mode in Phase 4 when needed.
+
+- [x] Implement a comment extraction layer per language:
+  - [x] doc comments (existing behavior) as `comment:doc`
+  - [x] inline comments (optional, configurable) as `comment:inline`
+  - [x] block comments as `comment:block`
+  - [x] license/header comments as `comment:license` (default: **extract but do not index**; searchable only when explicitly enabled)
+- [x] Each comment segment must record:
+  - [x] original language + comment style
+  - [x] byte range and line range
+  - [x] nearest symbol anchor (chunkId) when linkable
+- [x] Add config toggles (with concrete defaults):
+  - [x] `indexing.comments.extract = off|doc|all` (default: `doc`)
+  - [x] `indexing.comments.includeLicense = false` (default; when false, emit `comment:license` segments but exclude from term postings)
+  - [x] minimum length thresholds (defaults):
+    - [x] doc comments: **≥ 15** non-whitespace characters after stripping markers
+    - [x] inline/block comments: **≥ 30** non-whitespace characters after stripping markers
+    - [x] after normalization/tokenization: **≥ 5** prose tokens (otherwise drop)
+  - [x] skip patterns (defaults enabled; configurable allow/deny lists):
+    - [x] license/header detector: if a comment is within the first **200 lines** and matches `copyright|license|spdx|apache|mit|gpl|bsd`
+    - [x] generated detector: matches `generated by|do not edit|@generated|autogenerated`
+    - [x] linter-noise detector: matches `eslint-disable|prettier-ignore|noinspection`
+
+## 3.4 Prose-index strategy for comments and extracted prose
+
+### Dependency guidance (best choices)
+- `micromark` — for comment-prose indexing, treat fenced blocks inside comments/docstrings similarly to markdown fenced blocks.
+- `lru-cache` — cache comment-prose normalization and snippet generation (bounded by sizeCalculation + TTL).
+- `msgpackr` — if you create a dedicated prose index, serialize it separately with a small, versioned envelope for fast load/unload.
+
+Two supported options (choose one as default, keep the other as optional):
+
+**Option A — Separate “extracted-prose” index (recommended for clarity)**
+  - [x] Build a distinct index mode: `mode=extracted-prose`
+  - [x] Store comment segments + extracted prose blocks (frontmatter, docstrings, etc.)
+  - [x] Use **prose tokenization** (stemming/stopwords) for these segments
+  - [x] Search tool can query `code`, `prose`, and `extracted-prose` and fuse (RRF)
+
+**Option B — Fielded indexing inside code chunks (DEFAULT)**
+- [x] Keep a single code chunk and index comment-prose as a **separate field**:
+  - [x] add `fieldTokens.comment` (normalized prose tokens from extracted inline/block comments)
+  - [x] keep doc comments in `fieldTokens.doc` (existing behavior)
+  - [x] store raw comment snippets in `docmeta.comments[]` for explain/snippet (not tokenized)
+- [x] Default caps (configurable):
+  - [x] max **5** comment segments per chunk (nearest-to-symbol first)
+  - [x] max **8 KiB** total raw comment bytes per chunk (truncate + note `truncated=true`)
+- [x] Default scoring weights (BM25 field weights; overrideable via `search.fieldWeights`):
+  - [x] code intent: `comment=0.6`
+  - [x] prose intent: `comment=1.8`
+  - [x] mixed intent: `comment=1.2`
+  - [x] path intent: `comment=0.4`
+
+## 3.5 Correctness tests for segmentation and hybrid chunking
+
+### Dependency guidance (best choices)
+- `seedrandom` — generate stress fixtures deterministically (random mixed-language embedding + comment fences).
+- `ajv` — validate that segmentation outputs comply with `SegmentedDocument` + metadata v2 schema before indexing.
+
+- [x] Fixture files covering:
+  - [x] HTML with script/style blocks (existing) + nested code/pre
+  - [x] markdown with multiple fenced blocks + frontmatter
+  - [x] Vue/Svelte/Astro files
+  - [x] mixed json-in-comments (doc blocks)
+- [x] Assert:
+  - [x] segment boundaries correct (byte + line ranges)
+  - [x] chunk boundaries correct (no overlaps unless explicitly allowed)
+  - [x] embedded language detection correct enough (by tag/fence/lang attr)
+  - [x] comment segments extracted according to config
+
+**Deliverables**
+- SegmentedDocument pipeline + segment discovery implementations
+- comment extraction engine + config
+- either “extracted-prose index” or fielded comment-prose indexing (or both, with a default)
+- fixtures + tests
+
+**Exit criteria**
+- Mixed-language fixtures chunk correctly with stable IDs and validated invariants.
+- Comments become searchable as prose **via the code-chunk `comment` field (Option B, default)**; extracted-prose mode remains optional, and both are validated by tests when enabled.
+
+---
+
+# Phase 4 — Advanced Rich Metadata, Advanced Risk Analysis, and Advanced Type Inference
+
+**Objective:** Fully implement advanced versions of:
+1) rich per-chunk metadata,
+2) risk analysis (sources/sinks/flows), and
+3) type inference,
+with explicit provenance, confidence scoring, and testable correctness.
+
+## 4.1 Rich metadata per chunk (schema v2 compliance)
+
+### Dependency guidance (best choices)
+- JS/TS structural metadata:
+  - `@swc/core` — preferred for high-throughput parsing to AST (Rust); use when native deps are allowed and install is stable for your targets.
+  - `typescript` — use compiler API for type-aware metadata (Program/TypeChecker); cache Programs per tsconfig.
+  - `@typescript-eslint/typescript-estree` — when you need ESTree compatibility + TS node services; keep config minimal for speed.
+  - `@babel/traverse` — traverse JS/TS ASTs for symbol extraction, call graphs, and reference collection.
+  - `eslint/js` — derive lexical scopes/variable bindings (useful for risk analysis + metadata).
+  - `esquery` — allow declarative AST queries for “extractor rules” without writing custom visitors.
+- Data structure speedups:
+  - `roaring-wasm` — store large sets of symbol IDs, callsites, and references compactly with fast set ops.
+  - `xxhash-wasm` — hash AST node signatures and normalized identifiers for stable IDs.
+
+
+## 4.2 Advanced **risk analysis**: sources / sinks / sanitizers / flows
+
+### Dependency guidance (best choices)
+- `@ast-grep/napi` — implement rule packs for sources/sinks/sanitizers using structural patterns (AST-level matching).
+  - Use the JS API for integration; keep rule packs versioned and testable.
+- `re2js` — use for user-supplied or configurable regex rules to avoid ReDoS in large repos.
+- `aho-corasick` — accelerate “dictionary style” scanning (many fixed tokens like sink names, env var keys, SQL APIs) before expensive AST passes.
+- `graphology` — represent flows as graphs (nodes = symbols/expressions/files; edges = dataflow/callflow/import).
+  - Use traversal + shortest-path utilities for explainable flow paths.
+- `roaring-wasm` — represent taint sets and reachability sets efficiently; union/intersection are hot-path ops for flows.
+
+The current regex-based “sources × sinks” cartesian product is a useful baseline, but not advanced.
+
+### 4.2.1 Risk rule system
+
+### 4.2.2 Intra-procedural taint/dataflow (per chunk/function scope)
+
+### 4.2.3 Inter-procedural flows (within file, then cross-file)
+
+### 4.2.4 Risk metadata outputs
+
+## 4.3 Advanced **type inference** (local + cross-file + tooling)
+
+### Dependency guidance (best choices)
+- `typescript` — primary for TS/JS type inference when a tsconfig exists; extract types with provenance (source, inferred, any/unknown).
+  - Prefer incremental Programs (or reuse) rather than re-creating per file.
+- `pyright` — primary for Python static typing; run via CLI with `--outputjson` for machine-readable results.
+  - Cache pyright environment resolution per repo; treat missing stubs as low-confidence types.
+- `protobufjs` — leverage schema-defined types for `.proto` files and for generated-code correlation when present.
+
+### 4.3.1 Local type extraction upgrades
+
+
+### 4.3.2 Cross-file inference engine
+
+### 4.3.3 Tooling integration hardening (LSP and language servers)
+
+### 4.3.4 Output schema and search integration
+
+**Deliverables**
+- [x] `docs/risk-rules.md` + risk rules bundle format
+- [x] advanced risk engine (local → file → cross-file)
+- [x] advanced type inference engine (local + cross-file + tooling)
+- [x] v2 metadata completeness across core languages
+- [x] fixtures and goldens for risk/types correctness
+
+**Exit criteria**
+- [x] Risk flows and type inference are correct on fixtures with documented conservative limitations.
+- [x] `index-validate` can validate risk/type metadata invariants.
+- [x] Enrichment never crashes indexing; it degrades gracefully with actionable logs.
+
+---
+
+# Phase 5 — Search Correctness, Parity, and Index Benchmarking Suite
+
+**Objective:** Guarantee that search semantics are correct, explainable, and stable across backends, and add an index-evaluation tool that can auto-generate benchmark queries (10–100+ configurable) that exercise flags.
+
+## 5.1 Search contract and explainability
+
+### Dependency guidance (best choices)
+- `roaring-wasm` — implement fast boolean retrieval operators (AND/OR/NOT) over postings; this underpins correctness + speed.
+- `lru-cache` — query-result caching (per query plan signature); enforce size/TTL to prevent runaway memory.
+- `msgpackr` — persist query plans and explain traces for debugging/benchmark replay.
+
+- [x] Define `docs/search-contract.md`:
+  - [x] ranking components and weights
+  - [x] filter semantics and precedence rules
+  - [x] how multi-mode results are fused (RRF rules)
+  - [x] how metadata fields impact scoring and filtering
+- [x] Implement a single explain schema across backends:
+  - [x] lexical score components
+  - [x] semantic score components
+  - [x] filter decisions
+  - [x] metadata boosts
+
+## 5.2 Backend parity as a gate (memory vs sqlite vs sqlite-fts)
+
+### Dependency guidance (best choices)
+- `better-sqlite3` — SQLite backend parity testing: ensure identical semantics vs in-memory index.
+- `lmdb` (optional) — if introduced as an alternate backend, add parity tests against SQLite and in-memory.
+
+- [x] Define parity thresholds (concrete defaults; `K=5`):
+  - [x] Gate policy (concrete default):
+    - [x] memory vs sqlite: **blocking** (fails CI)
+    - [x] memory vs sqlite-fts: **non-blocking warning** (until the backend is promoted from experimental)
+  - [x] Parity scoring rule (concrete default):
+    - [x] if both backends return **0 hits** for a given query+mode, treat `overlap@K = 1.0` and exclude that query+mode from `rankCorr` averaging
+  - [x] memory vs sqlite (primary backend; blocking):
+    - [x] `overlap@5` average **≥ 0.95** (code and prose evaluated separately)
+    - [x] Spearman `rankCorr` average **≥ 0.90**
+    - [x] `avgDelta` average **≤ 0.10**
+    - [x] no single query with `overlap@5 < 0.60`
+  - [x] memory vs sqlite-fts (experimental backend; warning-only):
+    - [x] `overlap@5` average **≥ 0.70**
+    - [x] Spearman `rankCorr` average **≥ 0.55**
+    - [x] `avgDelta` average **≤ 0.50**
+- [x] Create parity debug tooling:
+  - [x] compare component-level scoring
+  - [x] diff filters and metadata interpretation
+
+## 5.3 **Index evaluator + benchmark query generator** (new)
+
+### Dependency guidance (best choices)
+- `tinybench` — microbench harness for parsing, chunking, indexing, and query operators (configurable runs, warmups).
+- `hdr-histogram-js` — capture latency distributions (p50/p95/p99) across repeated queries and builds.
+- `seedrandom` — deterministic benchmark query generation (seeded by index hash + config).
+- `prom-client` — export benchmark metrics for dashboards/CI regression gates (histograms + counters).
+- `@vscode/ripgrep` — baseline external comparator for lexical search latency + correctness on the same corpora.
+
+This is the requested capability.
+
+- [x] Implement benchmark query generator + runner (`tools/bench-query-generator.js`, `tests/bench.js`):
+  - Inputs:
+    - [x] index path (or repoRoot)
+    - [x] number of queries (10–1000)
+    - [x] random seed
+    - [x] coverage targets (ensure flags are exercised)
+  - Index analysis step:
+    - [x] sample symbols by language/kind
+    - [x] sample files by ext/size
+    - [x] sample metadata values (decorators, risk tags, types, visibility, imports)
+  - Query generation step:
+    - [x] generate a mixed set:
+      - [x] “name lookup” queries (symbol exact-ish)
+      - [x] “natural language” queries
+      - [x] filters-heavy queries exercising flags:
+        - [x] `--lang`, `--ext`, `--file`, `--kind`
+        - [x] `--calls`, `--uses`, `--reads`, `--writes`, `--mutates`, `--awaits`
+        - [x] `--decorator`, `--signature`, `--param`, `--return-type`, `--inferred-type`
+        - [x] `--risk`, `--min-risk-score` (or equivalent), flow existence filters
+        - [x] `--modified-after`, `--author`, churn filters (where available)
+      - [x] backend toggles:
+        - [x] memory vs sqlite vs sqlite-fts
+        - [x] ann on/off
+        - [x] rrf/mrr/mmr toggles where available
+    - [x] ensure at least `N = max(10, ceil(0.25 * queryCount))` queries include **multiple flags simultaneously**
+  - Execution step:
+    - [x] run the search CLI/core search with each query
+    - [x] capture latency, candidate counts, and topK stability
+  - Report step:
+    - [x] JSON report + optional markdown summary
+    - [x] per-flag coverage report (“did we exercise X?”)
+
+- [x] Integrate into CI (smoke-level) and perf pipelines (tiered).
+
+**Deliverables**
+- [x] `docs/search-contract.md`
+- [x] unified explain schema and parity tools
+- [x] index benchmark suite generator + runner + reports
+- [x] CI gate that runs a small benchmark suite on fixtures
+
+**Exit criteria**
+- [x] Search parity gates are green for fixtures.
+- [x] Benchmark suite can generate and run 10–100+ flag-rich queries and produce a report deterministically.
+
+---
+
+# Phase 6 — Throughput Engineering: Sharding, Worker Pools, Parallelism, WASM Parsing Acceleration
+
+**Objective:** After correctness gates are met, maximize build throughput and stability by making sharding metric-driven, tuning concurrency, improving pre-pass leverage, and accelerating language parsing (especially tree-sitter WASM).
+
+## 6.1 Metric-driven sharding (time-equalized shards)
+
+### Dependency guidance (best choices)
+- Shard planner algorithm (default): **LPT greedy bin packing** (Longest-Processing-Time first; implement in-house, no new dependency).
+- `xxhash-wasm` — stable file identity + content hash used to reuse prior throughput measurements safely across runs.
+- `hdr-histogram-js` — maintain per-language/per-extension throughput distributions to inform shard planning.
+
+### 6.1.1 Collect throughput metrics
+- [x] During builds, record per-language performance:
+  - [x] files/sec, lines/sec, bytes/sec
+  - [x] parse time (chunking + relations)
+  - [x] tokenization time
+  - [x] stage2/3/x enrichment time
+- [x] Persist a “perf profile” artifact:
+  - [x] per language + file-size bucket cost model
+  - [x] versioned and tied to config hash
+
+### 6.1.2 Shard planning with a cost model
+- [x] Replace line-count-only shard sizing with estimated time cost:
+  - [x] cost(file) = overhead(lang) + bytes * byteCost(lang) + lines * lineCost(lang)
+  - [x] modifiers for enabled features (relations/flow/tree-sitter/tooling)
+- [x] Implement bin-packing / greedy balancing to minimize shard makespan
+- [x] Support constraints:
+  - [x] preserve directory locality (optional; default: off; for cache locality)
+  - [x] cap max shard size by bytes/lines (defaults): **maxShardBytes = 64 MiB**, **maxShardLines = 200,000**
+- [x] At runtime:
+  - [x] adaptively rebalance when early shards show different throughput than predicted
+
+## 6.2 Worker pool correctness + Windows reliability hard gate
+
+### Dependency guidance (best choices)
+- `piscina` — standardize on Piscina for worker pools (robust scheduling, backpressure, worker lifecycle controls).
+  - Prefer transferable objects (ArrayBuffer) or SharedArrayBuffer for large payloads; avoid structured-clone of huge JSON.
+  - Validate Windows paths/URLs for worker entrypoints (`file://` URLs where needed).
+- `pino` — log worker lifecycle events and crashes with shard context; ensure uncaught exceptions are attributed to a shard + stage.
+
+- [x] Explicitly cap default worker counts (concrete defaults):
+  - [x] `fileConcurrency = min(cpuCount, 16)` (Windows: `min(cpuCount, 8)`)
+  - [x] `cpuConcurrency = fileConcurrency` (avoid CPU oversubscription by default)
+  - [x] `ioConcurrency = min(64, fileConcurrency * 4)` (Windows: `min(32, fileConcurrency * 4)`)
+  - [x] `workerPool.maxWorkers = min(8, fileConcurrency)` (default), and hard-cap at **16** unless explicitly overridden
+  - [x] `indexing.pythonAst.maxWorkers = min(4, fileConcurrency)` (default), and hard-cap at **8** unless explicitly overridden
+- [x] Add Windows-specific CI and stress tests:
+  - [x] worker pool creation, restart, and shutdown
+  - [x] long runs with many small tasks
+  - [x] path length + spaces + unicode paths
+- [x] Improve worker crash reporting:
+  - [x] capture error class, message, stack, serialized “cause” chain
+  - [x] include task context (file, ext, size, mode) on failure
+- [x] Consider splitting pools by task type:
+  - [x] tokenization pool
+  - [x] parsing pool (tree-sitter/Babel/TS)
+  - [x] quantization pool
+  - [x] avoid contention and reduce restart blast radius
+
+## 6.3 Parallelism and pipeline refactors (architectural changes allowed)
+
+### Dependency guidance (best choices)
+- `piscina` — use a single pool per stage (or per workload class) with explicit concurrency limits; avoid nested pools.
+- `fdir` — parallelize file discovery and stat collection; feed measured work weights into shard planning.
+- `lru-cache` — cache parse artifacts (AST, token streams, segment maps) within shard lifetime; enforce strict memory budgets.
+- `fflate` — stream intermediate shard writes so workers can flush incrementally rather than buffering.
+
+- [x] Reduce redundant IO passes:
+  - [x] integrate import extraction into file processing when feasible
+  - [x] defer import-link enrichment to a post-pass instead of separate full scan
+- [x] Stream postings construction:
+  - [x] avoid holding all chunk texts/tokens in memory at once
+  - [x] incremental flush of postings shards
+- [x] Pipeline embeddings with backpressure:
+  - [x] overlap embedding computation with lexical index build where possible
+  - [x] control memory via bounded queues (defaults):
+    - [x] cap pending file-processing tasks at `min(10_000, fileConcurrency * 100)`
+    - [x] cap pending embedding batches at `min(64, embeddingConcurrency * 8)`
+    - [x] when caps are hit, producers block (no unbounded Promise arrays)
+
+## 6.4 WASM parsing acceleration (tree-sitter)
+
+### Dependency guidance (best choices)
+- `@swc/core` — where tree-sitter WASM is insufficiently fast for JS/TS metadata, use SWC parse as an accelerator (native, optional).
+- `@ast-grep/napi` — can offload certain structural matches to tree-sitter engines efficiently; apply before bespoke analyzers.
+- `xxhash-wasm` — cache AST/parse results keyed by stable content hash to avoid repeat work across stages.
+
+Within the limits of web-tree-sitter + tree-sitter-wasms:
+
+- [x] Optimize traversal:
+  - [x] avoid `node.namedChildren` allocations; use `namedChildCount`/`namedChild(i)` or TreeCursor
+  - [x] avoid `text.split('
+')` for doc extraction when possible (use line index + windowed scanning)
+- [x] Preload grammars efficiently:
+  - [x] keep per-process cache as today, but add an option to preload in parallel after correctness verification
+- [x] Offload heavy parsing to workers (optional; default: off):
+  - [x] per-worker wasm init + grammar cache
+  - [x] measure if this improves throughput vs overhead
+- [x] Add/enable additional WASM grammars where available:
+  - [x] JavaScript / TypeScript / TSX / JSX
+  - [x] Python (chunking fallback to avoid spawning python for stage1)
+  - [x] JSON / YAML / TOML / Markdown (as available) for segment parsing
+- [x] Add per-language performance guardrails:
+  - [x] maxBytes/maxLines gating (defaults):
+    - [x] tree-sitter: skip if file > **512 KiB** or > **10,000 lines**
+    - [x] YAML top-level chunking: skip if file > **200 KiB**
+    - [x] Kotlin flow: skip if file > **200 KiB** or > **3,000 lines**
+    - [x] Kotlin relations: skip if file > **200 KiB** or > **2,000 lines**
+    - [x] per-file parse timeout: if parsing exceeds **1000 ms**, fall back to heuristic chunking for that file
+  - [x] automatic fallback to heuristic parsing on slow files
+
+**Deliverables**
+- [x] perf profile artifact + cost-model sharder
+- [x] Windows worker pool reliability gate
+- [x] reduced redundant passes and pipelined indexing where beneficial
+- [x] tree-sitter traversal optimizations + additional wasm grammars
+- [x] benchmark comparisons showing throughput improvements
+
+**Exit criteria**
+- [x] Shards finish in near-equal wall time on benchmark repos (reduced straggler effect).
+- [x] Worker pool does not fail on Windows across stress runs.
+- [x] Measured throughput improves without violating correctness gates.
+
+---
+
+# Phase 7 — Observability, Failure Capture, and Operational Durability
+
+**Objective:** Ensure failures are diagnosable, logs are complete, and long-running operation is durable (watch mode, service mode).
+
+## 7.1 Structured logging and run diagnostics
+
+### Dependency guidance (best choices)
+- `pino` — structured logs with runId/shardId/stageId; use redaction for secrets; avoid logging full file contents by default.
+- `pino-pretty` — developer-only pretty transport; ensure production logs remain JSON.
+- `prom-client` — export counters/histograms for build throughput, parse failures, risk rule matches, and query latency.
+- `hdr-histogram-js` — maintain in-process histograms for high-cardinality timing stats (then export summaries).
+
+- [x] Add structured JSON logs option:
+  - [x] log levels
+  - [x] timestamps
+  - [x] buildId correlation
+  - [x] shardId / workerId correlation
+- [x] Capture environment snapshot at start of build:
+  - [x] node version, OS, CPU count, memory
+  - [x] enabled features and effective config hash
+- [x] Ensure logs flush on crash:
+  - [x] avoid fire-and-forget writes for crash logs
+  - [x] add “last N events” ring buffer persisted on fatal errors (default: **N=200** events, capped to **2 MiB** serialized)
+
+## 7.2 Failure taxonomy and actionable capture
+
+### Dependency guidance (best choices)
+- `pino` — emit structured error events with classification fields: `{ category, languageId, stage, shardId, file, offset?, tool, retryable }`.
+- `ajv` — validate that error objects conform to a schema (so failures are machine-actionable, not ad-hoc strings).
+
+- [x] Define a failure taxonomy:
+  - [x] parse failures
+  - [x] tool dependency failures
+  - [x] worker pool failures
+  - [x] artifact IO failures (JSON too large, corruption)
+  - [x] sqlite build failures
+- [x] For each failure class:
+  - [x] record minimal reproduction hints
+  - [x] suggest config mitigations (e.g., file caps, disable feature X)
+
+## 7.3 Watch/service durability
+
+### Dependency guidance (best choices)
+- `chokidar` — cross-platform file watching; on Windows/network drives prefer `awaitWriteFinish` and consider polling fallback.
+- `piscina` — for watch mode, keep pools warm but rate-limit rebuilds; cancel in-flight shard work on superseding changes.
+
+- [x] Ensure watch mode debounces and avoids rebuild storms
+- [x] Ensure service queue and stage2 jobs:
+  - [x] have durable state machine (Phase 2)
+  - [x] can be resumed after crash/restart
+  - [x] produce per-job logs and reports
+
+**Deliverables**
+- [x] structured logging + diagnostic bundle output
+- [x] failure taxonomy + captured evidence improvements
+- [x] hardened watch/service operation
+
+**Exit criteria**
+- [x] Any build failure produces a diagnostic bundle sufficient to triage without rerunning.
+- [x] Long-running modes remain stable over multi-hour runs.
+
+---
+
+# Phase 8 — Language Coverage Expansion and Long-Term Architecture Simplification
+
+**Objective:** Expand supported languages and mixed-file containers while reducing maintenance cost and ensuring performance guardrails.
+
+## 8.1 New language onboarding playbook (repeatable)
+
+### Dependency guidance (best choices)
+- `linguist-languages` — bootstrap languageId mapping and aliases; treat it as a baseline and allow project overrides.
+- Parser deps (only when the language is in the support matrix):
+  - `graphql`, `protobufjs`, `fast-xml-parser`, `dockerfile-ast`, `@handlebars/parser`, `nunjucks`, etc.
+- For higher-level metadata/risk:
+  - `@ast-grep/napi`, `graphology`, `re2js`, `aho-corasick` as reusable primitives across languages.
+
+For each new language or container format:
+
+- [x] Decide parsing strategy:
+  - [x] tree-sitter wasm grammar (preferred)
+  - [x] heuristic parser
+  - [x] tooling/LSP enrichment only (optional)
+- [x] Implement:
+  - [x] chunk extraction
+  - [x] minimal relations (imports + calls) where feasible
+  - [x] comment extraction rules
+  - [x] metadata v2 mapping
+- [x] Add fixtures:
+  - [x] “language fidelity” tests
+  - [x] perf guard tests (max bytes/lines)
+- [x] Add to benchmark matrix.
+
+## 8.2 Recommended language priorities
+
+### Dependency guidance (best choices)
+- Prioritize languages whose parsers expose **ranges/locations** and can run deterministically in your target environments (Node + optional native/WASM).
+- Reuse existing primitives (`micromark`, `parse5`, `@ast-grep/napi`, `graphology`, `ajv`) rather than adding bespoke parsers where possible.
+
+**High priority (common + high ROI)**
+- [x] JavaScript/TypeScript (tree-sitter wasm for chunking/metadata)
+- [x] TSX/JSX segmentation improvements
+- [x] Python (tree-sitter wasm chunking as stage1 fallback)
+- [x] Dockerfile
+- [x] Makefile
+- [x] Protobuf
+- [x] GraphQL
+
+**Next tier (ecosystem breadth)**
+- [x] CMake
+- [x] Bazel/Starlark
+- [x] Nix
+- [x] Dart
+- [x] Scala / Groovy
+- [x] R / Julia
+
+**Web template tier**
+- [x] Handlebars/Mustache
+- [x] Jinja2 / Django templates
+- [x] Razor
+
+## 8.3 Architecture simplification after stabilization
+
+### Dependency guidance (best choices)
+- Consolidate on fewer parsing/AST stacks per ecosystem:
+  - JS/TS: SWC for speed + TypeScript for type-aware enrichment, with AST bridges where needed.
+  - Structural matching: `@ast-grep/napi` rule packs to reduce custom per-language logic.
+- Keep serialization + storage minimal: `msgpackr` + `fflate` + one durable backend (`better-sqlite3` or `lmdb`) behind a stable contract.
+
+- [x] Consolidate parsing APIs:
+  - [x] one segment discovery API
+  - [x] one chunking API
+  - [x] one metadata v2 builder interface
+- [x] Reduce duplicate passes and duplicated formats
+- [x] Remove deprecated schema paths once migrations are complete
+
+**Deliverables**
+- [x] language onboarding playbook
+- [x] expanded language and mixed-file coverage
+- [x] simplified architecture and reduced defect surface
+
+**Exit criteria**
+- [x] New languages can be added with predictable steps, tests, and guardrails.
+- [x] Maintenance burden decreases while coverage increases.
+
+---
+
+# Phase 10 — Dependency Bundle Parser/AST Checklists
+
+**Objective:** Complete extraction checklists for parser/AST dependency sheets and link them to implementation and tests.
+
+## 10.1 Parser/AST extraction checklist completion
+- [x] For each dependency sheet below, fill in the extraction checklist (stable ranges, minimal traversal, metadata vs. indexes, performance pitfalls) and check it off:
+  - `docs/references/dependency-bundle/deps/ast-grep-napi.md`
+  - `docs/references/dependency-bundle/deps/astrojs-compiler.md`
+  - `docs/references/dependency-bundle/deps/babel-traverse.md`
+  - `docs/references/dependency-bundle/deps/dockerfile-ast.md`
+  - `docs/references/dependency-bundle/deps/es-joy-jsdoccomment.md`
+  - `docs/references/dependency-bundle/deps/esquery.md`
+  - `docs/references/dependency-bundle/deps/fast-xml-parser.md`
+  - `docs/references/dependency-bundle/deps/graphql.md`
+  - `docs/references/dependency-bundle/deps/handlebars-parser.md`
+  - `docs/references/dependency-bundle/deps/jsdoc-type-pratt-parser.md`
+  - `docs/references/dependency-bundle/deps/jsonc-parser.md`
+  - `docs/references/dependency-bundle/deps/mdx-js-mdx.md`
+  - `docs/references/dependency-bundle/deps/micromark.md`
+  - `docs/references/dependency-bundle/deps/nunjucks.md`
+  - `docs/references/dependency-bundle/deps/parse5.md`
+  - `docs/references/dependency-bundle/deps/picomatch.md`
+  - `docs/references/dependency-bundle/deps/protobufjs.md`
+  - `docs/references/dependency-bundle/deps/semver.md`
+  - `docs/references/dependency-bundle/deps/smol-toml.md`
+  - `docs/references/dependency-bundle/deps/svelte.md`
+  - `docs/references/dependency-bundle/deps/swc-core.md`
+  - `docs/references/dependency-bundle/deps/typescript.md`
+  - `docs/references/dependency-bundle/deps/typescript-eslint-typescript-estree.md`
+  - `docs/references/dependency-bundle/deps/vue-compiler-sfc.md`
+  - `docs/references/dependency-bundle/deps/vscode-ripgrep.md`
+  - `docs/references/dependency-bundle/deps/yaml.md`
+
+**Deliverables**
+- Parser/AST dependency sheets updated with extraction notes and links to code/tests.
+
+**Exit criteria**
+- Every listed sheet has all extraction checklist items checked with concrete references.
+
+---
+
+# Phase 11 — Dependency Bundle API/Knobs/Test Checklists
+
+**Objective:** Complete integration checklists for core helper libraries (API entrypoints, config knobs, fixtures/benchmarks).
+
+## 11.1 Integration checklist completion
+- [x] For each dependency sheet below, document API entrypoints, config knobs, and add references to fixtures/benchmarks; then check the boxes.
+  - `docs/references/dependency-bundle/deps/aho-corasick.md`
+  - `docs/references/dependency-bundle/deps/ajv.md`
+  - `docs/references/dependency-bundle/deps/chardet.md`
+  - `docs/references/dependency-bundle/deps/execa.md`
+  - `docs/references/dependency-bundle/deps/fdir.md`
+  - `docs/references/dependency-bundle/deps/fflate.md`
+  - `docs/references/dependency-bundle/deps/file-type.md`
+  - `docs/references/dependency-bundle/deps/graphology.md`
+  - `docs/references/dependency-bundle/deps/greedy-number-partitioning.md`
+  - `docs/references/dependency-bundle/deps/hdr-histogram-js.md`
+  - `docs/references/dependency-bundle/deps/hnswlib-node.md`
+  - `docs/references/dependency-bundle/deps/iconv-lite.md`
+  - `docs/references/dependency-bundle/deps/ignore.md`
+  - `docs/references/dependency-bundle/deps/istextorbinary.md`
+  - `docs/references/dependency-bundle/deps/linguist-languages.md`
+  - `docs/references/dependency-bundle/deps/lru-cache.md`
+  - `docs/references/dependency-bundle/deps/onnxruntime-node.md`
+  - `docs/references/dependency-bundle/deps/pino-pretty.md`
+  - `docs/references/dependency-bundle/deps/pyright.md`
+  - `docs/references/dependency-bundle/deps/re2js.md`
+  - `docs/references/dependency-bundle/deps/seedrandom.md`
+
+**Deliverables**
+- Integration sheets updated with API usage notes, config knobs, and test/bench references.
+
+**Exit criteria**
+- All listed sheets have the integration checklist items checked.
+
+---
+
+# Phase 12 — Dependency Bundle Storage/Determinism Checklists
+
+**Objective:** Document artifact format, determinism, and durability expectations for storage/serialization dependencies.
+
+## 12.1 Artifact/determinism checklist completion
+- [x] For each dependency sheet below, complete the artifact/determinism checklist and check the boxes:
+  - `docs/references/dependency-bundle/deps/better-sqlite3.md`
+  - `docs/references/dependency-bundle/deps/lmdb.md`
+  - `docs/references/dependency-bundle/deps/msgpackr.md`
+  - `docs/references/dependency-bundle/deps/roaring-wasm.md`
+  - `docs/references/dependency-bundle/deps/xxhash-wasm.md`
+
+**Deliverables**
+- Storage/serialization sheets updated with format/determinism/throughput notes.
+
+**Exit criteria**
+- All listed sheets have artifact/determinism checklist items checked.
+
+---
+
+# Phase 13 — Dependency Bundle Metrics/Concurrency Checklists
+
+**Objective:** Finalize metrics/logging and concurrency checklist items for observability and rebuild safety.
+
+## 13.1 Metrics and logging checklist completion
+- [x] For each dependency sheet below, complete the metrics/logging checklist and check the boxes:
+  - `docs/references/dependency-bundle/deps/pino.md`
+  - `docs/references/dependency-bundle/deps/prom-client.md`
+  - `docs/references/dependency-bundle/deps/tinybench.md`
+
+## 13.2 Concurrency and rebuild safety checklist completion
+- [x] For each dependency sheet below, complete the concurrency checklist and check the boxes:
+  - `docs/references/dependency-bundle/deps/chokidar.md`
+  - `docs/references/dependency-bundle/deps/piscina.md`
+
+**Deliverables**
+- Metrics/logging and concurrency sheets updated with concrete policies and test references.
+
+**Exit criteria**
+- All listed sheets have their checklist items checked.
+
+---
+
+# Phase 14 — Integration Checklist Items (Detailed)
+
+**Objective:** Complete the integration checklists captured in `TEMP_DEPENDENCY_CHECKLISTS.md` for Phase 11 dependencies.
+
+## 14.1 API entrypoints and persisted data structures
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist for:
+  - `docs/references/dependency-bundle/deps/aho-corasick.md`
+  - `docs/references/dependency-bundle/deps/chardet.md`
+  - `docs/references/dependency-bundle/deps/fflate.md`
+  - `docs/references/dependency-bundle/deps/file-type.md`
+  - `docs/references/dependency-bundle/deps/graphology.md`
+  - `docs/references/dependency-bundle/deps/greedy-number-partitioning.md`
+  - `docs/references/dependency-bundle/deps/hdr-histogram-js.md`
+  - `docs/references/dependency-bundle/deps/hnswlib-node.md`
+  - `docs/references/dependency-bundle/deps/iconv-lite.md`
+  - `docs/references/dependency-bundle/deps/istextorbinary.md`
+  - `docs/references/dependency-bundle/deps/linguist-languages.md`
+  - `docs/references/dependency-bundle/deps/onnxruntime-node.md`
+  - `docs/references/dependency-bundle/deps/pyright.md`
+  - `docs/references/dependency-bundle/deps/re2js.md`
+
+## 14.2 Configuration knobs that impact output/performance
+- [x] Record configuration knobs that meaningfully change output/performance for:
+  - `docs/references/dependency-bundle/deps/aho-corasick.md`
+  - `docs/references/dependency-bundle/deps/chardet.md`
+  - `docs/references/dependency-bundle/deps/fflate.md`
+  - `docs/references/dependency-bundle/deps/file-type.md`
+  - `docs/references/dependency-bundle/deps/graphology.md`
+  - `docs/references/dependency-bundle/deps/greedy-number-partitioning.md`
+  - `docs/references/dependency-bundle/deps/hdr-histogram-js.md`
+  - `docs/references/dependency-bundle/deps/hnswlib-node.md`
+  - `docs/references/dependency-bundle/deps/iconv-lite.md`
+  - `docs/references/dependency-bundle/deps/istextorbinary.md`
+  - `docs/references/dependency-bundle/deps/linguist-languages.md`
+  - `docs/references/dependency-bundle/deps/onnxruntime-node.md`
+  - `docs/references/dependency-bundle/deps/pyright.md`
+  - `docs/references/dependency-bundle/deps/re2js.md`
+
+## 14.3 Fixtures and regression benchmarks
+- [x] Add at least one representative test fixture and a regression benchmark for:
+  - `docs/references/dependency-bundle/deps/aho-corasick.md`
+  - `docs/references/dependency-bundle/deps/chardet.md`
+  - `docs/references/dependency-bundle/deps/fflate.md`
+  - `docs/references/dependency-bundle/deps/file-type.md`
+  - `docs/references/dependency-bundle/deps/graphology.md`
+  - `docs/references/dependency-bundle/deps/greedy-number-partitioning.md`
+  - `docs/references/dependency-bundle/deps/hdr-histogram-js.md`
+  - `docs/references/dependency-bundle/deps/hnswlib-node.md`
+  - `docs/references/dependency-bundle/deps/iconv-lite.md`
+  - `docs/references/dependency-bundle/deps/istextorbinary.md`
+  - `docs/references/dependency-bundle/deps/linguist-languages.md`
+  - `docs/references/dependency-bundle/deps/onnxruntime-node.md`
+  - `docs/references/dependency-bundle/deps/pyright.md`
+  - `docs/references/dependency-bundle/deps/re2js.md`
+
+**Deliverables**
+- Phase 11 dependency sheets updated with API entrypoints, config knobs, and fixture/benchmark references.
+
+**Exit criteria**
+- All Phase 11 unchecked checklist items in `TEMP_DEPENDENCY_CHECKLISTS.md` are resolved.
+
+---
+
+# Phase 15 — Storage/Determinism Checklist Items (Detailed)
+
+**Objective:** Complete the storage/determinism checklists captured in `TEMP_DEPENDENCY_CHECKLISTS.md` for Phase 12 dependencies.
+
+## 15.1 Artifact format/versioning documentation
+- [x] Define artifact formats and version them (schema/version header + migration plan) for:
+  - `docs/references/dependency-bundle/deps/better-sqlite3.md`
+  - `docs/references/dependency-bundle/deps/lmdb.md`
+  - `docs/references/dependency-bundle/deps/msgpackr.md`
+  - `docs/references/dependency-bundle/deps/roaring-wasm.md`
+  - `docs/references/dependency-bundle/deps/xxhash-wasm.md`
+
+## 15.2 Deterministic output requirements
+- [x] Ensure determinism: stable ordering, stable encodings, stable hashing inputs for:
+  - `docs/references/dependency-bundle/deps/better-sqlite3.md`
+  - `docs/references/dependency-bundle/deps/lmdb.md`
+  - `docs/references/dependency-bundle/deps/msgpackr.md`
+  - `docs/references/dependency-bundle/deps/roaring-wasm.md`
+  - `docs/references/dependency-bundle/deps/xxhash-wasm.md`
+
+## 15.3 Throughput/size measurements
+- [x] Measure: write/read throughput and artifact size; record p95/p99 for bulk load for:
+  - `docs/references/dependency-bundle/deps/better-sqlite3.md`
+  - `docs/references/dependency-bundle/deps/lmdb.md`
+  - `docs/references/dependency-bundle/deps/msgpackr.md`
+  - `docs/references/dependency-bundle/deps/roaring-wasm.md`
+  - `docs/references/dependency-bundle/deps/xxhash-wasm.md`
+
+## 15.4 Corruption detection and partial rebuild safety
+- [x] Plan for corruption detection (hashes) and safe partial rebuilds for:
+  - `docs/references/dependency-bundle/deps/better-sqlite3.md`
+  - `docs/references/dependency-bundle/deps/lmdb.md`
+  - `docs/references/dependency-bundle/deps/msgpackr.md`
+  - `docs/references/dependency-bundle/deps/roaring-wasm.md`
+  - `docs/references/dependency-bundle/deps/xxhash-wasm.md`
+
+**Deliverables**
+- Phase 12 dependency sheets updated with artifact/determinism notes and measurement guidance.
+
+**Exit criteria**
+- All Phase 12 unchecked checklist items in `TEMP_DEPENDENCY_CHECKLISTS.md` are resolved.
+
+---
+
+# Phase 16 — Metrics/Concurrency Checklist Items (Detailed)
+
+**Objective:** Complete the metrics/logging and concurrency checklists captured in `TEMP_DEPENDENCY_CHECKLISTS.md` for Phase 13 dependencies.
+
+## 16.1 Metrics vocabulary and logging hygiene
+- [x] Define a minimal metrics vocabulary (names, labels) and keep label cardinality bounded for:
+  - `docs/references/dependency-bundle/deps/pino.md`
+  - `docs/references/dependency-bundle/deps/prom-client.md`
+  - `docs/references/dependency-bundle/deps/tinybench.md`
+- [x] Capture latency distributions, not just averages (p50/p95/p99) for:
+  - `docs/references/dependency-bundle/deps/pino.md`
+  - `docs/references/dependency-bundle/deps/prom-client.md`
+  - `docs/references/dependency-bundle/deps/tinybench.md`
+- [x] Make logs structured and redact secrets; add run/repo correlation fields for:
+  - `docs/references/dependency-bundle/deps/pino.md`
+  - `docs/references/dependency-bundle/deps/prom-client.md`
+  - `docs/references/dependency-bundle/deps/tinybench.md`
+- [x] Keep benchmarking reproducible (fixed inputs, warmups, pinned configs) for:
+  - `docs/references/dependency-bundle/deps/pino.md`
+  - `docs/references/dependency-bundle/deps/prom-client.md`
+  - `docs/references/dependency-bundle/deps/tinybench.md`
+
+## 16.2 Concurrency and rebuild safety
+- [x] Define units of work and weights (bytes or historical parse time) for load balancing for:
+  - `docs/references/dependency-bundle/deps/chokidar.md`
+  - `docs/references/dependency-bundle/deps/piscina.md`
+- [x] Set resource limits and failure policy (skip, retry, quarantine) for:
+  - `docs/references/dependency-bundle/deps/chokidar.md`
+  - `docs/references/dependency-bundle/deps/piscina.md`
+- [x] Instrument per-worker timings and queue depth for:
+  - `docs/references/dependency-bundle/deps/chokidar.md`
+  - `docs/references/dependency-bundle/deps/piscina.md`
+- [x] Ensure incremental rebuild logic is correct under bursts of file events for:
+  - `docs/references/dependency-bundle/deps/chokidar.md`
+  - `docs/references/dependency-bundle/deps/piscina.md`
+
+**Deliverables**
+- Phase 13 dependency sheets updated with concrete metrics/logging and concurrency notes.
+
+**Exit criteria**
+- All Phase 13 unchecked checklist items in `TEMP_DEPENDENCY_CHECKLISTS.md` are resolved.
+
+---
+
+# Phase 17 — File Identification, Encoding, and Language Mapping
+
+**Objective:** Implement the file-type, binary detection, and encoding fallback pipeline so discovery and parsing handle non-UTF8 and binary inputs correctly.
+
+## 17.1 File type and binary detection
+- [x] Integrate `file-type` magic-byte detection before parsing; fall back to `istextorbinary` heuristics.
+- [x] Add binary fixtures (`tests/fixtures/binary`) and wire them into discovery tests.
+
+## 17.2 Encoding fallback
+- [x] When UTF-8 decode fails, run `chardet` and decode via `iconv-lite` while preserving byte offsets.
+- [x] Add encoding fixtures (`tests/fixtures/encoding`) and coverage in `tools/bench-language-repos.js`.
+
+## 17.3 Language mapping
+- [x] Load `linguist-languages` mappings with override precedence; validate in `tests/language-fidelity.js`.
+
+**Deliverables**
+- File discovery pipeline with binary + encoding fallback handling.
+- Fixtures + tests for binary and non-UTF8 content.
+
+**Exit criteria**
+- Mixed binary/encoding fixtures pass and language fidelity stays green.
+
+---
+
+# Phase 18 — Pattern Matching Engines (Aho-Corasick, Re2js)
+
+**Objective:** Implement safe, fast pattern matching for dictionary segmentation and risk/search filters.
+
+## 18.1 Multi-pattern dictionary scans
+- [x] Integrate `aho-corasick` for multi-pattern matching and persist term lists/automata for reuse.
+- [x] Add fixtures under `tests/fixtures/dict-scan` and extend `tools/bench-dict-seg.js`.
+
+## 18.2 Safe regex execution
+- [x] Use `re2js` for risk rules and search filters; expose config knobs (timeout/flags/max size).
+- [x] Add regression coverage in `tests/language-fidelity.js` and `tests/bench.js`.
+
+**Deliverables**
+- Multi-pattern matching pipeline + fixtures/benchmarks.
+- Safe regex engine path for risk/search filters.
+
+**Exit criteria**
+- Dictionary segmentation and regex fixtures pass with bounded execution time.
+
+---
+
+# Phase 19 — Graph and Shard Balancing (Graphology, Greedy Partitioning)
+
+**Objective:** Implement graph-backed relations and weight-based shard balancing.
+
+## 19.1 Graph-backed relations
+- [x] Use `graphology` to capture call/flow graphs and persist adjacency lists.
+- [x] Add fixtures under `tests/fixtures/graphs` and extend `tests/type-inference-crossfile.js`.
+
+## 19.2 Shard balancing
+- [x] Use `greedy-number-partitioning` to balance shard weights and queue batches.
+- [x] Add coverage in `tools/shard-census.js` and `tests/thread-limits.js`.
+
+**Deliverables**
+- Graph relation storage with fixtures.
+- Weighted shard balancing with regression tests.
+
+**Exit criteria**
+- Cross-file graph tests pass and shard balance metrics stabilize.
+
+---
+
+# Phase 20 — Embeddings and ANN (ONNX Runtime, HNSW)
+
+**Objective:** Add real embedding inference and ANN indexing.
+
+## 20.1 ONNX embeddings
+- [x] Integrate `onnxruntime-node` inference path with config knobs (executionProviders, thread counts).
+- [x] Add fixture coverage in `tests/bench.js` and repo-scale runs in `tools/bench-language-repos.js`.
+
+## 20.2 HNSW ANN index
+- [x] Integrate `hnswlib-node` for vector ANN search with persistence and rebuild hooks.
+- [x] Add coverage in `tests/sqlite-ann-extension.js` or a dedicated ANN test.
+
+**Deliverables**
+- ONNX embedding pipeline with configurable execution providers.
+- HNSW ANN indexing + tests/benchmarks.
+
+**Exit criteria**
+- Embedding + ANN runs complete on bench repos with reproducible metrics.
+
+---
+
+# Phase 22 — Metrics, Benchmarking, and Logging (Prom-client, HDR Histogram, Tinybench, Pino)
+
+**Objective:** Provide consistent metrics, latency distributions, and reproducible benchmarking.
+
+## 22.1 Metrics + distributions
+- [x] Wire `prom-client` metrics with a minimal vocabulary and bounded labels.
+- [x] Capture p50/p95/p99 latencies using `hdr-histogram-js` or Prometheus histograms.
+
+## 22.2 Bench tooling and logging hygiene
+- [x] Add `tinybench`-based microbench harness and store reproducible baselines.
+- [x] Configure `pino` redaction and propagate run/repo correlation fields consistently.
+
+**Deliverables**
+- Metrics endpoint + latency distributions.
+- Microbench suite with reproducible outputs.
+
+**Exit criteria**
+- Metrics and benchmark outputs are stable across repeated runs.
+
+---
+
+# Phase 23 — Worker and Watch Observability (Piscina, Chokidar)
+
+**Objective:** Instrument worker pools and file watch flows with clear, actionable telemetry.
+
+## 23.1 Worker pool telemetry
+- [x] Emit per-worker timings, queue depth, and retry counts for `piscina` pools.
+
+## 23.2 Watch telemetry
+- [x] Surface watch backlog, debounce stats, and burst handling metrics in `src/index/build/watch.js`.
+
+**Deliverables**
+- Worker and watch telemetry surfaced in logs/metrics.
+
+**Exit criteria**
+- Worker/watch telemetry consistently reports queue depth and durations under load.
+
+---
+
+# Phase 24 — Python Type Tooling (Pyright)
+
+**Objective:** Implement Python type inference using Pyright.
+
+## 24.1 Pyright integration
+- [x] Integrate `pyright` analysis for Python files and capture diagnostics/types.
+- [x] Add fixtures and cross-file inference tests for Python.
+
+**Deliverables**
+- Pyright-backed Python type metadata with fixtures and tests.
+
+**Exit criteria**
+- Python type inference fixtures pass with stable outputs.
+
+
+## Phase 6: Throughput engineering and incremental correctness
+
+### 6.1 Metric-driven sharding and performance profiles
+
+**Audit**
+
+- Sharding planner exists with a cost model:
+  - `src/index/build/shards.js`
+  - `src/index/build/perf-profile.js`
+- Index builds write metrics and perf profiles:
+  - `src/index/build/artifacts.js` (writes to `repometrics/`)
+
+**Remaining work**
+
+- [x] Implemented.
+
+### 6.2 Worker pool tuning and OS gates
+
+**Audit**
+
+- Worker pool exists and can be gated/disabled:
+  - `src/index/build/worker-pool.js`
+  - `src/index/build/runtime.js` contains platform gating and pool sizing logic
+
+**Remaining work**
+
+- [x] Implemented.
+
+### 6.3 Incremental indexing: correctness and reuse
+
+**Audit**
+
+- Incremental manifest, bundle caching, pruning exist:
+  - `src/index/build/incremental.js`
+- Tests cover reuse and manifest behaviors:
+  - `tests/incremental-reuse.js`
+  - `tests/incremental-manifest.js`
+  - `tests/incremental-tokenization-cache.js`
+
+**Resolved correctness defect**
+
+- **File deletions are now considered in the “reuse whole index” check** (manifest key set must match current entries).
+
+**Remaining work**
+
+- [x] Fix `shouldReuseIncrementalIndex(...)` to detect deletions (manifest key set check).
+- [x] Add deletion coverage to `tests/incremental-reuse.js` (manifest extra entry rejects reuse).
+
+---
+
+## Phase A (P0): Correctness and “don’t lie” invariants
+
+- [x] Fix incremental reuse deletion bug (`src/index/build/incremental.js`).
+- [x] Add deletion coverage for incremental reuse (`tests/incremental-reuse.js`).
+- [x] Remove all `process.exit(...)` paths from reusable library modules (notably sqlite backend creation in `src/retrieval/cli-sqlite.js`); convert to errors honoring `exitOnError` and “forced backend” semantics.
+- [x] Make stage3 embeddings update `index_state.json` atomically and run `index-validate` after writing embeddings artifacts.
+
+## Phase C (P2): Mode surface cleanup (extracted-prose)
+
+- [x] Decide: keep extracted-prose.
+- [x] If keep: expose it in `bin/pairofcleats.js` mode choices (build and search), document it in the truth table, and add tests.
+
+---
+
+## Phase 4: High-signal metadata and rich filters
+
+### 4.1 Metadata v2 wiring
+
+**Audit**
+
+- Meta v2 exists and is written with chunks:
+  - `src/index/metadata-v2.js`
+  - `src/index/build/file-processor.js`
+
+**Remaining work**
+
+- [x] Implemented.
+
+### 4.2 Risk analysis
+
+**Audit**
+
+- Risk analyzer exists: `src/index/risk.js`.
+- Risk metadata is incorporated into doc/chunk metadata and is filterable in retrieval (see retrieval output/filter logic).
+
+**Remaining work**
+
+- [x] Implemented; heuristic quality improvements can be iterative.
+
+### 4.3 Type inference
+
+**Audit**
+
+- Type inference exists: `src/index/type-inference.js`.
+- Integrated into metadata generation.
+
+**Remaining work**
+
+- [x] Implemented; heuristic quality improvements can be iterative.
+
+### 4.4 Filters (lang/ext/kind/risk/type/imports/structural)
+
+**Audit**
+
+- The retrieval CLI supports a large filter surface; the implementation uses:   
+  - `src/retrieval/filters.js` (core filter parsing/matching)
+  - `src/retrieval/output.js` (post-filtering and output gating)
+  - `src/retrieval/structural-*.js` (structural filtering/search)
+
+**Gaps / issues**
+
+- Some advanced filters are “post-filter” style (scan chunk meta) rather than being backed by an accelerated index structure. This is functionally correct but may be slower on very large corpora.
+- The roadmap mentions `roaring-wasm` for bitmap acceleration; it is now used for filter intersections.
+
+**Remaining work**
+
+- [x] implement bitmap-accelerated filter evaluation (roaring-wasm-backed bitmaps for filter intersections)
+
+---
+
+# Phase HS — High-severity issues found (must fix)
+
+**Objective:** Close the top correctness and operational gaps identified in code review.
+
+- [x] Incremental reuse detects deletions by verifying manifest keys match the current file set (`src/index/build/incremental.js`) and is covered by `tests/incremental-reuse.js`.
+- [x] SQLite backend creation no longer calls `process.exit(1)`; forced mode throws, optional mode warns and falls back (`src/retrieval/cli-sqlite.js`).
+- [x] Stage3 embeddings writes `index_state.json` atomically, validates after write, and readers gate embeddings readiness via `index_state` (`tools/build-embeddings.js` + readers).
+- [x] Observability/acceleration dependencies are implemented: `xxhash-wasm` for checksums, `roaring-wasm` for bitmap acceleration, and `prom-client` + `hdr-histogram-js` wired into metrics/bench tooling.
+- [x] Extracted-prose mode is first-class: CLI exposure and dedicated tests via `tests/extracted-prose.js`.
+
+---
+
+## Roadmap closeout (2026-01-10)
+
+### 2.3 Atomic build directories + “current” pointer
+
+**Audit**
+
+- Build roots and promotion exist:
+  - `src/index/build/promotion.js` (writes/reads `current.json`)
+  - `src/integrations/core/index.js` builds to `builds/<buildId>` and promotes after validation
+
+**Gaps / issues**
+
+- None noted; stage3/stage4 enrichment runs in-place but is gated by `index_state` readiness.
+
+**Remaining work**
+
+- [x] Define and enforce a clear atomicity rule for stage3/stage4 (in-place gated by `index_state` readiness flags).
+- [x] Add post-stage validation for stage3 outputs (run `tools/index-validate.js` or call `validateIndexArtifacts(...)`).
+
+### 2.4 Durable build state machine (heartbeat/stale/retry)
+
+**Audit**
+
+- Build state tracking exists:
+  - `src/index/build/build-state.js` (phase transitions, checkpointing, heartbeat)
+- Service job queue includes stale requeue + bounded retries:
+  - `tools/service/queue.js`
+  - `tools/indexer-service.js`
+
+**Gaps / issues**
+
+- None noted; build-state writes are atomic and fire-and-forget updates catch errors.
+
+**Remaining work**
+
+- [x] Make build-state writes robust (await critical updates or catch fire-and-forget writes).
+- [x] Extend durability semantics to stage3/stage4 consistently (stage3 build-embeddings, stage4 build-sqlite-index).
+
+### 5.3 Benchmark query suite generation
+
+**Audit**
+
+- Deterministic bench query generator exists:
+  - `tools/bench-query-generator.js` (uses `seedrandom`)
+- Bench runner tests exist:
+  - `tests/bench.js`
+
+**Gaps / issues**
+
+- None noted; bench tooling references now point at `tools/bench-query-generator.js` and `tests/bench.js`.
+
+**Remaining work**
+
+- [x] Update documentation/roadmap references to reflect the actual bench tooling entry points (`tools/bench-query-generator.js`, `tests/bench.js`, docs under `docs/bench/`).
+
+### 7.1 Structured logging and crash diagnostics
+
+**Audit**
+
+- Structured logging via pino exists:
+  - `src/shared/progress.js`
+- Crash log capturing exists:
+  - `src/index/build/crash-log.js`
+- Failure taxonomy exists:
+  - `src/index/build/failure-taxonomy.js`
+
+**Gaps / issues**
+
+- None noted; crash/build-state writes are guarded against unhandled rejections.
+
+**Remaining work**
+
+- [x] Make crash/build-state writes robust against unhandled rejections (catch or await).
+- [x] Make stage3 index_state updates atomic and add post-stage validation (`tools/build-embeddings.js`).
+
+### 8.1 Documentation alignment
+
+**Audit**
+
+- There is extensive documentation in `docs/`.
+- Some doc references in `COMPLETED_PHASES.md` point to files/dirs that no longer exist (likely due to refactors).
+
+**Remaining work**
+
+- [x] Update docs to reflect current entry points and filenames (see “Doc/reference drift” appendix below).
+- [x] Ensure the public CLI help (`bin/pairofcleats.js`) reflects supported modes (including extracted-prose if kept).
+
+## Phase B (P1): Atomicity and staged enrichment clarity
+
+- [x] Decide stage3/stage4 atomic strategy: in-place gated by index_state readiness.
+- [x] If in-place: ensure readers never treat partial outputs as ready (strict gating).
+- [x] Document decision: keep stage3/4 in-place (gated by index_state readiness); promote-style builds not planned.
+
+## Phase D (P3): Docs + dependency alignment
+
+- [x] Update doc references that no longer match repo layout.
+- [x] Either remove unused deps or wire them in (metrics/histograms/xxhash/roaring).
+
+---
+
+## Phase 4 (NEW_ROADMAP) — Retrieval pipeline semantics
+
+### 4.2 Determinism guarantees
+
+**Audit**
+
+- Tie-break ordering is enforced in:
+  - `src/retrieval/rankers.js`
+  - `src/retrieval/pipeline.js`
+  - `src/retrieval/sqlite-helpers.js`
+- Determinism coverage:
+  - `tests/search-determinism.js` (stub embeddings; asserts identical hits + explain output)
+  - `tests/sqlite-vec-candidate-set.js` (ANN ordering uses `ORDER BY distance, rowid`)
+
+**Remaining work**
+
+- [x] Implemented and test-backed.
+
+### 4.3 Advanced type inference (local + cross-file + tooling)
+
+**Audit**
+
+- Local inference and normalization:
+  - `src/index/type-inference.js`
+- Cross-file inference and tooling enrichment:
+  - `src/index/type-inference-crossfile.js`
+  - `src/index/tooling/typescript-provider.js`
+  - `src/index/tooling/pyright-provider.js`
+  - `src/index/tooling/clangd-provider.js`
+  - `src/index/tooling/sourcekit-provider.js`
+- Tests:
+  - `tests/type-inference-crossfile.js`
+  - `tests/type-inference-crossfile-go.js`
+  - `tests/type-inference-lsp-enrichment.js`
+
+**Remaining work**
+
+- [x] Implemented and test-backed.
+
+---
+
+## Phase 1 (NEW_ROADMAP) — Roadmap executable and falsifiable
+
+### 1.3 Tool invocation correctness: install-root vs repo-root
+
+**Audit**
+
+- Root resolution utilities:
+  - `tools/dict-utils.js` (`resolveRepoRoot`, `resolveToolRoot`)
+  - `tools/path-utils.js`
+- Tests:
+  - `tests/tool-root.js`
+  - `tests/repo-root.js`
+
+**Remaining work**
+
+- [x] Implemented and test-backed.
+
+### 1.4 Determinism + reproducibility baseline
+
+**Audit**
+
+- Deterministic chunk IDs and metadata:
+  - `src/index/metadata-v2.js`
+- Artifact determinism and validation:
+  - `src/index/build/artifacts.js`
+  - `src/index/validate.js`
+  - `src/shared/hash.js` (xxhash/sha1)
+- Tests:
+  - `tests/incremental-reuse.js`
+  - `tests/incremental-manifest.js`
+  - `tests/metadata-v2.js`
+
+**Remaining work**
+
+- [x] Implemented and test-backed.
+
+---
+
+## Phase 2 (NEW_ROADMAP) — Artifact contract and metadata schema
+
+### 2.1 Artifact contract + index-validate tool
+
+**Audit**
+
+- Contract and schema:
+  - `docs/artifact-contract.md`
+  - `src/shared/artifact-schemas.js`
+- Validation tooling:
+  - `src/index/validate.js`
+  - `tools/index-validate.js`
+- Tests:
+  - `tests/index-validate.js`
+  - `tests/artifact-formats.js`
+  - `tests/artifact-size-guardrails.js`
+
+**Remaining work**
+
+- [x] Implemented and in active use.
+
+### 2.2 Metadata schema v2
+
+**Audit**
+
+- Schema and wiring:
+  - `src/index/metadata-v2.js`
+  - `src/index/build/file-processor.js`
+  - `src/index/build/artifacts.js`
+- Tests:
+  - `tests/metadata-v2.js`
+  - `tests/graph-chunk-id.js`
+  - `tests/sqlite-chunk-id.js`
+
+**Remaining work**
+
+- [x] Implemented and integrated.
+
+---
+
+## Phase 3 (NEW_ROADMAP) — Segment-aware chunking, mixed-file support, and prose
+
+### 3.1 Segmented document pipeline
+
+**Audit**
+
+- Segment discovery and chunking:
+  - `src/index/segments.js` (`discoverSegments`, `chunkSegments`)
+- Tests:
+  - `tests/segment-pipeline.js`
+
+**Remaining work**
+
+- [x] Implemented and tested.
+
+### 3.2 Mixed-file support (Markdown/Vue/Svelte/Astro, embedded blocks)
+
+**Audit**
+
+- Mixed-format segmentation:
+  - `src/index/segments.js`
+- Tests:
+  - `tests/segment-pipeline.js`
+
+**Remaining work**
+
+- [x] Implemented and tested.
+
+### 3.3 Comment extraction and config blocks inside comments
+
+**Audit**
+
+- Comment parsing and wiring:
+  - `src/index/comments.js`
+  - `src/index/build/file-processor.js`
+- Tests:
+  - `tests/segment-pipeline.js`
+
+**Remaining work**
+
+- [x] Implemented and tested.
+
+### 3.4 Prose-index strategy (Option A vs Option B)
+
+**Audit**
+
+- Comment-as-field mode (Option B) and extracted-prose support:
+  - `src/index/build/file-processor.js`
+  - `src/index/segments.js`
+- Tests:
+  - `tests/extracted-prose.js`
+
+**Remaining work**
+
+- [x] Implemented and documented.
+
+---
+
+## Phase 5 (NEW_ROADMAP) — Retrieval correctness, parity, and benchmark harness
+
+### 5.1 Search contract and explainability
+
+**Audit**
+
+- Search contract:
+  - `docs/search-contract.md`
+- Explain output:
+  - `src/retrieval/output/explain.js`
+- Tests:
+  - `tests/search-explain.js`
+
+**Remaining work**
+
+- [x] Implemented and tested.
+
+### 5.2 Parity harness (file-backed vs sqlite)
+
+**Audit**
+
+- Parity harness:
+  - `tests/parity.js`
+  - `tests/fixture-parity.js`
+- Backend selection checks:
+  - `tests/sqlite-auto-backend.js`
+  - `tests/sqlite-missing-dep.js`
+
+**Remaining work**
+
+- [x] Implemented and tested.
+
+---
+
+## Phase 7 (NEW_ROADMAP) — Operational hardening, observability, and service surfaces
+
+### 7.2 Metrics endpoint and telemetry (prom-client)
+
+**Audit**
+
+- Metrics registry and endpoint:
+  - `src/shared/metrics.js`
+  - `tools/api-server.js`
+- Tests:
+  - `tests/api-server.js`
+
+**Remaining work**
+
+- [x] Implemented and exposed.
+
+### 7.3 Service-mode indexer and queue semantics
+
+**Audit**
+
+- Service queue:
+  - `tools/indexer-service.js`
+  - `tools/service/queue.js`
+- Tests:
+  - `tests/indexer-service.js`
+  - `tests/two-stage-state.js`
+
+**Remaining work**
+
+- [x] Implemented and tested.
+
+### 7.4 API server and MCP server
+
+**Audit**
+
+- Service entrypoints:
+  - `tools/api-server.js`
+  - `tools/mcp-server.js`
+- Tests:
+  - `tests/api-server.js`
+  - `tests/mcp-smoke.js`
+
+**Remaining work**
+
+- [x] Implemented and tested.
+
+
+
+---
+
+# NEW_ROADMAP Completed Phases
+
+# Phase 0 — Roadmap hygiene, baseline gates, and “tests must be truthful”
+
+**Objective:** establish a reliable baseline so subsequent changes are validated quickly and deterministically.
+
+## 0.1 Remove/retire docs-consistency-test (locked decision)
+- [x] Remove `docs-consistency-test` entry from `package.json` (or repoint to an existing test if you prefer to keep the script name as a no-op wrapper).
+- [x] Update `tests/script-coverage.js` so it does not expect `docs-consistency-test` to run.
+- [x] Update any docs referencing the script (if present).
+
+**Exit criteria**
+- [x] `npm run script-coverage-test` passes without missing-script references.
+
+## 0.2 Establish “fast smoke lanes” per major surface
+Create deterministic, cache-isolated smoke entrypoints:
+- [x] **Indexing smoke** (Section 1): core API + minimal index build + API server basic route test
+- [x] **Retrieval smoke** (Section 2): search help + search filters + search explain + RRF/blend sanity
+- [x] **Services smoke** (Section 3): MCP server basic tool call + JSON-RPC framing sanity
+- [x] **Worker/meta smoke** (Section 4): worker pool split teardown + language fidelity baseline
+- [x] **Embeddings smoke** (Section 5): cache reuse + dims mismatch failure case
+- [x] **SQLite smoke** (Section 6): build + incremental + sqlite ANN extension missing fallback
+
+**Deliverables**
+- [x] `npm run smoke:section1`
+- [x] `npm run smoke:retrieval`
+- [x] `npm run smoke:services`
+- [x] `npm run smoke:workers`
+- [x] `npm run smoke:embeddings`
+- [x] `npm run smoke:sqlite`
+
+**Exit criteria**
+- [x] Each smoke lane runs deterministically with an isolated `PAIROFCLEATS_CACHE_ROOT` and cleans up after itself.
+
+## 0.3 Contract capture + coverage ledger (repo-wide)
+- [x] Create/update `docs/contracts/` so each major surface has a short contract:
+  - indexing stages/modes and artifacts
+  - chunk identity and sizing
+  - search flags and outputs
+  - retrieval ranking/explain semantics
+  - sqlite schema/incremental/ANN semantics
+  - API server and MCP server request/response/error contracts
+- [x] Create a “entrypoint → tests” coverage ledger (what is asserted vs assumed).
+
+**Exit criteria**
+- [x] Every public entrypoint has at least one content-asserting test (not just “exits 0”) or a documented gap.
+
+---
+
+# Phase 1 — Stop-the-bleeding P0 fixes (hangs, crashers, leaks)
+
+**Objective:** eliminate known hangs, orphan processes, and common crash paths before feature/semantics work.
+
+## 1.1 Runtime lifecycle teardown (watch mode, worker pools, long-lived resources)
+- [x] Persist combined worker pools on runtime creation (e.g., `runtime.workerPools = { tokenizePool, quantizePool, destroy }`).
+- [x] Ensure teardown destroys both tokenize and quantize pools (and any other long-lived resources).
+- [x] Wrap watch mode in `try/finally` so teardown runs on shutdown/signals.
+
+**Exit criteria**
+- [x] `build_index.js --watch ...` exits cleanly on SIGINT/SIGTERM with split pools enabled.
+- [x] No lingering worker threads keep the Node event loop alive.
+
+## 1.2 Search CLI crashers / hard failures
+- [x] Guard `--stats` so it cannot dereference null indexes when a mode is disabled.
+- [x] Make telemetry writes best-effort so read-only cache roots do not fail searches.
+- [x] Make human-output highlighting safe (escape tokens; avoid unsafe regex compilation).
+
+**Exit criteria**
+- [x] Punctuation-heavy queries do not crash human output mode.
+- [x] `search --stats` works across modes.
+
+## 1.3 Bench and test harness correctness hazards
+- [x] Fix bench runner acceptance so missing timing stats cannot be recorded as `0ms`.
+- [x] Fix `tests/language-fidelity.js` `failures` scoping error and make token postings validation resilient to sharded formats.
+- [x] Fix bench harness line normalization to avoid `
+ → \n\n` artifacts.
+
+**Exit criteria**
+- [x] Bench fails loudly when it cannot measure.
+- [x] Language fidelity fails only on real fidelity problems (not reference errors).
+- [x] Bench output parsing remains stable on Windows and non-TTY.
+
+## 1.4 File processor observability
+- [x] Record skip reason on read failure (do not silently drop files from indexing).
+
+**Exit criteria**
+- [x] Read failures are surfaced in metrics/skipped lists and covered by a test.
+
+## 1.5 Python AST pool: prevent orphans
+- [x] On timeout/write error, explicitly kill the Python worker process.
+- [x] Add crash-loop guard/backoff; fall back to heuristic chunking.
+- [x] Add optional queue backpressure.
+
+**Exit criteria**
+- [x] A timeout cannot leave orphan Python processes running.
+
+---
+
+# Phase 2 — Retrieval CLI contract alignment (flags, UX, and help truthfulness)
+
+**Objective:** ensure CLI behavior matches help/docs and eliminate dead/ambiguous flags.
+
+## 2.1 Remove dead/ambiguous flags (locked decision)
+- [x] Remove `--human` and `--headline` from:
+  - `src/retrieval/cli-args.js` (parser)
+  - help/usage text
+  - README/docs that mention them
+- [x] Add/adjust tests to ensure the flags are not accepted and that the error is actionable.
+
+**Exit criteria**
+- [x] Help output no longer advertises removed flags.
+- [x] Passing removed flags returns a clean error (non-zero exit) with remediation.
+
+## 2.2 Flag typing and “missing value is an error” (locked decision)
+- [x] Declare `--type`, `--author`, `--import` as **string** options in yargs.
+- [x] If any of these flags are passed without a value, fail with:
+  - a non-zero exit code
+  - a clear message: which flag is missing a value and an example of correct usage
+
+**Exit criteria**
+- [x] Regression tests prove correct parsing and error behavior.
+
+## 2.3 Windows path normalization for file/path filters
+- [x] Normalize candidate file paths and filter substrings to a shared representation (recommended: POSIX `/` separators + lowercasing).
+
+**Exit criteria**
+- [x] Windows-style `--file src\nested\util.ts` matches expected results.
+
+## 2.4 Explain output fidelity
+- [x] Ensure explain output includes all applied boosts and scoring components (including symbol boost data).
+- [x] Ensure `--why` and `--explain` are identical in content.
+
+**Exit criteria**
+- [x] Explain output is “reconcilable” with actual scoring logic and is test-backed.
+
+---
+
+# Phase 3 — Chunking correctness, deterministic sizing, and stable chunk identity
+
+**Objective:** stabilize chunk identity across builds and prevent pathological chunk sizes.
+
+## 3.1 Chunk identity contract (locked decision)
+- [x] Treat `chunk.metaV2.chunkId` as the **stable external identifier** across:
+  - JSON outputs
+  - SQLite records (where applicable)
+  - incremental mapping/reuse logic
+- [x] Document the distinction:
+  - `chunk.id` = index-local numeric id (unstable across builds)
+  - `metaV2.chunkId` = stable id (content/structure-derived)
+
+**Exit criteria**
+- [x] External outputs clearly expose `metaV2.chunkId` and tests assert stability expectations.
+
+## 3.2 Deterministic chunk splitting (locked decision)
+- [x] Add config for deterministic size limits at the chunking layer:
+  - max bytes and/or max lines per chunk (choose one primary; support both if needed)
+- [x] Ensure the split logic is deterministic (no dependence on iteration order/concurrency).
+- [x] Add regression tests for oversize inputs.
+
+**Exit criteria**
+- [x] With a fixed config, repeated runs produce identical chunk boundaries and IDs.
+- [x] No chunk exceeds configured limits.
+
+---
+
+# Phase 4 — Retrieval pipeline semantics (early filtering, top-N fulfillment, determinism)
+
+**Objective:** ensure `--top N` means what it says, and results are predictable.
+
+## 4.1 Apply filters earlier (locked decision; architecture supports it)
+The current pipeline computes `allowedIdx` early but applies it late (after ranking). This causes under-filled results when filters are restrictive.
+
+Implement pre-filtering without rewriting the rankers:
+- [x] Introduce `allowedIdx` into sparse ranking:
+  - Option A: modify `rankBM25` / `rankBM25Fields` to accept `allowedIdx` and skip scoring docs not in the allowed set.
+  - Option B: apply an early intersection step to postings iteration (equivalent effect, lower overhead).
+- [x] For sqlite FTS mode, push down allowed sets where feasible:
+  - for small allowed sets: `rowid IN (...)`
+  - for large allowed sets: best-effort (documented) or use a temp table strategy if warranted
+- [x] Intersect ANN candidate sets with `allowedIdx` so ANN work is not wasted.
+
+**Exit criteria**
+- [x] `--top N` returns N results whenever at least N chunks satisfy the filter constraints.
+- [x] Regression tests cover restrictive filters and prove top-N fulfillment.
+
+---
+
+# Phase 5 — Artifact durability and atomicity (with `.bak` retention)
+
+**Objective:** eliminate partial/corrupt writes and ensure crash recovery is possible.
+
+## 5.1 Safer atomic replace with `.bak` retention (locked decision)
+- [x] Implement safer `replaceFile()`:
+  - write `*.tmp-*` in same directory
+  - rename existing destination to `*.bak` (best-effort)
+  - rename temp to destination
+  - keep `.bak` until the next successful read/validate cycle, then best-effort delete
+- [x] Update critical readers (where practical) to fall back to `.bak` if the primary is missing/corrupt.
+
+**Exit criteria**
+- [x] A crash during write never removes both old and new files.
+- [x] Recovery behavior is documented and tested.
+
+## 5.2 Setup idempotency across all artifact formats
+- [x] Replace “index exists” detection to recognize:
+  - `chunk_meta.json`
+  - `chunk_meta.jsonl`
+  - `chunk_meta.meta.json` + `chunk_meta.parts/`
+- [x] Add tests covering partial installs and re-run behavior.
+
+**Exit criteria**
+- [x] Re-running setup is a no-op when artifacts are already present and valid.
+
+## 5.3 HNSW build output atomicity
+- [x] Write HNSW `.bin` to a temp path and atomically replace the final.
+- [x] Store actual inserted vector count and validate it matches expectations.
+
+**Exit criteria**
+- [x] HNSW artifacts are never half-written and failures preserve prior working indexes.
+
+---
+
+# Phase 6 — Embeddings tooling correctness (cache integrity, decoding alignment, dims validation)
+
+**Objective:** ensure embeddings are correct, deterministic, and not reused across incompatible configs.
+
+## 6.1 Cache key correctness
+- [x] Include in embeddings cache keys:
+  - model identity (`modelId`)
+  - effective dims
+  - quantization scale
+  - stub vs real mode (and provider)
+- [x] Store cache metadata for diagnostics.
+
+**Exit criteria**
+- [x] Changing model/dims/scale changes cache key and triggers recompute.
+
+## 6.2 Hashing and decoding consistency
+- [x] Compute file hash from raw bytes (buffer), not decoded text.
+- [x] Decode text for slicing using the same decode logic as indexing (shared helper).
+- [x] Add shared helper `readTextFileWithHash()` used by both indexer and embeddings tool.
+
+**Exit criteria**
+- [x] Embeddings slicing is consistent with chunk offsets produced by indexing for non-UTF8 inputs.
+
+## 6.3 Dims mismatch policy (locked decision)
+- [x] Detect actual embedding dims from computed vectors.
+- [x] If configured dims mismatch actual dims: **fail hard** with an actionable error message.
+
+**Exit criteria**
+- [x] Dims mismatch cannot silently truncate vectors.
+
+---
+
+# Phase 7 — SQLite builder integrity, ANN semantics, and hardening
+
+**Objective:** make SQLite build/update safe, deterministic, and injection-resistant.
+
+## 7.1 Transaction boundaries and fail-closed state
+- [x] Wrap incremental update in transaction boundaries that prevent partial state from being promoted.
+- [x] Ensure `index_state.json` is fail-closed:
+  - set pending before work
+  - only mark ready after successful replacement/validation
+
+**Exit criteria**
+- [x] Failure mid-update does not leave the DB promoted as “ready”.
+
+## 7.2 Bundle-backed rebuild completeness (locked decision)
+- [x] Treat missing/invalid bundles as **fatal** for bundle-backed rebuild:
+  - either fail closed, or
+  - fall back to artifact-backed rebuild (but never produce a silently partial DB)
+- [x] Add tests with missing bundle references.
+
+**Exit criteria**
+- [x] Bundle-backed rebuild cannot silently drop files.
+
+## 7.3 SQLite replacement hygiene (WAL/-shm)
+- [x] Implement `replaceSqliteDatabase(tempDbPath, finalDbPath)` that also manages `-wal`/`-shm` sidecars.
+- [x] Use this helper in build and compact tools.
+- [x] Add regression test for stale WAL sidecars.
+
+**Exit criteria**
+- [x] Stale WAL/shm sidecars do not break rebuilt/compacted DBs.
+
+## 7.4 Injection-safe dynamic SQL
+- [x] Validate identifiers (table/column/module names) via allowlist regex.
+- [x] Replace raw `options` concatenation with structured config or strict allowlist parsing.
+- [x] If validation fails: disable extension mode and warn (do not execute unsafe SQL).
+
+**Exit criteria**
+- [x] No config-driven SQL injection primitives remain.
+
+## 7.5 sqlite-vec candidate-set semantics (locked decision)
+- [x] Implement candidate pushdown for small candidate sets (exact within candidate set).
+- [x] For large candidate sets: best-effort fallback is allowed but must be documented and observable.
+- [x] Ensure deterministic ANN ordering (`ORDER BY distance, rowid`).
+
+**Exit criteria**
+- [x] Candidate-set correctness is guaranteed for small candidate sets and test-backed.
+
+## 7.6 Extension download/extraction hardening
+- [x] Prevent zip-slip/tar traversal and symlink tricks.
+- [x] Add malicious archive fixtures and assert extraction never writes outside destination.
+
+**Exit criteria**
+- [x] Extension extraction is path-safe and test-backed.
+
+---
+
+# Phase 8 — Service surfaces (API server + MCP server) hardening
+
+**Objective:** make service mode reliable under concurrency, cancellation, and malformed inputs.
+
+## 8.1 API server request validation + error contract (locked decisions)
+- [x] Add request schema validation for `/search` and `/search/stream`:
+  - reject unknown fields (`additionalProperties: false`)
+  - validate types/ranges/enums
+- [x] Implement stable error payloads:
+  - `NO_INDEX` returns **409**
+  - invalid request returns 400
+  - internal errors return 500 with `{ ok:false, code:'INTERNAL', ... }`
+
+**Exit criteria**
+- [x] API error responses are predictable and machine-parseable.
+
+## 8.2 API server streaming robustness
+- [x] Handle client disconnects and propagate cancellation where feasible.
+- [x] Respect backpressure (`drain`) and avoid writes-after-close.
+- [x] Add tests for aborted streaming requests.
+
+**Exit criteria**
+- [x] Streaming endpoints do not leak work or crash on slow/aborting clients.
+
+## 8.3 JSON-RPC framing safety (MCP + LSP)
+- [x] Replace per-message writer creation with per-stream writer + serialization queue.
+- [x] Provide close semantics to prevent writes-after-close.
+- [x] Fix LSP shutdown ordering issues (`ERR_STREAM_DESTROYED`) and add regression tests.
+
+**Exit criteria**
+- [x] No frame corruption under concurrent sends.
+- [x] Shutdown is deterministic and does not emit stream-destroyed errors.
+
+## 8.4 MCP server backpressure and timeouts (locked decision)
+- [x] Implement queue cap with clear error code on overload.
+- [x] Implement per-tool timeouts with conservative defaults (overrideable via config).
+- [x] Add schema snapshot tests for MCP tool definitions and representative responses.
+
+**Exit criteria**
+- [x] MCP cannot hang indefinitely without an explicit long timeout.
+- [x] Tool schema changes are intentional and test-detectable.
+
+---
+
+# Phase 9 — Un-gate flaky tests and strengthen CI signals
+
+**Objective:** reduce “safety tape” (skips/gates) and ensure CI failures indicate real regressions.
+
+## 9.1 Un-gate currently skipped/unstable tests
+- [x] Fix Windows `fixture-parity` crash (exit 3221226505) with diagnostics and regression.
+- [x] Fix `type-inference-crossfile-test` hang with timeouts + deterministic cleanup.
+- [x] Fix `type-inference-lsp-enrichment-test` stream shutdown ordering.
+
+**Exit criteria**
+- [x] Previously gated tests run deterministically (or are explicitly retired with rationale and cleanup).
+
+## 9.2 Script coverage ≠ correctness
+- [x] Split test coverage into:
+  - Tier A: surface coverage (command runs/usage/exit codes)
+  - Tier B: behavioral correctness (artifact invariants, output invariants, negative tests)
+- [x] Require Tier B for artifact-producing scripts.
+
+**Exit criteria**
+- [x] Script coverage failures point to missing *meaningful* tests, not only missing invocations.
+
+## 9.3 Add minimal platform matrix
+- [x] Add a Windows CI lane running a reduced but meaningful suite:
+  - worker pool teardown regression
+  - path normalization tests
+  - fixture parity (reduced fixture)
+- [x] Keep Linux lane as the primary full suite.
+
+**Exit criteria**
+- [x] Windows regressions are caught continuously.
+
+---
+
+# Phase 10 — Modularization (refactor-only; behavior frozen by tests)
+
+**Objective:** reduce defect surface area by splitting mega-files only after correctness is stabilized.
+
+## 10.1 Retrieval
+- [x] Split `src/retrieval/cli.js` into cohesive modules (normalize options, load indexes, run search, render output, telemetry, highlight).
+- [x] Split `src/retrieval/output.js` (filters, explain formatting, context cleaning, caching).
+
+## 10.2 Indexing + language
+- [x] Split `src/index/build/file-processor.js` into read/chunk/relations/meta/embeddings/incremental modules.
+- [x] Split TypeScript and Tree-sitter integration modules as planned in the Section roadmaps.
+
+## 10.3 Services
+- [x] Split `tools/mcp-server.js` into transport/repo/runner/tools modules.     
+- [x] Split `tools/api-server.js` into router/validation/sse/response modules.  
+
+**Exit criteria**
+- [x] Refactors introduce no behavior change without tests updated accordingly. 
+- [x] Modules are cohesive and significantly smaller (soft target: ≤ ~300 LOC). 
+
+---
+
+# Phase 11 — Documentation parity and migration notes
+
+**Objective:** ensure docs/help match actual behavior; document breaking changes introduced by locked decisions.
+
+## 11.1 Retrieval docs and help
+- [x] Remove references to removed flags (`--human`, `--headline`) and update examples.
+- [x] Document:
+  - stable chunk id (`metaV2.chunkId`)
+  - filter ordering semantics and `--top` fulfillment expectations
+  - explain output components
+
+## 11.2 API server docs
+- [x] Align docs with actual SSE event types and routes.
+- [x] Document `/metrics`.
+- [x] Document the `409 NO_INDEX` behavior and error schema.
+
+## 11.3 SQLite + embeddings docs
+- [x] Document bundle-backed rebuild failure behavior.
+- [x] Document candidate-set ANN semantics (exact small / best-effort large).
+- [x] Document dims mismatch hard-failure behavior and remediation steps.
+
+**Exit criteria**
+- [x] Docs and CLI help no longer contradict implementation.
+
+---
+
+# Phase 12 — Additional phases (gaps not fully covered by the source roadmaps)
+
+These phases are recommended additions based on codebase risk profile.
+
+## 12.1 Security posture and supply-chain hardening
+- [x] Add archive extraction hardening beyond traversal:
+  - size limits (zip bombs)
+  - safe symlink handling
+  - permission normalization
+- [x] Add download verification policy for external artifacts (hash allowlists or signed manifests where feasible).
+- [x] Add “untrusted repo indexing” guardrails (file size caps, recursion limits, degenerate input protection).
+
+## 12.2 Cross-surface error taxonomy + observability consistency
+- [x] Define a shared error code taxonomy used by:
+  - CLI
+  - API server
+  - MCP server
+- [x] Standardize structured logging (especially for service modes).
+- [x] Align metrics labels and ensure key counters exist (timeouts, fallbacks, cache hits/misses).
+
+## 12.3 Release readiness discipline
+- [x] Define versioning rules for:
+  - output schema changes
+  - artifact schema changes
+  - CLI flag removals/renames
+- [x] Add a concise changelog process that is enforced for breaking changes.
+
+---
+
+## Appendix — Dependency-optimized execution order (recommended)
+
+1) Phase 0 (baseline truth + remove broken docs-consistency script)  
+2) Phase 1 (stop-the-bleeding P0 fixes)  
+3) Phase 2–4 (retrieval CLI + chunking + early filtering semantics)  
+4) Phase 5–7 (artifact durability + embeddings + SQLite integrity)  
+5) Phase 8–9 (services hardening + un-gating tests + CI matrix)  
+6) Phase 10–12 (modularization, docs parity, security/observability/release discipline)
+
+
+# Phase 21 — Storage, Compression, and Determinism (Fflate, Msgpackr, Roaring, XXHash, LMDB)
+
+**Objective:** Implement durable, efficient artifact storage with deterministic formats and checksums.
+
+## 21.1 Compression and serialization
+- [x] Use `fflate` streaming compression for large artifacts; update `docs/artifact-contract.md`.
+- [x] Add `msgpackr` envelope format for bundles with deterministic encoding and checksums.
+
+## 21.2 Postings storage and hashing
+- [x] Use `roaring-wasm` for bitmap-accelerated filter evaluation now that it is implemented.
+- [x] Use `xxhash-wasm` for checksums; keep sha1 for legacy identifiers where required.
+
+## 21.3 Alternative storage backend
+- [x] Implement optional LMDB backend (`lmdb`) with keyspace schema + migration rules.
+- [x] Add throughput and corruption checks in `tools/report-artifacts.js` and bench runs.
+
+**Deliverables**
+- Compressed, deterministic artifact formats with checksum validation.
+- Optional LMDB backend with benchmarks.
+
+**Exit criteria**
+- Artifacts validate deterministically and storage backends pass integrity checks.
+
+---
+
+
+## 4.2 Advanced **risk analysis**: sources / sinks / sanitizers / flows
+
+### Dependency guidance (best choices)
+- `@ast-grep/napi` — implement rule packs for sources/sinks/sanitizers using structural patterns (AST-level matching).
+  - Use the JS API for integration; keep rule packs versioned and testable.
+- `re2js` — use for user-supplied or configurable regex rules to avoid ReDoS in large repos.
+- `aho-corasick` — accelerate “dictionary style” scanning (many fixed tokens like sink names, env var keys, SQL APIs) before expensive AST passes.
+- `graphology` — represent flows as graphs (nodes = symbols/expressions/files; edges = dataflow/callflow/import).
+  - Use traversal + shortest-path utilities for explainable flow paths.
+- `roaring-wasm` — represent taint sets and reachability sets efficiently; union/intersection are hot-path ops for flows.
+
+The current regex-based “sources × sinks” cartesian product is a useful baseline, but not advanced.
+
+## Phase 1: Make the roadmap executable and falsifiable
+
+### 1.1 Truth table: behavioral ledger of user-visible invariants
+
+**Audit (code evidence)**
+
+- A truth-table document exists: `docs/truth-table.md`.
+- Additional “contract” style docs exist and help (even if not named “truth table”):
+  - `docs/artifact-contract.md`
+  - `docs/search-contract.md`
+
+**Gaps / issues**
+
+- None noted; truth table now maps behavior to implementation, config, and tests.
+
+**Remaining work**
+
+- [x] Expand `docs/truth-table.md` into a complete “behavioral ledger” for:
+  - build modes/stages (stage1–stage4),
+  - all public `--mode` values (including any supported “extracted-prose” semantics),
+  - backend selection rules (file-backed vs sqlite; auto/fallback vs forced),
+  - key indexing invariants (chunk IDs, artifact names, sharding formats),
+  - search semantics (filters, ranking, explain output),
+  - service/API/MCP behavior (job queueing, timeouts/retries).
+- [x] For each truth-table claim, add:
+  - “Implementation pointers” (file paths + function names),
+  - “Config knobs” (profile/env keys),
+  - “Proving tests” (tests that would fail if the claim breaks).
+
+### 1.2 Acceptance fixtures + golden expectations
+
+**Audit**
+
+- Multiple fixture repos exist:
+  - `tests/fixtures/sample`
+  - `tests/fixtures/mixed`
+  - `tests/fixtures/medium` generator (`generate-medium-fixture.cjs`)
+- There are strong integration tests around fixtures and parity:
+  - `tests/fixture-smoke.js`
+  - `tests/fixture-parity.js` / `tests/parity.js`
+
+**Gaps / issues**
+
+- There is no “golden must-hit” query pack that asserts specific retrieval expectations for:
+  - comment-derived matches vs code matches,
+  - risk/type filters,
+  - extracted-prose behavior (if supported).
+
+**Remaining work**
+
+- [ ] Add a small “golden query suite” for `tests/fixtures/mixed` with assertions like:
+  - query → expected file(s)/chunk(s) appear in top-N
+  - filters change results in predictable ways
+- [x] Add a dedicated extracted-prose fixture/query (`tests/extracted-prose.js`).
+- [x] Add deletion coverage to incremental reuse tests (manifest extra entry now forces reuse rejection).
+
+---
+
+## Phase 2: Artifact contract, metadata contract, and durability
+
+---
+
+## Phase 3: Segment-aware chunking, mixed-file support, and prose
+### 3.5 Correctness tests for segmentation + prose
+
+**Remaining work**
+
+- [x] Add extracted-prose build/search integration tests (`tests/extracted-prose.js`).
+- [ ] Add a golden-query test proving comment-field vs code-field behavior (e.g., query that matches only a comment should still retrieve the owning code chunk).
+
+---
+
+---
+
+# Appendix A: COMPLETED_PHASES.md cross-check (dedupe + drift notes)
+
+This repository contains a historical “completed phases” ledger in `COMPLETED_PHASES.md`. The ledger includes multiple phase-number series and several references that appear to be from older layouts. Where the completed phases describe an older approach that has been superseded by a newer design, this audit treats the older approach as **(DEPRECATED/REPLACED)** and focuses on verifying the best/latest implementation.
+
+## A.1 Doc/reference drift (files/dirs referenced but not present)
+
+The following references are still missing from the current repository layout:
+
+- `scripts/config`
+- `scripts/styles`
+- `scripts/tools`
+- `docs/config` (directory)
+- `docs/tests` (directory)
+- `tests/fixtures/docs`
+- `tests/fixtures/external-docs`
+
+Previously noted drift entries now have clear replacements or are present:
+
+- `tools/index-bench-suite.js` -> `tools/bench-query-generator.js` + `tests/bench.js`
+- `docs/phase3-parity-report.json` exists in `docs/`
+- `tools/bench-compare-models.js` -> `tools/compare-models.js`
+- `tools/mergeNoResultQueries.js` -> `tools/merge-no-results.sh`
+- `tools/mergeSearchHistory.js` -> `tools/merge-history.sh`
+- `tools/search-sqlite.js` -> `search.js --backend sqlite`
+
+## A.2 High-confidence verification of major “completed” subsystems
+
+The following completed-phase feature clusters are clearly implemented in code and generally covered by tests:
+
+- Cache layout and repo/build root resolution:
+  - `tools/dict-utils.js`, tests `tests/tool-root.js`, `tests/repo-root.js`
+- Tooling detect/install + language servers:
+  - `tools/tooling-detect.js`, `tools/tooling-install.js`, and providers under `src/index/tooling/`
+- Structural search surface:
+  - `bin/pairofcleats.js` structural commands, structural matching under `src/retrieval/structural-*.js`, tests `tests/structural-search.js`
+- Ingest tools (ctags/gtags/lsif/scip):
+  - `tools/ctags-ingest.js`, `tools/gtags-ingest.js`, `tools/lsif-ingest.js`, `tools/scip-ingest.js`
+- Service-mode indexing:
+  - `tools/indexer-service.js`, `tools/service/queue.js`, tests `tests/indexer-service.js`, `tests/two-stage-state.js`
+- API and MCP:
+  - `tools/api-server.js`, `tools/mcp-server.js`, tests `tests/api-server.js`, `tests/mcp-smoke.js`
+
+### Previously noted cross-cutting issues (now resolved)
+
+Even where the phase is “complete,” the following issues were addressed (they affected completed functionality too):
+
+- Incremental reuse deletion correctness (fixed in `src/index/build/incremental.js` + `tests/incremental-reuse.js`)
+- Library-unsafe process exit in sqlite backend creation (fixed in `src/retrieval/cli-sqlite.js`)
+- Stage3 durability/atomicity inconsistencies (fixed in `tools/build-embeddings.js` + index_state gating)
+
+---
+
+## Appendix B: Suggested new tests (concrete proposals)
+
+These are intentionally specific and can be added quickly.
+
+1. **Incremental deletion reuse test**
+   - Build code index for a small fixture
+   - Assert file `X` produces at least one chunk
+   - Delete file `X`
+   - Re-run build with reuse enabled
+   - Assert `chunk_meta` contains no entries for `X` and searching a unique token from `X` yields no hits
+   - Status: manifest-level deletion coverage added in `tests/incremental-reuse.js`; full fixture/search variant still optional.
+
+2. **Extracted-prose integration test (if supported)**
+   - Build `--mode extracted-prose` for a fixture containing doc-comments and config blocks
+   - Search for a phrase that appears only in comments and verify results appear from extracted-prose index
+   - Status: implemented in `tests/extracted-prose.js`.
+
+3. **SQLite backend non-fatal missing dependency test**
+   - Simulate `better-sqlite3` import failure (dependency injection or env guard)
+   - In backend “auto,” verify search falls back to file-backed
+   - In backend “forced sqlite,” verify a structured error is returned/thrown (no process exit)
+   - Status: implemented in `tests/sqlite-missing-dep.js` (env guard via `PAIROFCLEATS_SQLITE_DISABLED`).
+
+4. **Stage3 embeddings validation test**
+   - Run stage2 build with embedding service disabled (or stubbed)
+   - Run `tools/build-embeddings.js`
+   - Run `tools/index-validate.js` and assert pass
+   - Verify `index_state.json` updated atomically (e.g., checksum of file valid, schema valid)
+   - Status: implemented in `tests/embeddings-validate.js` (build + embeddings + validate, index_state flags checked).
+
+---
+
+# Phase 23.4 — Language module modularization (barrel + submodules)
+
+**Objective:** convert the two largest language “mega-files” into a `typescript/`-style layout (directory of cohesive modules + a stable barrel file), without changing behavior.
+
+## 23.4.1 `src/lang/python.js` → `src/lang/python/*` (keep `src/lang/python.js` as the barrel)
+
+* [x] Create `src/lang/python/` directory.
+* [x] Move the embedded script into a dedicated module:
+
+  * [x] `src/lang/python/ast-script.js`: export `PYTHON_AST_SCRIPT` (string) and any script-version constants.
+  * [x] Keep the spawn path unchanged (`python -u -c <script>`); do **not** introduce runtime file reads unless packaging guarantees exist.
+* [x] Split process/pool responsibilities:
+
+  * [x] `src/lang/python/executable.js`: detection + validation of `python` binary (currently the “probe” spawn logic).
+  * [x] `src/lang/python/pool.js`: pool lifecycle (spawn, health, request/response framing, backpressure, shutdown).
+  * [x] `src/lang/python/ast.js`: `getPythonAst()` wrapper (pure API surface; delegates to pool).
+* [x] Split transformation logic (pure, unit-testable):
+
+  * [x] `src/lang/python/chunks-from-ast.js`: `buildPythonChunksFromAst()`.
+  * [x] `src/lang/python/chunks-heuristic.js`: `buildPythonHeuristicChunks()`.
+  * [x] `src/lang/python/imports.js`: `collectPythonImports()`.
+  * [x] `src/lang/python/relations.js`: `buildPythonRelations()`.
+  * [x] `src/lang/python/docmeta.js`: `extractPythonDocMeta()`.
+  * [x] `src/lang/python/normalize.js`: shared normalizers/utilities used across the above (offset mapping, safe slicing, etc.).
+* [x] Convert `src/lang/python.js` into a barrel:
+
+  * [x] Re-export the existing public API (same names, same signatures):        
+    `shutdownPythonAstPool`, `getPythonAst`, `buildPythonChunksFromAst`, `buildPythonHeuristicChunks`, `collectPythonImports`, `buildPythonRelations`, `extractPythonDocMeta`.
+* [x] Add tests that become easy only after this split:
+
+  * [x] `tests/lang/python-heuristic-chunking.test.js`: deterministic chunk boundaries for representative Python fixtures.
+  * [x] `tests/lang/python-imports.test.js`: imports edge cases (relative, `from x import y`, multiline, conditional).
+  * [x] `tests/lang/python-pool.test.js`: pool shutdown/idempotency; “python missing” path is handled predictably.
+
+**Deliverables**
+
+* `src/lang/python/*` module set + barrel `src/lang/python.js`
+* Focused unit tests for chunking/imports/pool behavior
+
+**Exit criteria**
+
+* No change in chunk counts/ranges for existing fixtures (or goldens updated with explicit justification).
+* `src/lang/python.js` drops to a thin re-export + minimal wiring (soft target: ≤ ~80 LOC).
+
+---
+
+## 23.4.2 `src/lang/javascript.js` → `src/lang/javascript/*` (keep `src/lang/javascript.js` as the barrel)
+
+* [x] Create `src/lang/javascript/` directory.
+* [x] Split parsing vs. downstream consumers:
+
+  * [x] `src/lang/javascript/parse.js`: `parseJavaScriptAst()` and parser selection options.
+  * [x] `src/lang/javascript/ast-utils.js`: node range helpers, safe traversal, normalization utilities.
+* [x] Split chunking vs. relations vs. docmeta:
+
+  * [x] `src/lang/javascript/chunks.js`: `buildJsChunks()`.
+  * [x] `src/lang/javascript/imports.js`: `collectImportsFromAst()` and `collectImports()`.
+  * [x] `src/lang/javascript/relations.js`: `buildCodeRelations()` (and any call/usages extraction helpers).
+  * [x] `src/lang/javascript/docmeta.js`: `extractDocMeta()`.
+* [x] Convert `src/lang/javascript.js` into a barrel that re-exports the same API.
+* [x] Add tests:
+
+  * [x] `tests/lang/js-imports.test.js`: ESM/CJS/dynamic imports; re-exports; `require()` parsing.
+  * [x] `tests/lang/js-chunking.test.js`: functions/classes/exports chunk boundaries.
+  * [x] `tests/lang/js-relations.test.js`: calls/usages extraction stability.
+
+**Deliverables**
+
+* `src/lang/javascript/*` module set + barrel `src/lang/javascript.js`
+
+**Exit criteria**
+
+* `language-registry` and `chunking` still import the same public functions with no behavior drift.
+* `src/lang/javascript.js` becomes a thin barrel (soft target ≤ ~80 LOC).
+
+---
+
+# Phase 23.5 — Index chunking + language registry modularization
+
+**Objective:** split two “hub” modules into cohesive submodules so they become readable, testable, and easier to extend.
+
+## 23.5.1 `src/index/chunking.js` → `src/index/chunking/*`
+
+* [x] Create `src/index/chunking/` directory and convert `src/index/chunking.js` into:
+
+  * [x] a small barrel exporting `smartChunk` and the public format chunkers (Markdown/JSON/YAML/etc) exactly as today.
+* [x] Split by responsibility:
+
+  * [x] `src/index/chunking/limits.js`: `resolveChunkingLimits`, `applyChunkingLimits`, `splitChunkByLines`, `splitChunkByBytes`, boundary safety.
+  * [x] `src/index/chunking/dispatch.js`: `resolveChunker`, CODE_CHUNKERS / PROSE_CHUNKERS / CODE_FORMAT_CHUNKERS tables, and the `smartChunk()` orchestration.
+  * [x] `src/index/chunking/tree-sitter.js`: `getTreeSitterOptions(context)` and any tree-sitter gating rules used by multiple formats.
+* [x] Split format chunkers into files (pure logic, easy fixtures):
+
+  * [x] `src/index/chunking/formats/markdown.js`: `chunkMarkdown` (+ fallback heading matcher).
+  * [x] `src/index/chunking/formats/json.js`: `chunkJson` (+ string scanning helpers).
+  * [x] `src/index/chunking/formats/yaml.js`: `chunkYaml` (and YAML “top-level” mode logic).
+  * [x] `src/index/chunking/formats/ini-toml.js`: `chunkIniToml`.
+  * [x] `src/index/chunking/formats/xml.js`: `chunkXml`.
+  * [x] `src/index/chunking/formats/rst-asciidoc.js`: `chunkRst`, `chunkAsciiDoc`.
+* [x] Add tests (these are currently hard to isolate while everything lives in one file):
+
+  * [x] `tests/chunking/limits.test.js`: maxLines/maxBytes splitting invariants.
+  * [x] `tests/chunking/yaml.test.js`: root vs top-level behavior; nested documents; anchors.
+  * [x] `tests/chunking/json.test.js`: large JSON objects; escaped strings; invalid JSON returns null.
+
+**Deliverables**
+
+* `src/index/chunking/*` directory split with stable exports
+* Dedicated chunking format + limit tests
+
+**Exit criteria**
+
+* `smartChunk()` output is identical for a fixed set of fixtures (or changes are intentional + documented).
+* `src/index/chunking.js` drops to a barrel + `smartChunk()` wiring only (soft target ≤ ~150 LOC).
+
+---
+
+## 23.5.2 `src/index/language-registry.js` → `src/index/language-registry/*`
+
+* [x] Create `src/index/language-registry/` directory; convert `src/index/language-registry.js` into a barrel re-exporting:
+
+  * [x] `getLanguageForFile`
+  * [x] `collectLanguageImports`
+  * [x] `buildLanguageContext`
+  * [x] `buildChunkRelations`
+* [x] Split “registry” (configuration) from “collectors” (parsers):
+
+  * [x] `src/index/language-registry/registry.js`: the language table; selection rules; linguist mapping.
+  * [x] `src/index/language-registry/control-flow.js`: `JS_CONTROL_FLOW`, `PY_CONTROL_FLOW`, `buildControlFlowOnly`.
+  * [x] `src/index/language-registry/simple-relations.js`: `buildSimpleRelations`, token normalization helpers.
+* [x] Move the many one-off import collectors into their own directory:
+
+  * [x] `src/index/language-registry/import-collectors/dockerfile.js`
+  * [x] `.../makefile.js`
+  * [x] `.../proto.js`
+  * [x] `.../graphql.js`
+  * [x] `.../cmake.js`
+  * [x] `.../starlark.js`
+  * [x] `.../nix.js`
+  * [x] `.../dart.js`
+  * [x] `.../scala.js`
+  * [x] `.../groovy.js`
+  * [x] `.../r.js`
+  * [x] `.../julia.js`
+  * [x] `.../handlebars.js`
+  * [x] `.../mustache.js`
+  * [x] (and the remaining template/DSL collectors)
+* [x] Make `registry.js` depend on collectors, not the other way around (prevents circular growth).
+* [x] Add tests:
+
+  * [x] `tests/language-registry/collectors.test.js`: fixtures for each collector (small, explicit, easy to extend).
+  * [x] `tests/language-registry/selection.test.js`: extension + relPath → language selection invariants.
+
+**Deliverables**
+
+* `src/index/language-registry/*` split + tests for collectors/selection
+
+**Exit criteria**
+
+* A new import collector can be added by creating one file + adding one line to `registry.js` (no edits to unrelated code).
+* `src/index/language-registry.js` becomes a barrel (soft target ≤ ~80 LOC).
+
+---
+
+# Phase 23.6 — Cross-file type inference modularization
+
+**Objective:** break `type-inference-crossfile.js` into explicit stages (index → infer → apply → tooling), so bugs become localized and tests become targeted.
+
+## 23.6.1 `src/index/type-inference-crossfile.js` → `src/index/type-inference-crossfile/*`
+
+* [x] Create `src/index/type-inference-crossfile/` directory; keep `src/index/type-inference-crossfile.js` as the barrel exporting `applyCrossFileInference`.
+* [x] Split into modules aligned to the algorithm:
+
+  * [x] `constants.js`: sources, confidence defaults, regexes (`RETURN_CALL_RX`, etc.).
+  * [x] `symbols.js`: symbol index build (`addSymbol`, `resolveUniqueSymbol`, `leafName`, type-declaration detection).
+  * [x] `extract.js`: extract return/param types from chunk docmeta; extract return call sites from chunk text; arg-type inference.
+  * [x] `tooling.js`: provider orchestration (`clangd`, `pyright`, `sourcekit`, `typescript`) + logging + retry/breaker normalization.
+  * [x] `apply.js`: “write-back” helpers (`addInferredReturn`, `addInferredParam`, diagnostics merge).
+  * [x] `pipeline.js`: `applyCrossFileInference()` orchestrator that calls the above in order.
+* [x] Add tests by stage:
+
+  * [x] `tests/type-inference-crossfile/symbols.test.js`
+  * [x] `tests/type-inference-crossfile/extract.test.js`
+  * [x] `tests/type-inference-crossfile/apply.test.js` (idempotency + dedupe invariants)
+
+**Deliverables**
+
+* `src/index/type-inference-crossfile/*` split + focused tests
+
+**Exit criteria**
+
+* `applyCrossFileInference()` remains the only public entrypoint and stays behavior-identical for fixture repos.
+* Each stage module is ≤ ~250–300 LOC and testable without filesystem/tooling dependencies.
+
+---
+
+# Phase 23.7 — Index build pipeline modularization (runtime/indexer/artifacts/file-processor)
+
+**Objective:** split “build_index core” into explicit subsystems: runtime config, orchestration, per-file processing, and artifact emission.
+
+## 23.7.1 `src/index/build/runtime.js` → `src/index/build/runtime/*`
+
+* [x] Create `src/index/build/runtime/` and keep `runtime.js` as the public entrypoint exporting `createBuildRuntime`.
+* [x] Split into cohesive normalizers:
+
+  * [x] `stage.js`: `normalizeStage`, `buildStageOverrides`.
+  * [x] `hash.js`: `normalizeContentConfig`, `buildContentConfigHash`.
+  * [x] `logging.js`: `configureLogger` wiring + log-level/format normalization.
+  * [x] `caps.js`: guardrails + file caps normalization.
+  * [x] `embeddings.js`: embedding-mode resolution + `createEmbedder` wiring.
+  * [x] `tree-sitter.js`: enabled-languages resolution + preload policy.
+  * [x] `workers.js`: thread limits + worker-pool config resolution and creation.
+  * [x] `runtime.js`: final assembly of the runtime object (thin).
+* [x] Add tests for config normalization invariants:
+
+  * [x] `tests/build-runtime/stage-overrides.test.js`
+  * [x] `tests/build-runtime/content-hash.test.js` (stable stringify inputs)
+
+**Exit criteria**
+
+* No behavioral change in resolved runtime fields for a matrix of configs/env inputs.
+
+---
+
+## 23.7.2 `src/index/build/indexer.js` → `src/index/build/indexer/*`
+
+* [x] Create `src/index/build/indexer/` and keep `indexer.js` exporting `buildIndexForMode`.
+* [x] Extract the three top-level “mega-responsibilities”:
+
+  * [x] `signatures.js`: `buildTokenizationKey`, `buildIncrementalSignature`.
+  * [x] `embedding-queue.js`: `enqueueEmbeddingJob`.
+  * [x] `pipeline.js`: `buildIndexForMode()` orchestrator (should read like: discover → incremental plan → process → postings → artifacts → checkpoint).
+* [x] Extract inner pipeline steps so they become independently testable:
+
+  * [x] `steps/discover.js`: discovery reuse/sort/orderIndex assignment.
+  * [x] `steps/incremental.js`: reuse decision + manifest pruning + bundle updates (wrapping the existing `./incremental.js` helpers).
+  * [x] `steps/process-files.js`: queue/concurrency orchestration over `createFileProcessor`.
+  * [x] `steps/postings.js`: wrapping `buildPostings` + token retention.
+  * [x] `steps/relations.js`: import scan + relation graph build + cross-file inference gating.
+  * [x] `steps/write.js`: `writeIndexArtifacts` + perf profile finalization + “current pointer” updates/checkpoints.
+* [x] Add tests:
+
+  * [x] `tests/indexer/signatures.test.js` (hash stability and input sensitivity).
+  * [x] `tests/indexer/incremental-plan.test.js` (reuse/skip decisions for synthetic manifests).
+
+**Exit criteria**
+
+* `buildIndexForMode()` becomes ~200–300 LOC and reads as orchestration only.
+
+---
+
+## 23.7.3 `src/index/build/file-processor.js` (finish the split it already started)
+
+This file already delegates to `./file-processor/*`, but still contains multiple concerns that can be separated cleanly.
+
+* [x] Add additional modules under `src/index/build/file-processor/` and move logic out of the parent:
+
+  * [x] `skip.js`: oversize/minified/binary detection policy + consistent skip reasons.
+  * [x] `cached-bundle.js`: cached bundle validation + “rehydration” of fileRelations/importLinks/metaV2 defaults.
+  * [x] `timings.js`: file timing accounting + perf hooks (`recordFileMetric` call site coordination).
+  * [x] `assemble.js`: final chunk object assembly (weights, headlines, external docs, fields).
+* [x] Keep `createFileProcessor()` as the only exported factory, but make it mostly “wire dependencies + return processFile”.
+* [x] Add tests:
+
+  * [x] `tests/file-processor/skip.test.js`: skip reason invariants (minified name, binary sample, caps).
+  * [x] `tests/file-processor/cached-bundle.test.js`: cached bundle reuse doesn’t silently drop required fields.
+
+**Exit criteria**
+
+* Parent `file-processor.js` becomes a wiring module; most logic sits in `file-processor/*`.
+
+---
+
+## 23.7.4 `src/index/build/artifacts.js` → `src/index/build/artifacts/*`
+
+* [x] Create `src/index/build/artifacts/` and keep `artifacts.js` exporting `writeIndexArtifacts`.
+* [x] Split artifact emission by domain:
+
+  * [x] `token-mode.js`: token retention decision (`auto/full/sample/none`) + budgeting.
+  * [x] `file-meta.js`: `fileMeta` + `fileIdByPath` construction and invariants.
+  * [x] `writers/chunk-meta.js`: chunk meta sharding + JSON/JSONL selection.
+  * [x] `writers/repo-map.js`: repo map generation + export flags.
+  * [x] `writers/file-relations.js`: relations serialization.
+  * [x] `filter-index.js`: build + serialize filter index (thin wrapper around existing retrieval module).
+  * [x] `compression.js`: compressible artifact set + gzip policy + keepRaw rules.
+  * [x] `checksums.js`: checksum collection and embedding into manifests/metrics.
+  * [x] `metrics.js`: write metrics payload and perf profile outputs.
+* [x] Add tests:
+
+  * [x] `tests/artifacts/token-mode.test.js`
+  * [x] `tests/artifacts/file-meta.test.js` (stable fileId assignment, no duplicates)
+
+**Exit criteria**
+
+* Each writer is independently testable with synthetic `state` objects.
+* `writeIndexArtifacts()` becomes orchestration-only.
+
+---
+
+# Phase 23.8 — SQLite builder modularization
+
+**Objective:** turn `tools/build-sqlite-index.js` into a thin CLI wrapper over a reusable builder library (and isolate incremental update logic).
+
+## 23.8.1 `tools/build-sqlite-index.js` → `tools/build-sqlite-index/*` + `src/storage/sqlite/build/*`
+
+* [x] Create `tools/build-sqlite-index/`:
+
+  * [x] `cli.js`: argv parsing + normalization + defaults (pure).
+  * [x] `run.js`: per-mode orchestration (`runMode`), heartbeats, state updates.
+  * [x] `index-state.js`: `updateSqliteState` + `updateIndexStateManifest` (also reused by `build-embeddings`).
+  * [x] `temp-path.js`: `createTempPath` (shared with embeddings).
+* [x] Create `src/storage/sqlite/build/` to hold the actual logic:
+
+  * [x] `pragmas.js`: `applyBuildPragmas`.
+  * [x] `validate.js`: `getSchemaVersion`, `validateSqliteDatabase`.
+  * [x] `manifest.js`: `getFileManifest`, `normalizeManifestFiles`, `diffFileManifests`, `isManifestMatch`.
+  * [x] `vocab.js`: `getVocabCount`, `fetchVocabRows`, `ensureVocabIds`.
+  * [x] `delete.js`: `deleteDocIds`, `updateTokenStats`.
+  * [x] `from-artifacts.js`: current `buildDatabase(...)` (fresh build).
+  * [x] `from-bundles.js`: current `buildDatabaseFromBundles(...)` (bundle-backed rebuild).
+  * [x] `incremental-update.js`: current `incrementalUpdateDatabase(...)` (move whole function first, then split).
+  * [x] `statements.js`: prepared statement factory shared across the three build modes (fresh/bundle/incremental).
+  * [x] `bundle-loader.js`: Piscina worker wiring + fallback non-worker bundle reads.
+* [x] Ensure the tool script becomes:
+
+  1. parse args
+  2. resolve paths/config
+  3. call `buildSqliteIndex({ mode, inputs, options })`
+  4. update manifests/state
+* [x] Add unit + integration tests:
+
+  * [x] unit: manifest diff / vocab id creation / delete semantics
+  * [x] integration: incremental update produces identical results for “no-change” runs
+
+**Exit criteria**
+
+* `tools/build-sqlite-index.js` shrinks to ~150–250 LOC of CLI wiring.
+* Incremental logic becomes testable without invoking the entire CLI.
+
+---
+
+# Phase 23.9 — Embeddings builder modularization
+
+**Objective:** isolate caching, HNSW, SQLite updates, and manifest/state updates into separate modules so each can be tested independently.
+
+## 23.9.1 `tools/build-embeddings.js` → `tools/build-embeddings/*`
+
+* [x] Create `tools/build-embeddings/`:
+
+  * [x] `cli.js`: argv parsing + config resolution.
+  * [x] `cache.js`: cache identity, cache-dir layout, signature validity checks.
+  * [x] `chunks.js`: `buildChunksFromBundles` + chunk signature computation.
+  * [x] `embed.js`: `runBatched`, vector normalization/quantization checks, dims validation helpers.
+  * [x] `hnsw.js`: HNSW init/add/save/load meta logic (all `HierarchicalNSW` usage).
+  * [x] `sqlite-dense.js`: `updateSqliteDense` and table presence detection.
+  * [x] `manifest.js`: `updatePieceManifest` (and any manifest merge logic).
+  * [x] `atomic.js`: `createTempPath` + `replaceFile` (then reuse from SQLite builder to remove duplication).
+* [x] Convert `tools/build-embeddings.js` into a thin wrapper that calls `runBuildEmbeddings({ ... })`.
+* [x] Add tests:
+
+  * [x] cache invalidation when dims/model/provider changes
+  * [x] dims mismatch behavior (hard-fail vs skip) remains exactly as designed
+  * [x] SQLite dense update only executes when tables exist and mode is enabled
+
+**Exit criteria**
+
+* HNSW and SQLite update code can be tested with small synthetic vectors without reading bundle files.
+* Tool script shrinks substantially (target ≤ ~250 LOC).
+
+---
+
+# Phase 23.10 — Bench harness modularization
+
+**Objective:** make `tools/bench-language-repos.js` maintainable by isolating state, progress parsing, process management, and reporting.
+
+## 23.23.1 `tools/bench-language-repos.js` → `tools/bench/language/*`
+
+* [x] Create `tools/bench/language/`:
+
+  * [x] `cli.js`: args parsing + normalization (backend list, lock mode, limits).
+  * [x] `config.js`: `loadConfig()` and config schema validation (if any).
+  * [x] `locks.js`: lock reading/age/process-alive checks (`checkIndexLock`, etc.).
+  * [x] `repos.js`: clone tool detection, long paths support, repo dir resolution.
+  * [x] `process.js`: `runProcess`, kill-tree, active-child lifecycle.
+  * [x] `progress/state.js`: a single mutable progress model (instead of scattered globals).
+  * [x] `progress/parse.js`: shard/file/import stats line parsing (pure functions).
+  * [x] `progress/render.js`: log window rendering + formatting.
+  * [x] `metrics.js`: LOC stats, heap recommendations, metric summary formatting.
+  * [x] `report.js`: `summarizeResults`, `printSummary`, final JSON output.
+* [x] Convert `tools/bench-language-repos.js` into a small wrapper that:
+
+  * builds context
+  * iterates repos
+  * delegates to runner modules
+* [x] Add tests:
+
+  * [x] progress line parsing (golden input lines → parsed state)
+  * [x] lock semantics (stale lock vs active pid)
+
+**Exit criteria**
+
+* Progress parsing is unit-testable (no subprocess required).
+* Main script becomes orchestration only.
+
+---
+
+# Phase 23.11 — Retrieval CLI modularization (finish the split)
+
+**Objective:** reduce `src/retrieval/cli.js` to an orchestrator and push details into `src/retrieval/cli/*` modules.
+
+## 23.11.1 `src/retrieval/cli.js` → additional `src/retrieval/cli/*` extraction
+
+(There are already `cli-*` modules; this is the “finish line”.)
+
+* [x] Extract branch filter logic:
+
+  * [x] `src/retrieval/cli/branch-filter.js`: `resolveRepoBranch` + branch match behavior + “emit empty payload” behavior.
+* [x] Extract backend binding glue:
+
+  * [x] `src/retrieval/cli/backend-context.js`: `getSqliteDb/getLmdbDb`, helper creation, backend labeling.
+* [x] Extract terminal rendering primitives:
+
+  * [x] `src/retrieval/cli/ansi.js`: the `color` helpers (or move to `src/shared/ansi.js` if reused elsewhere).
+* [x] Extract “policy decisions” that currently live inline:
+
+  * [x] `src/retrieval/cli/policy.js`: backendPolicy normalization + selection (sqlite/lmdb/memory).
+  * [x] `src/retrieval/cli/model-ids.js`: resolve per-mode model IDs and fallback behaviors.
+* [x] Tighten `cli.js` to:
+
+  1. parse args
+  2. build context (configs, backends, dictionary)
+  3. run queries
+  4. render output
+* [x] Add tests:
+
+  * [x] snapshot-style tests for help/usage text are already common; add targeted tests for:
+
+    * [x] branch-filter “no results” payload shape
+    * [x] backend selection invariants given a simulated availability matrix
+
+**Exit criteria**
+
+* `src/retrieval/cli.js` becomes a readable orchestration layer (target ≤ ~250–300 LOC).
+* Backend selection and branch filtering are testable without running the full CLI.
+
+---
+
+# Phase 23.12 — Script coverage harness modularization
+
+**Objective:** make the “scripts are covered by tests” harness maintainable by splitting actions/config, execution, and reporting.
+
+## 23.12.1 `tests/script-coverage.js` → `tests/script-coverage/*`
+
+* [x] Create `tests/script-coverage/`:
+
+  * [x] `actions.js`: the `actions` array (and any tiering logic like `coversTierB`).
+  * [x] `runner.js`: `run()`, retry logic, subprocess spawning, env handling.
+  * [x] `report.js`: uncovered script detection, pretty printing, failure summarization.
+  * [x] `paths.js`: repo root resolution, cache dir resolution.
+* [x] Convert `tests/script-coverage.js` into:
+
+  * [x] minimal “load actions → run actions → assert coverage → exit”.
+* [x] Add a unit test for the harness itself (lightweight):
+
+  * [x] verify that “unknown script name in covers” produces a clear failure
+  * [x] verify that tier overrides work as intended
+
+**Exit criteria**
+
+* Adding a new script coverage rule requires touching `actions.js` only.
+* The entrypoint file is short and free of complex logic.
+
+---
+
+## Global modularization guardrails (apply to every phase above)
+
+* [ ] Soft target per module: **≤ ~300 LOC** (consistent with the repo’s Phase 23 exit criteria).
+* [ ] Prefer **pure functions** + explicit context objects over hidden module globals.
+* [ ] Introduce barrels only to preserve existing import paths (`<name>.js` remains stable, `<name>/...` is new).
+* [ ] Every split comes with at least one new “tight” unit test for the extracted logic (not just integration coverage).
+
+# Phase 22 — Verification gates (passed tests)
+
+**Objective:** Consolidate passed Phase 22 verification tests from NEW_ROADMAP.md.
+
+- [x] `tests/search-windows-path-filter.js`
+- [x] `tests/search-explain-symbol.js`
+- [x] `tests/chunking-limits.js`
+- [x] `tests/graph-chunk-id.js`
+- [x] `tests/sqlite-chunk-id.js`
+- [x] `tests/search-topn-filters.js`
+- [x] `tests/search-determinism.js`
+- [x] `tests/artifact-bak-recovery.js`
+- [x] `tests/setup-index-detection.js`
+- [x] `tests/hnsw-atomic.js`
+- [x] `tests/encoding-hash.js`
+- [x] `tests/embeddings-cache-identity.js`
+- [x] `tests/embeddings-dims-mismatch.js`
+- [x] `tests/sqlite-index-state-fail-closed.js`
+- [x] `tests/sqlite-bundle-missing.js`
+- [x] `tests/sqlite-sidecar-cleanup.js`
+- [x] `tests/vector-extension-sanitize.js`
+- [x] `tests/sqlite-vec-candidate-set.js`
+- [x] `tests/download-extensions.js`
+- [x] `tests/api-server.js`
+- [x] `tests/api-server-stream.js`
+- [x] `tests/mcp-robustness.js`
+- [x] `tests/lsp-shutdown.js`
+- [x] `tests/fixture-parity.js`
+- [x] `tests/type-inference-crossfile.js`
+- [x] `tests/type-inference-lsp-enrichment.js`
+- [x] `tests/worker-pool-windows.js`
+- [x] `tests/search-windows-path-filter.js`
+- [x] `tests/cli.js`
+- [x] `tests/summary-report.js`
+- [x] `tests/segment-pipeline.js`
+- [x] `tests/ts-jsx-fixtures.js`
+- [x] `tests/typescript-parser-selection.js`
+- [x] `tests/tree-sitter-chunks.js`
+- [x] `tests/mcp-server.js`
+- [x] `tests/smoke-services.js`
+- [x] `tests/api-server.js`
+- [x] `tests/api-server-stream.js`
+- [x] `tests/download-extensions.js`
+- [x] `tests/download-dicts.js`
+- [x] `tests/discover.js`
+- [x] `tests/lmdb-report-artifacts.js`
+- [x] `tests/lmdb-corruption.js`
+- [x] `tests/truth-table.js`
+- [x] `tests/artifacts/file-meta.test.js`
+- [x] `tests/artifacts/token-mode.test.js`
+- [x] `tests/bench-language-lock-semantics.js`
+- [x] `tests/bench-language-progress-parse.js`
+- [x] `tests/build-runtime/content-hash.test.js`
+- [x] `tests/build-runtime/stage-overrides.test.js`
+- [x] `tests/chunking/json.test.js`
+- [x] `tests/chunking/limits.test.js`
+- [x] `tests/chunking/yaml.test.js`
+- [x] `tests/embeddings-cache-invalidation.js`
+- [x] `tests/embeddings-dims-validation.js`
+- [x] `tests/embeddings-sqlite-dense.js`
+- [x] `tests/file-processor/cached-bundle.test.js`
+- [x] `tests/indexer/incremental-plan.test.js`
+- [x] `tests/indexer/signatures.test.js`
+- [x] `tests/lang/js-imports.test.js`
+- [x] `tests/lang/python-heuristic-chunking.test.js`
+- [x] `tests/lang/python-imports.test.js`
+- [x] `tests/lang/python-pool.test.js`
+- [x] `tests/language-registry/selection.test.js`
+- [x] `tests/retrieval-backend-policy.js`
+- [x] `tests/retrieval-branch-filter.js`
+- [x] `tests/script-coverage-harness.js`
+- [x] `tests/script-coverage/actions.js`
+- [x] `tests/script-coverage/paths.js`
+- [x] `tests/script-coverage/report.js`
+- [x] `tests/script-coverage/runner.js`
+- [x] `tests/sqlite-build-delete.js`
+- [x] `tests/sqlite-build-manifest.js`
+- [x] `tests/sqlite-build-vocab.js`
+- [x] `tests/type-inference-crossfile/apply.test.js`
+- [x] `tests/type-inference-crossfile/extract.test.js`
+- [x] `tests/type-inference-crossfile/symbols.test.js`
+- [x] `tests/mcp-schema.js`
+- [x] `tests/format-fidelity.js`
+- [x] `tests/sqlite-incremental-no-change.js`
diff --git a/NEW_ROADMAP.md b/NEW_ROADMAP.md
new file mode 100644
index 000000000..bceb2ff43
--- /dev/null
+++ b/NEW_ROADMAP.md
@@ -0,0 +1,5473 @@
+# PairOfCleats GigaRoadmap
+
+Large architectural changes are explicitly permitted when they reduce defect surface area and/or materially improve throughput and durability. Confirm before you enact any large architectural changes!
+
+## Status legend
+
+Checkboxes represent “meets the intent of the requirement, end-to-end, without known correctness gaps”:
+
+- [x] Implemented and appears complete/correct based on code inspection and existing test coverage
+- [ ] Not complete **or** there is a correctness gap **or** there is a missing/insufficient test proving behavior
+
+## Phase 1 — Sublime Text 3 Plugin Foundation (Parity + Plumbing)
+
+### 1.1 Plugin repo structure + packaging
+
+* [x] Create `sublime/PairOfCleats/` package skeleton:
+
+  * [x] `PairOfCleats.py` (entrypoint)
+  * [x] `commands/` (command modules)
+  * [x] `lib/` (helpers: config, subprocess, parsing, caching)
+  * [x] `messages/` (install/upgrade notes)
+  * [x] `Default.sublime-commands`
+  * [x] `Main.sublime-menu` (optional)
+  * [x] `Default.sublime-keymap` (optional)
+* [x] Add `README.md` for ST3 plugin installation + prerequisites
+* [x] Add “Package Control” compatibility notes (no external deps beyond Node runtime + repo binaries)
+
+### 1.2 Node/CLI discovery + execution contract
+
+* [x] Implement robust “pairofcleats binary discovery”:
+
+  * [x] Prefer project-local `node_modules/.bin/pairofcleats` when available
+  * [x] Fallback to global `pairofcleats` on PATH
+  * [x] Allow explicit override in ST settings: `pairofcleats_path`
+* [x] Implement repo-root detection:
+
+  * [x] Prefer `.pairofcleats.json` location
+  * [x] Fallback to `.git` root
+  * [x] Fallback to folder of active file
+* [x] Implement subprocess wrapper:
+
+  * [x] Streams output to Sublime panel
+  * [x] Captures JSON payloads when `--json` is used
+  * [x] Supports cancellation (best-effort)
+  * [x] Adds stable environment injection (cache root, embeddings mode, etc.)
+
+### 1.3 Settings + per-project overrides
+
+* [x] Add `PairOfCleats.sublime-settings` defaults:
+
+  * [x] `pairofcleats_path`, `node_path`
+  * [x] `index_mode_default` (code/prose/both)
+  * [x] `search_backend_default` (memory/sqlite-fts/etc)
+  * [x] `open_results_in` (quick_panel / new_tab / output_panel)
+* [x] Support `.sublime-project` settings overrides
+* [x] Validate config and surface actionable error messages
+
+### 1.4 Smoke tests (plugin-side)
+
+* [x] Add Python unit tests that:
+
+  * [x] Import plugin modules without Sublime runtime (mock `sublime`, `sublime_plugin`)
+  * [x] Validate binary discovery behavior
+  * [x] Validate repo-root resolution on fixtures
+  * [x] Validate settings overlay precedence
+
+---
+
+
+## Phase 2 — Sublime Search UX (Queries, Results, Navigation)
+
+### 2.1 Search command(s)
+
+* [x] `PairOfCleats: Search` command:
+
+  * [x] Prompt input panel for query
+  * [x] Optional toggles: code/prose/both, backend, limit
+  * [x] Execute `pairofcleats search ... --json`
+* [x] `PairOfCleats: Search Selection` command:
+
+  * [x] Uses selected text as query
+* [x] `PairOfCleats: Search Symbol Under Cursor` command
+
+### 2.2 Results presentation
+
+* [x] Quick panel results:
+
+  * [x] Show `file:line-range`, symbol name, snippet/headline, score
+  * [x] Preserve stable ordering for repeatability
+* [x] On selection:
+
+  * [x] Open file at best-effort location (line/column)
+  * [x] Highlight match range (if available)
+* [x] Add optional “results buffer” view (for large result sets)
+
+### 2.3 Quality-of-life UX
+
+* [x] Query history (per project)
+* [x] “Repeat last search” command
+* [x] “Explain search” (if supported by CLI flags / internal explain output)
+
+### 2.4 Tests
+
+* [x] Add Node-level “search contract” tests:
+
+  * [x] Ensure `--json` output parseability and required fields
+* [x] Add plugin tests:
+
+  * [x] Search command dispatches correct subprocess args
+  * [x] Results parsing tolerates partial/missing optional fields
+
+---
+
+
+## Phase 3 — Index Lifecycle in Sublime (Build/Watch/Validate + Status)
+
+### 3.1 Build index commands
+
+* [x] `PairOfCleats: Index Build (Code)`
+* [x] `PairOfCleats: Index Build (Prose)`
+* [x] `PairOfCleats: Index Build (All)`
+* [x] Stream progress to an output panel
+* [x] Persist “last index time” + “last index mode” in project cache
+
+### 3.2 Watch mode integration
+
+* [x] `PairOfCleats: Index Watch Start`
+* [x] `PairOfCleats: Index Watch Stop`
+* [x] Prevent duplicate watchers per window/project
+* [x] Robust shutdown on Sublime exit / project close
+
+### 3.3 Validate + repair affordances
+
+* [x] `PairOfCleats: Index Validate`
+* [x] Surface actionable failures (missing artifacts, invalid JSON, stale manifests)
+* [x] Provide “Open index directory” convenience command
+
+### 3.4 Tests
+
+* [x] Node tests for index build/validate on fixtures
+* [x] Plugin tests for lifecycle commands and watcher gating
+
+---
+
+
+## Phase 4 — Codebase Semantic Map (Imports/Exports/Calls/Dataflow/Control Flow → Visual Map)
+
+### What this phase delivers
+
+A **real codebase map** that uses existing and enriched semantic metadata to generate a **diagram-ready model** and one or more **rendered artifacts**.
+
+It must explicitly incorporate and visualize:
+
+* **Imports / Exports / ImportLinks**
+* **Calls / CallLinks / CallSummaries**
+* **Usages / UsageLinks**
+* **Signature / Modifiers / Params / Returns**
+* **Reads / Writes / Mutates / Aliases**
+* **Control flow** (branches, loops, throws, awaits, yields, returns)
+* **AST-derived semantics** (using what the indexer already extracts)
+
+#### Visual grammar (required characteristics)
+
+* **File = outer shape**
+
+  * Shape varies by file type/category (source/test/config/doc/generated/etc.)
+* **Functions/classes = content inside the file shape**
+
+  * The “fill” of the file node is structurally subdivided to represent contained functions/classes
+* **Function details = nested sub-shapes inside function area**
+
+  * Small badges/segments represent modifiers/returns/dataflow/control-flow
+* **Multiple line styles = multiple edge semantics**
+
+  * Imports (file→file), control flow calls (fn→fn), usage deps (fn→fn), dataflow (arg/return/state)
+
+---
+
+### 4.1 Inventory + normalize available semantics from existing artifacts
+
+Leverage what is already produced today, and formalize how it’s consumed:
+
+* [x] **Inputs** (expected present after `index build`):
+
+  * [x] `file_relations.json` (imports, exports, usages, importLinks, functionMeta/classMeta)
+  * [x] `repo_map.json` (chunk-level symbol map, exported flag, signatures)
+  * [x] `chunk_meta.json` (docmeta/metaV2: signature/modifiers/returns/controlFlow/dataflow + relations)
+  * [x] `graph_relations.json` (importGraph/callGraph/usageGraph)
+* [x] Define “canonical IDs” used across the map:
+
+  * [x] `fileId = <repo-relative path>`
+  * [x] `symbolId = <file>::<symbolName>` (already used in relation graphs)
+  * [x] Stable IDs for anonymous/lambda cases (fallback: chunkId when name is `(anonymous)`)
+
+---
+
+### 4.2 Define a versioned “Map Model” schema (diagram-ready)
+
+This is the core contract the plugin will consume.
+
+* [x] Create `docs/map-schema.json` (or similar) with:
+
+  * [x] `version`
+  * [x] `generatedAt`
+  * [x] `root` (repo root logical id)
+  * [x] `legend`:
+
+    * [x] `nodeTypes` (file/function/class/symbol)
+    * [x] `fileShapes` mapping (category → shape)
+    * [x] `functionBadges` mapping (modifier/returns/dataflow/control-flow → badge glyph)
+    * [x] `edgeTypes` mapping (imports/calls/usages/dataflow/aliases/mutations)
+    * [x] `edgeStyles` mapping (solid/dashed/dotted/double, arrowheads, labels)
+  * [x] `nodes`:
+
+    * [x] file nodes with nested “members” (functions/classes)
+    * [x] function nodes with structured “semantic facets”
+  * [x] `edges` (typed, labeled, optionally “port-addressable”)
+* [x] Schema must support **hierarchical nesting**:
+
+  * [x] File node has `members[]` with per-member ports
+  * [x] Member nodes (functions) include `signature`, `modifiers`, `returns`, `controlFlow`, `dataflow`
+* [x] Determinism requirements:
+
+  * [x] Stable ordering (sort keys/ids)
+  * [x] Explicit timestamp field allowed, but everything else must be deterministic
+
+---
+
+### 4.3 Build the semantic “map extractor” (core engine tool)
+
+Implement a Node tool that reads index artifacts and produces the map model.
+
+* [x] Add `tools/code-map.js` (or `tools/report-code-map.js`) that:
+
+  * [x] Locates repo + index dirs using existing `tools/dict-utils.js`
+  * [x] Loads:
+
+    * [x] `file_relations.json`
+    * [x] `repo_map.json`
+    * [x] `chunk_meta.json` (or minimal subset)
+    * [x] `graph_relations.json`
+  * [x] Merges into a single “map model”:
+
+    * [x] **Files** classified into categories (drives file shape)
+    * [x] **Members** extracted per file:
+
+      * [x] functions/methods/classes (from `repo_map` and/or chunk meta)
+      * [x] include line ranges
+      * [x] include `signature`, `modifiers`, `params`, `returns`
+    * [x] **Function semantics**:
+
+      * [x] `dataflow.reads`, `dataflow.writes`, `dataflow.mutations`, `dataflow.aliases`
+      * [x] `controlFlow.branches/loops/returns/throws/awaits/yields/breaks/continues`
+      * [x] `throws`, `awaits`, `yields`, `returnsValue` facets surfaced explicitly
+    * [x] **Edges**:
+
+      * [x] Import edges (file→file) from `importLinks` + raw `imports`
+      * [x] Export edges (file→symbol) from `exports` + repo_map `exported`
+      * [x] Call edges (fn→fn) from `callLinks` or `graph_relations.callGraph`
+      * [x] Usage edges (fn→fn) from `usageLinks` or `graph_relations.usageGraph`
+      * [x] Dataflow edges:
+
+        * [x] Argument flow edges from `callSummaries.argMap` (caller→callee param ports)
+        * [x] Return flow edges using inferred return metadata where available
+        * [x] Optional: “state flow” edges when reads/writes/mutations overlap (guardrailed; see 28.6)
+      * [x] Alias edges:
+
+        * [x] derived from `dataflow.aliases` (function-local or cross-function via calls when resolvable)
+* [x] Add CLI entrypoint:
+
+  * [x] `pairofcleats report map` (preferred, consistent with existing `report` group), or
+  * [x] `pairofcleats map` (top-level)
+* [x] Support scope + size controls:
+
+  * [x] `--scope repo|dir|file|symbol`
+  * [x] `--focus <path or symbol>`
+  * [x] `--include imports,calls,usages,dataflow,exports`
+  * [x] `--only-exported`
+  * [x] `--max-files N`, `--max-members-per-file N`, `--max-edges N`
+  * [x] `--collapse file|dir` (aggregate mode)
+  * [x] `--format json|dot|svg|html` (see 28.4)
+
+---
+
+### 4.4 Generate “shape-based” diagrams (DOT-first, with nested function fills)
+
+To match your “shape with fill containing functions” requirement cleanly, DOT/Graphviz is the most direct representation.
+
+* [x] Implement a DOT generator `src/map/dot-writer.js`:
+
+  * [x] **File nodes as outer shapes** with file-type-dependent shapes:
+
+    * [x] Source code: `box` or `component`
+    * [x] Tests: `box` with distinct border style
+    * [x] Config/data: `cylinder` or `hexagon`
+    * [x] Docs/prose: `note`
+    * [x] Generated/build artifacts: `folder` or `box3d`
+  * [x] **Fill represents members** using HTML-like labels:
+
+    * [x] Outer `<TABLE>` represents the file “container”
+    * [x] Each function/class is a row with a `PORT` so edges can land on that member specifically
+  * [x] **Nested shapes inside the function row** (HTML sub-tables/cells) to represent:
+
+    * [x] modifiers: async/static/generator/visibility
+    * [x] signature/params summary
+    * [x] returns/returnType/returnsValue indicator
+    * [x] dataflow mini-badges: reads/writes/mutates/aliases counts (and/or top N symbols)
+    * [x] controlFlow mini-badges: branches/loops/throws/awaits/yields
+* [x] **Edge encoding** (multiple edge “line types”):
+
+  * [x] Import edges: dashed file→file
+  * [x] Call edges: solid function→function (primary control flow)
+  * [x] Usage edges: thin/secondary style function→function
+  * [x] Dataflow edges:
+
+    * [x] dotted caller→callee(param) edges (argument flow)
+    * [x] dotted callee→caller edges for return flow (if inferred)
+  * [x] Mutation/state edges (optional, guardrailed): double-line or distinct style
+  * [x] Alias edges: dashed-dotted, labeled `alias: a=b`
+* [x] Output modes:
+
+  * [x] `--format dot` always available
+  * [x] `--format svg` if Graphviz present (shell out to `dot -Tsvg`)
+  * [x] `--format html` wraps SVG + legend into a standalone HTML viewer
+* [x] Implement legend rendering:
+
+  * [x] Either embed as a DOT subgraph or in HTML wrapper
+  * [x] Must document shape/edge meaning for users
+
+---
+
+### 4.5 Sublime Text 3 plugin commands for map generation + viewing
+
+Provide first-class UX inside Sublime, even if rendering happens externally.
+
+* [x] Add commands:
+
+  * [x] `PairOfCleats: Map (Repo)`
+  * [x] `PairOfCleats: Map (Current Folder)`
+  * [x] `PairOfCleats: Map (Current File)`
+  * [x] `PairOfCleats: Map (Symbol Under Cursor)`
+  * [x] `PairOfCleats: Map (Selection)`
+* [x] Add a “Map Type” chooser:
+
+  * [x] Import Map
+  * [x] Call Map
+  * [x] Usage/Dependency Map
+  * [x] Dataflow Map (args/returns/state)
+  * [x] Combined Map (guardrailed by size limits)
+* [x] Implement output handling:
+
+  * [x] Write outputs to `.pairofcleats/maps/` (repo-local) or cache dir
+  * [x] Open `.dot` in Sublime for inspection
+  * [x] If `.svg`/`.html` produced:
+
+    * [x] Provide “Open in Browser” command (best-effort)
+* [x] Navigation affordances:
+
+  * [x] When a map is generated, also produce an indexable “node list” JSON:
+
+    * [x] allows Sublime quick panel “Jump to node” (file/function)
+    * [x] opens file at recorded `startLine`
+* [x] Graceful degradation:
+
+  * [x] If `astDataflow` / `controlFlow` metadata is unavailable in the index:
+
+    * [x] show “limited map” warning
+    * [x] offer action: “Rebuild index with dataflow/control-flow enabled” (invokes `index build` with the project’s config expectations)
+
+---
+
+### 4.6 Performance guardrails + scaling strategy (mandatory for real repos)
+
+This phase will generate *very large graphs* unless explicitly constrained.
+
+* [x] Hard limits with user-overrides:
+
+  * [x] `maxFiles`, `maxMembersPerFile`, `maxEdges`
+  * [x] edge sampling policies per edge type
+* [x] Aggregation modes:
+
+  * [x] Directory-level aggregation (folder nodes contain files)
+  * [x] File-only map (no nested functions)
+  * [x] Export-only functions view
+  * [x] “Top-K by degree” (highest call/import fan-in/out)
+* [x] Deterministic sampling:
+
+  * [x] same inputs → same output (stable selection)
+* [x] Cache map builds keyed by:
+
+  * [x] index signature + generator options
+* [x] Failure mode policy:
+
+  * [x] If size exceeds limits, output a “truncated map” plus a summary explaining what was dropped
+
+---
+
+### 4.7 Tests (core + integration + determinism)
+
+Add explicit automated coverage for the map feature.
+
+#### Node tool tests (authoritative)
+
+* [x] `tests/code-map-basic.js`
+
+  * [x] Build a tiny fixture repo with:
+
+    * [x] imports/exports
+    * [x] functions calling other functions
+    * [x] a function with reads/writes/mutations/aliases
+    * [x] a function with branches/loops/throws/awaits
+  * [x] Run `build_index.js --stub-embeddings`
+  * [x] Run `pairofcleats report map --format json`
+  * [x] Assert:
+
+    * [x] file nodes exist
+    * [x] member nodes include `signature/modifiers/returns/dataflow/controlFlow`
+    * [x] edge sets include imports + calls
+* [x] `tests/code-map-dot.js`
+
+  * [x] Generate DOT output
+  * [x] Assert:
+
+    * [x] file “container” nodes exist
+    * [x] function rows/ports exist
+    * [x] edges connect to ports (caller fn → callee fn)
+    * [x] distinct edge styles appear for import vs call vs dataflow
+* [x] `tests/code-map-determinism.js`
+
+  * [x] Run map generation twice and compare outputs (ignore `generatedAt`)
+* [x] `tests/code-map-guardrails.js`
+
+  * [x] Generate a repo with many dummy functions
+  * [x] Ensure truncation behavior is correct and stable
+
+#### Plugin-side tests
+
+* [x] Python unit tests:
+
+  * [x] command registration exists
+  * [x] subprocess args are correct for each map command
+  * [x] output paths computed correctly
+  * [x] “Graphviz missing” fallback behavior (DOT-only) works
+
+
+
+### 4.8 Isometric map viewer (three.js)
+
+* [x] Generate an isometric HTML viewer from the map model (three.js module import)
+* [x] Support zoom with configurable sensitivity
+* [x] Support WASD movement with configurable sensitivity/acceleration/drag
+* [x] Highlight selections and show file/line metadata
+* [x] Double-click opens the selected file/line via a URI template
+* [x] Add layout styles (clustered/radial/flat) with adjustable spacing
+* [x] Add flow-connected highlighting (edges + related nodes) and hover highlights from the selection panel
+* [x] Add grid line rendering + glow, fog, and wireframe tuning (panel configurable)
+* [x] Modularize the isometric viewer client into <500-line modules
+---
+
+
+## Phase 5 — Optional: Service-Mode Integration for Sublime (API-backed Workflows)
+
+*(Renumbered from prior Phase 28; content largely unchanged, but consider adding map endpoints.)*
+
+### 5.1 Map endpoints (if service mode is adopted)
+
+* [ ] Extend `api-server` to support:
+
+  * [ ] `GET /map?scope=...&format=...`
+  * [ ] `GET /map/nodes?filter=...` for quick panels
+* [ ] Sublime plugin optionally consumes the API for faster iteration
+
+### 5.2 Tests
+
+* [ ] API contract tests for map endpoints
+* [ ] Sublime plugin integration tests (mock HTTP server)
+
+---
+
+
+## Phase 6 — Distribution Readiness (Package Control + Cross-Platform)
+
+*(Renumbered from prior Phase 29.)*
+
+* [ ] Packaging rules for ST3 (no compiled Python deps)
+* [ ] Windows/macOS/Linux path + quoting correctness
+* [ ] Document Graphviz optional dependency (for SVG/HTML rendering)
+* [ ] Provide minimal “DOT-only mode” documentation
+
+Tests:
+
+* [ ] `python -m py_compile` over plugin package
+* [ ] Cross-platform subprocess quoting tests (Node)
+
+---
+
+
+## Phase 7 — Verification Gates (Regression + Parity + UX Acceptance)
+
+*(Renumbered from prior Phase 30.)*
+
+* [ ] Parity checklist vs existing extension behaviors (where applicable)
+* [ ] Deterministic outputs for map/search commands
+* [ ] Performance acceptance criteria (map generation with guardrails)
+* [ ] End-to-end smoke suite including:
+
+  * [ ] index build
+  * [ ] search
+  * [ ] map generation (json + dot)
+  * [ ] optional svg rendering when Graphviz available
+
+---
+
+### Notes on dependency leverage (aligned to the map phase)
+
+This map phase is intentionally designed to **maximize reuse** of what the repo already has:
+
+* Existing semantics extraction already provides the key fields you listed:
+
+  * `imports/exports/usages/importLinks` via relations
+  * `calls/callDetails` + cross-file `callLinks/usageLinks/callSummaries`
+  * `signature/modifiers/returns` via docmeta/functionMeta
+  * `reads/writes/mutations/aliases` via AST dataflow (when enabled)
+  * `controlFlow` counts already present in docmeta/functionMeta
+* Existing graph tooling:
+
+  * `graphology`-backed `graph_relations.json` provides a strong base graph layer
+* The missing piece is the **visual model + rendering/export** and **Sublime UX** around it, which Phase 28 supplies.
+
+
+## Phase 8 — Test Gate Stabilization and Determinism
+
+**Objective:** Make the current test suite reliable (non-flaky) and green, so subsequent refactors (security, caching, RPC hardening) have a trustworthy safety net.
+
+1. **Fix failing Phase 22 gate: `type-inference-lsp-enrichment` (Python tooling return type missing)**
+
+   * [ ] **Broaden hover fallback conditions in LSP tooling providers so missing return types are recovered even when parameter types are present.**
+
+     * **Why:** All three LSP tooling providers currently only fetch hover when *both* `returnType` is missing *and* `paramTypes` is empty. If a provider can parse param types from `documentSymbol.detail` but that string omits return type (a plausible LSP behavior), it will never attempt hover and will miss return types (exact symptom reported by the failing test).
+     * **Where:**
+
+       * `src/index/tooling/pyright-provider.js`
+
+         * Current gating (too strict):
+           `if (!info || (!info.returnType && !Object.keys(info.paramTypes || {}).length)) { ... hover ... }`
+       * `src/index/tooling/clangd-provider.js` (same pattern)
+       * `src/index/tooling/sourcekit-provider.js` (same pattern)
+     * **Fix:**
+
+       * Change hover fallback gating to trigger when **either** return type is missing **or** param types are missing, e.g.:
+
+         * `if (!info || !info.returnType || !Object.keys(info.paramTypes || {}).length) { ... }`
+       * Keep a small timeout override (already present) and consider a per-file/per-symbol hover cap if you want to prevent worst-case hover storms.
+     * **Tests:**
+
+       * Keep `tests/type-inference-lsp-enrichment.js` as the regression gate.
+       * Add/adjust a focused unit/integration test fixture path where `documentSymbol.detail` omits return type but hover includes it (this directly validates the new behavior rather than relying on chance).
+   * [ ] **Validate stored tooling return types match exact expectations for Python (`str`)**
+
+     * **Why:** The test asserts `entry.type === 'str'` (exact string match). Any normalization differences (e.g., `builtins.str`, `str:`) will fail.
+     * **Where:** Return type extraction path:
+
+       * `src/index/tooling/signature-parse/python.js` (`parsePythonSignature`)
+       * `src/index/tooling/pyright-provider.js` (populating `entry.returns`)
+       * `src/index/type-inference-crossfile/apply.js` (`addInferredReturn`)
+     * **Fix:** Ensure the Python return type passed into `addInferredReturn()` is the normalized “plain” name the project expects (currently looks intended to already be `str`, but explicitly confirm by tests).
+
+2. **Fix failing Phase 22 gate: `embeddings-dims-mismatch` (test is flaky due to cache file selection)**
+
+   * [ ] **Make the test select a cache entry that matches the identity it intends to mutate.**
+
+     * **Why:** The cache directory can contain *multiple* caches for the same file hash/signature but different identity keys (e.g., stub embeddings default dims 384 from `build_index` stage vs. a subsequent `build-embeddings --dims 8`). The test currently mutates an arbitrary first file returned by `readdir`, which is OS/filesystem-order dependent, causing nondeterministic behavior (observed in `tests/phase22-logs/embeddings-dims-mismatch.js.log`).
+     * **Where:** `tests/embeddings-dims-mismatch.js`
+
+       * Current behavior: `const targetFile = cacheFiles[0];` (no filtering)
+     * **Fix (recommended):**
+
+       * Read all cache files, parse JSON, and select one whose `cacheMeta.identity.dims === 8` **and** `cacheMeta.identity.stub === true` (or match `cacheMeta.identityKey` computed from `buildCacheIdentity`).
+       * Sort `cacheFiles` for determinism even after filtering.
+     * **Tests:** The test itself is the gate; ensure it passes consistently on Windows/macOS/Linux.
+
+3. **De-flake related embeddings cache test to prevent future intermittent failures**
+
+   * [ ] Apply the same deterministic cache selection strategy to `tests/embeddings-cache-identity.js`.
+
+     * **Why:** It uses the same “first file” selection pattern and can fail depending on directory enumeration order and presence of other identity caches.
+     * **Where:** `tests/embeddings-cache-identity.js`
+     * **Fix:** Filter for identity matching the run’s intended dims/provider/stub flags (same as above), and sort before selecting.
+
+4. **Add a “Phase 22 gate” smoke runner (optional but strongly recommended)**
+
+   * [ ] Create a single script to run only the gate tests and report failures clearly.
+
+     * **Why:** Reduces time-to-signal and encourages frequent local verification during refactors.
+     * **Where:** e.g., `tools/run-phase22-gates.js` or `npm run test:phase22`
+     * **Exit expectation:** One command that deterministically reproduces CI gate results.
+
+**Exit criteria**
+
+* [ ] `tests/type-inference-lsp-enrichment.js` passes.
+* [ ] `tests/embeddings-dims-mismatch.js` passes deterministically (no filesystem-order dependence).
+* [ ] `tests/embeddings-cache-identity.js` passes deterministically.
+* [ ] No new flaky tests introduced (verified via at least 5 repeated local runs on one platform, and ideally at least one Windows run).
+
+---
+
+
+## Phase 9 — Security and Input-Hardening (Local Servers + Indexing)
+
+**Objective:** Close high-impact vulnerabilities and unsafe defaults that could be exploited when indexing untrusted repositories or exposing the local API server beyond localhost.
+
+1. **Prevent symlink-based repo escape during discovery/indexing**
+
+   * [ ] **Stop following symlinks when discovering and stat’ing files.**
+
+     * **Why:** If a repository contains a tracked symlink pointing outside the repo (e.g., to `/etc/passwd`), the current logic can follow it and read/index external files. This is a classic “repo escape / data exfiltration” risk when indexing untrusted repos.
+     * **Where:** `src/index/build/discover.js`
+
+       * Uses `fs.stat()` (follows symlinks) on each path.
+     * **Fix:**
+
+       * Use `lstat` first; if it is a symlink:
+
+         * Default behavior: **skip** the entry.
+         * Optional (configurable) behavior: allow symlinks only if resolved target remains within `rootDir` (realpath boundary check).
+       * Ensure both “git ls-files” path discovery and fallback `fdir` scanning apply the same symlink policy.
+     * **Tests:**
+
+       * Add a fixture repo containing a symlink file pointing outside repo root.
+       * Assert indexing does not read it (and ideally logs a warning or records a skip reason).
+   * [ ] **Ensure downstream file reads cannot accidentally follow symlinks even if discovery misses one.**
+
+     * **Why:** Defense-in-depth; discovery should prevent it, but a second gate at file-read time reduces risk.
+     * **Where:** `src/index/build/file-processor.js` and any shared read helpers (e.g., `src/shared/encoding.js` `readTextFileWithHash`)
+     * **Fix:** If feasible, check `lstat` before read in the pre-read stage (or pass `lstat` results from discovery and enforce “no symlink reads”).
+
+2. **Lock down API server defaults (CORS, repo selection, and exposure)**
+
+   * [ ] **Remove unconditional permissive CORS (`Access-Control-Allow-Origin: *`) or make it explicitly opt-in.**
+
+     * **Why:** If the server is started with `--host 0.0.0.0` (supported), permissive CORS plus no auth makes it trivial for any web page on the same network to call the API from a browser (cross-site request from an untrusted origin).
+     * **Where (currently sets `*`):**
+
+       * `tools/api/router.js` (sets headers broadly, including metrics endpoint)
+       * `tools/api/response.js`
+       * `tools/api/sse.js`
+     * **Fix (recommended safe default):**
+
+       * Default allowlist: `http://127.0.0.1:*` and `http://localhost:*` only (or no CORS headers at all unless configured).
+       * Add config flags:
+
+         * `api.cors.allowedOrigins` (array)
+         * `api.cors.allowAnyOrigin` (explicit opt-in, default false)
+   * [ ] **Add authentication for non-localhost bindings (or always, with a “dev disable” escape hatch).**
+
+     * **Why:** The API allows expensive operations (search) and can access the filesystem via repo selection (see next item). This should not be anonymous if reachable from other machines.
+     * **Fix:**
+
+       * Support a bearer token header, e.g. `Authorization: Bearer <token>` with `PAIR_OF_CLEATS_API_TOKEN` env var.
+       * If `host` is not `127.0.0.1/localhost`, require token by default.
+   * [ ] **Restrict `repoPath` override in API requests (prevent arbitrary filesystem indexing/search).**
+
+     * **Why:** Current API accepts a request body that can set `repoPath`, and then resolves and operates on that directory. Without an allowlist, this is arbitrary directory read/search capability.
+     * **Where:** `tools/api/router.js` `resolveRepo(value)` and usage in `/search`, `/status`, `/stream/search`.
+     * **Fix options:**
+
+       * Option A (strict): disallow `repoPath` in request; only use the server’s configured repo.
+       * Option B (allowlist): allow only if within a configured set of allowed roots (`api.allowedRepoRoots`), enforced by realpath boundary checks.
+     * **Tests:**
+
+       * Confirm requests with disallowed repoPath return 400/403.
+       * Confirm allowed repo paths still work.
+
+3. **Harden API request body parsing and limits**
+
+   * [ ] **Replace string concatenation body parsing with byte-safe buffering and strict size enforcement.**
+
+     * **Why:** Current `parseBody` in `tools/api/router.js` does `data += chunk` and uses `data.length` (characters, not bytes). This is less reliable and can be slower for large payloads due to repeated string reallocations.
+     * **Fix:**
+
+       * Accumulate Buffers in an array; track `byteLength`.
+       * Enforce a hard cap in bytes (e.g., 1 MiB configurable).
+       * Only decode once at the end.
+   * [ ] **Validate `Content-Type` for JSON endpoints.**
+
+     * **Why:** Avoid ambiguous parsing and reduce attack surface.
+     * **Fix:** Require `application/json` for POST bodies on `/search` and stream endpoints (except where intentionally flexible).
+
+**Exit criteria**
+
+* [ ] Indexing does not follow symlinks by default (tested with a symlink fixture).
+* [ ] API no longer emits permissive CORS headers by default.
+* [ ] API requests cannot arbitrarily set `repoPath` unless explicitly allowed/configured.
+* [ ] API body parsing is byte-safe and enforces a clear, tested size limit.
+
+---
+
+
+## Phase 10 — RPC Robustness and Memory-Safety (LSP + MCP + JSON-RPC)
+
+**Objective:** Prevent unbounded memory growth and improve resilience when communicating with external processes (LSP servers, MCP transport), including malformed or oversized JSON-RPC frames.
+
+1. **Implement `maxBufferBytes` enforcement in framed JSON-RPC parser**
+
+   * [ ] **Enforce `maxBufferBytes` in `createFramedJsonRpcParser`.**
+
+     * **Why:** The function accepts `maxBufferBytes` but does not enforce it, leaving an unbounded buffer growth path if a peer sends large frames or never terminates headers.
+     * **Where:** `src/shared/jsonrpc.js` (`createFramedJsonRpcParser`)
+     * **Fix:**
+
+       * Track buffer size after concatenation.
+       * If buffer exceeds limit:
+
+         * Clear internal buffer.
+         * Call `onError(new Error(...))`.
+         * Optionally enter a “failed/closed” state to reject further data.
+       * Consider separate thresholds:
+
+         * `maxHeaderBytes` (protect header scan)
+         * `maxMessageBytes` (protect content-length payload)
+   * [ ] **Add explicit tests for oversized frames.**
+
+     * **Where:** Add a new unit test under `tests/` that pushes > limit into parser and asserts:
+
+       * `onError` called
+       * parser does not continue to grow memory
+
+2. **Apply bounded JSON-RPC parsing in LSP client**
+
+   * [ ] Replace `StreamMessageReader` usage with the bounded framed parser (or wrap it with size checks).
+
+     * **Why:** `StreamMessageReader` will buffer messages; without explicit size enforcement at your integration boundary, a misbehaving server can cause OOM.
+     * **Where:** `src/integrations/tooling/lsp/client.js`
+     * **Fix:**
+
+       * Wire `proc.stdout` `data` into `createFramedJsonRpcParser`.
+       * Feed parsed messages into the existing dispatch/response correlation logic.
+       * Ensure shutdown/kill closes parser cleanly.
+
+3. **Apply bounded JSON-RPC parsing in MCP transport**
+
+   * [ ] Replace `StreamMessageReader` usage similarly.
+
+     * **Where:** `tools/mcp/transport.js`
+     * **Fix:** Same pattern as LSP client; enforce message size limits and fail gracefully.
+
+**Exit criteria**
+
+* [ ] `createFramedJsonRpcParser` enforces max buffer/message sizes with tests.
+* [ ] LSP client no longer relies on unbounded message buffering.
+* [ ] MCP transport no longer relies on unbounded message buffering.
+
+---
+
+
+## Phase 11 — Resource Lifecycle Management (Caches, Long-Lived Servers, Builds)
+
+**Objective:** Prevent memory and resource leaks in long-running processes (API server, service workers), especially across repeated builds and multi-repo usage.
+
+1. **Add eviction/TTL for API router repo-level caches**
+
+   * [ ] **Implement eviction for `repoCaches` map in `tools/api/router.js`.**
+
+     * **Why:** `repoCaches` can grow unbounded if clients query multiple repos or if repo roots vary. Each entry can hold heavy caches (index cache + sqlite connections).
+     * **Fix:**
+
+       * Add:
+
+         * `maxRepos` (e.g., 3–10)
+         * `repoTtlMs` (e.g., 10–30 minutes)
+       * Track `lastUsed` and evict least-recently-used / expired.
+       * On eviction: close sqlite cache handles (`sqliteCache.close()`), clear index cache.
+   * [ ] Add metrics for cache size and evictions.
+
+     * **Where:** `tools/api/router.js` and metrics registry.
+
+2. **Add eviction for per-repo index cache and sqlite DB cache**
+
+   * [ ] **Index cache eviction**
+
+     * **Why:** `src/retrieval/index-cache.js` caches by `dir` (which can change per build). On repeated re-indexing, old build directories can accumulate.
+     * **Fix:** Convert to LRU with max entries, or TTL purge on access.
+   * [ ] **SQLite DB cache eviction**
+
+     * **Where:** `src/retrieval/sqlite-cache.js`
+     * **Why:** Same “dir-per-build” key pattern; can leak connections/handles.
+     * **Fix:** LRU/TTL + ensure `close()` called on eviction.
+
+3. **Add explicit cache invalidation when “current build” pointer changes**
+
+   * [ ] Detect when the effective index directory changes (new build) and prune caches for previous builds.
+
+     * **Why:** Keeps hot caches relevant and bounds memory footprint.
+
+**Exit criteria**
+
+* [ ] API server memory does not grow unbounded when indexing/searching multiple repos/builds.
+* [ ] Old build caches are evicted/pruned automatically.
+* [ ] SQLite handles are closed on eviction (verified via tests or instrumentation).
+
+---
+
+
+## Phase 12 — Performance and Operational Hardening
+
+**Objective:** Improve throughput and robustness under load without changing core behavior.
+
+1. **Reduce event-loop blocking sync filesystem calls on API request paths**
+
+   * [ ] Replace `fsSync.*` in API request hot paths with async equivalents where practical.
+
+     * **Why:** Sync I/O can stall concurrent requests in the API server process.
+     * **Where (examples):**
+
+       * `tools/api/router.js` `resolveRepo()` uses `existsSync/statSync`.
+     * **Fix:** Use `fs.promises.stat` with try/catch; cache results briefly if needed.
+
+2. **Prevent decompression “zip bomb” style memory spikes in artifact reading**
+
+   * [ ] Add output size limiting to gzip decompression.
+
+     * **Why:** `src/shared/artifact-io.js` uses `gunzipSync(buffer)` and only checks decompressed size *after* decompression. A small compressed file could expand massively and spike memory.
+     * **Fix:**
+
+       * Use `zlib.gunzipSync(buffer, { maxOutputLength: maxBytes + slack })` (if supported in your Node target), or switch to streaming gunzip with explicit byte limits.
+     * **Where:** `src/shared/artifact-io.js` `parseBuffer` / gzip handling.
+
+3. **Add download size limits for tools that fetch large remote assets**
+
+   * [ ] Enforce maximum download size (or require hash) for dictionary downloads.
+
+     * **Why:** `tools/download-dicts.js` buffers the entire response in memory (`Buffer.concat`) without a hard cap.
+     * **Fix:** Stream to disk with a cap; abort if exceeded; strongly prefer requiring hashes for non-default URLs.
+
+**Exit criteria**
+
+* [ ] API request path avoids avoidable sync I/O.
+* [ ] Artifact gzip parsing cannot explode memory beyond configured limits.
+* [ ] Large downloads are bounded and/or verified.
+
+---
+
+
+## Phase 13 — Documentation and Configuration Hardening
+
+**Objective:** Ensure the fixed behavior is discoverable, configurable, and hard to misconfigure into an unsafe state.
+
+1. **Document security posture and safe defaults**
+
+   * [ ] Document:
+
+     * API server host binding risks (`--host 0.0.0.0`)
+     * CORS policy and how to configure allowed origins
+     * Auth token configuration (if implemented)
+     * RepoPath allowlist behavior
+   * [ ] Add a prominent note: indexing untrusted repos and symlinks policy.
+
+2. **Add configuration schema coverage for new settings**
+
+   * [ ] If adding config keys (CORS/auth/cache TTL), ensure they are:
+
+     * Reflected in whatever config docs you maintain
+     * Validated consistently (even if validation is lightweight)
+
+**Exit criteria**
+
+* [ ] README/docs reflect new defaults and how to safely expose services.
+* [ ] New options are documented and validated enough to prevent silent misconfiguration.
+
+---
+
+---
+
+
+## Phase 14 — Optional-dependency framework + capability registry (foundation for all phases)
+
+### 14.1 Introduce a consistent “optional dependency” loader
+
+* [ ] Add `src/shared/optional-deps.js` with a single, opinionated API:
+
+  * [ ] `tryRequire(name)` / `tryImport(name)` helpers (use `createRequire(import.meta.url)` where needed)
+  * [ ] Standardized return shape: `{ ok: true, mod } | { ok: false, error, reason }`
+  * [ ] Standardized logging hook (only when `PAIROFCLEATS_VERBOSE` or a dedicated flag is enabled)
+* [ ] Add `src/shared/capabilities.js` that reports runtime availability:
+
+  * [ ] `watcher: { chokidar: true, parcel: boolean }`
+  * [ ] `regex: { re2: boolean, re2js: true }`
+  * [ ] `hash: { nodeRsXxhash: boolean, wasmXxhash: true }`
+  * [ ] `compression: { gzip: true, zstd: boolean }`
+  * [ ] `extractors: { pdf: boolean, docx: boolean }`
+  * [ ] `mcp: { sdk: boolean, legacy: true }`
+  * [ ] `externalBackends: { tantivy: boolean, lancedb: boolean }` (even if “boolean” means “reachable” rather than “installed”)
+* [ ] Wire capabilities into existing “status” surfaces:
+
+  * [ ] Extend `tools/mcp/repo.js` → `configStatus()` to include capability info and warnings for requested-but-unavailable features
+  * [ ] Extend `tools/config-dump.js` (or equivalent) to print capabilities in JSON output mode
+
+### 14.2 Add config + env “backend selectors” (uniform UX)
+
+* [ ] Extend `src/shared/env.js` to parse new selectors (string + allowlist):
+
+  * [ ] `PAIROFCLEATS_WATCHER_BACKEND` = `auto|chokidar|parcel`
+  * [ ] `PAIROFCLEATS_REGEX_ENGINE` = `auto|re2|re2js`
+  * [ ] `PAIROFCLEATS_XXHASH_BACKEND` = `auto|native|wasm`
+  * [ ] `PAIROFCLEATS_COMPRESSION` = `auto|gzip|zstd|none`
+  * [ ] `PAIROFCLEATS_DOC_EXTRACT` = `auto|on|off`
+  * [ ] `PAIROFCLEATS_MCP_TRANSPORT` = `auto|sdk|legacy`
+* [ ] Add parallel config keys in `.pairofcleats.json` (keep them near existing related config blocks):
+
+  * [ ] `indexing.watch.backend`
+  * [ ] `search.regex.engine`
+  * [ ] `indexing.hash.backend`
+  * [ ] `indexing.artifactCompression.mode` enum expansion + `auto`
+  * [ ] `indexing.documentExtraction.enabled`
+  * [ ] `mcp.transport`
+* [ ] Update `docs/config-schema.json`:
+
+  * [ ] Add/expand enums (avoid “free string” for anything that’s meant to be policy-controlled)
+  * [ ] Add descriptions that clarify fallback rules (`auto` behavior)
+* [ ] Update any config validation code paths if they enforce known keys (`src/config/validate.js` is schema-driven; keep schema authoritative)
+
+### 14.3 Add dependency-bundle reference stubs (keeps repo documentation consistent)
+
+For each new dependency introduced in later phases, add a minimal doc file under:
+`docs/references/dependency-bundle/deps/<dep>.md`
+
+* [ ] `parcel-watcher.md`
+* [ ] `re2.md`
+* [ ] `node-rs-xxhash.md`
+* [ ] `mongodb-js-zstd.md`
+* [ ] `pdfjs-dist.md`
+* [ ] `mammoth.md`
+* [ ] `modelcontextprotocol-sdk.md`
+* [ ] `lancedb.md` (if used)
+* [ ] `tantivy.md` (if used)
+* [ ] Update `docs/references/dependency-bundle/README.md` if it has an index
+
+### 14.4 Tests (framework-level)
+
+* [ ] Add `tests/capabilities-report.js`:
+
+  * [ ] Asserts `capabilities` object shape is stable
+  * [ ] Asserts `auto` selectors never throw when optional deps are missing
+* [ ] Add a script-coverage action to run it:
+
+  * [ ] `tests/script-coverage/actions.js`: add action entry that calls `runNode(...)`
+  * [ ] (Optional) Add an npm script alias if you want parity with the rest of the repo scripts
+
+**Exit criteria**
+
+* [ ] All “capability” calls are side-effect-free and safe when optional deps are absent
+* [ ] `config_status` (MCP) can surface “you requested X but it’s not available” warnings without crashing
+* [ ] CI passes on Node 18 (Ubuntu + Windows lanes)
+
+---
+
+
+## Phase 15 — File watching performance: add `@parcel/watcher` backend (keep chokidar fallback)
+
+### 15.1 Add the dependency (prefer optional unless you want it guaranteed everywhere)
+
+* [ ] Add `@parcel/watcher` to `package.json`
+
+  * [ ] Prefer `optionalDependencies` if you want installs to succeed even when native builds fail
+  * [ ] If you add it as a hard dependency, ensure Windows CI remains green
+
+### 15.2 Create a watcher-backend abstraction
+
+* [ ] Create `src/index/build/watch/backends/types.js` (or inline JSDoc contract) describing:
+
+  * [ ] `start({ root, ignored, onEvent, onError, pollMs? }) -> { close(): Promise<void> }`
+  * [ ] Normalized event shape: `{ type: 'add'|'change'|'unlink', absPath }`
+* [ ] Extract chokidar wiring out of `src/index/build/watch.js`:
+
+  * [ ] Move into `src/index/build/watch/backends/chokidar.js`
+  * [ ] Preserve existing semantics (`awaitWriteFinish`, ignored matcher, poll support)
+* [ ] Implement parcel watcher backend:
+
+  * [ ] New file: `src/index/build/watch/backends/parcel.js`
+  * [ ] Map parcel events to the normalized `{type, absPath}` model
+  * [ ] Decide how to handle rename/move (often appears as unlink+add):
+
+    * [ ] If parcel reports rename, still emit unlink+add for compatibility with current scheduling
+  * [ ] Implement “poll” behavior:
+
+    * [ ] If poll mode is requested, either:
+
+      * [ ] force chokidar with polling, **or**
+      * [ ] implement a cheap stat-based poller wrapper (only if needed)
+  * [ ] Implement “write stability” guard:
+
+    * [ ] Chokidar has `awaitWriteFinish`; parcel does not in the same way
+    * [ ] Add a “stabilize file” check in the pipeline: before processing a file, optionally confirm `mtime/size` stable across N ms
+    * [ ] Place this in `createDebouncedScheduler()` or immediately before `enqueueOrUpdate()` in `file-processor.js` (prefer a single shared guard)
+
+### 15.3 Wire selection into `watchIndex()`
+
+* [ ] Update `src/index/build/watch.js`:
+
+  * [ ] Choose backend via (in order): CLI/config → env → `auto` capability
+  * [ ] Log selected backend once at startup (only if verbose or `--watch`)
+  * [ ] Ensure `pollMs` is still honored (either by backend or by selection logic)
+
+### 15.4 Tests
+
+* [ ] Add `tests/watch-backend-selection.js`:
+
+  * [ ] Forces `PAIROFCLEATS_WATCHER_BACKEND=chokidar` and asserts no parcel import occurs
+  * [ ] Forces `...=parcel` and asserts fallback behavior if module unavailable (no crash, warning path)
+* [ ] Add `tests/watch-stability-guard.js`:
+
+  * [ ] Simulate “partial write” (write file in two chunks with delay) and assert processor waits/defers correctly
+  * [ ] Keep the test deterministic: use explicit timeouts and a temp directory under `tests/.cache`
+* [ ] Add corresponding script-coverage actions in `tests/script-coverage/actions.js`
+
+**Exit criteria**
+
+* [ ] `pairofcleats index watch` remains correct on Windows and Linux
+* [ ] No regressions in ignore behavior (still uses `buildIgnoredMatcher`)
+* [ ] Event storms do not cause repeated redundant rebuilds (existing debounce logic preserved)
+
+---
+
+
+## Phase 16 — Safe regex acceleration: optional native RE2 (`re2`) with `re2js` fallback
+
+### 16.1 Add dependency + backend wrapper
+
+* [ ] Add `re2` (native) as an optional dependency (recommended)
+* [ ] Refactor `src/shared/safe-regex.js` into a backend-based module:
+
+  * [ ] Keep current behavior as the fallback backend (`re2js`)
+  * [ ] Add `src/shared/safe-regex/backends/re2.js`
+  * [ ] Add `src/shared/safe-regex/backends/re2js.js` (wrap existing usage cleanly)
+* [ ] Preserve existing safety constraints:
+
+  * [ ] `maxPatternLength`
+  * [ ] `maxInputLength`
+  * [ ] Guard flags normalization (only `gimsyu` supported as today)
+
+### 16.2 Integrate selector + compatibility contract
+
+* [ ] Add `createSafeRegex({ engine, ...limits })` selection:
+
+  * [ ] `engine=auto` uses `re2` if available else `re2js`
+  * [ ] `engine=re2` hard-requires native; if missing, returns a clear error (or a warning + fallback if you prefer)
+* [ ] Validate behavioral parity:
+
+  * [ ] Ensure `.exec()` and `.test()` match expectations for `g` and non-`g`
+  * [ ] Ensure `.lastIndex` semantics are either compatible or explicitly *not supported* (and documented)
+
+### 16.3 Update call sites
+
+* [ ] Verify these flows still behave correctly:
+
+  * [ ] `src/retrieval/output/filters.js` (file/path filters)
+  * [ ] `src/retrieval/output/risk-tags.js` (risk tagging)
+  * [ ] Any structural search / rulepack path using regex constraints
+
+### 16.4 Tests
+
+* [ ] Add `tests/safe-regex-engine.js`:
+
+  * [ ] Conformance tests (flags, match groups, global behavior)
+  * [ ] Safety limit tests (pattern length, input length)
+  * [ ] Engine-selection tests (`auto`, forced `re2js`)
+* [ ] Add script-coverage action(s)
+
+**Exit criteria**
+
+* [ ] No user-visible semantic regressions in filtering/risk-tagging
+* [ ] “Engine auto” is safe and silent (no noisy logs) unless verbose
+
+---
+
+
+## Phase 17 — Hashing performance: optional native xxhash (`@node-rs/xxhash`) with `xxhash-wasm` fallback
+
+### 17.1 Add dependency + unify backend contract
+
+* [ ] Add `@node-rs/xxhash` as optional dependency (or hard dep if you accept platform constraints)
+* [ ] Create `src/shared/hash/xxhash-backend.js`:
+
+  * [ ] `hash64(buffer|string) -> hex16` (exact output format must match existing `checksumString()` + `checksumFile()`)
+  * [ ] `hash64Stream(readable) -> hex16` (if supported; otherwise implement chunking in JS)
+* [ ] Update `src/shared/hash.js`:
+
+  * [ ] Keep `sha1()` unchanged
+  * [ ] Route `checksumString()` / `checksumFile()` through the backend contract
+  * [ ] Preserve deterministic formatting (`formatXxhashHex`)
+
+### 17.2 Introduce selector + telemetry
+
+* [ ] Add `PAIROFCLEATS_XXHASH_BACKEND=auto|native|wasm`
+* [ ] Emit backend choice in verbose logs (once)
+
+### 17.3 Tests
+
+* [ ] Add `tests/xxhash-backends.js`:
+
+  * [ ] Assert `checksumString('abc')` matches a known baseline (record from current implementation)
+  * [ ] Assert `checksumFile()` matches `checksumString()` on same content (via temp file)
+  * [ ] If native backend is available, assert native and wasm match exactly
+  * [ ] If native is missing, ensure test still passes (skips “native parity” block)
+* [ ] Add script-coverage action(s)
+
+**Exit criteria**
+
+* [ ] No change to bundle identity semantics (incremental cache stability)
+* [ ] `checksumFile()` remains bounded-memory for large files (streaming or chunked reads)
+
+---
+
+
+## Phase 18 — Artifact compression upgrade: add Zstandard (`zstd`) alongside gzip
+
+### 18.1 Add compression dependency
+
+* [ ] Add `@mongodb-js/zstd` (recommended as optional dependency due to native bindings)
+* [ ] Decide “streaming vs buffer-only” support:
+
+  * [ ] If streaming is supported: implement streaming JSONL writers/readers
+  * [ ] If buffer-only: restrict zstd to JSON object/array artifacts, keep JSONL as gzip (document clearly)
+
+### 18.2 Introduce compression abstraction (avoid sprinkling `if (mode===...)` everywhere)
+
+* [ ] Add `src/shared/compression.js`:
+
+  * [ ] `compressBuffer(mode, buffer, level?)`
+  * [ ] `decompressBuffer(mode, buffer)`
+  * [ ] Optional stream helpers if supported
+* [ ] Update `src/index/build/artifacts/compression.js`:
+
+  * [ ] Expand `mode` validation: `gzip|zstd|none`
+  * [ ] Keep current defaults unchanged (`gzip` or `null` based on existing config)
+* [ ] Update `src/index/build/artifacts.js`:
+
+  * [ ] Replace hard-coded `.json.gz` with extension derived from compression mode
+
+    * [ ] gzip: `.json.gz`
+    * [ ] zstd: `.json.zst` (or `.json.zstd`; pick one and standardize)
+  * [ ] Ensure `compressionKeepRaw` behavior remains correct
+
+### 18.3 Update readers/writers for new extensions
+
+* [ ] Update `src/shared/artifact-io.js`:
+
+  * [ ] Extend `resolveArtifactPath()` to check:
+
+    * [ ] `<name>.json` then `<name>.json.gz` then `<name>.json.zst`
+    * [ ] Also handle `.bak` variants for each
+  * [ ] Extend `readJsonFile()` to decode zstd when applicable
+* [ ] Update `src/shared/json-stream.js`:
+
+  * [ ] Add zstd path for `writeJsonArrayFile()` / `writeJsonObjectFile()` when compression is requested
+  * [ ] If JSONL is to support zstd: update `writeJsonLinesFile()` and `readJsonLinesArraySync()`
+
+### 18.4 Update artifact contract + metrics
+
+* [ ] Update `docs/artifact-contract.md`:
+
+  * [ ] New allowed compression modes
+  * [ ] New filename extensions
+  * [ ] Backward compatibility statement (gzip still readable)
+* [ ] Update `src/index/build/artifacts/metrics.js` to report `compression.mode=zstd`
+* [ ] Update `docs/config-schema.json` to restrict/describe valid modes
+
+### 18.5 Tests
+
+* [ ] Add `tests/artifact-zstd-readwrite.js`:
+
+  * [ ] Write a compressed artifact (zstd) using production writer
+  * [ ] Read it with `readJsonFile()` and assert payload matches
+* [ ] Extend `tests/artifact-bak-recovery.js` with a zstd variant:
+
+  * [ ] `.json.zst` + `.bak` fallback behavior
+* [ ] Add script-coverage action(s)
+
+**Exit criteria**
+
+* [ ] `loadIndex()` can transparently read `.json`, `.json.gz`, and `.json.zst` artifacts
+* [ ] Existing gzip artifacts remain fully compatible
+* [ ] Failure-mode behavior (`.bak` recovery) remains correct for new extensions
+
+---
+
+
+## Phase 19 — Massive functionality boost: PDF + DOCX ingestion (prose mode)
+
+### 19.1 Add document extraction dependencies
+
+* [ ] Add `pdfjs-dist` (PDF text extraction)
+* [ ] Add `mammoth` (DOCX → text/HTML extraction)
+
+### 19.2 Introduce “extractor” layer in indexing pipeline
+
+* [ ] Create `src/index/build/extractors/`:
+
+  * [ ] `text.js` (wrap existing `readTextFileWithHash` path)
+  * [ ] `pdf.js` (buffer → extracted text; include page separators if possible)
+  * [ ] `docx.js` (buffer → extracted text; preserve headings if possible)
+  * [ ] `index.js` (select extractor by extension + config)
+* [ ] Add a new constant set in `src/index/constants.js`:
+
+  * [ ] `EXTS_EXTRACTABLE_BINARY = new Set(['.pdf', '.docx'])`
+* [ ] Add `.pdf` and `.docx` to `EXTS_PROSE` **only if** extraction is enabled (or add them unconditionally but ensure they don’t get skipped)
+
+### 19.3 Fix binary-skip logic to allow extractable docs
+
+You must handle both “pre-read” scanning and “post-read” binary checks:
+
+* [ ] Update `src/index/build/file-scan.js` / `createFileScanner()`:
+
+  * [ ] If `ext` ∈ `EXTS_EXTRACTABLE_BINARY` and extraction enabled:
+
+    * [ ] Do **not** mark as `{ reason: 'binary' }`
+    * [ ] Still allow minified checks to run when relevant (likely irrelevant for pdf/docx)
+* [ ] Update `src/index/build/file-processor/skip.js`:
+
+  * [ ] If `ext` extractable and extraction enabled, do not return `binarySkip`
+* [ ] Update `src/index/build/file-processor.js`:
+
+  * [ ] Branch early on `ext`:
+
+    * [ ] For `.pdf`/`.docx`: read buffer → extractor → `text`
+    * [ ] For all else: existing text decoding path
+  * [ ] Ensure `hash` still derives from raw bytes (current `sha1(buffer)` behavior is good)
+  * [ ] Ensure `stats.bytes` is still the raw size for guardrails
+
+### 19.4 Chunking strategy for extracted docs
+
+* [ ] Decide on an initial, deterministic chunking approach:
+
+  * [ ] Minimal viable: treat extracted output as prose and let default prose chunking apply
+  * [ ] Better: add dedicated chunkers:
+
+    * [ ] Add `src/index/chunking/prose/pdf.js` to split by page markers
+    * [ ] Add `src/index/chunking/prose/docx.js` to split by headings / paragraph blocks
+* [ ] Update `src/index/chunking/dispatch.js`:
+
+  * [ ] Map `.pdf` and `.docx` to their chunkers (or prose fallback)
+
+### 19.5 Search + metadata integration
+
+* [ ] Ensure extracted docs appear in:
+
+  * [ ] `file_meta.json` (file path + ext)
+  * [ ] `chunk_meta.*` (chunks with correct file associations)
+* [ ] Consider adding a metadata flag for UI filters:
+
+  * [ ] `fileMeta[i].isExtractedDoc = true` (or reuse existing `externalDocs` pattern if appropriate)
+* [ ] Verify retrieval filters treat these files correctly (extension/path filters)
+
+### 19.6 Tests (must include “end-to-end search finds doc content”)
+
+* [ ] Add fixture files under `tests/fixtures/docs/`:
+
+  * [ ] `sample.pdf` with a known unique phrase
+  * [ ] `sample.docx` with a known unique phrase
+* [ ] Add `tests/pdf-docx-extraction.js`:
+
+  * [ ] Unit-level extraction returns expected text
+* [ ] Add `tests/pdf-docx-index-search.js`:
+
+  * [ ] Build prose index for a temp repo that includes the docs
+  * [ ] Run `search.js --mode prose` and assert the phrases match chunks
+* [ ] Add script-coverage action(s)
+
+**Exit criteria**
+
+* [ ] PDF/DOCX are no longer silently dropped as “binary” (when enabled)
+* [ ] Prose search can retrieve content from these formats reliably
+* [ ] No regression to binary detection for non-extractable files
+
+---
+
+
+## Phase 20 — MCP server: migrate from custom JSON-RPC plumbing to official MCP SDK (reduce maintenance)
+
+### 20.1 Add MCP SDK and plan transport layering
+
+* [ ] Add `@modelcontextprotocol/sdk` dependency
+* [ ] Decide migration strategy:
+
+  * [ ] **Option A (recommended):** keep `tools/mcp-server.js` as the entrypoint, but implement server via SDK and keep legacy behind a flag
+  * [ ] Option B: replace legacy entirely (higher risk)
+
+### 20.2 Implement SDK-based server
+
+* [ ] Add `src/integrations/mcp/sdk-server.js` (or similar):
+
+  * [ ] Register tools from `src/integrations/mcp/defs.js`
+  * [ ] Dispatch calls to existing handlers in `tools/mcp/tools.js` (or migrate handlers into `src/` cleanly)
+  * [ ] Preserve progress notifications semantics expected by `tests/mcp-server.js`:
+
+    * [ ] `notifications/progress`
+    * [ ] Include `{ tool: 'build_index', phase, message }` fields (match current tests)
+* [ ] Update `tools/mcp-server.js`:
+
+  * [ ] If `mcp.transport=legacy` or env forces legacy → use current transport
+  * [ ] Else → use SDK transport
+
+### 20.3 Remove or isolate legacy transport surface area
+
+* [ ] Keep `tools/mcp/transport.js` for now, but:
+
+  * [ ] Move to `tools/mcp/legacy/transport.js`
+  * [ ] Update imports accordingly
+  * [ ] Reduce churn risk while you validate parity
+
+### 20.4 Tests
+
+* [ ] Ensure these existing tests continue to pass without rewriting expectations unless protocol mandates it:
+
+  * [ ] `tests/mcp-server.js`
+  * [ ] `tests/mcp-robustness.js`
+  * [ ] `tests/mcp-schema.js`
+* [ ] Add `tests/mcp-transport-selector.js`:
+
+  * [ ] Force `PAIROFCLEATS_MCP_TRANSPORT=legacy` and assert legacy path still works
+  * [ ] Force `...=sdk` and assert SDK path works
+* [ ] Add script-coverage action(s)
+
+**Exit criteria**
+
+* [ ] MCP server behavior is unchanged from the client perspective (tool list, outputs, progress events)
+* [ ] Maintenance burden reduced: eliminate custom framing/parsing where SDK provides it
+
+---
+
+
+## Phase 21 — Tantivy sparse backend (optional, high impact on large repos)
+
+> This phase is intentionally split into “abstraction first” and “backend integration” to keep risk controlled.
+
+### 21.1 Extract a sparse-retrieval interface
+
+* [ ] Create `src/retrieval/sparse/`:
+
+  * [ ] `types.js` contract: `search({ query, topN, filters, mode }) -> hits[]`
+  * [ ] `providers/sqlite-fts.js` wrapper around existing SQLite FTS ranking
+  * [ ] `providers/js-bm25.js` wrapper around the in-memory BM25 path
+* [ ] Update `src/retrieval/pipeline.js` to call the provider rather than direct sqlite/JS branching:
+
+  * [ ] Keep behavior identical as baseline
+  * [ ] Preserve determinism (stable tie-breaking)
+
+### 21.2 Implement Tantivy integration (choose one operational model)
+
+* [ ] Choose packaging model:
+
+  * [ ] **Sidecar model:** `tools/tantivy-server` (Rust) + Node client
+  * [ ] **Embedded binding:** Node N-API module
+* [ ] Add `src/retrieval/sparse/providers/tantivy.js`:
+
+  * [ ] Build query → execute → map results to `{ idx, score }`
+  * [ ] Support candidate-set filtering if feasible (or document it as a limitation and handle via post-filtering)
+* [ ] Add `tools/build-tantivy-index.js`:
+
+  * [ ] Consume existing artifacts (`chunk_meta`, token streams) and build tantivy index on disk
+  * [ ] Store alongside other indexes (e.g., under repo cache root)
+  * [ ] Consider incremental updates later; start with full rebuild
+
+### 21.3 Config + CLI integration
+
+* [ ] Add config:
+
+  * [ ] `tantivy.enabled`
+  * [ ] `tantivy.path` (optional override)
+  * [ ] `tantivy.autoBuild` (optional)
+* [ ] Extend backend policy logic (see `src/retrieval/cli/backend-context.js` and backend-policy tests):
+
+  * [ ] Allow `--backend tantivy` (or `--sparse-backend tantivy`)
+  * [ ] Ensure `auto` fallback behavior remains predictable
+
+### 21.4 Tests (gated if tantivy isn’t always available in CI)
+
+* [ ] Add `tests/tantivy-smoke.js`:
+
+  * [ ] Builds tantivy index for `tests/fixtures/sample`
+  * [ ] Executes a basic query and asserts hits are non-empty
+* [ ] Gate it behind env:
+
+  * [ ] `PAIROFCLEATS_TEST_TANTIVY=1` to run
+  * [ ] Otherwise test exits 0 with “skipped” message (match existing patterns in repo)
+* [ ] Add script-coverage action(s) that run it only when env flag is set (or mark as skipped in coverage if you keep strictness)
+
+**Exit criteria**
+
+* [ ] Tantivy backend can be enabled without changing default behavior
+* [ ] For large repos, sparse retrieval latency is materially improved (benchmarks added in Phase 15)
+
+---
+
+
+## Phase 22 — LanceDB vector backend (optional, high impact on ANN scaling)
+
+### 22.1 Extract a vector-ANN provider interface
+
+* [ ] Create `src/retrieval/ann/`:
+
+  * [ ] `types.js`: `query({ embedding, topN, candidateSet, mode }) -> hits[]`
+  * [ ] `providers/sqlite-vec.js` wrapper around `rankVectorAnnSqlite`
+  * [ ] `providers/hnsw.js` wrapper around `rankHnswIndex`
+* [ ] Update `src/retrieval/pipeline.js` to use the provider interface
+
+### 22.2 Implement LanceDB integration (choose operational model)
+
+* [ ] Choose packaging model:
+
+  * [ ] Node library integration, **or**
+  * [ ] Sidecar service (Python) + HTTP
+* [ ] Add `src/retrieval/ann/providers/lancedb.js`:
+
+  * [ ] Query by vector and return `{ idx, sim }`
+  * [ ] Handle filtering:
+
+    * [ ] If LanceDB supports “where id IN (…)” efficiently → push down
+    * [ ] Otherwise → post-filter and overfetch
+
+### 22.3 Build tooling for vector index creation
+
+* [ ] Add `tools/build-lancedb-index.js`:
+
+  * [ ] Ingest `dense_vectors_*` artifacts
+  * [ ] Store LanceDB table in cache (mode-specific)
+  * [ ] Validate dims/model compatibility using existing `index_state.json` semantics
+
+### 22.4 Tests (gated)
+
+* [ ] Add `tests/lancedb-ann-smoke.js`:
+
+  * [ ] Build embeddings (stub) → build lancedb table → run a nearest-neighbor query → assert stable result ordering
+* [ ] Gate behind `PAIROFCLEATS_TEST_LANCEDB=1`
+* [ ] Add script-coverage action(s) gated similarly
+
+**Exit criteria**
+
+* [ ] LanceDB ANN can be enabled without breaking sqlite/hnsw fallbacks
+* [ ] Demonstrable memory and/or latency win for ANN retrieval at scale
+
+---
+
+
+## Phase 23 — Benchmarks, regression gates, and release hardening (prove the ROI)
+
+### 23.1 Extend microbench suite (`tools/bench/micro/`)
+
+* [ ] Add `tools/bench/micro/watch.js`:
+
+  * [ ] Event storm simulation (if feasible) or synthetic scheduler load
+* [ ] Add `tools/bench/micro/regex.js`:
+
+  * [ ] Compare `re2js` vs `re2` on representative patterns/inputs
+* [ ] Add `tools/bench/micro/hash.js`:
+
+  * [ ] Compare wasm vs native checksum throughput
+* [ ] Add `tools/bench/micro/compression.js`:
+
+  * [ ] gzip vs zstd compress/decompress for representative artifact payload sizes
+* [ ] Add `tools/bench/micro/extractors.js`:
+
+  * [ ] PDF/DOCX extraction throughput and memory ceiling
+
+### 23.2 Add “no-regression” assertions where it matters
+
+* [ ] Add deterministic snapshot tests (lightweight, not full golden files):
+
+  * [ ] Ensure chunk IDs stable across backends
+  * [ ] Ensure ordering stable under ties
+* [ ] Add metrics validation:
+
+  * [ ] `index-*.json` metrics reflect new compression/extractor options correctly
+
+### 23.3 Documentation + UX polish
+
+* [ ] Update `README.md`:
+
+  * [ ] Mention PDF/DOCX support and how to enable/disable
+  * [ ] Mention optional performance backends and how `auto` works
+* [ ] Update `docs/external-backends.md` for Tantivy/LanceDB reality (what’s implemented vs planned)
+* [ ] Update `docs/mcp-server.md` for SDK migration
+
+**Exit criteria**
+
+* [ ] Benchmarks show measurable improvement (and are reproducible)
+* [ ] CI remains green on Node 18 + Windows lane
+* [ ] New features are discoverable via config docs + `config_status`
+
+---
+
+
+## Phase 24 — LibUV threadpool utilization (explicit control + docs + tests)
+
+**Objective:** Make libuv threadpool sizing an explicit, validated, and observable runtime control so PairOfCleats I/O concurrency scales predictably across platforms and workloads.
+
+### 24.1 Audit: identify libuv-threadpool-bound hot paths and mismatch points
+
+* [ ] Audit all high-volume async filesystem call sites (these ultimately depend on libuv threadpool behavior):
+
+  * [ ] `src/index/build/file-processor.js` (notably `runIo(() => fs.stat(...))`, `runIo(() => fs.readFile(...))`)
+  * [ ] `src/index/build/file-scan.js` (`fs.open`, `handle.read`)
+  * [ ] `src/index/build/preprocess.js` (file sampling + `countLinesForEntries`)
+  * [ ] `src/shared/file-stats.js` (stream-based reads for line counting)
+* [ ] Audit concurrency derivation points where PairOfCleats may exceed practical libuv parallelism:
+
+  * [ ] `src/shared/threads.js` (`ioConcurrency = ioBase * 4`, cap 32/64)
+  * [ ] `src/index/build/runtime/workers.js` (`createRuntimeQueues` pending limits)
+* [ ] Decide and record the intended precedence rules for threadpool sizing:
+
+  * [ ] Whether PairOfCleats should **respect an already-set `UV_THREADPOOL_SIZE`** (recommended, matching existing `NODE_OPTIONS` behavior where flags aren’t overridden if already present).
+
+### 24.2 Add a first-class runtime setting + env override
+
+* [ ] Add config key (new):
+
+  * [ ] `runtime.uvThreadpoolSize` (number; if unset/invalid => no override)
+* [ ] Add env override (new):
+
+  * [ ] `PAIROFCLEATS_UV_THREADPOOL_SIZE` (number; same parsing rules as other numeric env overrides)
+* [ ] Implement parsing + precedence:
+
+  * [ ] Update `src/shared/env.js`
+
+    * [ ] Add `uvThreadpoolSize: parseNumber(env.PAIROFCLEATS_UV_THREADPOOL_SIZE)`
+  * [ ] Update `tools/dict-utils.js`
+
+    * [ ] Extend `getRuntimeConfig(repoRoot, userConfig)` to resolve `uvThreadpoolSize` with precedence:
+
+      * `userConfig.runtime.uvThreadpoolSize` → else `envConfig.uvThreadpoolSize` → else `null`
+    * [ ] Clamp/normalize: floor to integer; require `> 0`; else `null`
+    * [ ] Update the function’s return shape and JSDoc:
+
+      * from `{ maxOldSpaceMb, nodeOptions }`
+      * to `{ maxOldSpaceMb, nodeOptions, uvThreadpoolSize }`
+
+### 24.3 Propagate `UV_THREADPOOL_SIZE` early enough (launcher + spawned scripts)
+
+* [ ] Update `bin/pairofcleats.js` (critical path)
+
+  * [ ] In `runScript()`:
+
+    * [ ] Resolve `runtimeConfig` as today.
+    * [ ] Build child env as an object (don’t pass `process.env` by reference when you need to conditionally add keys).
+    * [ ] If `runtimeConfig.uvThreadpoolSize` is set and `process.env.UV_THREADPOOL_SIZE` is not set, add:
+
+      * [ ] `UV_THREADPOOL_SIZE = String(runtimeConfig.uvThreadpoolSize)`
+    * [ ] (Optional) If `--verbose` or `PAIROFCLEATS_VERBOSE`, log a one-liner showing the chosen `UV_THREADPOOL_SIZE` for the child process.
+* [ ] Update other scripts that spawn Node subcommands and already apply runtime Node options, so they also carry the threadpool sizing consistently:
+
+  * [ ] `tools/setup.js` (`buildRuntimeEnv()`)
+  * [ ] `tools/bootstrap.js` (`baseEnv`)
+  * [ ] `tools/ci-build-artifacts.js` (`baseEnv`)
+  * [ ] `tools/bench-language-repos.js` (repo child env)
+  * [ ] `tests/bench.js` (bench child env when spawning search/build steps)
+  * [ ] `tools/triage/context-pack.js`, `tools/triage/ingest.js` (where `resolveNodeOptions` is used)
+  * Implementation pattern: wherever you currently do `{ ...process.env, NODE_OPTIONS: resolvedNodeOptions }`, also conditionally set `UV_THREADPOOL_SIZE` from `runtimeConfig.uvThreadpoolSize` if not already present.
+
+> (Optional refactor, if you want to reduce repetition): add a helper in `tools/dict-utils.js` like `resolveRuntimeEnv(runtimeConfig, baseEnv)` and migrate the call sites above to use it.
+
+### 24.4 Observability: surface “configured vs effective” values
+
+* [ ] Update `tools/config-dump.js`
+
+  * [ ] Include in `payload.derived.runtime`:
+
+    * [ ] `uvThreadpoolSize` (configured value from `getRuntimeConfig`)
+    * [ ] `effectiveUvThreadpoolSize` (from `process.env.UV_THREADPOOL_SIZE` or null/undefined if absent)
+* [ ] Add runtime warnings in indexing startup when mismatch is likely:
+
+  * [ ] Update `src/index/build/runtime/workers.js` (in `resolveThreadLimitsConfig`, verbose mode is already supported)
+
+    * [ ] Compute `effectiveUv = Number(process.env.UV_THREADPOOL_SIZE) || null`
+    * [ ] If `effectiveUv` is set and `ioConcurrency` is materially larger, emit a single warning suggesting alignment.
+    * [ ] If `effectiveUv` is not set, consider a *non-fatal* hint when `ioConcurrency` is high (e.g., `>= 16`) and `--verbose` is enabled.
+* [ ] (Services) Emit one-time startup info in long-running modes:
+
+  * [ ] `tools/api-server.js`
+  * [ ] `tools/indexer-service.js`
+  * [ ] `tools/mcp-server.js`
+  * Log: effective `UV_THREADPOOL_SIZE`, and whether it was set by PairOfCleats runtime config or inherited from the environment.
+
+### 24.5 Documentation updates
+
+* [ ] Update env overrides doc:
+
+  * [ ] `docs/env-overrides.md`
+
+    * [ ] Add `PAIROFCLEATS_UV_THREADPOOL_SIZE`
+    * [ ] Explicitly note: libuv threadpool size must be set **before the Node process starts**; PairOfCleats applies it by setting `UV_THREADPOOL_SIZE` in spawned child processes (via `bin/pairofcleats.js` and other tool launchers).
+* [ ] Update config docs:
+
+  * [ ] `docs/config-schema.json` add `runtime.uvThreadpoolSize`
+  * [ ] `docs/config-inventory.md` add `runtime.uvThreadpoolSize (number)`
+  * [ ] `docs/config-inventory.json` add entry for `runtime.uvThreadpoolSize`
+* [ ] Update setup documentation:
+
+  * [ ] `docs/setup.md` add a short “Performance tuning” note:
+
+    * [ ] When indexing large repos or using higher `--threads`, consider setting `runtime.uvThreadpoolSize` (or `PAIROFCLEATS_UV_THREADPOOL_SIZE`) to avoid libuv threadpool becoming the limiting factor.
+* [ ] (Optional) Add a benchmark note:
+
+  * [ ] `docs/benchmarks.md` mention that benchmarking runs should control `UV_THREADPOOL_SIZE` for reproducibility.
+
+### 24.6 Tests: schema validation + env propagation
+
+* [ ] Update config validation tests:
+
+  * [ ] `tests/config-validate.js` ensure `runtime.uvThreadpoolSize` is accepted by schema validation.
+* [ ] Add a focused propagation test:
+
+  * [ ] New: `tests/uv-threadpool-env.js`
+
+    * [ ] Create a temp repo dir with a `.pairofcleats.json` that sets `runtime.uvThreadpoolSize`.
+    * [ ] Run: `node bin/pairofcleats.js config dump --json --repo <temp>`
+    * [ ] Assert:
+
+      * `payload.derived.runtime.uvThreadpoolSize` matches the config
+      * `payload.derived.runtime.effectiveUvThreadpoolSize` matches the propagated env (or check `process.env.UV_THREADPOOL_SIZE` if you expose it directly in the dump)
+* [ ] Add a non-override semantics test (if that’s the decided rule):
+
+  * [ ] New: `tests/uv-threadpool-no-override.js`
+
+    * [ ] Set parent env `UV_THREADPOOL_SIZE=…`
+    * [ ] Also set config `runtime.uvThreadpoolSize` to a different value
+    * [ ] Assert child sees the parent value (i.e., wrapper respects existing env)
+
+**Exit criteria**
+
+* [ ] `runtime.uvThreadpoolSize` is in schema + inventory and validated by `tools/validate-config.js`.
+* [ ] `pairofcleats …` launches propagate `UV_THREADPOOL_SIZE` to child processes when configured.
+* [ ] Users can confirm configured/effective behavior via `pairofcleats config dump --json`.
+* [ ] Docs clearly explain when and how the setting applies.
+
+---
+
+
+## Phase 25 — Threadpool-aware I/O scheduling guardrails
+
+**Objective:** Reduce misconfiguration risk by aligning PairOfCleats internal I/O scheduling with the effective libuv threadpool size and preventing runaway pending I/O buildup.
+
+### 25.1 Add a “threadpool-aware” cap option for I/O queue sizing
+
+* [ ] Add config (optional, but recommended if you want safer defaults):
+
+  * [ ] `indexing.ioConcurrencyCap` (number) **or** `runtime.ioConcurrencyCap` (number)
+  * Choose the namespace based on your ownership map (`docs/config-inventory-notes.md` suggests runtime is `tools/dict-utils.js`, indexing is build runtime).
+* [ ] Implement in:
+
+  * [ ] `src/shared/threads.js` (preferred, because it’s the canonical concurrency resolver)
+
+    * [ ] After computing `ioConcurrency`, apply:
+
+      * `ioConcurrency = min(ioConcurrency, ioConcurrencyCap)` when configured
+      * (Optional) `ioConcurrency = min(ioConcurrency, effectiveUvThreadpoolSize)` when a new boolean is enabled, e.g. `runtime.threadpoolAwareIo === true`
+  * [ ] `src/index/build/runtime/workers.js`
+
+    * [ ] Adjust `maxIoPending` to scale from the *final* `ioConcurrency`, not the pre-cap value.
+
+### 25.2 Split “filesystem I/O” from “process I/O” (optional, higher impact)
+
+If profiling shows git/tool subprocess work is being unnecessarily throttled by a threadpool-aware cap:
+
+* [ ] Update `src/shared/concurrency.js` to support two queues:
+
+  * [ ] `fs` queue (bounded by threadpool sizing)
+  * [ ] `proc` queue (bounded separately)
+* [ ] Update call sites:
+
+  * [ ] `src/index/build/file-processor.js`
+
+    * [ ] Use `fsQueue` for `fs.stat`, `fs.readFile`, `fs.open`
+    * [ ] Use `procQueue` for `getGitMetaForFile` (and any other spawn-heavy steps)
+  * [ ] `src/index/build/runtime/workers.js` and `src/index/build/indexer/steps/process-files.js`
+
+    * [ ] Wire new queues into runtime and shard runtime creation.
+
+### 25.3 Tests + benchmarks
+
+* [ ] Add tests that validate:
+
+  * [ ] Caps are applied deterministically
+  * [ ] Pending limits remain bounded
+  * [ ] No deadlocks when both queues exist
+* [ ] Update or add a micro-benchmark to show:
+
+  * [ ] Throughput difference when `UV_THREADPOOL_SIZE` and internal `ioConcurrency` are aligned vs misaligned.
+
+**Exit criteria**
+
+* [ ] Internal I/O concurrency cannot silently exceed intended caps.
+* [ ] No regression in incremental/watch mode stability.
+* [ ] Benchmarks show either improved throughput or reduced memory/queue pressure (ideally both).
+
+---
+
+
+## Phase 26 — (Conditional) Native LibUV work: only if profiling proves a real gap
+
+**Objective:** Only pursue *direct* libuv usage (via a native addon) if profiling demonstrates a material bottleneck that cannot be addressed through configuration and queue hygiene.
+
+### 26.1 Profiling gate and decision record
+
+* [ ] Add a short profiling harness / guidance doc:
+
+  * [ ] `docs/perf-profiling.md` (new) describing how to profile indexing (CPU + I/O wait) and what thresholds justify native work.
+* [ ] Establish decision criteria (example):
+
+  * [ ] If ≥20–30% wall time is spent in JS-level file scanning/reading overhead beyond disk throughput limits, consider native.
+  * [ ] Otherwise, stay in JS + threadpool tuning.
+
+### 26.2 Prototype native module (N-API) using libuv for a specific hot path
+
+* [ ] Only target one narrow, measurable function (examples):
+
+  * [ ] Fast “sample read + binary/minified detection” replacing parts of `src/index/build/file-scan.js`
+  * [ ] Batched `stat + read` pipeline for small files
+* [ ] Provide a clean fallback path to existing JS implementation.
+* [ ] Add CI coverage for:
+
+  * [ ] Linux/macOS/Windows builds (or prebuilds)
+  * [ ] ABI compatibility across supported Node versions
+
+### 26.3 Packaging and docs
+
+* [ ] Update:
+
+  * [ ] `package.json` optionalDependencies/build tooling (node-gyp/prebuildify/etc.)
+  * [ ] `docs/setup.md` to explain native build requirements/fallback behavior
+
+**Exit criteria**
+
+* [ ] Prototype demonstrates measurable improvement on representative repos.
+* [ ] Install friction and cross-platform maintenance cost are explicitly accepted (or the work is abandoned).
+
+#### 18 Bottom line
+
+* **Do not add libuv directly** to this Node codebase.
+* **Do add explicit support for libuv threadpool sizing** (via `UV_THREADPOOL_SIZE`) because the current concurrency model (notably `ioConcurrency` up to 64) strongly suggests you will otherwise hit an invisible throughput ceiling.
+
+---
+
+
+
+## Phase 27 — File processing & artifact assembly (chunk payloads/writers/shards)
+
+**Reviewed snapshot:** `PairOfCleats-main` (zip import)  
+**Scope driver:** `pairofcleats_review_section_3_files_and_checklist.md` (Section 3)  
+**Review date:** 2026-01-12  
+
+### Severity / priority scale
+
+- **P0** — correctness, broken reads, data loss/corruption, or contract violations that can invalidate an index
+- **P1** — determinism/stability, significant performance regressions, security/CI risks, or high-maintenance debt
+- **P2** — cleanup, minor performance wins, refactors, and documentation improvements
+
+---
+
+## Executive summary
+
+### P0 (must address)
+
+- **Chunk-meta sharding cleanup bug can cause the loader to read stale shard data** when switching builds from sharded chunk-meta to non-sharded JSONL. This is because `loadChunkMeta()` prefers `chunk_meta.meta.json` / `chunk_meta.parts` over `chunk_meta.jsonl`. Current cleanup logic does not remove the sharded artifacts in the “jsonl, not sharded” path.  
+  - Impact: **incorrect chunks, incorrect file mapping, confusing debug output, and potentially broken search** for any repo where a previous build produced `chunk_meta.meta.json` / `chunk_meta.parts`.  
+  - Primary locus: `src/index/build/artifacts/writers/chunk-meta.js`.
+
+- **Fast import scanning likely mis-parses `es-module-lexer` records** by treating `entry.d` as a module specifier string. In `es-module-lexer`, `d` is not a specifier (it is typically a numeric “dynamic import” marker). This can yield non-string imports (numbers), downstream crashes in normalization, and/or incorrect `fileRelations.imports` / `externalDocs`.  
+  - Primary locus: `src/index/build/imports.js`.
+
+- **Piece assembly can silently accept structurally-invalid inputs** because `validateLengths()` treats an empty list as “valid” even when the expected length is non-zero. This can produce assembled indexes with mismatched arrays (e.g., `docLengths`, embeddings vectors) without an early, actionable error.  
+  - Primary locus: `src/index/build/piece-assembly.js`.
+
+- **Piece assembly appears to drop the `comment` field in field postings/docLengths** (field tokens include `comment`, but assembly only merges `name/signature/doc/body`). If `comment` is enabled in fielded search, this can corrupt/disable that feature in assembled outputs.  
+  - Primary locus: `src/index/build/piece-assembly.js` (and, secondarily, `src/index/build/postings.js` conventions).
+
+### P1 (high-value next)
+
+- **Determinism risks** (import link ordering; vocab ordering derived from `Map` insertion order; shard batch sorting ties; repo-map ordering) can cause noisy diffs and unstable IDs across builds even when inputs are unchanged.
+- **Artifact manifest robustness**: `pieces/manifest.json` generation can silently record `null` checksums/bytes on error; this weakens contract guarantees and can hide partial artifact failures.
+- **CI metadata hygiene**: `tools/ci-build-artifacts.js` records remote URLs; sanitize to avoid leaking credentials in CI logs/artifacts.
+
+### P2 (cleanup / maintainability)
+
+- Documentation drift (notably the claim that compressed payloads embed a `compression` field) and contract documentation gaps (assembled stage semantics, meta schema examples) should be corrected.
+- Several low-risk performance wins are available (avoid `split('\n')` in hot paths; reduce repeated per-chunk work; minimize transient array concat).
+
+---
+
+## 27.1 Per-file processing correctness (Checklist A)
+
+**Audit**
+
+Reviewed the per-file pipeline as implemented in:
+
+- `src/index/build/file-processor.js`
+- `src/index/build/file-processor/*` (assemble/cached-bundle/chunk/incremental/meta/read/relations/skip/timings)
+- Supporting callsites and artifacts emitted downstream: `src/index/build/artifacts.js`, `src/index/build/artifacts/file-meta.js`, and chunk-meta serialization (`src/index/build/artifacts/writers/chunk-meta.js`)
+- Relevant tests in scope: `tests/file-processor/skip.test.js`, `tests/file-processor/cached-bundle.test.js`
+
+Key pipeline stages observed:
+
+1. Resolve file identity (`abs`, `relKey`) and caps → early skip checks
+2. Load cached bundle (incremental) when enabled
+3. Read + decode file; hash
+4. Language context (registry), segment discovery, chunking
+5. Comments extraction (optional) → comment-to-chunk assignment
+6. Relations, docmeta, flow/meta enrichment (code mode)
+7. Tokenization (main thread or worker), minhash, phrase/chargram sources
+8. Embeddings attach (optional)
+9. Assemble final chunk payloads + per-file relations → persist incremental bundle
+
+**Gaps / issues**
+
+#### Offsets: define and test offset units (byte vs. UTF-16 index)
+
+- `start` / `end` offsets are produced and consumed as **JavaScript string indices** (UTF‑16 code units) throughout the file pipeline (`text.slice(c.start, c.end)` etc.).  
+- The checklist explicitly calls out **byte offsets**. Current docs/contracts do not define the unit for `start`/`end`, which leaves room for misinterpretation and subtle bugs for non‑ASCII content.
+
+**Why it matters**
+- If any consumer assumes byte offsets (e.g., a non-JS reader, a tool that indexes into raw file bytes), chunks will be mis-sliced for multi-byte UTF‑8 sequences.
+
+**Where to address**
+- Primary: `src/index/build/file-processor.js` and `src/index/build/artifacts/writers/chunk-meta.js` (and docs under `docs/`).
+
+#### Chunk boundary invariants are not asserted at the file-processor boundary
+
+- `file-processor.js` assumes `chunkSegments()` returns non-overlapping, in-range chunks. It does not assert invariants such as:
+  - `0 <= start <= end <= text.length`
+  - monotonically increasing chunk ranges (or “overlap only when configured”)
+  - “no accidental overlap” beyond configured overlap window
+- This makes debugging chunking regressions harder: errors will surface downstream (postings build, artifact read) rather than at the boundary.
+
+#### Skip reasons: observable coverage is incomplete
+
+Covered / explicit:
+- `oversize` (max bytes / max lines), `minified`, `binary`, `read-failure` (and `unreadable` via scan results)
+
+Missing or ambiguous:
+- **unsupported language** (no explicit skip reason visible in `file-processor.js` / `skip.js`)
+- **parse / relation extraction failures**: most errors will currently throw and likely fail the build rather than record a per-file skip reason (no “parse-error” skip).
+
+#### Provenance: per-file outputs are missing stable “content identity” fields
+
+- Chunk payloads contain `file` (rel path), `ext`, and `lang`, which is good.
+- `file_meta.json` contains `id`, `file`, `ext`, git metadata, etc.
+- **Neither chunk meta nor file meta currently records a stable file content hash** (even though the pipeline already computes `fileHash` for incremental caching).
+
+This makes post-hoc debugging harder:
+- You cannot quickly tell whether a chunk came from a particular file revision without recomputing hashes from source.
+
+#### Minor correctness nits
+
+- Comment assignment edge: comments starting exactly at `chunk.end` can be assigned to the previous chunk due to a strict `<` comparison in `assignCommentsToChunks()` (`src/index/build/file-processor/chunk.js`).
+- Timing accounting: `addParseDuration()` is invoked multiple times per file (parseStart and relationStart paths), which risks double-counting in aggregated metrics.
+
+**Remaining work**
+
+- [ ] **Document offset units** for `start`/`end` (recommendation: define as UTF‑16 code-unit offsets, because that is what JS uses), and add at least one non‑ASCII regression test that validates:
+  - [ ] `text.slice(start, end)` reproduces the chunk text
+  - [ ] `offsetToLine()` aligns with `startLine/endLine` for multi-byte characters  
+  (Files: `src/index/build/file-processor.js`, `docs/artifact-contract.md`, `docs/contracts/indexing.md`, plus a new/extended test)
+
+- [ ] Add **boundary asserts** (behind a dev/test flag if needed) after chunking:
+  - [ ] in-range checks (`0..text.length`)
+  - [ ] monotonic chunk ordering
+  - [ ] overlap detection (only allow configured overlap)  
+  (File: `src/index/build/file-processor.js`)
+
+- [ ] Make **unsupported-language** behavior explicit and test-covered:
+  - [ ] decide: skip with reason `unsupported-language` vs. treat as `unknown` with generic chunking
+  - [ ] add test coverage for the chosen behavior  
+  (Files: `src/index/build/file-processor.js`, `src/index/build/file-processor/skip.js`, tests under `tests/file-processor/`)
+
+- [ ] Add **parse-error** (and relation-error) per-file skip handling:
+  - [ ] catch and record failures from `lang.chunk`, `lang.buildRelations`, `lang.extractDocMeta`, `flow()`, etc.
+  - [ ] ensure the build can proceed when a single file fails (configurable)  
+  (File: `src/index/build/file-processor.js`)
+
+- [ ] Add **file-level content hash** to `file_meta.json` (and optionally, to each chunk’s `metaV2`):
+  - [ ] store `hash` and `hashAlgo`
+  - [ ] ensure incremental and non-incremental builds agree  
+  (Files: `src/index/build/file-processor.js`, `src/index/build/artifacts/file-meta.js`, `docs/artifact-contract.md`)
+
+- [ ] Fix the comment boundary condition in `assignCommentsToChunks()`:
+  - [ ] consider `<=` for boundary tests, or implement overlap-based assignment using comment `(start,end)`  
+  (File: `src/index/build/file-processor/chunk.js`)
+
+- [ ] Audit and correct **timing double-counting** in `createTimingsTracker()` usage:
+  - [ ] ensure parseMs reflects one pass, and relation/flow have separate counters if desired  
+  (Files: `src/index/build/file-processor.js`, `src/index/build/file-processor/timings.js`)
+
+---
+
+## 27.2 Artifact contract correctness (Checklist B)
+
+**Audit**
+
+Reviewed artifact write orchestration and contract touchpoints:
+
+- Orchestration: `src/index/build/artifacts.js`
+- Contract-level helpers: `src/index/build/artifacts/checksums.js`, `src/index/build/artifacts/compression.js`
+- Writers: `src/index/build/artifacts/writers/chunk-meta.js`, `.../file-relations.js`, `.../repo-map.js`
+- Schema docs: `docs/artifact-contract.md`, `docs/contracts/indexing.md`
+- Guardrail tests: `tests/artifact-size-guardrails.js`, `tests/artifact-formats.js`, `tests/artifact-bak-recovery.js`
+
+Confirmed:
+- JSON and JSONL writers use `atomic: true` (temp + rename + `.bak` semantics) via shared JSON stream helpers.
+- `pieces/manifest.json` is generated and includes checksums for files that can be read at generation time.
+- Readers are designed to be backward compatible with older shapes (e.g., token shard files and meta shapes in `tests/artifact-formats.js`).
+
+**Gaps / issues**
+
+#### P0: Chunk-meta sharding cleanup is incomplete (stale shards override new JSONL)
+
+- In `enqueueChunkMetaArtifacts()` (`src/index/build/artifacts/writers/chunk-meta.js`):
+  - When `chunkMetaUseJsonl === true` and `chunkMetaUseShards === false`, the writer removes `chunk_meta.json` and `chunk_meta.json.gz`, but **does not remove**:
+    - `chunk_meta.meta.json`
+    - `chunk_meta.parts/`
+- `loadChunkMeta()` prefers meta/parts if they exist, even if `chunk_meta.jsonl` exists. Therefore, stale shards can override a newly-written JSONL file.
+
+#### Sharded directory atomicity remains “best effort” only
+
+- Token postings shards: `artifacts.js` deletes and recreates `token_postings.shards/` and writes part files atomically, but the directory as a whole can still be left in a partial state if the process crashes mid-write (no staging directory + atomic rename).
+- Chunk meta shards: similar; additionally, the parts directory is not cleared before writing, which can leave orphan part files.
+
+This is not always fatal if readers rely solely on `meta.parts`, but it violates the “no partially-written states” intent of the checklist.
+
+#### Manifest robustness: checksum/stat errors are swallowed
+
+- `writePiecesManifest()` catches errors from `fs.stat` and `checksumFile` and records `bytes: null` / `checksum: null`, without failing the build or preserving error details.
+- That makes it easy to produce an apparently “valid” manifest that cannot be validated later.
+
+#### Documentation drift: compression description is inaccurate
+
+- `docs/artifact-contract.md` claims the JSON payload contains a `compression` field when `.json.gz` is written. Current writers compress the raw JSON stream; they do not inject a `compression` field into the JSON object.
+
+#### Contract clarity gaps
+
+- The docs do not clearly document:
+  - precedence rules when multiple formats are present (meta/parts vs jsonl vs json)
+  - the on-disk schema for `token_postings.meta.json` and `chunk_meta.meta.json` (fields vs arrays vs legacy)
+  - whether `.json.gz` is a sidecar (both present) or a replacement (only gz present)
+
+**Remaining work**
+
+- [ ] **Fix chunk-meta cleanup** when `chunkMetaUseJsonl && !chunkMetaUseShards`:
+  - [ ] remove `chunk_meta.meta.json` if present
+  - [ ] remove `chunk_meta.parts/` if present  
+  (File: `src/index/build/artifacts/writers/chunk-meta.js`)
+
+- [ ] Ensure shard writes do not accumulate orphan files:
+  - [ ] delete `chunk_meta.parts/` before writing new sharded parts (or write to staging dir + rename)
+  - [ ] confirm `token_postings.shards/` cleanup is complete on all branches  
+  (Files: `src/index/build/artifacts/writers/chunk-meta.js`, `src/index/build/artifacts.js`)
+
+- [ ] Implement **directory-level atomicity** for sharded artifacts:
+  - [ ] write shards to `*.tmp/` directory
+  - [ ] atomically swap into place via rename (and optionally keep a directory-level `.bak`)  
+  (Files: `src/index/build/artifacts/writers/chunk-meta.js`, `src/index/build/artifacts.js`)
+
+- [ ] Make manifest generation strict for required artifacts:
+  - [ ] either (a) fail the build on checksum/stat failure, or (b) record an `error` field and ensure validation tooling treats it as failure  
+  (File: `src/index/build/artifacts/checksums.js`)
+
+- [ ] Update docs to match implementation:
+  - [ ] remove/adjust claim about `compression` field
+  - [ ] add schema examples for meta files (fields/arrays/legacy)
+  - [ ] document precedence rules for readers  
+  (Files: `docs/artifact-contract.md`, `docs/contracts/indexing.md`)
+
+- [ ] Add a regression test that explicitly covers the stale chunk-meta shard override:
+  - [ ] build A: sharded chunk meta written
+  - [ ] build B: non-sharded jsonl written, ensure shards removed or ignored
+  - [ ] loader reads build B’s jsonl, not build A’s shards  
+  (New test; or extend `tests/artifact-formats.js` / `tests/artifact-size-guardrails.js`)
+
+---
+
+## 27.3 Sharding / pieces / postings (Checklist C)
+
+**Audit**
+
+Reviewed:
+
+- Shard planning: `src/index/build/shards.js` + tests (`tests/shard-plan.js`)
+- Postings build: `src/index/build/postings.js`
+- Tokenization primitives: `src/index/build/tokenization.js` + buffering tests (`tests/tokenization-buffering.js`)
+- Piece assembly/merge: `src/index/build/piece-assembly.js` + test (`tests/piece-assembly.js`)
+- Piece compaction tool: `tools/compact-pieces.js`
+
+**Gaps / issues**
+
+#### Determinism: import links and vocab ordering are under-specified
+
+- **Imports / importLinks**:
+  - `scanImports()` runs with concurrency and stores per-module Sets of importing files. The final arrays are not sorted.
+  - `buildImportLinksFromRelations()` builds `importLinks` lists that may include the current file and are not explicitly sorted/deduped.
+  - Result: output can vary based on processing order, which can vary with concurrency and scheduling.
+
+- **Vocab ordering**:
+  - `buildPostings()` converts multiple Maps to vocab arrays via `Array.from(map.keys())`.
+  - This relies on Map insertion order being stable across builds. It often is, but it is not a strong contract and can be perturbed by changes in traversal order or parallelism.
+  - Risk: **token IDs may shift across builds** even when inputs are unchanged, creating noisy diffs and complicating caching.
+
+#### Postings canonicalization: sorted/canonical postings are assumed but not asserted
+
+- Many consumers assume postings are in docId order and token vocab order is stable.
+- There is no explicit “canonicalize and validate” step before writing postings, and few tests assert canonical ordering.
+
+#### Piece assembly: field postings coverage mismatch + weak validation
+
+- **Field postings merge omits the `comment` field** (see P0 summary).
+- **validateLengths()** can silently allow missing arrays when expected > 0 (see P0 summary).
+- Vocab arrays in assembly are also derived from Map insertion order; if input order differs, assembled token IDs can differ.
+
+#### Shard planning: tie-break determinism should be explicit
+
+- Some sorts are deterministic (by label, by relPath), but shard batching uses weight-based partitioning without explicit tie-breakers when weights are equal. This is likely stable in current Node versions, but should be explicitly stable to avoid cross-version drift.
+
+**Remaining work**
+
+#### Shard planning
+
+- [ ] Add explicit tie-breakers in shard batching and balancing when weights are equal:
+  - [ ] include `label` or `id` in comparator
+  - [ ] document determinism guarantees  
+  (File: `src/index/build/shards.js`)
+
+- [ ] Add a “very large repo” synthetic shard-plan test:
+  - [ ] verifies bounded memory and time
+  - [ ] verifies stable shard labels/IDs across runs  
+  (New test; extend `tests/shard-plan.js`)
+
+#### Postings / tokenization
+
+- [ ] Canonicalize vocab ordering for stability:
+  - [ ] define canonical sort order (lexicographic; or localeCompare with explicit locale; or bytewise)
+  - [ ] apply consistently to token vocab, phrase vocab, chargram vocab, and field vocabs  
+  (File: `src/index/build/postings.js` and any upstream postings-map builders)
+
+- [ ] Canonicalize and/or validate postings ordering:
+  - [ ] assert postings doc IDs are strictly increasing per token (or stable canonical order)
+  - [ ] assert vocab/postings arrays align and lengths match  
+  (File: `src/index/build/postings.js`; plus tests)
+
+- [ ] Expand quantization tests to include:
+  - [ ] scale correctness
+  - [ ] dims mismatch handling
+  - [ ] doc/code embeddings “fallback to main embedding” behavior  
+  (File: `tests/postings-quantize.js`)
+
+#### Piece assembly
+
+- [ ] Fix `validateLengths()` to fail when expected > 0 and list is empty or mismatched:
+  - [ ] treat `[]` as invalid when `expected > 0`
+  - [ ] include artifact name + input dir in error message for fast triage  
+  (File: `src/index/build/piece-assembly.js`)
+
+- [ ] Merge **all field postings present in inputs**, including `comment` (and any future fields):
+  - [ ] do not hardcode `name/signature/doc/body`
+  - [ ] merge based on keys present in `field_postings.json` / `field_tokens.json` or config  
+  (File: `src/index/build/piece-assembly.js`)
+
+- [ ] Determinize assembly:
+  - [ ] sort `inputs` deterministically by path (or require stable input ordering and document it)
+  - [ ] sort merged vocabs (or guarantee stable order via canonicalization)
+  - [ ] ensure assembled output is byte-for-byte stable for same inputs  
+  (Files: `tools/assemble-pieces.js`, `src/index/build/piece-assembly.js`)
+
+- [ ] Add a regression test: **assembled output equals monolithic output** for the same fixture:
+  - [ ] build monolithic index
+  - [ ] build two partial indexes (or reuse shards) and assemble
+  - [ ] compare chunk_meta + token_postings + manifest semantics  
+  (New test; extend `tests/piece-assembly.js`)
+
+- [ ] Verify manifests list all required parts:
+  - [ ] ensure meta files are included and checksummed
+  - [ ] ensure shard part counts match meta.parts and manifest counts match meta totals  
+  (Files: `src/index/build/artifacts/checksums.js`, tests)
+
+---
+
+## 27.4 Performance improvements to prioritize (Checklist D)
+
+**Audit**
+
+The current implementation is functional and reasonably structured, but several areas will become dominant costs on large repos:
+
+- Per-file pipeline does multiple passes over the same data (chunking, tokenization, docmeta, lint/complexity).
+- Artifact writing constructs full in-memory arrays for potentially huge artifacts and then serializes them.
+- Some hot paths allocate transient arrays aggressively.
+
+### High-impact improvements (prioritized)
+
+#### Avoid “build huge arrays then serialize”
+
+- `buildPostings()` currently materializes large `vocab` and `postings` arrays in memory.
+  - [ ] Add a streaming/sharded writer path that writes postings shards incrementally as postings are built (or at least allows releasing intermediate Maps earlier).
+- `chunk_meta` estimation uses JSON.stringify samples, which is OK, but writing sharded JSONL still relies on iterators that materialize per-entry objects.
+  - [ ] Consider a “lightweight entry view” or direct JSONL streaming that avoids building large intermediate objects for fields not needed.
+
+#### Reduce repeated parsing/enrichment passes
+
+- Complexity + lint are computed in the per-chunk loop but cached per file; move the computation to a single per-file pre-pass to remove repeated cache checks.
+- Where feasible, consider combining:
+  - chunking + tokenization (tokenize the chunk as soon as you slice it, but avoid repeated slice work)
+  - relations/docmeta extraction caching to avoid per-chunk repeated derived work
+
+#### Minimize transient allocations
+
+- Avoid `text.split('\n')` for context windows in `file-processor.js`. Use a line-scan utility that slices the relevant ranges without splitting the entire file.
+- Replace repeated `array.concat()` in loops (e.g., `commentFieldTokens = commentFieldTokens.concat(tokens)`) with `push(...tokens)` or manual push for large arrays.
+- In tokenization, buffer reuse is good, but `buildTokenSequence()` still clones arrays (`slice()`) each call. Confirm this is intentional and consider:
+  - pre-sizing output arrays when token counts are known/estimable
+  - returning typed arrays for `seq` where possible (if consumers permit)
+
+**Remaining work**
+
+- [ ] Replace `split('\n')` usage in `src/index/build/file-processor.js` with a targeted line-scan helper.  
+- [ ] Move complexity/lint computation outside the per-chunk loop in `file-processor.js`.  
+- [ ] Reduce transient array concatenations in comment token aggregation.  
+- [ ] Explore a streaming postings writer for very large repos (phase-level refactor).  
+- [ ] Add at least one micro-benchmark or perf regression test covering:
+  - piece assembly (`src/index/build/piece-assembly.js`)
+  - piece compaction (`tools/compact-pieces.js`)
+
+---
+
+## 27.5 Refactoring goals (Checklist E)
+
+**Audit**
+
+Current state:
+- Artifact writing is orchestrated from `artifacts.js` via `enqueueJsonObject/Array/Lines` + special-case writers (chunk meta writer).
+- Schema definitions are implicit in “writer payload construction” and spread across multiple modules.
+- Multiple identifiers exist (`chunk.id`, `metaV2.chunkId`, graph keys `file::name`), which increases the chance of accidental drift.
+
+**Remaining work**
+
+- [ ] Introduce a single “artifact writer” abstraction with a consistent interface:
+  - [ ] `write(name, payload | iterator, { format, sharded, compression, pieceType })`
+  - [ ] built-in cleanup rules and directory-level atomic swaps
+  - [ ] standard metadata (version, generatedAt, schemaVersion)  
+  (Impacts: `src/index/build/artifacts.js`, `src/index/build/artifacts/writers/*`)
+
+- [ ] Separate schema definitions from I/O:
+  - [ ] define schemas for artifacts in a central module (even if only via JS object contracts + comments)
+  - [ ] ensure docs mirror those schema definitions  
+  (Impacts: `docs/artifact-contract.md`, `docs/contracts/indexing.md`)
+
+- [ ] Create a single canonical chunk-id generator and use it everywhere:
+  - [ ] prefer `metaV2.chunkId` (content-based) for graphs/relations keys instead of ad-hoc `file::name`
+  - [ ] ensure assembled and non-assembled builds produce identical chunkIds  
+  (Impacts: `src/index/build/graphs.js`, and any code producing chunk identifiers)
+
+---
+
+## 27.6 Tests (Checklist F)
+
+**Audit**
+
+In-scope tests are generally helpful and cover:
+- `.bak` recovery semantics (`tests/artifact-bak-recovery.js`)
+- artifact precedence formats (`tests/artifact-formats.js`)
+- size guardrails forcing sharding (`tests/artifact-size-guardrails.js`)
+- shard planning (`tests/shard-plan.js`)
+- shard vs non-shard equivalence (`tests/shard-merge.js`)
+- quantization correctness (`tests/postings-quantize.js`)
+- incremental tokenization caching (`tests/incremental-tokenization-cache.js`)
+
+However, multiple tests are still existence/shape-heavy and do not verify semantic meaning deeply, especially around assembled outputs and import scanning.
+
+**Gaps / issues**
+
+- `tests/file-processor/cached-bundle.test.js` uses shapes for `allImports` and `codeRelations.calls` that do not match the likely real shapes; it can pass while not meaningfully validating correctness.
+- No tests cover:
+  - chunk-meta cleanup when switching formats (P0 issue)
+  - compressed sidecar `.json.gz` artifacts and their `.bak` semantics
+  - partial shard write behavior (meta missing, orphan parts, etc.)
+  - import scanning correctness for dynamic imports / es-module-lexer record handling
+  - deterministic `importLinks` ordering
+  - perf regression for `compact-pieces` / `assembleIndexPieces`
+
+**Remaining work**
+
+- [ ] Strengthen artifact format tests to assert semantic meaning:
+  - [ ] verify loader precedence (meta/parts vs jsonl vs json) in more combinations
+  - [ ] verify meta.parts path normalization and correctness
+
+- [ ] Add regression tests for atomic write failures:
+  - [ ] simulate rename failures (via dependency injection or controlled FS behavior)
+  - [ ] assert `.bak` fallback and cleanup behavior
+
+- [ ] Add regression tests for partial shard writes:
+  - [ ] parts written, meta missing
+  - [ ] meta references missing parts
+  - [ ] stale orphan parts do not affect reads
+
+- [ ] Add stress fixtures for large token/postings sets:
+  - [ ] ensure bounded memory / time
+  - [ ] ensure canonical ordering remains correct under stress
+
+- [ ] Add at least one perf regression test:
+  - [ ] compaction: `tools/compact-pieces.js`
+  - [ ] assembly: `src/index/build/piece-assembly.js`
+
+- [ ] Fix `tests/file-processor/cached-bundle.test.js` to use realistic shapes:
+  - [ ] `allImports` should be `{ [moduleName: string]: string[] }`
+  - [ ] `codeRelations.calls/usages` should match the real structure used by `buildRelationGraphs()` / `buildCallIndex()`  
+  (File: `tests/file-processor/cached-bundle.test.js`)
+
+---
+
+## Appendix A: File-by-file findings
+
+This section enumerates each in-scope file and lists file-specific items to address (beyond cross-cutting tasks already listed above).
+
+### src/index/build/artifacts.js
+- [ ] (P1) Consider directory-level atomic swap for `token_postings.shards/` (staging dir + rename).
+- [ ] (P1) Normalize shard part paths to POSIX in any meta/manifest structures (avoid OS-separator leakage).
+- [ ] (P2) Consider sorting `pieceEntries` by `path` before writing the manifest to reduce diff noise.
+
+### src/index/build/artifacts/checksums.js
+- [ ] (P1) Do not silently accept checksum/stat failures for required pieces; fail or record errors explicitly.
+
+### src/index/build/artifacts/compression.js
+- [ ] (P2) Update docs to clarify that gzip is a sidecar (`.json` and `.json.gz` both exist).
+- [ ] (P2) Consider extending compression to sharded artifacts (optional future work).
+
+### src/index/build/artifacts/file-meta.js
+- [ ] (P1) Make file ID assignment stable by sorting unique file paths before assigning IDs.
+- [ ] (P1) Add file content hash (and algo) and file size to `file_meta.json`.
+- [ ] (P2) Remove or rename `chunk_authors` in file meta (currently derived from the first chunk and not file-level).
+
+### src/index/build/artifacts/filter-index.js
+- [ ] (P2) Consider persisting schema version/config hash in the filter index artifact for easier debugging.
+
+### src/index/build/artifacts/metrics.js
+- [ ] (P2) Do not swallow metrics write errors silently (log or propagate based on severity).
+
+### src/index/build/artifacts/token-mode.js
+- [ ] (P2) Make parsing more robust (case-insensitive modes; integer parsing + clamping).
+
+### src/index/build/artifacts/writers/chunk-meta.js
+- [ ] (P0) Remove stale `chunk_meta.meta.json` and `chunk_meta.parts/` when writing non-sharded JSONL.
+- [ ] (P1) Clear or stage-swap `chunk_meta.parts/` when writing sharded output.
+- [ ] (P1) Normalize `meta.parts` entries to POSIX paths.
+- [ ] (P2) Consider normalizing field naming conventions (`chunk_authors` vs `startLine/endLine`).
+
+### src/index/build/artifacts/writers/file-relations.js
+- [ ] (P2) Consider JSONL/sharding for very large `file_relations` outputs; add versioning metadata.
+
+### src/index/build/artifacts/writers/repo-map.js
+- [ ] (P1) Ensure `exported` detection handles default exports correctly (depends on relations schema).
+- [ ] (P2) Consider sorting output by `{file, name}` for stability.
+
+### src/index/build/file-processor.js
+- [ ] (P1) Add explicit boundary asserts for chunks after chunking.
+- [ ] (P1) Replace `split('\n')` with line-scan utility for context extraction.
+- [ ] (P2) Move complexity/lint to per-file scope; avoid repeated per-chunk cache checks.
+- [ ] (P2) Fix possible timing double-counting across parse/relation durations.
+- [ ] (P1) Add explicit unsupported-language and parse-error skip reasons (configurable).
+
+### src/index/build/file-processor/assemble.js
+- [ ] (P1) Ensure field token fields written here (including `comment`) are consistently supported by postings and piece assembly.
+
+### src/index/build/file-processor/cached-bundle.js
+- [ ] (P2) Validate cached bundle shapes more strictly; ensure importLinks shape is consistent.
+
+### src/index/build/file-processor/chunk.js
+- [ ] (P2) Adjust comment-to-chunk assignment at boundary (`chunk.end === comment.start`) and consider overlap-based assignment.
+
+### src/index/build/file-processor/incremental.js
+- [ ] (P2) Ensure cache invalidation includes schema/version changes for any artifact-impacting changes.
+
+### src/index/build/file-processor/meta.js
+- [ ] (P2) Deduplicate `externalDocs` outputs; consider ordering for determinism.
+
+### src/index/build/file-processor/read.js
+- [ ] (P2) Consider UTF-8 safe truncation (avoid splitting multi-byte sequences mid-codepoint).
+
+### src/index/build/file-processor/relations.js
+- [ ] (P2) Consider sorting/deduping relation arrays (imports/exports/usages) for determinism.
+
+### src/index/build/file-processor/skip.js
+- [ ] (P1) Add explicit unsupported-language skip reason (or document that unknown languages are processed).
+- [ ] (P2) Add coverage for `unreadable` and `read-failure` skip paths.
+
+### src/index/build/file-processor/timings.js
+- [ ] (P2) Validate that parse/token/embed durations are not double-counted; document semantics.
+
+### src/index/build/graphs.js
+- [ ] (P2) Prefer canonical `chunkId` keys where possible instead of `file::name` to avoid collisions.
+- [ ] (P2) Sort serialized node lists for full determinism (neighbors are already sorted).
+
+### src/index/build/imports.js
+- [ ] (P0) Fix `es-module-lexer` import record handling (`entry.d` is not a specifier string).
+- [ ] (P1) Sort and dedupe `importLinks` deterministically; exclude self-links unless explicitly desired.
+- [ ] (P1) Ensure concurrency does not affect output ordering (sort module keys and file arrays before serialization).
+
+### src/index/build/piece-assembly.js
+- [ ] (P0) Make `validateLengths()` strict when `expected > 0`.
+- [ ] (P0) Merge all field postings (including `comment`) and docLengths based on actual input keys.
+- [ ] (P1) Canonicalize vocab ordering in assembled outputs.
+- [ ] (P2) Remove redundant filterIndex construction (avoid double work; rely on writeIndexArtifacts).
+
+### src/index/build/postings.js
+- [ ] (P1) Canonicalize vocab ordering (token/phrase/chargram/field) explicitly.
+- [ ] (P2) Validate docLengths are finite and consistent; avoid NaN avgDocLen.
+- [ ] (P2) Sort Object.entries() iteration for field postings and weights for deterministic output.
+
+### src/index/build/shards.js
+- [ ] (P1) Add explicit tie-breakers in weight-based sorts/batching for determinism across runtimes.
+- [ ] (P2) Document heuristic thresholds (minFilesForSubdir, hugeThreshold, tenth-largest targets).
+
+### src/index/build/tokenization.js
+- [ ] (P2) Review buffer reuse effectiveness (arrays are still cloned); consider pre-sizing and reducing transient allocations further.
+
+### tools/assemble-pieces.js
+- [ ] (P1) Sort `inputDirs` by default (or add `--sort`) to ensure deterministic assembled output.
+- [ ] (P2) When `--force` is used, consider cleaning the output dir first to avoid stale artifacts.
+
+### tools/ci-build-artifacts.js
+- [ ] (P1) Sanitize remote URLs before writing them to `manifest.json` to avoid leaking credentials.
+
+### tools/ci-restore-artifacts.js
+- [ ] (P2) Optionally validate `pieces/manifest.json` checksums after restore (fast fail on corrupt artifacts).
+
+### tools/compact-pieces.js
+- [ ] (P1) Consider directory-level atomic swap semantics (avoid rm+rename window).
+- [ ] (P2) Add perf regression harness and validate output equivalence post-compaction.
+
+### tests/artifact-bak-recovery.js
+- [ ] (P2) Expand coverage to include: both primary and backup corrupt; json.gz sidecars; and cleanup expectations.
+
+### tests/artifact-formats.js
+- [ ] (P1) Add explicit precedence test: sharded meta/parts must not override fresh jsonl when shards are stale (post-fix).
+
+### tests/artifact-size-guardrails.js
+- [ ] (P2) Extend to cover: chunkMetaFormat=jsonl with switching shard/no-shard, and cleanup behavior.
+
+### tests/artifacts/file-meta.test.js
+- [ ] (P1) Update test if file ID assignment is changed to sorted-by-path; assert stability across different chunk orders.
+
+### tests/artifacts/token-mode.test.js
+- [ ] (P2) Add coverage for invalid modes, case-insensitive parsing, and maxTokens/maxFiles parsing edge cases.
+
+### tests/clean-artifacts.js
+- [ ] (P2) Consider adding a check that `.bak` files are handled correctly (optional).
+
+### tests/file-processor/cached-bundle.test.js
+- [ ] (P1) Fix test fixtures to use realistic `allImports` and `codeRelations` shapes, and assert semantic correctness (not only presence).
+
+### tests/file-processor/skip.test.js
+- [ ] (P2) Add coverage for `unreadable` and `read-failure` paths (permissions, ENOENT races).
+
+### tests/filter-index-artifact.js
+- [ ] (P2) Add a schema assertion for filter_index fields/versioning to prevent drift.
+
+### tests/filter-index.js
+- [ ] (P2) Consider adding a determinism check for serialized filter index (same inputs => same output).
+
+### tests/graph-chunk-id.js
+- [ ] (P2) Add a collision regression test for graph keys, or migrate to chunkId-based keys.
+
+### tests/incremental-tokenization-cache.js
+- [ ] (P2) Add a second invalidation scenario (e.g., tokenization config changes that affect stemming/synonyms).
+
+### tests/piece-assembly.js
+- [ ] (P1) Add semantic equivalence test vs monolithic build and add a determinism test (same inputs => identical assembled output).
+
+### tests/postings-quantize.js
+- [ ] (P2) Extend to test scale and dims, and doc/code embedding behavior.
+
+### tests/shard-merge.js
+- [ ] (P2) Consider adding checksum and manifest equivalence checks as well.
+
+### tests/shard-plan.js
+- [ ] (P2) Add stress case coverage (many files, equal weights, perfProfile enabled).
+
+### tests/tokenization-buffering.js
+- [ ] (P2) Consider adding a non-ASCII tokenization regression case.
+
+### docs/artifact-contract.md
+- [ ] (P1) Fix compression description (no embedded `compression` field) and clarify `.json.gz` sidecar semantics.
+- [ ] (P1) Add explicit precedence rules (meta/parts vs jsonl vs json).
+- [ ] (P2) Add schema examples for meta files and `pieces/manifest.json`.
+
+### docs/contracts/coverage-ledger.md
+- [ ] (P2) Add entries for new/critical tooling: `tools/assemble-pieces.js`, `tools/compact-pieces.js`, and CI artifact scripts.
+
+### docs/contracts/indexing.md
+- [ ] (P1) Clarify which artifacts are “required” vs “optional/configurable” (e.g., minhash signatures).
+- [ ] (P1) Document sharded meta schema and loader precedence.
+
+
+## Phase 28 — Section 2 — Index build orchestration review (findings + required fixes)
+
+### Executive summary: highest-priority issues (fix first)
+
+#### Correctness / functional
+
+- [ ] **Sharding path creates fresh worker pools + queues per shard work item, with no explicit teardown.**  
+  This is very likely to cause thread/resource leaks, excessive pool creation overhead, and/or a build process that does not exit cleanly.  
+  _Primary file:_ `src/index/build/indexer/steps/process-files.js`  
+  _Related:_ `src/index/build/runtime/workers.js`, `src/index/build/worker-pool.js`
+
+- [ ] **`--mode all` behavior is inconsistent with “extracted-prose” expectations (tests + CLI surface).**  
+  `tests/build-index-all.js` expects an `extracted-prose` index to be produced for `--mode all`, and `parseBuildArgs(...)` already resolves `modes` to include it; however the CLI entry (`build_index.js`) discards the computed `modes` and delegates to the core build entry, which (in the current tree) resolves “all” differently.  
+  _Primary file(s) in scope:_ `build_index.js`, `src/index/build/args.js`, `tests/build-index-all.js`  
+  _Note:_ the root cause may live outside this section’s file list, but the mismatch is observable from the files in scope and should be corrected at the boundary.
+
+- [ ] **Watch debounce scheduler does not safely handle async `onRun` errors (risk of unhandled promise rejection).**  
+  `createDebouncedScheduler(...)` calls `onRun()` without `await`/`.catch(...)`. In `watchIndex(...)`, `onRun` is async. Any unexpected throw/rejection (e.g., from lock release, filesystem exceptions) can become an unhandled rejection.  
+  _Primary file:_ `src/index/build/watch.js`
+
+#### Determinism / reproducibility
+
+- [ ] **Locale-dependent sorts in ordering-critical paths (`localeCompare`) should be replaced with deterministic lexicographic compares.**  
+  Ordering drives chunk IDs, manifest key ordering, and shard planning stability; `localeCompare` can vary by ICU/locale.  
+  _Primary files:_  
+  - `src/index/build/indexer/steps/discover.js`  
+  - `src/index/build/indexer/steps/process-files.js`  
+  - `tools/shard-census.js`
+
+#### Incremental correctness across versions
+
+- [ ] **Incremental cache signature likely needs a “tool/build schema version” component.**  
+  Today, signature invalidation is strongly config-based. If tokenization/chunk schema/postings semantics change across releases without config changes, the cache can be reused incorrectly.  
+  _Primary file:_ `src/index/build/indexer/signatures.js`  
+  _Related:_ `src/index/build/incremental.js`, `tests/incremental-*.js`
+
+---
+
+### A. Pipeline mapping and boundaries
+
+#### A.1 Current pipeline map (as implemented)
+
+**Audit**
+
+The index build pipeline, as observable from the files in scope, is structured as:
+
+1. **CLI entry**
+   - `build_index.js` → parses args and calls the core build entry with `argv` + `rawArgv`.
+
+2. **Runtime construction**
+   - `src/index/build/runtime.js` → `createBuildRuntime(...)`  
+   - `src/index/build/runtime/runtime.js` → loads config(s), applies stage overrides (`runtime/stage.js`), resolves caps/guardrails (`runtime/caps.js`), ignore rules (`ignore.js`), concurrency and queues/pools (`runtime/workers.js`, `worker-pool.js`), crash logging (`crash-log.js`), and creates a build output root.
+
+3. **Mode build orchestration**
+   - `src/index/build/indexer.js` → `buildIndexForMode(...)` for each mode.
+   - `src/index/build/indexer/pipeline.js` coordinates the build steps per mode.
+
+4. **Per-mode pipeline stages**
+   - **Discover**: `indexer/steps/discover.js` (uses `discover.js` + optional preprocessed discovery)  
+   - **Incremental plan + whole-index reuse**: `indexer/steps/incremental.js` (wraps `incremental.js`)  
+   - **Relations pre-scan**: `indexer/steps/relations.js` (`preScanImports`)  
+   - **Estimate context window**: `estimateContextWindow(...)` (not in scope; used by pipeline)  
+   - **Process files**: `indexer/steps/process-files.js`  
+     - optional sharding plan execution
+     - per-file chunking + postings accumulation + incremental bundle read/write
+   - **Relations post-scan + cross-file inference**: `indexer/steps/relations.js` (`postScanImports`, `runCrossFileInference`)  
+   - **Incremental manifest pruning**: `incremental.js` (`pruneIncrementalManifest(...)`)  
+   - **Postings build**: `indexer/steps/postings.js`  
+   - **Write artifacts**: `indexer/steps/write.js`  
+   - **Optional**: enqueue embeddings job when using an external embeddings service (called from pipeline)
+
+5. **Promotion**
+   - `src/index/build/promotion.js` writes/updates a `current.json` pointer to a successful build root (promotion is performed outside the per-mode pipeline).
+
+**Contract boundaries (recommended)**
+
+- The pipeline currently “spans layers” in a few places:
+  - CLI args parsing (“mode all”) and computed mode lists are not consistently treated as an API contract boundary.
+  - Sharding logic (planning + execution) creates runtime sub-instances rather than remaining a pure scheduling layer.
+  - Incremental state is mutated from multiple steps (process-files + relations cross-file inference updates).
+
+These are workable, but they heighten the importance of clear contracts/invariants per stage.
+
+---
+
+#### A.2 Stage-by-stage contracts (inputs/outputs/invariants/errors/determinism)
+
+> This section captures what the code *currently* does, plus what should be made explicit (and tested).
+
+##### Stage: Discover
+
+**Primary implementation**
+- `src/index/build/indexer/steps/discover.js`
+- `src/index/build/discover.js`
+
+**Inputs**
+- `runtime.root`, `runtime.ignoreMatcher`, `runtime.maxFileBytes`, `runtime.fileCaps`, `runtime.guardrails` (maxDepth/maxFiles), mode (`code`/`prose`/`extracted-prose`)
+- Optional precomputed discovery bundle `{ entries, skippedFiles, lineCounts }` from preprocessing (if provided by orchestration layer)
+
+**Outputs**
+- `state.entries`: ordered list of discovered file entries
+- `state.skippedFiles`: per-mode skips (plus common skips)
+- Entries are annotated with `orderIndex` for deterministic downstream ordering
+
+**Invariants**
+- Entries must have:
+  - `abs` absolute path
+  - `rel` repo-relative path (POSIX form) with no `..`
+  - `stat` with at least `size`, `mtimeMs`
+- Deterministic ordering: sorting by `rel` must be stable and locale-independent.
+- `skippedFiles` should preserve a stable ordering for reproducibility (currently sorted in discover.js).
+
+**Error behavior**
+- Per-file stat errors or size cap failures are recorded as skips, not fatal errors.
+- Discover-level failures (e.g., inability to crawl filesystem) should throw and abort build.
+
+**Determinism requirements**
+- Must not use locale-sensitive comparisons (`localeCompare`) or OS-dependent casing assumptions.
+- Normalize paths consistently (POSIX rel keys).
+
+**Remaining work**
+- [ ] Replace locale-dependent sorting in `indexer/steps/discover.js` with deterministic compare (and document determinism requirement).
+- [ ] Consider adding `stat.isFile()` checks (defensive) before admitting entries (especially for non-git discovery paths).
+- [ ] Consider making “tracked-only” behavior explicit at the API boundary (discover uses `git ls-files` when root is a git repo root) and ensure watch mode semantics align (see Watch section).
+
+---
+
+##### Stage: Incremental plan / reuse
+
+**Primary implementation**
+- `src/index/build/indexer/steps/incremental.js`
+- `src/index/build/incremental.js`
+- `src/index/build/indexer/signatures.js`
+
+**Inputs**
+- `outDir` (mode-specific index output dir)
+- `tokenizationKey` (derived from dict signature + tokenization/postings config)
+- `cacheSignature` (derived from broader runtime feature/config surface)
+- current discovered entries list + their `stat` for whole-index reuse decision
+
+**Outputs**
+- `incrementalState` with:
+  - `manifest` (files, signature, tokenizationKey, bundleFormat, shards metadata)
+  - `bundleDir` + bundle format
+- `reused` boolean indicating full-index reuse (early exit)
+- For per-file reuse, `readCachedBundle(...)` is used by file processor layer.
+
+**Invariants**
+- `manifest.files` keys represent the exact set of indexed files, keyed by deterministic relKey.
+- Whole-index reuse must only return true if:
+  - stage coverage is sufficient for requested stage
+  - manifest key set matches current entries key set (including deletions)
+  - size + mtime checks match for all files (or an approved hash fallback mechanism is used)
+  - signature + tokenizationKey match
+
+**Error behavior**
+- Corrupt/missing manifest should fall back to “rebuild” (not crash).
+- Bundle read failures should fall back to “recompute file” (not crash), unless explicitly configured otherwise.
+
+**Determinism requirements**
+- Signature computation must be stable (`stableStringify` is used).
+- Manifest writing should be stable in structure and ordering (even if JSON object key order is mostly stable in practice).
+
+**Remaining work**
+- [ ] Add an explicit “cache schema / tool version” component to `cacheSignature` (or a separate `cacheSchemaVersion` field checked alongside it).
+- [ ] Treat `manifest.version` as a compatibility gate (migrate or reset when unsupported); ensure `manifest.files` is validated as a *plain object* (not an array).
+- [ ] Decide whether whole-index reuse should allow hash fallback (currently it is strict on mtime/size) — if yes, add an opt-in and tests.
+
+---
+
+##### Stage: Process files (chunking + postings accumulation)
+
+**Primary implementation**
+- `src/index/build/indexer/steps/process-files.js`
+- `src/index/build/state.js`
+- `src/index/build/file-scan.js` (via file processor layer)
+- `src/index/build/workers/indexer-worker.js` (worker pool tokenization)
+- `src/index/build/worker-pool.js`, `src/index/build/runtime/workers.js` (pool + queue orchestration)
+
+**Inputs**
+- Ordered entries list with `orderIndex`
+- Runtime config: tokenization config, postings config, feature flags, caps/guardrails, worker pool config, concurrency limits, sharding config
+- Incremental state with manifest + bundle directory
+- Optional import map from pre-scan stage
+
+**Outputs**
+- Mutated `state`:
+  - `chunks` (+ `chunkMeta`)
+  - `tokenPost`, `phrasePost`, `trigramPost`, `chargramPost`
+  - `df`, `docLengths`, `fileRelations`, `importLinks`
+  - `fileMeta` and `fileChunkMap`
+  - `totalTokens`, `totalChunks`
+  - `skippedFiles` additions for per-file failures
+- `tokenizationStats` + `shardSummary` + `shardPlan` (for reporting and later artifact writing)
+- Incremental manifest updates + bundle writes for non-cached files
+
+**Invariants**
+- Chunk IDs must be assigned deterministically and match the ordering derived from discovered entries (not processing completion order).
+  - Current mechanism: `orderedAppender` ensures deterministic append order even with concurrency/sharding.
+- Postings and DF must reflect the same token stream used to produce chunk meta.
+- For cached files:
+  - The cached bundle contents must be compatible with the current tokenizationKey/signature.
+  - Cached chunks must be appended in the same deterministic order.
+
+**Error behavior**
+- Per-file failures: retry per `indexingConfig.fileRetries` (via `runWithQueue` retry handling); if ultimately failing, abort build (current behavior).
+- Crash logging is best-effort (debug mode only).
+
+**Determinism requirements**
+- Ordering must not depend on concurrency, sharding, or locale settings.
+- Any feature that modifies existing chunks (token retention “auto”, cross-file inference update) must be deterministic given the same inputs.
+
+**Remaining work**
+- [ ] Fix sharding runtime lifecycle (see Section C/D): avoid creating worker pools per shard item; ensure explicit teardown; ensure sharding does not leak threads/handles.
+- [ ] Replace localeCompare usage in shard plan sorting with deterministic ordering.
+- [ ] Consider exposing and testing a “deterministic build mode” in which timestamps/build IDs do not affect artifact contents (at least for core artifacts).
+
+---
+
+##### Stage: Relations (import scan + cross-file inference)
+
+**Primary implementation**
+- `src/index/build/indexer/steps/relations.js`
+- `src/index/build/feature-metrics.js` (for reporting)
+
+**Inputs**
+- `state.fileRelations` from per-file processing (and/or pre-scan)
+- runtime feature flags:
+  - `indexingConfig.importScan`
+  - `typeInferenceEnabled`, `riskAnalysisEnabled`
+  - `*CrossFileEnabled` flags
+- incremental state (to update cached bundles after cross-file inference)
+
+**Outputs**
+- `state.importLinks` from `postScanImports`
+- Optionally updated `state.chunks` and file metadata from `applyCrossFileInference`
+- `graphRelations` structure for index artifacts
+- Optional incremental bundle updates via `updateIncrementalBundlesWithChunks(...)`
+
+**Invariants**
+- importLinks should be stable given stable fileRelations + scan plan.
+- If cross-file inference updates are applied:
+  - updates must be reflected in persisted incremental bundles (or explicitly excluded)
+  - index artifacts written later must correspond to the updated state.
+
+**Error behavior**
+- Import scan failures should degrade gracefully (ideally mark relations as unavailable and continue) unless configured otherwise.
+- Cross-file inference failures should not leave state partially mutated; either apply atomically or abort.
+
+**Determinism requirements**
+- Import scan output ordering should be stable.
+- Graph construction should be stable (avoid hash/map iteration nondeterminism in serialization).
+
+**Remaining work**
+- [ ] Add tests ensuring cross-file inference updates are persisted into incremental bundles when enabled.
+- [ ] Clarify the artifact contract for `graphRelations` in `index_state.json` and ensure it is versioned.
+
+---
+
+##### Stage: Postings build
+
+**Primary implementation**
+- `src/index/build/indexer/steps/postings.js`
+
+**Inputs**
+- `state` with postings sets + DF + doc lengths + chunks
+- `runtime.postingsConfig`, token retention configuration
+
+**Outputs**
+- A postings artifact structure ready for serialization (plus metrics like context window)
+- Optional token retention adjustments applied to chunks (auto)
+
+**Invariants**
+- Postings must refer to valid chunk IDs.
+- DF counts must align with unique tokens per doc.
+- Token retention must not change postings/DF (only the retained token/gram arrays stored in chunks for downstream consumers).
+
+**Error behavior**
+- Failures should abort (postings are core artifact).
+
+**Determinism requirements**
+- Postings list ordering must be stable (e.g., chunk IDs sorted ascending).
+- DF computation must not depend on processing order (it currently does not, provided chunk order is deterministic).
+
+**Remaining work**
+- [ ] Add/verify tests around token retention “auto” switching (sample vs none) to ensure artifact stability and correctness.
+
+---
+
+##### Stage: Write artifacts + promotion
+
+**Primary implementation**
+- `src/index/build/indexer/steps/write.js`
+- `src/index/build/promotion.js`
+- `src/index/build/build-state.js` (build_state.json)
+
+**Inputs**
+- runtime + mode
+- `state`, `postings`, `timing`, `entries`, `shardSummary`, `graphRelations`
+- (promotion) build root + mode list
+
+**Outputs**
+- Mode-specific index directory:
+  - `index_state.json`
+  - chunk meta, file meta, postings, perf profile, feature metrics, relations graph
+- Promotion pointer file:
+  - `current.json` mapping mode → build root
+
+**Invariants**
+- Artifact writes should be atomic where practical.
+- `index_state.json` must contain:
+  - tool version + config hash
+  - stage
+  - tokenizationKey + cacheSignature (if incremental is enabled)
+  - feature flags summary (for transparency)
+
+**Error behavior**
+- Any write failure should abort promotion; promotion must only occur after successful writes.
+
+**Determinism requirements**
+- Artifact contents (excluding timestamps) should be stable given stable inputs.
+- Promotion pointer must not “flip” to a partial build.
+
+**Remaining work**
+- [ ] Validate that `promotion.js` cannot write a `current.json` pointer that escapes the intended cache root (path traversal hardening).
+- [ ] Consider making build_state updates resilient to concurrent writes (or explicitly “best effort” with documentation).
+
+---
+
+### B. Incremental builds: deeper review
+
+#### B.1 What is already solid
+
+**Audit**
+
+- Clear separation between:
+  - tokenizationKey (tokenization + dictionary + postings surface)
+  - cacheSignature (broader runtime feature surface)
+- Per-file bundle read has a hash fallback mechanism to handle mtime/size mismatch scenarios (when a cached hash exists).
+- Manifest pruning deletes bundles for deleted files (`pruneIncrementalManifest`).
+- Whole-index reuse checks stage coverage and verifies manifest key set matches entries key set (including deletions) and validates per-file stat checks (`shouldReuseIncrementalIndex`).
+- A dedicated test suite exists for:
+  - signature invalidation (`tests/incremental-cache-signature.js`)
+  - manifest updates (`tests/incremental-manifest.js`)
+  - reuse semantics including deletions (`tests/incremental-reuse.js`)
+  - incremental plan behavior (`tests/indexer/incremental-plan.test.js`)
+
+#### B.2 Gaps / risks
+
+**Remaining work (correctness + durability)**
+
+- [ ] **Cache invalidation across tool updates:** include a “tool version / schema version / algorithm version” in the incremental signature.  
+  Suggested approach:
+  - Add a `runtime.cacheSchemaVersion` constant (bumped on any semantic change), and include it in `buildIncrementalSignature(...)`.
+  - Or include `runtime.toolInfo.version` (and document that caches are invalidated across versions).
+- [ ] **Manifest version compatibility:** enforce `manifest.version` compatibility explicitly; if unsupported, reset (and optionally delete bundles).  
+  Also validate `manifest.files` is a plain object: `loaded.files && typeof loaded.files === 'object' && !Array.isArray(loaded.files)`.
+- [ ] **Bundle cleanup on invalidation:** when signature/tokenizationKey mismatches, consider deleting the bundles directory (or moving aside) to avoid disk bloat.
+- [ ] **Whole-index reuse strictness:** decide if whole-index reuse should support content-hash fallback for stat mismatch (opt-in).  
+  If not, document that mtime/size must match exactly, and why (performance vs safety).
+- [ ] **Stage interactions:** confirm and test that:
+  - stage1 builds do not reuse stage2 caches (signature should differ, but confirm)
+  - stage2 builds do not reuse stage1 caches
+  - stage4 behaviors are consistent (if stage4 writes different artifact sets)
+- [ ] **RelKey normalization:** ensure relKey generation is consistently POSIX and case-handled on Windows for both discovery and watch paths.
+
+---
+
+### C. Concurrency and robustness
+
+#### C.1 Locking
+
+**Audit**
+
+- `src/index/build/lock.js` implements:
+  - atomic lock acquisition via `fs.open(lockPath, 'wx')`
+  - stale lock detection via pid + timestamp (and mtime fallback)
+  - optional wait/poll to acquire lock
+
+**Remaining work**
+- [ ] Ensure the lock file handle is closed even if `writeFile(...)` fails (use try/finally around the acquired `handle`).
+- [ ] Consider including `buildId` and `mode(s)` in the lock file payload to improve observability/debugging.
+- [ ] Add a test that simulates write failure during lock acquisition (can be done by injecting a stubbed fs layer, or by creating a read-only directory).
+
+#### C.2 Sharding + queues + worker pools
+
+**Audit**
+
+- The pipeline uses a queue abstraction (`createTaskQueues`, `runWithQueue`) and worker pools (`Piscina`) to parallelize CPU-heavy tasks.
+- Sharding aims to distribute work based on line counts / cost predictions, while preserving deterministic output ordering via an ordered appender.
+
+**Remaining work (critical)**
+- [ ] **Do not create worker pools per shard item.**  
+  Options (choose one):
+  1) **Preferred:** share the parent runtime’s worker pools across all shards; only shard the scheduling/queueing.  
+  2) If per-shard pools are required: create **one** shard runtime per shard worker (batch), reuse it for all work items in that batch, and **always** `destroy()` pools and tear down queues in a `finally`.
+- [ ] Add a regression test / harness that runs a sharded build and asserts the process exits promptly (no lingering worker threads).  
+  Practical approach: spawn `node build_index.js ...` with `--shards.enabled` and ensure it exits within a timeout; also enable `--verbose` to detect repeated pool creation.
+- [ ] Audit `maxPending` sizing on queues in shard runtime creation; ensure it cannot exceed a safe bound when shard concurrency is high.
+
+#### C.3 Watch mode robustness
+
+**Audit**
+
+- Watch mode uses chokidar and a debounce scheduler to coalesce changes.
+- It maintains a tracked file set to decide whether removals/oversize transitions should trigger rebuilds.
+- It always enables incremental to avoid full reindexing on every change.
+
+**Remaining work**
+- [ ] Make `createDebouncedScheduler(...)` safe for async `onRun`:
+  - wrap `onRun()` in `Promise.resolve(...).catch(...)`
+  - optionally provide an `onError` callback
+- [ ] Ensure “extracted-prose only” watch mode is supported:
+  - update `isIndexablePath(...)` to treat `extracted-prose` as both `code` and `prose` for extension filtering
+  - add coverage in `tests/watch-filter.js`
+- [ ] Decide how to handle untracked file changes in git repos (discover is tracked-only):
+  - either document that watch will trigger rebuilds but new untracked files will not be indexed
+  - or add an optional “include untracked” mode for watch builds (with tests)
+
+---
+
+### D. Performance and scalability
+
+#### D.1 Discovery and preprocessing overhead
+
+**Audit**
+
+- Discovery uses `git ls-files -z` when root is the git repo root, otherwise fdir crawl.
+- It performs a per-file `fs.stat` in a sequential loop (async, but awaited one-by-one).
+- Preprocess stage can scan file headers to detect binary/minified, and optionally count lines.
+
+**Remaining work**
+- [ ] Parallelize `fs.stat` in discovery with a concurrency limit (e.g., 32) to reduce wall-clock time on large repos.
+- [ ] Consider using fdir’s `withStats()` to avoid a separate stat syscall for non-git discovery paths.
+- [ ] Ensure file-type detection does not misclassify common text types as binary (treat certain `application/*` mimes as text if needed).
+
+#### D.2 Sharding overhead
+
+**Audit**
+
+- Sharding may require a full line-count pass (expensive) unless line counts are provided.
+- Shard planning uses predicted cost from perf profiles when available.
+
+**Remaining work**
+- [ ] Add an option to avoid full line counting when perf profile is available and sufficiently fresh (approximate weights).
+- [ ] Revisit per-shard file concurrency hard cap (`min(2, ...)`) — it can underutilize configured `runtime.fileConcurrency` on larger machines.
+- [ ] Avoid per-shard runtime creation (performance + correctness; see Section C).
+
+#### D.3 Worker pool overhead
+
+**Audit**
+
+- Worker tasks validate cloneability of inputs/outputs for each task (deep scan with limits).
+- Worker pool supports restart/backoff, and permanent disable on repeated opaque failures.
+
+**Remaining work**
+- [ ] Gate cloneability validation behind a debug flag or environment variable; keep it on by default in CI/tests, off in production, or vice versa (choose explicitly).
+- [ ] Consider using transfer lists for large typed arrays in quantize tasks to reduce cloning overhead.
+- [ ] Add metrics to quantify:
+  - pool restart frequency
+  - clone-check overhead
+  - task latency distribution
+
+---
+
+### E. Refactoring / code quality / test gaps
+
+#### E.1 Duplication and clarity
+
+**Audit**
+
+- Multiple modules duplicate “max bytes per extension” logic and cap normalization:
+  - `discover.js` has `resolveMaxBytesForExt`
+  - `watch.js` has `maxBytesForExt`
+  - `tools/shard-census.js` has its own normalization helpers
+- Ordering uses both explicit `<` comparisons and `localeCompare` in different places.
+
+**Remaining work**
+- [ ] Centralize “max bytes per extension” and “cap normalization” logic into a single helper module (likely `runtime/caps.js` or a shared `file-caps.js`) and reuse across discover/watch/tools.
+- [ ] Standardize ordering comparisons: provide a shared `compareRelPaths(a, b)` helper that is locale-independent and (optionally) Windows-case-aware.
+- [ ] Run formatter / lint pass on files with inconsistent indentation (not functionally wrong, but increases diff noise and review friction).
+
+#### E.2 Tests to add or strengthen
+
+**Remaining work**
+- [ ] **Build all modes:** Ensure `tests/build-index-all.js` reliably enforces that `--mode all` produces `code`, `prose`, and `extracted-prose` artifacts (and fix the orchestration boundary if currently inconsistent).
+- [ ] **Watch extracted-prose:** add a case to `tests/watch-filter.js` where `modes=['extracted-prose']` and confirm indexable file changes trigger scheduling.
+- [ ] **Watch async error safety:** add a test that uses an async `onRun` that rejects once, and assert no `unhandledRejection` occurs (attach a listener in the test).
+- [ ] **Sharding teardown:** add a harness test that enables sharding and asserts no lingering worker threads prevent exit.
+- [ ] **Incremental schema version:** add a test that simulates a tool version/schema version change and confirms caches are invalidated.
+
+---
+
+### File-by-file findings (actionable)
+
+> Items below are intentionally concrete and file-scoped to minimize ambiguity.
+
+#### `build_index.js`
+
+- [ ] Pass the resolved `modes` from `parseBuildArgs(...)` through to the build orchestrator (or otherwise guarantee that “mode all” resolves identically at every boundary).  
+  _Why:_ prevents drift between CLI arg parsing and internal orchestration; aligns with `tests/build-index-all.js`.
+
+#### `src/index/build/args.js`
+
+- [ ] Consider adding `argv.modes` (or similar) so downstream layers do not need to re-derive the “all → modes” mapping (and so the CLI entry can pass a single object).
+
+#### `src/index/build/build-state.js`
+
+- [ ] Document that `build_state.json` is best-effort and may lose updates under concurrent writers; or introduce an append-only/event model to prevent lost updates.
+- [ ] Consider `timer.unref()` on heartbeat interval for cases where build-state heartbeat should not keep the process alive (optional).
+
+#### `src/index/build/crash-log.js`
+
+- [ ] Consider throttling `updateFile(...)` writes when debug crash logging is enabled (currently potentially writes state on every file).
+
+#### `src/index/build/discover.js`
+
+- [ ] Add concurrency-limited parallel statting for large repos.
+- [ ] Add defensive `stat.isFile()` gating for non-git crawls.
+
+#### `src/index/build/failure-taxonomy.js`
+
+- No blocking issues found in scope; consider expanding taxonomy categories over time as needed.
+
+#### `src/index/build/feature-metrics.js`
+
+- No blocking issues found; consider adding an explicit schema version to metrics output to support future evolution.
+
+#### `src/index/build/file-scan.js`
+
+- [ ] Treat certain `file-type` “application/*” results (e.g., json/xml) as potentially text, or ensure `file-type` is only advisory and always confirm with istextorbinary when in doubt.
+#### `src/index/build/ignore.js`
+
+- [ ] Consider supporting nested `.gitignore` semantics for non-git discovery paths (optional, but improves parity with developer expectations).
+
+#### `src/index/build/incremental.js`
+
+- [ ] Validate `manifest.files` is a plain object; reset if array/invalid.
+- [ ] Enforce manifest version compatibility; reset or migrate.
+- [ ] Consider deleting stale bundles on signature/tokenizationKey mismatch to avoid disk bloat.
+
+#### `src/index/build/indexer.js`
+
+- No major issues; ensure per-mode runtime mutations are intentional and documented.
+
+#### `src/index/build/indexer/pipeline.js`
+
+- [ ] Ensure any ordering-critical sorts remain locale-independent (primary issue is in discover step; pipeline relies on it).
+- [ ] Consider explicitly documenting the per-mode stage graph and how it maps to artifacts and cache signature components.
+
+#### `src/index/build/indexer/signatures.js`
+
+- [ ] Add cache schema / tool version component to `buildIncrementalSignature(...)`.
+- [ ] Consider adding explicit versions for:
+  - chunk schema
+  - postings schema
+  - relations graph schema
+
+#### `src/index/build/indexer/steps/discover.js`
+
+- [ ] Replace `localeCompare` sort with deterministic compare.
+- [ ] Avoid mutating shared entry objects if discovery is reused across modes (optional; low risk today, but cleaner).
+
+#### `src/index/build/indexer/steps/incremental.js`
+
+- [ ] Add more granular status reporting (e.g., why reuse rejected) for observability; currently logs are decent but could be structured.
+
+#### `src/index/build/indexer/steps/postings.js`
+
+- [ ] Add tests for token retention “auto” switching correctness and stability.
+
+#### `src/index/build/indexer/steps/process-files.js`
+
+- [ ] Fix sharding runtime lifecycle (do not create per-work-item pools; ensure teardown).
+- [ ] Replace localeCompare in shard plan sorting with deterministic compare.
+- [ ] Revisit per-shard concurrency cap (min(2, ...)).
+- [ ] Consider hoisting shard runtime creation outside the inner work-item loop if per-shard runtime instances remain desired.
+
+#### `src/index/build/indexer/steps/relations.js`
+
+- [ ] Add tests ensuring cross-file inference updates are persisted into incremental bundles when enabled.
+- [ ] Clarify error strategy for import scan failures (degrade vs abort) and encode it in tests/config.
+
+#### `src/index/build/indexer/steps/write.js`
+
+- [ ] Ensure `index_state.json` always includes the correct cache signature / tokenizationKey values used for the build (especially when any runtime config is adapted per mode).
+
+#### `src/index/build/lock.js`
+
+- [ ] Close file handle in a `finally` if write fails during lock acquisition.
+
+#### `src/index/build/perf-profile.js`
+
+- No major correctness issues; consider exporting a schema version.
+
+#### `src/index/build/preprocess.js`
+
+- [ ] Document that preprocess is currently for `code` + `prose` only (or extend support to `extracted-prose` explicitly if desired).
+
+#### `src/index/build/promotion.js`
+
+- [ ] Harden path handling so `current.json` cannot point outside `repoCacheRoot` even if inputs are malformed.
+
+#### `src/index/build/runtime.js`
+
+- No blocking issues found in scope.
+
+#### `src/index/build/runtime/caps.js`
+
+- No blocking issues found; consider consolidating cap normalization usage across tools.
+
+#### `src/index/build/runtime/hash.js`
+
+- No blocking issues found.
+
+#### `src/index/build/runtime/logging.js`
+
+- No blocking issues found; consider documenting the distinction between structured logs and progress logs.
+
+#### `src/index/build/runtime/runtime.js`
+
+- [ ] Consider making the “tracked-only discovery” behavior visible in logs when git is used (helps users understand why new files may not be indexed).
+- [ ] Consider ensuring any per-mode adaptive config does not bleed across modes (currently low risk, but worth documenting).
+
+#### `src/index/build/runtime/stage.js`
+
+- No blocking issues found; stage overrides appear coherent and tested (`tests/build-runtime/stage-overrides.test.js`).
+
+#### `src/index/build/runtime/tree-sitter.js`
+
+- No blocking issues found in scope.
+
+#### `src/index/build/runtime/workers.js`
+
+- [ ] Review queue pending-limit sizing with sharding enabled; ensure worst-case bounds are safe.
+
+#### `src/index/build/state.js`
+
+- No blocking issues found; consider adding explicit assertions/guards in merge functions to prevent mismatched id offsets if used elsewhere.
+
+#### `src/index/build/watch.js`
+
+- [ ] Make debounce scheduler safe for async `onRun` (catch rejections).
+- [ ] Support `extracted-prose` as a mode for indexable path filtering.
+- [ ] Consider reducing rebuild churn from untracked files (optional).
+
+#### `src/index/build/worker-pool.js`
+
+- [ ] Consider exposing a “debug clone checks” toggle (ties into worker validation overhead discussion).
+- [ ] Add optional transferList support for quantize tasks.
+
+#### `src/index/build/workers/indexer-worker.js`
+
+- [ ] Gate cloneability validation behind a debug/config toggle if performance becomes an issue.
+
+#### `tools/shard-census.js`
+
+- [ ] Replace `localeCompare` with deterministic compare for stable reporting.
+- [ ] Consider reusing shared cap/normalization utilities rather than duplicating.
+
+#### Tests
+
+##### `tests/build-index-all.js`
+
+- [ ] Ensure the build orchestration actually builds `extracted-prose` for `--mode all` (fix boundary mismatch if needed).
+
+##### `tests/watch-filter.js`
+
+- [ ] Add an `extracted-prose`-only mode coverage case.
+- [ ] Add an async debounce safety test (unhandled rejection prevention).
+
+##### `tests/worker-pool*.js`
+
+- No immediate gaps; consider adding a perf regression test if clone checks are made optional.
+
+---
+
+### Deliverables
+
+- [ ] Fix sharding runtime lifecycle and add regression coverage.
+- [ ] Resolve “mode all” / extracted-prose mismatch and ensure `tests/build-index-all.js` passes reliably.
+- [ ] Harden watch debounce scheduling against async rejection.
+- [ ] Replace localeCompare sorts in ordering-critical paths.
+- [ ] Add a cache schema/tool version component to incremental signature and add a test for invalidation.
+
+### Exit criteria
+
+- [ ] Sharded builds do not leak worker threads/handles and the process exits cleanly.
+- [ ] `--mode all` produces `code`, `prose`, and `extracted-prose` indices; validated by test.
+- [ ] Watch mode does not emit unhandled promise rejections under forced error paths.
+- [ ] Deterministic ordering is documented and enforced (no locale-dependent sorts in critical ordering paths).
+- [ ] Incremental cache reuse is safe across code releases (explicit schema/version invalidation).
+
+
+## Phase 29 — Embeddings & ANN (onnx/HNSW/batching/candidate sets)
+
+**Objective:** harden the embeddings + ANN stack for correctness, determinism (where required), performance, and resilient fallbacks across **index build**, **build-embeddings tooling**, and **retrieval-time ANN execution**.
+
+### 29.1 Correctness
+
+#### 29.1.1 Model identity (cache keys, preprocessing, normalization, dims)
+
+##### Current state (verified)
+- [x] Tooling cache keys include **file hash** + **chunk signature** + **embedding identity** (`tools/build-embeddings/cache.js`, `tools/build-embeddings/run.js`).
+- [x] Tooling includes **dims mismatch guardrails** with explicit hard-fail paths and tests (`tools/build-embeddings/embed.js`, `tests/embeddings-dims-mismatch.js`, `tests/embeddings-dims-validation.js`).
+
+##### Remaining gaps / action items
+- [ ] **Expand embedding identity to include preprocessing + provider-specific knobs**, not just `{modelId, provider, mode, stub, dims, scale}`:
+  - Why: changing `onnx` tokenizer/model path or execution provider can change embeddings without changing `modelId`/`provider`, allowing silent cache reuse.
+  - Files:
+    - `tools/build-embeddings/cache.js` (identity schema)
+    - `tools/build-embeddings/run.js` (identity inputs)
+  - Add fields (at minimum):
+    - ONNX: `onnx.modelPath` (resolved), `onnx.tokenizerId`, `onnx.executionProviders`, `onnx.threads`, `onnx.graphOptimizationLevel`
+    - Common: pooling strategy (mean), `normalize=true`, truncation/max_length policy
+    - Quantization: `minVal/maxVal` (currently fixed -1..1), quantization “version”
+- [ ] **Include a tooling/version fingerprint in cache identity** (or bumpable `identity.version`) so cache invalidates when embedding algorithm changes:
+  - Why: changes to doc extraction, pooling logic, quantization, or merging should invalidate caches even if file hashes are unchanged.
+  - Files: `tools/build-embeddings/cache.js`, optionally `tools/build-embeddings/chunks.js`
+- [ ] **Add strict provider validation**: unknown `indexing.embeddings.provider` should not silently map to `xenova`.
+  - Why: silent fallback can produce “correct-looking” but unintended embeddings and cache identity mismatch.
+  - Files: `src/shared/onnx-embeddings.js` (normalizeEmbeddingProvider), `src/index/embedding.js`, `tools/build-embeddings/cli.js`, `src/retrieval/embedding.js`
+- [ ] **Unify default stub embedding dimensions across build + retrieval + tooling** (currently inconsistent defaults: 384 vs 512).
+  - Why: any code path that calls stub embeddings without an explicit `dims` risks producing query embeddings that cannot match the index dims.
+  - Files: `src/shared/embedding.js` (defaults to 512), `src/index/embedding.js` (defaults to 384), `tools/build-embeddings/run.js` (defaults to 384), `src/retrieval/embedding.js` (passes `dims`, but can pass null in some ANN-only paths).
+  - Recommendation: pick **384** as the single default everywhere OR require dims explicitly in stub mode and fail loudly if missing.
+- [ ] **Index-build (inline) path lacks explicit dims mismatch failure** comparable to build-embeddings tool:
+  - `src/index/build/file-processor/embeddings.js` currently coerces unexpected shapes to empty arrays and proceeds.
+  - Add an explicit “dims contract” check and fail fast (or disable embeddings) if:
+    - vectors are not arrays/typed arrays,
+    - dims are inconsistent across chunks,
+    - batch output length mismatches input length.
+- [ ] **Make per-file embedding cache writes atomic** (cache files are written with `fs.writeFile`):
+  - Why: partial/corrupt cache JSON can cause repeated recompute; while not “poisoning,” it degrades throughput and can mask real failures.
+  - Files: `tools/build-embeddings/run.js` (cache writes), optionally reuse `tools/build-embeddings/atomic.js` or shared atomic writer.
+
+**Exit criteria**
+- [ ] Changing any embedding-relevant knob (model path/tokenizer/provider/normalization/pooling/quantization) forces cache miss.
+- [ ] Dims mismatch fails loudly (or deterministically disables embeddings) in **both** build-embeddings and inline index-build paths.
+- [ ] Stub-mode dims are consistent across indexing + retrieval.
+
+---
+
+#### 29.1.2 Determinism (float handling, batching order)
+
+##### Current state (verified)
+- [x] Quantization uses deterministic rounding (`src/index/embedding.js`).
+- [x] Batched embedding retains input ordering in both tooling and index build (`tools/build-embeddings/embed.js`, `src/index/build/file-processor/embeddings.js`).
+
+##### Remaining gaps / action items
+- [ ] **Document and/or enforce determinism requirements for HNSW build**:
+  - HNSW graph structure can vary with insertion order; current insertion order is “file processing order,” which depends on `Map` insertion order derived from chunk meta traversal.
+  - Files: `tools/build-embeddings/run.js`, `tools/build-embeddings/hnsw.js`
+  - Recommendation: ensure vectors are added to HNSW in a stable order (e.g., ascending `chunkIndex`).
+- [ ] **Avoid nondeterministic file sampling in context window estimation**:
+  - `src/index/build/context-window.js` uses the first N files in `files[]`; if upstream file enumeration order is OS-dependent, context window results can change.
+  - Recommendation: sort file paths before sampling (or explicitly document nondeterminism).
+- [ ] **Normalize float types across providers**:
+  - Many paths convert typed arrays into JS arrays; this is deterministic but increases the surface for subtle differences and performance regressions.
+  - Recommendation: standardize on `Float32Array` where feasible and only convert at serialization boundaries.
+
+**Exit criteria**
+- [ ] HNSW build is reproducible across runs given identical artifacts/config (or nondeterminism is clearly documented and accepted).
+- [ ] Context window selection is stable given identical repo state.
+
+---
+
+#### 29.1.3 Robust fallback behavior (missing models/extensions/unsupported configs)
+
+##### Current state (verified)
+- [x] Retrieval embedding errors are caught and return `null` (`src/retrieval/embedding.js`), which allows the search pipeline to continue in sparse-only mode.
+- [x] SQLite vector extension usage is guarded and can be disabled via sanitization (`tests/vector-extension-sanitize.js`).
+
+##### Remaining gaps / action items
+- [ ] **ONNX embedder config validation is partially ineffective**:
+  - `src/shared/onnx-embeddings.js:createOnnxEmbedder()` checks `normalizeEmbeddingProvider('onnx') !== 'onnx'` which is a no-op (constant input).
+  - Replace with validation of the *actual* requested provider (or remove the dead check).
+- [ ] **Improve “missing model” errors with clear remediation** (especially for offline envs):
+  - Recommend: explicitly mention `tools/download-models.js` and where the model path is expected.
+  - Files: `src/shared/onnx-embeddings.js`, `src/index/embedding.js`
+- [ ] **HNSW load path should fall back to `.bak` on corrupt primary**, not only when primary is missing:
+  - Today: `src/shared/hnsw.js` only chooses `.bak` if primary missing; it does not retry `.bak` if `readIndexSync()` throws.
+- [ ] **Use HNSW meta for safety checks**:
+  - Retrieval load does not read `dense_vectors_hnsw.meta.json`, so it cannot validate `dims`, `space`, or `model` before querying.
+  - Files: `src/shared/hnsw.js`
+- [ ] **Add explicit tests for “extension missing” fallback**:
+  - Currently there is sanitization coverage, but not “load failure / missing shared library” behavior.
+  - Files/tests: `tools/build-embeddings/sqlite-dense.js` + new test.
+
+**Exit criteria**
+- [ ] Missing/corrupt HNSW artifacts do not crash retrieval; the system degrades gracefully to another ANN backend or sparse-only.
+- [ ] Missing ONNX model artifacts fail with actionable errors (or clean fallback in non-strict modes).
+
+---
+
+### 29.2 Batching & scheduling
+
+#### 29.2.1 Batch auto-tuning (memory/CPU/repo size)
+
+##### Current state (verified)
+- [x] Both index-build and build-embeddings tooling implement “auto batch” based on `os.totalmem()` (`src/index/build/runtime/embeddings.js`, `tools/build-embeddings/cli.js`).
+- [x] Language-specific multipliers exist and are tested (`src/index/build/embedding-batch.js`, `tests/embedding-batch-multipliers.js`).
+
+##### Remaining gaps / action items
+- [ ] **Unify and justify auto-batch heuristics**:
+  - Index-build uses `totalGb * 16` with min 16.
+  - build-embeddings tool uses `totalGb * 32` with min 32.
+  - Decide a single policy OR clearly document why they intentionally differ.
+- [ ] **Incorporate CPU oversubscription controls**:
+  - ONNX runtime can be multi-threaded (`threads` option), while the embedding queue can also be concurrent.
+  - Add a policy: e.g., `embeddingConcurrency * onnxThreads <= cpuCount` (or document exceptions).
+  - Files: `src/index/build/runtime/embeddings.js`, `src/shared/onnx-embeddings.js`
+- [ ] **Adapt batch sizing to repo characteristics**:
+  - For tiny repos/files, large batch sizes increase latency without improving throughput.
+  - For huge repos, file-by-file batching underutilizes the accelerator (many small batches).
+  - Recommendation: introduce a global “embedding batcher” that batches across files with:
+    - max batch size,
+    - max tokens/estimated memory per batch,
+    - stable ordering.
+  - Files impacted: `src/index/build/file-processor/embeddings.js`, `tools/build-embeddings/run.js`
+
+**Exit criteria**
+- [ ] Batch sizing + concurrency are predictable and safe across low-memory hosts, multi-core hosts, and both small and large repos.
+- [ ] Default settings do not oversubscribe CPU when ONNX threads are enabled.
+
+---
+
+#### 29.2.2 Embedding queues (backpressure, bounded memory)
+
+##### Current state (verified)
+- [x] Service-mode job enqueue provides a `maxQueued` hook (`src/index/build/indexer/embedding-queue.js`).
+
+##### Remaining gaps / action items
+- [ ] **Define and enforce backpressure defaults**:
+  - If `maxQueued` is unset/null, behavior depends on `enqueueJob()` (not in scope here); ensure a safe default exists.
+  - Add explicit documentation + a test that verifies queue growth is bounded.
+- [ ] **Ensure service jobs include enough identity to be safe**:
+  - Job payload includes `{repo, mode}`, but not an embedding identity fingerprint.
+  - Include `embeddingProvider`, model id, and/or a hash of embedding config to prevent mismatched worker configuration from producing incompatible embeddings.
+
+**Exit criteria**
+- [ ] Queue growth is bounded by default; overload produces clear errors and does not OOM the process.
+
+---
+
+#### 29.2.3 Session/model reuse
+
+##### Current state (verified)
+- [x] ONNX sessions are cached per normalized config (`src/shared/onnx-embeddings.js`).
+- [x] Retrieval embedder instances are cached in-process (`src/retrieval/embedding.js`).
+
+##### Remaining gaps / action items
+- [ ] **Guard concurrent use of shared ONNX sessions if required**:
+  - If `onnxruntime-node` sessions are not safe for concurrent `run()` calls, add a per-session mutex/queue.
+  - At minimum: document thread-safety assumptions and add a stress test.
+- [ ] **Avoid duplicate pipeline/session loads in index-build**:
+  - `src/index/embedding.js` does not maintain a global cache similar to retrieval; if multiple embedder instances are constructed in one process, models may be loaded multiple times.
+
+**Exit criteria**
+- [ ] A single model/session is loaded once per process per config, and safely shared across all embedding calls.
+
+---
+
+### 29.3 ANN correctness
+
+#### 29.3.1 Distance metric correctness (HNSW scoring)
+
+##### Current state (verified)
+- [x] HNSW ranker applies a stable tie-break (`idx`) after converting distances to similarity (`src/shared/hnsw.js`).
+
+##### Remaining gaps / action items
+- [ ] **Confirm and test distance-to-similarity conversion for each HNSW space** (`l2`, `cosine`, `ip`):
+  - Current code treats `ip` the same as `cosine` (`sim = 1 - distance`).
+  - This may be correct or incorrect depending on hnswlib’s distance definition for `ip`.
+  - Required: add unit tests with known vectors and expected distances/similarities and adjust conversion if needed.
+  - Files: `src/shared/hnsw.js`, new test (e.g., `tests/hnsw-distance-metrics.js`).
+
+**Exit criteria**
+- [ ] For each supported space, returned `sim` is monotonic with the true similarity notion used elsewhere in scoring.
+
+---
+
+#### 29.3.2 Atomic safety (no torn reads/writes)
+
+##### Current state (verified)
+- [x] Build writes HNSW `.bin` and `.meta.json` via atomic replace with `.bak` retention (`tools/build-embeddings/atomic.js`, `tools/build-embeddings/hnsw.js`).
+- [x] There is a test that asserts `.bak` is created on replace (`tests/hnsw-atomic.js`).
+
+##### Remaining gaps / action items
+- [ ] **HNSW reader should support “corrupt primary” fallback**:
+  - Implement: try primary, and if read fails, try `.bak` before giving up.
+  - Files: `src/shared/hnsw.js`
+- [ ] **Validate `.bin` / `.meta.json` pairing**:
+  - Ensure meta file exists, parseable, and matches expected dims/space/model before using the index.
+  - If mismatch, treat index as unavailable and fall back.
+
+**Exit criteria**
+- [ ] Retrieval never crashes due to a torn/corrupt HNSW file; fallback paths are exercised by tests.
+
+---
+
+#### 29.3.3 Candidate set semantics (HNSW + sqlite-vec)
+
+##### Current state (verified)
+- [x] SQLite candidate pushdown behavior is tested for small vs large candidate sets (`tests/sqlite-vec-candidate-set.js`).
+
+##### Remaining gaps / action items
+- [ ] **Handle empty candidate sets explicitly in HNSW path**:
+  - `rankHnswIndex()` currently treats an empty set as “no filter” (because `candidateSet.size` is falsy), which can return results when none are desired.
+  - Files: `src/shared/hnsw.js`
+- [ ] **Document and test candidate-set cap behavior**:
+  - HNSW uses a `candidateSetCap` default of 1000; ensure callers understand whether this can truncate results.
+  - Add tests for:
+    - empty set → empty hits,
+    - small set → only those labels,
+    - very large set → filter still applied and returned hits are subset, with stable ordering.
+- [ ] **Align candidate-set tie-break behavior across backends**:
+  - SQLite ANN tests require deterministic tie-break by `rowid`.
+  - HNSW already tie-breaks by `idx`. Ensure both are consistent with retrieval expectations.
+
+**Exit criteria**
+- [ ] Candidate sets behave identically (semantically) across ANN backends: never return items outside the set, deterministic ordering for ties, predictable truncation rules.
+
+---
+
+### 29.4 Performance improvements to prioritize
+
+#### 29.4.1 Float32Array end-to-end (avoid JS arrays of floats)
+- [ ] **Standardize the embedding contract to return `Float32Array`**:
+  - Files: `src/index/embedding.js`, `src/retrieval/embedding.js`, `src/shared/onnx-embeddings.js`, `src/shared/embedding.js`
+- [ ] **Update downstream code to accept typed arrays** (don’t gate on `Array.isArray`):
+  - Files: `src/index/build/file-processor/embeddings.js`, `tools/build-embeddings/embed.js`, `tools/build-embeddings/run.js`, `tools/build-embeddings/hnsw.js`
+- [ ] **Defer conversion to JS arrays only at serialization boundaries** (JSON writing).
+
+#### 29.4.2 Minimize serialization between threads/processes (transferable buffers)
+- [ ] Where embeddings are computed in worker threads/processes (service mode), prefer:
+  - transferring `ArrayBuffer`/`SharedArrayBuffer` instead of JSON arrays,
+  - or using binary packed formats for vectors.
+- [ ] Add an explicit “embedding payload format” version in job payloads so workers and callers stay compatible.
+  - File touchpoints: `src/index/build/indexer/embedding-queue.js` (job payload)
+
+#### 29.4.3 Pre-allocate and reuse buffers
+- [ ] **ONNX embedding path**:
+  - Avoid per-call allocations:
+    - re-use `BigInt64Array` buffers for token ids/masks where shapes are stable,
+    - avoid `Array.from()` conversions for slices.
+  - Files: `src/shared/onnx-embeddings.js`
+- [ ] **Index-build merge path**:
+  - Avoid allocating a new zero vector per chunk in `attachEmbeddings()`.
+  - File: `src/index/build/file-processor/embeddings.js`
+
+#### 29.4.4 Candidate generation tuning
+- [ ] Push sparse filters earlier and reduce dense scoring work:
+  - prefer ANN-restricted candidate sets before dense dot products,
+  - prefer pushing candidate constraints into sqlite-vec queries when small enough (already partially implemented).
+  - (Some of this lives outside the reviewed file list; track as cross-cutting work.)
+
+**Exit criteria**
+- [ ] Embedding pipelines avoid unnecessary conversions/allocations; measurable CPU and memory reductions on large repos.
+- [ ] ANN candidate generation demonstrably reduces dense scoring load for common queries.
+
+---
+
+### 29.5 Refactoring goals
+
+#### 29.5.1 Single embedding interface shared by build + retrieval
+- [ ] Create a single shared adapter interface, e.g.:
+  - `embed(texts: string[], opts) => Float32Array[]`
+  - `embedOne(text: string, opts) => Float32Array`
+- [ ] Move provider selection + error handling behind adapters:
+  - `xenova`, `onnx`, `stub`.
+- [ ] Ensure both index-build and retrieval use the same adapter and the same preprocessing defaults.
+
+#### 29.5.2 Centralize normalization & preprocessing
+- [ ] Eliminate duplicated `normalizeVec()` implementations:
+  - `src/index/embedding.js`
+  - `src/shared/onnx-embeddings.js`
+  - `tools/build-embeddings/embed.js` (indirectly uses index/embedding normalization)
+- [ ] Centralize:
+  - pooling strategy,
+  - normalization strategy,
+  - truncation/max_length policy,
+  - doc/code merge policy.
+
+#### 29.5.3 Clear ANN backend adapters
+- [ ] Wrap sqlite-vec and HNSW behind a single “ANN adapter” contract with:
+  - candidate set semantics,
+  - deterministic tie-break contract,
+  - consistent error handling and stats reporting.
+  - (Some of this lives outside the reviewed file list.)
+
+**Exit criteria**
+- [ ] Build + retrieval cannot diverge in embedding shape/normalization/pooling without a deliberate, versioned change.
+- [ ] ANN behavior is consistent regardless of backend.
+
+---
+
+### 29.6 Tests
+
+#### 29.6.1 Coverage checklist
+
+##### Already covered (verified)
+- [x] Cache identity/invalidation (baseline) — `tests/embeddings-cache-identity.js`, `tests/embeddings-cache-invalidation.js`
+- [x] Dims mismatch (tooling) — `tests/embeddings-dims-mismatch.js`, `tests/embeddings-dims-validation.js`
+- [x] ANN candidate set correctness (sqlite-vec) — `tests/sqlite-vec-candidate-set.js`
+- [x] HNSW artifacts existence + atomic replace — `tests/hnsw-ann.js`, `tests/hnsw-atomic.js`
+
+##### Missing / needs additions
+- [ ] **Cache identity tests must cover provider-specific knobs**, especially ONNX config:
+  - Add tests proving that changing `onnx.tokenizerId` or `onnx.modelPath` changes identityKey and forces cache miss.
+- [ ] **Add extension missing/fallback tests**:
+  - Simulate vector extension load failure and ensure build/search does not crash and disables vector ANN.
+- [ ] **Add HNSW candidate set tests**:
+  - empty set returns empty hits,
+  - filter does not leak labels,
+  - tie-break stability.
+- [ ] **Add HNSW `.bak` fallback tests**:
+  - corrupt primary index/meta triggers `.bak` load and does not crash.
+- [ ] **Add performance regression test for embedding batching throughput** (required by checklist):
+  - Recommended approach (stable in CI):
+    - Use a synthetic embedder function with a fixed per-call overhead + per-item cost.
+    - Assert that `runBatched()` with batchSize>1 achieves >= X% speedup vs batchSize=1 on a fixed input size.
+    - Use generous thresholds to avoid flakiness; focus on catching *major* regressions (e.g., accidental O(n²) behavior or disabling batching).
+  - Candidate target: `tools/build-embeddings/embed.js:runBatched()` and/or `src/index/build/file-processor/embeddings.js` batching path.
+
+**Exit criteria**
+- [ ] Tests fail if embedding identity changes are not reflected in cache keys.
+- [ ] Tests cover ANN candidate set semantics for both sqlite-vec and HNSW.
+- [ ] At least one performance regression test exists for batching throughput.
+
+---
+
+### Appendix A — File-by-file review notes (actionable items)
+
+> The checklist items above are the canonical “what to fix.” This appendix maps concrete file-level changes back to those items.
+
+#### src
+
+##### `src/index/build/context-window.js`
+- [ ] Sort/sanitize file list before sampling to reduce OS-dependent nondeterminism.
+- [ ] Consider documenting that context-window estimation is heuristic and may vary with sampling strategy.
+
+##### `src/index/build/embedding-batch.js`
+- [ ] Consider parsing `baseSize` if it may come from config as a numeric string.
+- [ ] Add explicit documentation for multiplier precedence (fallback vs user config).
+
+##### `src/index/build/file-processor/embeddings.js`
+- [ ] Add dims contract validation (non-empty vectors must share dims; fail fast otherwise).
+- [ ] Support `Float32Array` outputs (don’t rely on `Array.isArray`).
+- [ ] Avoid allocating `new Array(dims).fill(0)` per chunk; reuse a single `zeroVec`.
+- [ ] Validate that `getChunkEmbeddings(texts).length === texts.length`; if not, log + fail or retry with a clear warning.
+- [ ] Ensure doc embedding results are length-aligned with `docPayloads` (currently assumes perfect alignment).
+
+##### `src/index/build/indexer/embedding-queue.js`
+- [ ] Include embedding identity/config hash in job payload to prevent mismatched worker behavior.
+- [ ] Consider switching job IDs to `crypto.randomUUID()` for collision resistance.
+- [ ] Ensure `maxQueued` has a safe default; document backpressure behavior.
+
+##### `src/index/build/runtime/embeddings.js`
+- [ ] Reconcile auto-batch policy with tooling (`tools/build-embeddings/cli.js`).
+- [ ] Consider incorporating ONNX thread settings into concurrency auto-tune to avoid oversubscription.
+
+##### `src/index/embedding.js`
+- [ ] Centralize `normalizeVec`/`quantizeVec` into shared utilities; remove duplication.
+- [ ] Add strict provider validation (unknown provider should error/warn).
+- [ ] Harden `normalizeBatchOutput()` to:
+  - guarantee output length equals input count,
+  - handle unexpected tensor dims more defensively,
+  - avoid returning a single huge vector when output is 3D.
+- [ ] Prefer returning `Float32Array` (or at least accept typed arrays downstream).
+
+##### `src/retrieval/embedding.js`
+- [ ] Use a normalized/fingerprinted ONNX config in the embedder cache key (avoid JSON-order sensitivity).
+- [ ] If retrieval can request embeddings without known dims (ANN-only paths), require dims or ensure consistent default dims.
+- [ ] Consider logging embedder load failures once (rate-limited) to aid debugging.
+
+##### `src/shared/embedding.js`
+- [ ] Unify stub default dims with the rest of the system (recommend 384).
+- [ ] Optionally return `Float32Array` to match the desired end-to-end contract.
+
+##### `src/shared/hnsw.js`
+- [ ] Implement `.bak` fallback when the primary index exists but is corrupt/unreadable.
+- [ ] Read/validate `dense_vectors_hnsw.meta.json` to confirm `dims/space/model` before using the index.
+- [ ] Handle empty candidate sets explicitly by returning `[]`.
+- [ ] Add unit tests for distance conversion across spaces (l2/cosine/ip) and adjust similarity conversion if required.
+
+##### `src/shared/onnx-embeddings.js`
+- [ ] Remove/fix dead provider check (`normalizeEmbeddingProvider('onnx')`).
+- [ ] Add clearer error messaging for missing model artifacts + remediation steps.
+- [ ] Improve performance by avoiding heavy array conversions and by reusing buffers/tensors.
+- [ ] Consider concurrency guards around `session.run()` if onnxruntime sessions are not safe concurrently.
+
+---
+
+#### tools
+
+##### `tools/build-embeddings.js`
+- No issues observed beyond those in underlying implementation modules.
+
+##### `tools/build-embeddings/atomic.js`
+- [ ] Consider consolidating atomic replace logic with `src/shared/json-stream.js` to avoid divergence (optional refactor).
+
+##### `tools/build-embeddings/cache.js`
+- [ ] Expand identity schema to include preprocessing and provider-specific config (especially ONNX knobs).
+- [ ] Add a bumpable “identity version” or build-tool version fingerprint.
+
+##### `tools/build-embeddings/chunks.js`
+- [ ] Consider incorporating doc-related signals into the chunk signature (or into identity versioning) so doc embedding caches invalidate when doc extraction logic changes.
+- [ ] Consider normalizing `start/end` to finite numbers before signature generation (avoid stringifying `undefined`).
+
+##### `tools/build-embeddings/cli.js`
+- [ ] Document (or change) the behavior where `mode=service` is coerced to `inline` for this tool.
+- [ ] Unify auto-batch defaults with index-build runtime (or document why they differ).
+
+##### `tools/build-embeddings/embed.js`
+- [ ] Update to accept and return typed arrays (`Float32Array`) instead of insisting on JS arrays.
+- [ ] Consider failing fast on non-vector outputs instead of silently returning `[]` entries (to avoid quietly producing all-zero embeddings).
+
+##### `tools/build-embeddings/hnsw.js`
+- [ ] Ensure stable vector insertion order into HNSW (ascending chunkIndex).
+- [ ] When adding vectors reconstructed from cache (dequantized), consider re-normalizing for cosine space to reduce drift.
+
+##### `tools/build-embeddings/manifest.js`
+- [ ] Consider reading HNSW meta to report accurate `count`/`dims` for ANN piece files, rather than relying on `totalChunks` (defensive correctness).
+
+##### `tools/build-embeddings/run.js`
+- [ ] Make cache writes atomic (optional but recommended).
+- [ ] Use `Number.isFinite()` for chunk start/end to avoid 0/NaN edge cases from `||` coercion.
+- [ ] Apply `ensureVectorArrays()` to embedded doc batches just like code batches.
+- [ ] Make HNSW build deterministic (stable insertion order).
+- [ ] Consider adding a global cross-file batcher for throughput.
+
+##### `tools/build-embeddings/sqlite-dense.js`
+- [ ] Add tests for “vector extension missing/failed to load” fallback behavior.
+- [ ] Consider batching inserts in larger chunks or using prepared statements more aggressively for performance on large vector sets.
+
+##### `tools/compare-models.js`
+- [ ] If comparing ONNX vs xenova providers, ensure the script can capture and report provider config differences (identity) to interpret deltas correctly (minor enhancement).
+
+##### `tools/download-models.js`
+- [ ] Consider supporting explicit download of ONNX model artifacts when users rely on `indexing.embeddings.provider=onnx` and custom `onnx.modelPath`.
+- [ ] Improve output to show where models were cached and what to set in config if needed.
+
+---
+
+#### tests
+
+##### `tests/build-embeddings-cache.js`
+- [ ] Extend to assert cache identity changes for ONNX config changes (once identity schema is expanded).
+
+##### `tests/embedding-batch-autotune.js`
+- [ ] Consider loosening or documenting assumptions about minimum batch size on low-memory systems (or adjust runtime min to match test expectations).
+
+##### `tests/embedding-batch-multipliers.js`
+- No issues; good coverage of multiplier normalization.
+
+##### `tests/embeddings-cache-identity.js`
+- [ ] Extend to cover ONNX-specific identity fields (tokenizerId/modelPath/etc).
+
+##### `tests/embeddings-cache-invalidation.js`
+- [ ] Add invalidation scenarios tied to preprocessing knobs (pooling/normalize/max_length) once surfaced in identity.
+
+##### `tests/embeddings-dims-mismatch.js`
+- Good.
+
+##### `tests/embeddings-dims-validation.js`
+- Good.
+
+##### `tests/embeddings-sqlite-dense.js`
+- [ ] Add coverage for vector extension load failure paths (extension missing), not only baseline dense sqlite insertions.
+
+##### `tests/embeddings-validate.js`
+- Good baseline index-state + artifact validation coverage.
+
+##### `tests/hnsw-ann.js`
+- [ ] Add correctness assertions beyond “backend selected”:
+  - candidate set filtering (once exposed),
+  - tie-break determinism,
+  - sanity check of returned ordering for a known query on fixture corpus.
+
+##### `tests/hnsw-atomic.js`
+- [ ] Add test for `.bak` fallback on corrupt primary index/meta (reader-side).
+
+##### `tests/smoke-embeddings.js`
+- Good smoke harness; consider adding new tests to this suite after implementing performance regression and fallback tests.
+
+##### `tests/sqlite-vec-candidate-set.js`
+- [ ] Add a column-name sanitization test (table is covered; column is not).
+
+##### `tests/vector-extension-sanitize.js`
+- Good table sanitization coverage; extend for column sanitization as above.
+
+---
+
+
+## Phase 30 — Index analysis features (metadata/risk/git/type-inference) — Review findings & remediation checklist
+
+**Objective:** Review the Section 4 file set (56 files) and produce a concrete, exhaustive remediation checklist that (1) satisfies the provided Phase 4 checklist (A–G) and (2) captures additional defects, inconsistencies, and improvements found during review.
+
+**Scope:** All files enumerated in `pairofcleats_review_section_4_files_and_checklist.md` (src/tests/docs).  
+**Out of scope:** Implementing fixes in-code (this document is a work plan / punch list).
+
+---
+
+### Summary (priority ordered)
+
+#### P0 — Must fix (correctness / crash / schema integrity)
+
+- [ ] **Risk rules regex compilation is currently mis-wired.** `src/index/risk-rules.js` calls `createSafeRegex()` with an incorrect argument signature, so rule regex configuration (flags, limits) is not applied, and invalid patterns can throw and abort normalization.  
+  - Fix in: `src/index/risk-rules.js` (see §B.1).
+- [ ] **Risk analysis can crash indexing on long lines.** `src/index/risk.js` calls SafeRegex `test()` / `exec()` without guarding against SafeRegex input-length exceptions. One long line can throw and fail the whole analysis pass.  
+  - Fix in: `src/index/risk.js` (see §B.2).
+- [ ] **Metadata v2 drops inferred/tooling parameter types (schema data loss).** `src/index/metadata-v2.js` normalizes type maps assuming values are arrays; nested maps (e.g., `inferredTypes.params.<name>[]`) are silently discarded.  
+  - Fix in: `src/index/metadata-v2.js` + tests + schema/docs (see §A.1–A.4).
+
+#### P1 — Should fix (determinism, performance, docs, validation gaps)
+
+- [ ] **`metaV2` validation is far too shallow and does not reflect the actual schema shape.** `src/index/validate.js` only validates a tiny subset of fields and does not traverse nested type maps.  
+- [ ] **Docs drift:** `docs/metadata-schema-v2.md` and `docs/risk-rules.md` do not fully match current code (field names, structures, and configuration).  
+- [ ] **Performance risks:** risk scanning does redundant passes and does not short-circuit meaningfully when capped; markdown parsing is duplicated (inline + fenced); tooling providers re-read files rather than reusing already-loaded text.
+
+#### P2 — Nice to have (quality, maintainability, test depth)
+
+- [ ] Improve signature parsing robustness for complex types (C-like, Python, Swift).
+- [ ] Clarify and standardize naming conventions (chunk naming vs provider symbol naming, “generatedBy”, “embedded” semantics).
+- [ ] Expand tests to cover surrogate pairs (emoji), CRLF offsets, and risk rules/config edge cases.
+
+---
+
+### A) Metadata v2: correctness, determinism, and validation
+
+#### Dependency guidance (best choices)
+- `ajv` — encode **metadata-schema-v2** as JSON Schema and validate `metaV2` as a hard gate in `tools/index-validate` (or equivalent).  
+- `semver` — version `metaV2.schemaVersion` independently and gate readers/writers.
+
+#### A.1 `metaV2.types` loses nested inferred/tooling param types (P0)
+
+##### Affected files
+- `src/index/metadata-v2.js`
+- `docs/metadata-schema-v2.md`
+- `src/index/validate.js`
+- `tests/metadata-v2.js`
+
+##### Findings
+- [ ] **Data loss bug:** `normalizeTypeMap()` assumes `raw[key]` is an array of entries. If `raw[key]` is an object map (e.g., `raw.params` where `raw.params.<paramName>` is an array), it is treated as non-array and dropped.  
+  - Evidence: `normalizeTypeMap()` (lines ~78–91) only normalizes `Array.isArray(entries)` shapes.
+- [ ] **Downstream effect:** `splitToolingTypes()` is applied to `docmeta.inferredTypes`; because nested shapes are not handled, **tooling-derived param types will not appear in `metaV2.types.tooling.params`**, and inferred param types will be absent from `metaV2.types.inferred.params`.
+
+##### Required remediation
+- [ ] Update `normalizeTypeMap()` to support nested “param maps” (and any similar nested structures) rather than dropping them. A pragmatic approach:
+  - [ ] If `entries` is an array → normalize as today.
+  - [ ] If `entries` is an object → treat it as a nested map and normalize each subkey:
+    - preserve the nested object shape in output (preferred), or
+    - flatten with a predictable prefix strategy (only if schema explicitly adopts that).
+- [ ] Update `splitToolingTypes()` so it correctly separates tooling vs non-tooling entries **inside nested maps** (e.g., `params.<name>[]`, `locals.<name>[]`).
+- [ ] Update `tests/metadata-v2.js` to assert:
+  - [ ] inferred param types survive into `metaV2.types.inferred.params.<paramName>[]`
+  - [ ] tooling param types survive into `metaV2.types.tooling.params.<paramName>[]`
+  - [ ] non-tooling inferred types do not leak into tooling bucket (and vice versa)
+
+#### A.2 Declared types coverage is incomplete (P1)
+
+##### Findings
+- [ ] `buildDeclaredTypes()` currently only materializes:
+  - param annotations via `docmeta.paramTypes`
+  - return annotation via `docmeta.returnType`  
+  It does **not** cover:
+  - [ ] parameter defaults (`docmeta.paramDefaults`)
+  - [ ] local types (`docmeta.localTypes`)
+  - [ ] any other declared type sources the codebase may already emit
+
+##### Required remediation
+- [ ] Decide which “declared” facets are part of Metadata v2 contract and implement them consistently (and document them):
+  - [ ] `declared.defaults` (if desired)
+  - [ ] `declared.locals` (if desired)
+- [ ] Update `docs/metadata-schema-v2.md` accordingly.
+- [ ] Add tests in `tests/metadata-v2.js` for any newly included declared facets.
+
+#### A.3 Determinism and stable ordering in `metaV2` (P1)
+
+##### Findings
+- [ ] Several arrays are produced via Set insertion order (e.g., `annotations`, `params`, `risk.tags`, `risk.categories`). While *often* stable, they can drift if upstream traversal order changes.
+- [ ] `metaV2` mixes optional `null` vs empty collections inconsistently across fields (some fields null, others empty arrays). This matters for artifact diffs and schema validation.
+
+##### Required remediation
+- [ ] Standardize ordering rules for arrays that are semantically sets:
+  - [ ] Sort `annotations` (lexicographic) before emitting.
+  - [ ] Sort `params` (lexicographic) before emitting.
+  - [ ] Sort risk `tags`/`categories` (lexicographic) before emitting.
+- [ ] Establish a consistent “empty means null” vs “empty means []” policy for v2 and enforce it in `buildMetaV2()` and schema/docs.
+
+#### A.4 `generatedBy` and `embedded` semantics are unclear (P2)
+
+##### Findings
+- [ ] `generatedBy` currently uses `toolInfo?.version` only; if `tooling` already contains `tool` and `version`, this can be redundant and underspecified.
+- [ ] `embedded` is emitted whenever `chunk.segment` exists, even when the segment is not embedded (parentSegmentId may be null). This makes the field name misleading.
+
+##### Required remediation
+- [ ] Decide and document the intended meaning:
+  - [ ] Option A: `generatedBy = "<tool>@<version>"` and keep `tooling` for structured detail.
+  - [ ] Option B: remove `generatedBy` and rely solely on `tooling`.
+- [ ] Restrict `embedded` field to truly-embedded segments only **or** rename the field to something like `segmentContext` / `embedding`.
+
+#### A.5 Validation gaps for Metadata v2 (P1)
+
+##### Findings (in `src/index/validate.js`)
+- [ ] `validateMetaV2()` (lines ~162–206) validates only:
+  - `chunkId` presence
+  - `file` presence
+  - `risk.flows` has `source` and `sink`
+  - type entries have `.type` for a shallow, array-only traversal  
+  It does **not** validate:
+  - [ ] `segment` object shape
+  - [ ] range/start/end types and ordering invariants
+  - [ ] `lang`, `ext`, `kind`, `name` constraints
+  - [ ] nested types map shapes (params/locals)
+  - [ ] `generatedBy`/`tooling` shape and required fields
+  - [ ] cross-field invariants (e.g., range within segment, embedded context consistency)
+
+##### Required remediation
+- [ ] Establish **one canonical validator** for `metaV2` (preferably schema-based):
+  - [ ] Add an explicit JSON Schema for v2 (in docs or tooling directory).
+  - [ ] Validate `metaV2` against the schema in `validateIndexArtifacts()`.
+- [ ] If schema-based validation is not yet possible, expand `validateMetaV2()` to:
+  - [ ] traverse nested `params`/`locals` maps for type entries
+  - [ ] validate `range` numbers, monotonicity, and non-negativity
+  - [ ] validate the presence/type of stable core fields as defined in `docs/metadata-schema-v2.md`
+- [ ] Add tests (or fixtures) that exercise validation failures for each major failure class.
+
+#### A.6 Docs drift: `docs/metadata-schema-v2.md` vs implementation (P1)
+
+##### Findings
+- [ ] The schema doc should be reviewed line-by-line against current `buildMetaV2()` output:
+  - field names
+  - optionality
+  - nesting of `types.*`
+  - risk shapes and analysisStatus shape
+  - relations link formats
+
+##### Required remediation
+- [ ] Update `docs/metadata-schema-v2.md` to reflect the actual emitted shape **or** update `buildMetaV2()` to match the doc (pick one, do not leave them divergent).
+- [ ] Add a “schema change log” section so future modifications don’t silently drift.
+
+---
+
+### B) Risk rules and risk analysis
+
+#### Dependency guidance (best choices)
+- `re2`/RE2-based engine (already present via `re2js`) — keep for ReDoS safety, but ensure wrapper behavior cannot crash indexing.
+- `ajv` — validate rule bundle format (ids, patterns, severities, categories, etc.) before compiling.
+
+#### B.1 Risk regex compilation is broken (P0)
+
+##### Affected file
+- `src/index/risk-rules.js`
+
+##### Findings
+- [ ] **Incorrect call signature:** `compilePattern()` calls `createSafeRegex(pattern, flags, regexConfig)` but `createSafeRegex()` accepts `(pattern, config)` (per `src/shared/safe-regex.js`).  
+  Consequences:
+  - `regexConfig` is ignored entirely
+  - the intended default flags (`i`) are not applied
+  - any user-configured safe-regex limits are not applied
+- [ ] **No error shielding:** `compilePattern()` does not catch regex compilation errors. An invalid pattern can throw and abort normalization.
+
+##### Required remediation
+- [ ] Fix `compilePattern()` to call `createSafeRegex(pattern, safeRegexConfig)` (or a merged config object).
+- [ ] Wrap compilation in `try/catch` and return `null` on failure (or record a validation error) so rule bundles cannot crash indexing.
+- [ ] Add tests that verify:
+  - [ ] configured flags (e.g., `i`) actually take effect
+  - [ ] invalid patterns do not crash normalization and are surfaced as actionable diagnostics
+  - [ ] configured `maxInputLength` and other safety controls are honored
+
+#### B.2 Risk analysis can crash on long inputs (P0)
+
+##### Affected file
+- `src/index/risk.js`
+
+##### Findings
+- [ ] `matchRuleOnLine()` calls SafeRegex `test()` and `exec()` without guarding against exceptions thrown by SafeRegex input validation (e.g., when line length exceeds `maxInputLength`).  
+  - This is a hard failure mode: one long line can abort analysis for the entire file (or build, depending on call site error handling).
+
+##### Required remediation
+- [ ] Ensure **risk analysis never throws** due to regex evaluation. Options:
+  - [ ] Add `try/catch` around `rule.requires.test(...)`, `rule.excludes.test(...)`, and `pattern.exec(...)` to treat failures as “no match”.
+  - [ ] Alternatively (or additionally), change the SafeRegex wrapper to return `false/null` instead of throwing for overlong input.
+  - [ ] Add a deterministic “line too long” cap behavior:
+    - skip risk evaluation for that line
+    - optionally record `analysisStatus.exceeded` includes `maxLineLength` (or similar)
+
+#### B.3 `scope` and cap semantics need tightening (P1)
+
+##### Findings
+- [ ] `scope === 'file'` currently evaluates only `lineIdx === 0` (first line). This is likely not the intended meaning of “file scope”.
+- [ ] `maxMatchesPerFile` currently caps **number of matching lines**, not number of matches (variable name implies match-count cap).
+
+##### Required remediation
+- [ ] Define (in docs + code) what `scope: "file"` means:
+  - [ ] “pattern evaluated against entire file text” (recommended), or
+  - [ ] “pattern evaluated once per file via a representative subset”
+- [ ] Implement `maxMatchesPerFile` as an actual match-count cap (or rename it to `maxMatchingLines`).
+- [ ] Add tests for both behaviors.
+
+#### B.4 Performance: redundant scanning and weak short-circuiting (P1)
+
+##### Findings
+- [ ] Risk analysis scans the same text repeatedly (sources, sinks, sanitizers are scanned in separate loops).
+- [ ] When caps are exceeded (bytes/lines), flows are skipped, but line scanning for matches still proceeds across the entire file, which defeats the purpose of caps for large/minified files.
+
+##### Required remediation
+- [ ] Add an early-exit path when `maxBytes`/`maxLines` caps are exceeded:
+  - either skip all analysis and return `analysisStatus: capped`
+  - or scan only a bounded prefix/suffix and clearly mark that results are partial
+- [ ] Consider a single-pass scanner per line that evaluates all rule categories in one traversal.
+- [ ] Add a prefilter stage for candidate files/lines (cheap substring checks) before SafeRegex evaluation.
+
+#### B.5 Actionability and determinism of outputs (P1)
+
+##### Findings
+- [ ] `dedupeMatches()` collapses evidence to one match per rule id (may not be sufficient for remediation).
+- [ ] Time-based caps (`maxMs`) can introduce nondeterminism across machines/runs (what gets included depends on wall clock).
+
+##### Required remediation
+- [ ] Preserve up to N distinct match locations per rule (configurable) rather than only first hit.
+- [ ] Prefer deterministic caps (maxBytes/maxLines/maxNodes/maxEdges) over time caps; if `maxMs` remains, ensure it cannot cause nondeterministic partial outputs without clearly indicating partiality.
+- [ ] Sort emitted matches/flows deterministically (by line/col, rule id) before output.
+
+#### B.6 Docs drift: `docs/risk-rules.md` vs implementation (P1)
+
+##### Findings
+- [ ] `docs/risk-rules.md` should be updated to reflect:
+  - actual rule bundle fields supported (`requires`, `excludes`, `scope`, `maxMatchesPerLine`, `maxMatchesPerFile`, etc.)
+  - actual emitted `risk.analysisStatus` shape (object vs string)
+  - actual matching semantics (line-based vs file-based)
+
+##### Required remediation
+- [ ] Update the doc to match current behavior (or update code to match doc), then add tests that lock it in.
+
+---
+
+### C) Git signals (metadata + blame-derived authorship)
+
+#### Dependency guidance (best choices)
+- `simple-git` (already used) — ensure it’s called in a way that scales: batching where feasible, caching aggressively, and defaulting expensive paths off unless explicitly enabled.
+
+#### C.1 Default blame behavior and cost control (P1)
+
+##### Affected file
+- `src/index/git.js`
+
+##### Findings
+- [ ] `blameEnabled` defaults to **true** (`options.blame !== false`). If a caller forgets to pass `blame:false`, indexing will run `git blame` per file (very expensive).
+- [ ] `git log` + `git log --numstat` are executed per file; caching helps within a run but does not avoid the O(files) subprocess cost.
+
+##### Required remediation
+- [ ] Make blame opt-in by default:
+  - [ ] change default to `options.blame === true`, **or**
+  - [ ] ensure all call sites pass `blame:false` unless explicitly requested via config
+- [ ] Consider adding a global “gitSignalsPolicy” (or reuse existing policy object) that centrally controls:
+  - blame on/off
+  - churn computation on/off
+  - commit log depth
+- [ ] Performance optimization options (choose based on ROI):
+  - [ ] batch `git log` queries when indexing many files (e.g., per repo, not per file)
+  - [ ] compute churn only when needed for ranking/filtering
+  - [ ] support “recent churn only” explicitly in docs (currently it’s “last 10 commits”)
+
+#### C.2 Minor correctness and maintainability issues (P2)
+
+##### Findings
+- [ ] Misleading JSDoc: `parseLineAuthors()` is documented as “Compute churn from git numstat output” (it parses blame authors, not churn). This can mislead future maintenance.
+
+##### Required remediation
+- [ ] Fix the JSDoc to match the function purpose and parameter type.
+
+#### C.3 Tests improvements (P1)
+
+##### Affected tests
+- `tests/git-blame-range.js`
+- `tests/git-meta.js`
+- `tests/churn-filter.js`
+- `tests/git-hooks.js`
+
+##### Findings
+- [ ] No tests assert “blame is off by default” (or the intended default policy).
+- [ ] No tests cover rename-following semantics (`--follow`) or untracked files.
+- [ ] Caching behavior is not validated (e.g., “git blame called once per file even if many chunks”).
+
+##### Required remediation
+- [ ] Add tests that explicitly validate the intended default blame policy.
+- [ ] Add a caching-focused test that ensures repeated `getGitMeta()` calls for the same file do not spawn repeated git commands (can be validated via mocking or by instrumenting wrapper counts).
+- [ ] Decide whether rename-following is required and add tests if so.
+
+---
+
+### D) Type inference (local + cross-file + tooling providers)
+
+#### Dependency guidance (best choices)
+- LSP-based providers (clangd/sourcekit/pyright) — keep optional and guarded; correctness should degrade gracefully.
+- TypeScript compiler API — keep optional and isolated; add caching/incremental compilation for large repos.
+
+#### D.1 Provider lifecycle and resilience (P1)
+
+##### Affected files
+- `src/index/type-inference-crossfile/tooling.js`
+- `src/index/tooling/*.js`
+- `src/integrations/tooling/lsp/client.js`
+- `src/integrations/tooling/providers/lsp.js`
+- `src/integrations/tooling/providers/shared.js`
+
+##### Findings
+- [ ] `createLspClient().request()` can leave pending requests forever if a caller forgets to supply `timeoutMs` (pending map leak). Current provider code *usually* supplies a timeout, but this is not enforced.
+- [ ] Diagnostics timing: providers request symbols immediately after `didOpen` and then `didClose` quickly; some servers publish diagnostics asynchronously and may not emit before close, leading to inconsistent diagnostic capture.
+
+##### Required remediation
+- [ ] Enforce a default request timeout in `createLspClient.request()` if none is provided.
+- [ ] For diagnostics collection, consider:
+  - [ ] waiting a bounded time for initial diagnostics after `didOpen`, or
+  - [ ] explicitly requesting diagnostics if server supports it (varies), or
+  - [ ] documenting that diagnostics are “best effort” and may be incomplete
+
+#### D.2 Unicode/offset correctness: add stronger guarantees (P1)
+
+##### Affected files
+- `src/integrations/tooling/lsp/positions.js`
+- `src/shared/lines.js` (supporting)
+- `tests/type-inference-lsp-enrichment.js`
+- `tests/segment-pipeline.js` + fixtures
+
+##### Findings
+- [ ] `positions.js` JSDoc claims “1-based line/column”; column is actually treated as 0-based (correct for LSP), but the doc comment is misleading.
+- [ ] Test coverage does not explicitly include surrogate pairs (emoji), which are the common failure mode when mixing code-point vs UTF-16 offsets.
+
+##### Required remediation
+- [ ] Fix the JSDoc to reflect actual behavior (LSP: 0-based character offsets; line converted to 1-based for internal helpers).
+- [ ] Add tests with:
+  - [ ] emoji in identifiers and/or strings before symbol definitions
+  - [ ] CRLF line endings fixtures (if Windows compatibility is required)
+
+#### D.3 Generic LSP provider chunk matching is weaker than clangd provider (P2)
+
+##### Affected file
+- `src/integrations/tooling/providers/lsp.js`
+
+##### Findings
+- [ ] `findChunkForOffsets()` requires strict containment (symbol range must be within chunk range). clangd-provider uses overlap scoring, which is more robust.
+
+##### Required remediation
+- [ ] Update generic provider to use overlap scoring like clangd-provider to reduce missed matches.
+
+#### D.4 TypeScript provider issues (P2/P1 depending on usage)
+
+##### Affected file
+- `src/index/tooling/typescript-provider.js`
+
+##### Findings
+- [ ] `loadTypeScript()` resolve order includes keys that are not implemented (`global`) and duplicates (`cache` vs `tooling`).
+- [ ] Parameter name extraction uses `getText()` which can produce non-identifiers for destructuring params (bad keys for `params` map).
+- [ ] Naming convention risk: provider writes keys like `Class.method` which may not match chunk naming conventions; if mismatched, types will not attach.
+
+##### Required remediation
+- [ ] Fix the resolution order logic and document each lookup path purpose.
+- [ ] Only record parameter names for identifiers; skip or normalize destructuring params.
+- [ ] Validate chunk naming alignment (structural chunk naming vs provider symbol naming) and add a test for a class method mapping end-to-end.
+
+#### D.5 Cross-file inference merge determinism and evidence (P2)
+
+##### Affected files
+- `src/index/type-inference-crossfile/apply.js`
+- `src/index/type-inference-crossfile/pipeline.js`
+
+##### Findings
+- [ ] `mergeTypeList()` dedupes by `type|source` but drops evidence differences; confidence merging strategy is simplistic.
+- [ ] Output ordering is not explicitly sorted after merges.
+
+##### Required remediation
+- [ ] Decide how to treat evidence in merges (keep first, merge arrays, keep highest confidence).
+- [ ] Sort merged type lists deterministically (confidence desc, type asc, source asc).
+
+#### D.6 Signature parsing robustness (P2)
+
+##### Affected files
+- `src/index/tooling/signature-parse/clike.js`
+- `src/index/tooling/signature-parse/python.js`
+- `src/index/tooling/signature-parse/swift.js`
+
+##### Findings
+- [ ] Parsers are intentionally lightweight, but they will fail on common real-world signatures:
+  - C++ templates, function pointers, references
+  - Python `*args/**kwargs`, keyword-only params, nested generics
+  - Swift closures and attributes
+
+##### Required remediation
+- [ ] Add test fixtures covering at least one “hard” signature per language.
+- [ ] Consider using tooling hover text more consistently (already used as fallback in clangd-provider) or integrate a minimal parser that handles nested generics and defaults.
+
+---
+
+### E) Performance improvements to prioritize (cross-cutting)
+
+#### E.1 Risk analysis hot path (P1)
+- [ ] Single-pass line scan for sources/sinks/sanitizers.
+- [ ] Early return on caps (maxBytes/maxLines) rather than scanning the whole file anyway.
+- [ ] Cheap prefilter before SafeRegex evaluation.
+- [ ] Avoid per-line SafeRegex exceptions (see §B.2).
+
+#### E.2 Markdown segmentation duplication (P2)
+- [ ] `segments.js` parses markdown twice (inline code spans + fenced blocks). Consider extracting both from one micromark event stream.
+
+#### E.3 Tooling providers I/O duplication (P2)
+- [ ] Providers re-read file text from disk; if indexing already has the content in memory, pass it through (where feasible) to reduce I/O.
+
+---
+
+### F) Refactoring goals (maintainability / policy centralization)
+
+- [ ] Consolidate analysis feature toggles into a single `analysisPolicy` object that is passed to:
+  - metadata v2 builder
+  - risk analysis
+  - git analysis
+  - type inference (local + cross-file + tooling)
+- [ ] Centralize schema versioning and validation:
+  - one metadata v2 schema
+  - one risk rule bundle schema
+  - one place that validates both as part of artifact validation
+
+---
+
+### G) Tests: required additions and upgrades
+
+#### Existing tests reviewed (from the provided list)
+- `tests/metadata-v2.js`
+- `tests/churn-filter.js`
+- `tests/git-blame-range.js`
+- `tests/git-hooks.js`
+- `tests/git-meta.js`
+- `tests/minhash-parity.js`
+- `tests/segment-pipeline.js` (+ fixtures)
+- `tests/type-inference-crossfile*.js`
+- `tests/type-inference-lsp-enrichment.js`
+- `tests/type-inference-*-provider-no-*.js` (clangd/sourcekit)
+
+#### Required test upgrades (P1/P0 where noted)
+- [ ] **P0:** Add tests for metadata v2 nested inferred/tooling param types (see §A.1).
+- [ ] **P0:** Add tests for risk rule compilation config correctness (flags honored, invalid patterns handled) (see §B.1).
+- [ ] **P0:** Add risk analysis “long line” test to ensure no crashes (see §B.2).
+- [ ] **P1:** Add unicode offset tests that include surrogate pairs (emoji) for:
+  - LSP position mapping
+  - chunk start offsets around unicode
+- [ ] **P1:** Add git caching/policy tests (default blame policy + no repeated subprocess calls where caching is intended).
+
+---
+
+**Deliverables**
+- This remediation checklist (this document)
+- Updated `docs/metadata-schema-v2.md` and `docs/risk-rules.md` that match implementation
+- Expanded test suite that locks in:
+  - metaV2 types correctness (including nested)
+  - risk rule compilation correctness and non-crashing evaluation
+  - unicode offset correctness (including surrogate pairs)
+  - intended git blame policy and caching
+
+**Exit criteria**
+- All P0 items are fixed and covered by tests.
+- Metadata v2 output matches the schema doc, and `validateIndexArtifacts()` validates it meaningfully.
+- Risk analysis and tooling passes are “best-effort”: they may skip/partial, but they never crash indexing.
+
+
+## Phase 31 — Language handlers & chunking review (Section 5)
+
+**Objective:** Make language detection, per-language chunking, tree-sitter integration, and ingestion tooling *deterministic, robust on real-world code*, and *well-tested* — with clear fallback behavior, predictable chunk boundaries, and guardrails against performance/pathological inputs.
+
+**Scope reference:** Review Section 5 file list + checklist (see the attached “review section 5 files and checklist” markdown).
+
+### Note
+While generating the markdown deliverable, I noticed one small wording issue in the YAML section of the produced document: it currently describes the tab bug using code spans that don’t clearly distinguish '\t' vs '\\t' (because Markdown code spans visually collapse some intent). The underlying identified bug is correct and the remediation tasks are correct, but that one wording line could be clarified to explicitly contrast '\\t' (backslash+t) vs '\t' (actual tab).
+
+---
+
+### 31.0 Priority findings summary (what must be fixed first)
+
+#### P0 — Breaks correctness, tests, or core workflows
+- [ ] **Fix YAML tab handling + Windows path normalization bugs** in `src/index/chunking/formats/yaml.js` (tabs currently checked as the literal string `"\t"`; Windows paths normalized with the wrong regex).  
+  - Affects: skipping list items / indentation detection; GitHub Actions workflow detection on Windows-style paths.
+- [ ] **Fix C-like docstring/attribute extraction off-by-one** in `src/lang/clike.js` (doc comment extraction currently skips the line immediately above declarations).  
+  - Affects: docstring/attributes in C/C++/ObjC chunks (and downstream docmeta / fidelity).
+- [ ] **Fix broken test syntax** in `tests/language-registry/collectors.test.js` (invalid escaped quotes).  
+  - Affects: test suite execution.
+- [ ] **Fix ingestion tools writing output before ensuring directory exists** in:
+  - `tools/ctags-ingest.js`
+  - `tools/gtags-ingest.js`
+  - `tools/lsif-ingest.js`
+  - `tools/scip-ingest.js`  
+  Creating the write stream before `ensureOutputDir()` can fail when the output directory does not exist.
+- [ ] **Fix SQL statement splitting for standard SQL escaping (`''` / `""`)** in `src/lang/sql.js`.  
+  Current quote toggling assumes backslash-escaping and will mis-split statements containing doubled quotes.
+
+#### P1 — Tree-sitter quality/perf gaps that will surface at scale
+- [ ] **Fix `findNameNode` traversal depth bug** in `src/lang/tree-sitter/chunking.js` (depth increments per node instead of per level; the search stops after ~4 iterations).  
+  - Affects: chunk naming quality and method/class qualification.
+- [ ] **Make tree-sitter worker path functional and deterministic** (`src/lang/workers/tree-sitter-worker.js` + `src/lang/tree-sitter/chunking.js`).  
+  - Worker currently does not preload/init grammars; `buildTreeSitterChunksAsync()` treats a `null` worker result as “success” and does not fall back.
+
+#### P2 — Cleanup, clarity, and long-term maintainability
+- [ ] **Remove or use unused imports** (e.g., `parseTypeScriptSignature` in `src/lang/typescript/chunks-babel.js`).
+- [ ] **Add missing/edge-case tests** (Windows paths, tabs, unicode identifiers, SQL quoting, tree-sitter worker behavior, etc.).
+- [ ] **Document chunk metadata semantics** (particularly `meta.endLine` inclusivity and byte vs. code-unit offsets) in `docs/contracts/chunking.md` (and/or a new contract doc).
+
+---
+
+### 31.1 Chunking pipeline: mapping, fallback, limits, determinism
+
+#### 31.1.1 Fallback behavior and deterministic output
+- [ ] **Audit & document** the full fallback chain in `src/index/chunking/dispatch.js`:
+  - code chunker → code-format chunker → prose chunker → root chunk (prose extensions) → fixed-size blob fallback.
+- [ ] **Add regression tests** that verify:
+  - A failed code chunker returns `null` and the dispatcher properly falls back.
+  - “Prose mode” behavior for `.md/.rst/.adoc/.txt/.mdx` is stable (chunk headings when possible; otherwise single chunk).
+  - “Code mode” for prose files intentionally uses blob fallback (or adjust if that’s not desired).
+
+#### 31.1.2 Limits: correctness + performance under large inputs
+- [ ] **Add tests for multi-byte UTF-8 boundaries** in `applyChunkingLimits()` (`src/index/chunking/limits.js`):
+  - Ensure splits never create invalid surrogate pairs.
+  - Ensure byte limits are enforced correctly with emoji / non-ASCII identifiers.
+- [ ] **Performance review:** `resolveByteBoundary()` currently calls `Buffer.byteLength(text.slice(0, mid))` repeatedly.
+  - [ ] Consider a faster strategy (e.g., pre-encoding once to a `Buffer`, or maintaining cumulative byte counts per line) to avoid repeated substring allocations.
+- [ ] **Clarify contract semantics** for:
+  - Whether `chunk.end` is exclusive (it is treated as exclusive almost everywhere).
+  - Whether `meta.endLine` is “line containing end offset” vs “last included line”.  
+    (Many language chunkers use `offsetToLine(end)` vs `offsetToLine(end - 1)`; this should be intentional and documented.)
+  - Update `docs/contracts/chunking.md` accordingly and add examples.
+
+---
+
+### 31.2 Format chunkers: YAML, JSON, XML, INI/TOML, Markdown, RST/Asciidoc
+
+#### 31.2.1 YAML (`src/index/chunking/formats/yaml.js`)
+**Bugs**
+- [ ] **Fix tab detection** in `chunkYamlTopLevel()` and list-item skipping:
+  - Current code checks `line.startsWith("\t")` (literal backslash + t) instead of `line.startsWith("\t")` as a tab character.
+  - Locations:
+    - line ~60: `line.startsWith('\t')` in list-item skip condition
+    - line ~92: `line.startsWith('\t')` in indentation calculation
+- [ ] **Fix Windows path normalization** in `chunkYaml()`:
+  - Current: `normalizedPath = relPath.replace(/\\\\/g, '/')`  
+    This matches *double* backslashes; typical Windows paths contain single backslashes.
+  - Should be: `relPath.replace(/\\/g, '/')` (single backslash regex)
+
+**Hardening / improvements**
+- [ ] **Add YAML tests** covering:
+  - Tab-indented YAML (even if discouraged, tools may produce it).
+  - Workflow path detection for both `".github/workflows/foo.yml"` and `".github\\workflows\\foo.yml"`.
+  - A workflow file with `jobs:` where indentation is not 2 spaces (ensure graceful behavior).
+- [ ] **Document YAML chunker limitations** (top-level-only + heuristics for GH Actions) in the chunking contract or a dedicated “format chunkers” doc section.
+
+#### 31.2.2 JSON (`src/index/chunking/formats/json.js`)
+- [ ] **Test hygiene:** Fix test calls that pass arguments in the wrong positions (e.g., `chunkJson(jsonText, {})` in `tests/chunking/json.test.js` currently passes `{}` as `relPath`).  
+  Update to `chunkJson(jsonText, null, {})` for clarity and future-proofing.
+- [ ] **Optional robustness improvement:** consider using `jsonc-parser` for tolerant parsing (trailing commas/comments) *if desired*.
+  - If adopted, ensure invalid JSON still cleanly falls back (i.e., return `null`).
+
+#### 31.2.3 XML (`src/index/chunking/formats/xml.js`)
+- [ ] Add tests for:
+  - Nested tags with attributes + self-closing tags.
+  - CDATA blocks and processing instructions.
+  - Malformed tag recovery (should return `null`, triggering fallback, rather than producing broken chunks).
+
+#### 31.2.4 Markdown (`src/index/chunking/formats/markdown.js`)
+- [ ] Add tests for:
+  - Headings inside fenced blocks (should not create chunks; current `inFence` logic covers ``` and ~~~).
+  - Setext headings vs horizontal rules (ensure `---` under a paragraph is treated correctly).
+
+#### 31.2.5 RST/Asciidoc (`src/index/chunking/formats/rst-asciidoc.js`)
+- [ ] Add tests for:
+  - RST overline+underline headings and nested sectioning.
+  - Asciidoc `==` headings inside code/list blocks to avoid false positives.
+
+#### 31.2.6 INI/TOML (`src/index/chunking/formats/ini-toml.js`)
+- [ ] Add tests for:
+  - TOML array-of-tables (`[[table]]`).
+  - INI sections with unusual whitespace and comments.
+
+---
+
+### 31.3 Language registry: selection, options, and collector mapping
+
+#### 31.3.1 Registry correctness (`src/index/language-registry/registry.js`)
+- [ ] **Confirm and document intentional grouping** of C/C++/ObjC into `id: 'clike'`:
+  - Ensure docs and tests consistently reflect that `.c/.h/.cpp/.hpp/.m/.mm` map to the same language id.
+  - Update language-fidelity expectations and/or docs if users expect separate ids.
+
+- [ ] Expand `tests/language-registry/selection.test.js` to cover:
+  - C/C++/ObjC extensions: `.c`, `.h`, `.cpp`, `.hpp`, `.m`, `.mm`
+  - Ambiguous extensions and “special names”:
+    - `Dockerfile`, `dockerfile`, `*.Dockerfile`
+    - `Makefile`, `makefile`
+    - `CMakeLists.txt`
+    - `.gitignore`-style config names (if supported elsewhere)
+
+#### 31.3.2 Import collectors map (`tests/language-registry/collectors.test.js`)
+- [ ] **Fix syntax error** at the Dart fixture entry:
+  - Replace `text: "import 'package:foo/bar.dart';",` with a valid JS string literal:
+    - `text: "import 'package:foo/bar.dart';",`
+
+- [ ] Add edge-case import collector tests for:
+  - Multiline imports (where applicable).
+  - Imports inside comments (should be ignored where the collector claims to ignore comments).
+  - Duplicate imports / whitespace variants (ensure normalization works).
+
+---
+
+### 31.4 Tree-sitter backbone: wasm init, language loading, chunk extraction, workers
+
+#### 31.4.1 Name extraction (`src/lang/tree-sitter/chunking.js`)
+- [ ] **Fix `findNameNode()` depth logic**:
+  - Current implementation increments `depth` per dequeued node, not per BFS level.
+  - Result: the search stops after ~4 processed nodes and often fails to find a name.
+  - Expected: traverse up to N levels or up to a node-count budget (explicitly), and return the first plausible identifier.
+
+- [ ] Add tests that assert:
+  - Function and class chunk names are extracted correctly across multiple language grammars.
+  - Member/method names are found for nested AST shapes where the `name` field is not a direct child.
+
+#### 31.4.2 Worker-mode tree-sitter chunking (`src/lang/workers/tree-sitter-worker.js`, `src/lang/tree-sitter/chunking.js`)
+- [ ] **Initialize and preload grammars inside the worker** (or add a per-worker lazy-init path):
+  - Today, the worker calls `buildTreeSitterChunks()` without ensuring tree-sitter wasm + language grammar are loaded in that worker thread.
+  - Proposed fix:
+    - In the worker, resolve language id from `ext`/`languageId`, then `await preloadTreeSitterLanguages([resolvedId], treeSitterOptions)` before parsing.
+- [ ] **Make `buildTreeSitterChunksAsync()` treat `null` results as a failure signal** and fall back to in-thread parsing (or to non-tree-sitter chunking), at least when worker-mode is enabled.
+- [ ] Add tests that explicitly enable worker-mode and assert that:
+  - Chunks are returned (not `null`) for a known fixture.
+  - The result matches non-worker behavior (same chunk boundaries, or documented acceptable differences).
+  - If a grammar is missing/unavailable, it falls back cleanly and deterministically.
+
+#### 31.4.3 Configuration normalization (`src/lang/tree-sitter/options.js`)
+- [ ] Improve boolean normalization:
+  - Current `normalizeEnabled()` only recognizes `false` and the literal string `'off'`.
+  - Expand to treat `'false'`, `'0'`, `'no'` (case-insensitive) as disabled, and `'true'`, `'1'`, `'yes'`, `'on'` as enabled.
+- [ ] Add tests for config parsing from environment/JSON where booleans may be strings.
+
+#### 31.4.4 Offsets: bytes vs JS string indices
+- [ ] Add an explicit contract note and tests around offset units used by:
+  - tree-sitter (`node.startIndex/endIndex`)
+  - parse5 and other JS parsers
+  - Python AST (line/col from Python runtime)  
+  Ensure all chunk `start/end` offsets are consistent with JS string slicing expectations, particularly with non-BMP unicode characters.
+
+---
+
+### 31.5 Language handlers: correctness fixes & hardening
+
+#### 31.5.1 C-like (`src/lang/clike.js`)
+- [ ] **Fix docstring extraction index** for functions and ObjC methods:
+  - Current:
+    - ObjC method chunk meta: `extractDocComment(lines, i - 1, ...)` and `collectAttributes(lines, i - 1, ...)`
+    - C-like functions: `extractDocComment(lines, i - 1)`
+  - This skips the immediate preceding line.  
+  - Fix: pass `i` (0-based declaration start line) instead of `i - 1`.
+  - Locations:
+    - ~417–418, ~463 in `src/lang/clike.js`
+
+- [ ] Add tests for C-like doc comment capture:
+  - A `/** ... */` or `// ...` directly above a `struct`, `class`, `enum`, and `function`.
+  - ObjC method with `///` doc comment above it.
+
+#### 31.5.2 SQL (`src/lang/sql.js`)
+- [ ] **Fix quote handling** in both `stripSqlComments()` and `splitSqlStatements()`:
+  - SQL escaping commonly uses doubled quotes:
+    - `'It''s fine'`
+    - `"a ""quoted"" identifier"`
+  - Current logic toggles on every `'`/`"` not preceded by backslash, which breaks on doubled quotes.
+
+- [ ] Add tests that include:
+  - Semicolons inside strings with doubled quotes.
+  - PostgreSQL dollar-quoted strings combined with single-quoted strings.
+  - MySQL delimiter blocks that contain semicolons.
+
+#### 31.5.3 CSS (`src/lang/css.js`)
+- [ ] Add guardrails to prevent pathological chunk explosion when using the CSS tree-sitter parser:
+  - Options:
+    - Enforce a max node/chunk count (consistent with tree-sitter default maxChunkNodes behavior).
+    - Or switch to `buildTreeSitterChunks()` and its existing limits.
+- [ ] Add tests for:
+  - Nested `@media` with many rules (ensure performance and deterministic chunk output).
+  - Files exceeding the max node threshold (ensure fallback to heuristic).
+
+#### 31.5.4 TypeScript (`src/lang/typescript/chunks-babel.js`)
+- [ ] Remove or use unused import `parseTypeScriptSignature` (currently imported but not referenced).
+- [ ] Add/extend tests ensuring:
+  - Babel-based TS chunker produces signatures and types consistently where expected.
+  - Worker/non-worker tree-sitter paths do not regress TS chunking (when enabled).
+
+---
+
+### 31.6 Imports, relations, and control-flow metrics
+
+#### 31.6.1 Import collectors
+- [ ] Add test coverage for:
+  - Normalization rules (`normalizeImportToken()` behavior).
+  - Edge cases per language (e.g., JS `import type`, TS `import("x")`, Python relative imports).
+- [ ] Validate that collectors return stable, sorted output (dedupe + order determinism), or document if order is intentionally non-deterministic.
+
+#### 31.6.2 Relations builders (`src/index/language-registry/simple-relations.js`, per-language `relations.js`)
+- [ ] Add a small integration test that:
+  - Runs `collectLanguageImports()` and `buildLanguageRelations()` for a multi-language fixture set.
+  - Verifies the resulting `imports`, `exports`, `calls`, and `usages` sets match expectations.
+
+---
+
+### 31.7 Ingestion tools: ctags / gtags / lsif / scip
+
+#### 31.7.1 Output directory creation order
+- [ ] Move `await ensureOutputDir()` to occur *before* `fs.createWriteStream(outputPath, ...)` in:
+  - `tools/ctags-ingest.js` (write stream is created before the dir is ensured)
+  - `tools/gtags-ingest.js`
+  - `tools/lsif-ingest.js`
+  - `tools/scip-ingest.js`
+
+#### 31.7.2 Robustness improvements
+- [ ] Add tests / smoke scripts that verify:
+  - Tools succeed when output directory doesn’t exist.
+  - Tools correctly handle empty input streams.
+  - Tools fail with actionable errors on malformed JSON lines.
+
+- [ ] Add optional flags/docs for:
+  - Strict vs tolerant ingest behavior (skip malformed lines vs fail-fast).
+  - Path normalization expectations (repo-root relative vs absolute).
+
+---
+
+### 31.8 Docs and test suite alignment
+
+#### 31.8.1 Fix broken / missing documentation references
+- [ ] The Section 5 checklist references docs that are *not present* in this repo snapshot (e.g., `docs/contracts/language-registry.md`, `docs/contracts/ast.md`, and `docs/optional/*`).  
+  Decide whether to:
+  - Create these docs, or
+  - Update the checklist to point to existing docs (`docs/language-handler-imports.md`, `docs/language-fidelity.md`, etc.).
+
+#### 31.8.2 Update existing docs for discovered behavior
+- [ ] Update `docs/contracts/chunking.md` to include:
+  - Chunk offset semantics (exclusive `end`, unicode considerations).
+  - `meta.startLine/endLine` semantics and examples.
+  - Expected behavior for overlapping chunks (if allowed) vs non-overlapping (if required).
+- [ ] Update `docs/language-fidelity.md` if docstring expectations for C-like currently fail due to the off-by-one bug.
+
+#### 31.8.3 Add a “known limitations” section (recommended)
+- [ ] Document known heuristic limitations for:
+  - SQL parsing (heuristic statement splitting vs full parser).
+  - YAML parsing (line-based, top-level heuristics).
+  - Language relations (regex-based calls/usages for some languages).
+
+---
+
+### Deliverables
+- [ ] All P0/P1 fixes implemented with unit tests.
+- [ ] Updated docs reflecting chunk semantics and configuration.
+- [ ] A focused regression test pack covering:
+  - YAML tabs + Windows workflow paths
+  - C-like doc comments
+  - SQL doubled-quote handling
+  - Tree-sitter worker-mode functionality
+  - Chunking limits with unicode/multi-byte text
+
+---
+
+### Exit criteria
+- [ ] `npm test` (or the project’s test runner) executes without syntax errors (including `collectors.test.js`).
+- [ ] Format chunkers are robust against malformed inputs and fall back deterministically.
+- [ ] Tree-sitter worker-mode returns real chunks for supported languages and falls back when grammars are missing.
+- [ ] Chunk metadata semantics are documented and consistent across chunkers (or differences are explicitly justified).
+- [ ] Ingestion tools succeed when output directories are missing and produce valid NDJSON outputs.
+
+
+## Phase 32 — (Review) — Retrieval, Services & Benchmarking/Eval (Latency End-to-End)
+
+### Objective
+
+Validate and improve the **retrieval pipeline**, **services surfaces (API + MCP)**, and **benchmark/eval tooling** so that:
+
+* Search semantics are correct and contract-aligned (query parsing, filters, ranking, explain output, context expansion).
+* Backends behave consistently (memory / sqlite / sqlite-fts / lmdb) and performance paths are not accidentally disabled.
+* Services are robust (streaming behavior, cancellation, backpressure, security posture).
+* Benchmarks and eval harnesses are actionable, reproducible, and can enforce latency/quality budgets.
+
+### Scope
+
+Reviewed the complete Section 8 list from the attached markdown checklist document fileciteturn0file0, including:
+
+* Retrieval CLI + pipeline + filters + output formatting
+* SQLite/LMDB helpers and cache layers
+* Core integrations used by tools/services
+* API server (router + SSE) and MCP transport/tools
+* Benchmark harnesses (micro + language) and query tooling
+* Eval harness
+* Related docs + tests + fixtures
+
+(Where files referenced other modules not in the Section 8 list, I noted mismatches and dependency risks, but the primary focus remains the Section 8 scope.)
+
+---
+
+### Exit Criteria (What “Done” Looks Like)
+
+#### Correctness & Contracts
+
+* [ ] Query parsing supports required constructs (operators/quoting/negation/precedence) or docs/contracts explicitly define the simplified grammar.
+* [ ] Filters are correctly detected as “active” and do not disable backend fast-paths accidentally.
+* [ ] Explain output matches actual scoring math and is emitted only when requested (or contracts updated to reflect always-present fields).
+
+#### Performance & Latency
+
+* [ ] SQLite FTS fast-path is not disabled by default (especially for large indexes).
+* [ ] Context expansion avoids repeated O(N) scans per query (or is cached/optimized).
+* [ ] Benchmarks can write baselines reliably and optionally enforce budgets.
+
+#### Services Robustness
+
+* [ ] API streaming handles backpressure and connection close without hanging.
+* [ ] API/MCP support cancellation/timeout propagation to stop expensive work.
+* [ ] CORS/security posture is explicitly intentional and documented.
+
+#### Tests & Tooling
+
+* [ ] Tests cover discovered regressions and add missing edge cases (FTS eligibility, extracted-prose query caching, MCP id=0, etc.).
+* [ ] Bench/eval docs match actual behavior and command usage.
+
+---
+
+## Findings & Required Work
+
+### 8.A — Retrieval Semantics, Explain, Context Expansion
+
+#### A1 — **Critical: Filter “active” detection is wrong (breaks performance paths)**
+
+**Files:**
+
+* `src/retrieval/filters.js`
+* `src/retrieval/cli.js`
+* `src/retrieval/pipeline.js`
+* `src/retrieval/sqlite-helpers.js` (indirect impact via CLI choices)
+
+**What I found:**
+`hasActiveFilters()` treats *any non-empty object* as “active,” which causes `filtersActive` to be true even when no user filters are set, because the CLI always includes internal objects like `filePrefilter`.
+
+**Impact:**
+
+* Forces filter pass on every query.
+* Can disable SQLite FTS eligibility for large indexes because allowed-id pushdown cannot be used when the “allowed set” becomes huge.
+* Prevents “lazy chunk loading” decisions that should apply when there are no real filters.
+* Creates major, silent performance regressions at scale.
+
+**Action items:**
+
+* [ ] Fix `hasActiveFilters()` to ignore internal/config-only keys (e.g., `filePrefilter`) and only count user-constraining filters.
+* [ ] Add unit tests for `hasActiveFilters()` default filter object and typical combinations.
+* [ ] Add an integration test ensuring sqlite-fts remains eligible on a large index when no filters are set (or at least verify the path selection in stats/debug output).
+
+---
+
+#### A2 — **Context expansion does repeated O(N) indexing work per query**
+
+**Files:**
+
+* `src/retrieval/context-expansion.js`
+* `src/retrieval/cli.js` (enables context expansion)
+* `src/retrieval/pipeline.js`
+
+**What I found:**
+`buildContextIndex()` rebuilds `byName` and `byFile` maps every query.
+
+**Impact:**
+
+* For large repos, this adds noticeable latency per query.
+* Violates checklist intent: “avoids repeated file reads / expensive rebuilds.”
+
+**Action items:**
+
+* [ ] Cache context index per loaded index signature (store on the loaded index object or in `index-cache.js`).
+* [ ] Add tests to ensure expansions are stable and do not cross branch/filters (if applicable).
+* [ ] Document the intended semantic boundaries of context expansion (same file vs cross-file, name matching rules, etc.).
+
+---
+
+#### A3 — Explain output / scoring contract alignment is ambiguous
+
+**Files:**
+
+* `src/retrieval/pipeline.js`
+* `src/retrieval/output/explain.js`
+* `src/retrieval/cli/render-output.js`
+* Docs: `docs/contracts/retrieval-ranking.md` (very high-level)
+
+**What I found:**
+The pipeline always builds `scoreBreakdown` objects, even if explain is not requested; compact JSON hides it, but full JSON may expose it unintentionally.
+
+**Action items:**
+
+* [ ] Decide contract behavior:
+
+  * Option 1: Only compute/attach `scoreBreakdown` when explain requested.
+  * Option 2: Always include but document it (and remove `--explain` implication of optionality).
+* [ ] Add snapshot tests asserting the presence/absence of explain fields by mode/output format.
+* [ ] Ensure explain’s boost attribution matches scoring math (phrase + symbol boosts currently depend on the already-boosted score; document or adjust).
+
+---
+
+### 8.B — Query Parsing & Filtering
+
+#### B1 — Query parsing does not satisfy checklist requirements
+
+**Files:**
+
+* `src/retrieval/query.js`
+* `src/retrieval/query-parse.js`
+* Tests/docs indirectly
+
+**What I found:**
+Parsing supports:
+
+* quoted phrases (`"..."`)
+* negation via `-token` and `-"phrase"`
+
+It does **not** support:
+
+* boolean operators (AND/OR/NOT) semantics
+* precedence / parentheses
+* actionable errors for malformed queries (unbalanced quotes become literal tokens)
+
+**Action items:**
+
+* [ ] Either implement full operator parsing & precedence or explicitly constrain and document the query grammar.
+* [ ] Add detection + actionable error messages for unbalanced quotes and invalid constructs.
+* [ ] Add tests for negated phrases, nested quotes, malformed input, and operator tokens.
+
+---
+
+#### B2 — Filtering: performance and correctness concerns
+
+**Files:**
+
+* `src/retrieval/output/filters.js`
+* `src/retrieval/filter-index.js`
+
+**Key improvements:**
+
+* [ ] Ensure case-sensitive file filters don’t lose correctness through normalization shortcuts (currently used for prefiltering; confirm final checks are strict).
+* [ ] Consider memory growth of filter index structures; document expected footprint and add soft limits/metrics.
+
+---
+
+### 8.C — Ranking Determinism & Tie-Breaking
+
+#### C1 — Dense ranking should defensively validate embedding dimensionality
+
+**Files:**
+
+* `src/retrieval/rankers.js`
+* `src/retrieval/embedding.js`
+* `src/retrieval/sqlite-helpers.js`
+
+**What I found:**
+`rankDenseVectors()` assumes query embedding length matches index vector dimension. If not, dot-products can become NaN and ranking becomes unstable.
+
+**Action items:**
+
+* [ ] Validate query embedding length vs index dims; if mismatch, either truncate safely or skip dense scoring with a clear warning.
+* [ ] Add tests for dims mismatch (stub embeddings + configured dims is a good harness).
+
+---
+
+#### C2 — SQLite dense vector scale fallback looks unsafe
+
+**Files:**
+
+* `src/retrieval/sqlite-helpers.js`
+* Related: `src/storage/sqlite/vector.js` (quantization uses 2/255)
+
+**What I found:**
+If `dense_meta.scale` is missing for any reason, sqlite helper defaults scale to **1.0**, which would break score normalization badly for uint8 quantized vectors.
+
+**Action items:**
+
+* [ ] Change fallback scale default to `2/255` (and minVal to `-1` consistent with vector quantization).
+* [ ] Add a regression test ensuring dense scoring remains bounded even when meta is missing/corrupt (or fail loudly).
+
+---
+
+### 8.D — Services: API Server & MCP
+
+#### D1 — SSE backpressure “drain wait” can hang indefinitely on closed connections
+
+**Files:**
+
+* `tools/api/sse.js`
+
+**What I found:**
+If `res.write()` returns false, the code awaits `'drain'` only. If the client disconnects before drain fires, that promise may never resolve.
+
+**Action items:**
+
+* [ ] Replace `await once('drain')` with `Promise.race([drain, close, error])`.
+* [ ] Add tests simulating backpressure + early disconnect (larger payload / forced write buffering).
+
+---
+
+#### D2 — Streaming contracts/docs do not match actual /search/stream behavior
+
+**Files:**
+
+* `tools/api/router.js`
+* Docs: `docs/api-server.md`, `docs/contracts/api-mcp.md`
+
+**What I found:**
+`/search/stream` only emits:
+
+* `start`
+* `result` OR `error`
+* `done`
+
+Docs/contracts claim progress streaming and/or richer semantics.
+
+**Action items:**
+
+* [ ] Decide: implement progress events (pipeline milestones) OR revise docs/contracts to match current behavior.
+* [ ] If implementing progress: add hooks from retrieval CLI/pipeline → core API → router SSE.
+
+---
+
+#### D3 — Cancellation/timeout propagation is missing end-to-end
+
+**Files:**
+
+* `tools/api/router.js`
+* `tools/mcp/transport.js`
+* `tools/mcp/tools.js`
+* `src/integrations/core/index.js`
+* `src/retrieval/cli.js` (currently no signal handling)
+
+**What I found:**
+Timeouts exist in MCP wrapper, but they do not abort underlying work. API does not abort search on client disconnect. Retrieval does not consume `AbortSignal`.
+
+**Action items:**
+
+* [ ] Introduce `AbortController` per request/tool call.
+* [ ] Wire close events (`req.on('close')`) and timeout timers to `abort()`.
+* [ ] Teach retrieval pipeline / embedding fetch to check `signal.aborted` and throw a consistent cancellation error.
+* [ ] Add tests:
+
+  * API stream abort stops work early (not just stops writing).
+  * MCP tool timeout aborts the underlying work, not just returns an error.
+
+---
+
+#### D4 — Security posture: permissive CORS is risky
+
+**Files:**
+
+* `tools/api/router.js`
+* Docs: `docs/api-server.md`
+
+**What I found:**
+CORS is `*` by default. Even though server defaults to localhost, permissive CORS enables untrusted sites to read responses from a local service in a browser context.
+
+**Action items:**
+
+* [ ] Default CORS to disabled or restricted (require explicit `--cors` enablement).
+* [ ] Document threat model: local-only, trusted environment, or add token-based auth.
+* [ ] Add tests for CORS behavior (preflight, allowed origins).
+
+---
+
+### 8.E — Benchmarks & Latency Budgets
+
+#### E1 — Microbench “dense” vs “hybrid” distinction is not actually implemented
+
+**Files:**
+
+* `tools/bench/micro/run.js`
+* `tools/bench/micro/search.js`
+* `tools/bench/micro/tinybench.js`
+* Docs: `docs/benchmarks.md`
+
+**What I found:**
+Bench tasks labeled “dense” and “hybrid” do not reliably enforce different scoring regimes. Some of the logic implies profiles/env-driven behavior that isn’t applied.
+
+**Action items:**
+
+* [ ] Implement explicit scoring strategy selection (via args/env/profile) for sparse vs dense vs hybrid.
+* [ ] Confirm the benchmark measures what it claims (esp. hybrid weighting).
+* [ ] Add “sanity asserts” in benchmark output to record which strategy actually ran.
+
+---
+
+#### E2 — Baseline writing can fail because directories don’t exist
+
+**Files:**
+
+* `tools/bench/micro/tinybench.js`
+* Docs: `docs/benchmarks.md`
+
+**What I found:**
+`--write-baseline` writes to `benchmarks/baselines/...` but does not create the directory first.
+
+**Action items:**
+
+* [ ] Ensure baseline directory exists via `fs.mkdirSync(..., { recursive:true })`.
+* [ ] Add a test for `--write-baseline` success on a clean repo checkout.
+* [ ] Update docs to clarify how baselines are created and stored.
+
+---
+
+#### E3 — SQLite cache reuse is missing in benchmark harnesses
+
+**Files:**
+
+* `tools/bench/micro/run.js`
+* `tools/bench/micro/tinybench.js`
+
+**What I found:**
+Bench harnesses often pass `sqliteCache = null`, which may force repeated DB opens and distort warm-run measurements.
+
+**Action items:**
+
+* [ ] Instantiate and reuse `createSqliteDbCache()` across runs for warm scenarios.
+* [ ] Record cache reuse status in benchmark output for transparency.
+
+---
+
+#### E4 — Latency “budgets” are described but not enforceable
+
+**Files:**
+
+* `docs/benchmarks.md`
+* Tests: existing bench tests do not enforce budgets
+
+**Action items:**
+
+* [ ] Define target budgets (p50/p95) for representative queries and backends.
+* [ ] Add CI-friendly “perf smoke” tests that fail if budgets regress beyond thresholds (with generous margins and stable fixtures).
+* [ ] Document environment assumptions for benchmarks (CPU, disk, warmup, etc.).
+
+---
+
+### 8.F — Eval Harness
+
+#### F1 — Matching logic is permissive and may inflate scores
+
+**Files:**
+
+* `tools/eval/run.js`
+* Docs: `docs/eval.md`
+
+**What I found:**
+Expected match uses `hit.name.includes(expected.name)`; that may treat `foo` as matching `foobar`.
+
+**Action items:**
+
+* [ ] Decide strictness: exact name match vs substring vs regex.
+* [ ] Add dataset option `matchMode` or per-expected matcher configuration.
+* [ ] Add tests for false-positive matching cases.
+
+---
+
+## Additional Concrete Bugs Found (Non-Checklist)
+
+### G1 — Retrieval output summary “word count” logic uses character length
+
+**Files:**
+
+* `src/retrieval/output/format.js`
+
+**What I found:**
+The summary logic compares `.length` of the string (characters) to a “maxWords” variable and uses it to adjust `maxWords`. This is unit-inconsistent and likely incorrect behavior.
+
+**Action items:**
+
+* [ ] Fix to track word count, not character length.
+* [ ] Avoid calling `getBodySummary()` twice.
+* [ ] Add tests for summary length behavior.
+
+---
+
+### G2 — Parity test references missing benchmark query file path
+
+**Files:**
+
+* `tests/parity.js`
+* Existing file: `tests/parity-queries.txt`
+
+**What I found:**
+`tests/parity.js` reads from `benchmarks/queries/parity-queries.txt`, but the queries file exists under `tests/parity-queries.txt`.
+
+**Action items:**
+
+* [ ] Update parity test to load from `tests/parity-queries.txt` (or move file to benchmarks).
+* [ ] Add a guard assertion that query file exists with a clear message.
+
+---
+
+### G3 — Language benchmark progress renderer imports wrong relative paths
+
+**Files:**
+
+* `tools/bench/language/progress/render.js`
+
+**What I found:**
+Imports reference `../../../src/shared/...` but need one more `../` to reach repo root. As written, this resolves to `tools/src/shared/...` which doesn’t exist.
+
+**Action items:**
+
+* [ ] Fix import paths to `../../../../src/shared/...`.
+* [ ] Add a smoke test that loads the module (ensures no runtime import failures).
+
+---
+
+### G4 — MCP transport drops valid JSON-RPC ids when id = 0
+
+**Files:**
+
+* `tools/mcp/transport.js`
+
+**What I found:**
+`if (!id) return;` treats `0` as falsy and drops responses/notifications. JSON-RPC allows `id: 0`.
+
+**Action items:**
+
+* [ ] Change checks to `(id === null || id === undefined)`.
+* [ ] Add MCP tests sending `id: 0`.
+
+---
+
+### G5 — Bench query generator emits invalid CLI fragments (and lacks quoting)
+
+**Files:**
+
+* `tools/bench-query-generator.js`
+
+**What I found:**
+At least one strategy emits `--signature` without a value. Additionally, values with spaces (authors, types) are not quoted, which will break shell parsing.
+
+**Action items:**
+
+* [ ] Fix signature strategy to emit `--signature "<value>"`.
+* [ ] Quote/escape all flag values safely.
+* [ ] Clarify intended consumer (CLI vs internal harness) and ensure output format matches it.
+
+---
+
+## Test Coverage Additions (Highly Recommended)
+
+### New/Expanded Tests
+
+* [ ] `hasActiveFilters()` default object returns false; internal config-only objects don’t activate filters.
+* [ ] sqlite-fts eligibility remains enabled for unfiltered queries on large (>900 chunks) indexes.
+* [ ] Query cache includes extracted-prose payloads and validates required fields when mode enabled.
+* [ ] SSE backpressure + client disconnect doesn’t hang.
+* [ ] API abort cancels search work (requires AbortSignal support).
+* [ ] MCP id=0 support.
+* [ ] `--write-baseline` creates directories and succeeds.
+
+---
+
+## Documentation Corrections Required
+
+* [ ] `docs/api-server.md`: align stream behavior (progress vs start/result/done), update security/CORS discussion.
+* [ ] `docs/contracts/api-mcp.md`: align `/search/stream` contract to actual behavior or update implementation.
+* [ ] `docs/benchmarks.md`: document baseline creation and ensure code supports it (mkdir); clarify dense/hybrid distinctions.
+* [ ] `docs/mcp-server.md`: appears outdated vs actual transport implementation; update to match current code.
+
+## Phase 33 — Review Section 7 — Storage backends (SQLite + LMDB)
+
+**Objective:** Perform an audit of the storage backends (SQLite + LMDB) and their supporting tooling (build, validation, compaction, incremental updates, ANN extension management, and backend selection). Identify *all* correctness bugs, edge cases, documentation drift, missing tests, and performance/refactoring opportunities, aligned to the provided checklist.
+
+#### Out-of-scope (not deeply reviewed, but referenced when necessary)
+
+- Non-listed call-sites (e.g. retrieval query code) were spot-checked only when needed to validate schema/index/query alignment.
+
+---
+
+### Executive summary
+
+#### Top P0 / correctness items
+
+- [ ] **(P0) SQLite ANN table is not updated when it already exists** in:
+  - `src/storage/sqlite/build/from-bundles.js` (vector table existence sets `vectorAnnReady = true` but **does not** prepare `insertVectorAnn`) — see around L120.
+  - `src/storage/sqlite/build/incremental-update.js` (same pattern) — see around L240.
+
+  **Impact:** when the ANN virtual table already exists (most importantly during incremental updates), deleted rows *can* be removed (because deletes run via `deleteDocIds(...)`), but replacement vectors for changed chunks are **not reinserted**, leaving the ANN table sparse/out-of-sync with `dense_vectors`. This can silently degrade or break ANN-based retrieval depending on how the extension is queried.
+
+- [ ] **(P0) Retrieval-side fail-closed is incomplete for SQLite schema versions.**
+
+  `src/retrieval/cli-sqlite.js` validates required table *names* but does **not** enforce `PRAGMA user_version == SCHEMA_VERSION` (or otherwise fail-closed on schema mismatch). This violates the checklist requirement (“readers fail closed on unknown versions”) for the SQLite reader path.
+
+- [ ] **(P0) Bundle-build path does not hard-fail on embedding dimension mismatches** (`src/storage/sqlite/build/from-bundles.js`).
+
+  The code currently *warns once* on a dims mismatch but continues (and may still insert inconsistent vectors). This risks producing an index with an internally inconsistent dense-vector corpus (which can cause downstream errors or silent relevance regressions).
+
+#### High-signal P1 / robustness items
+
+- [ ] **WAL / sidecar handling is inconsistent across build vs incremental update paths.**  
+  Full rebuild paths use `replaceSqliteDatabase(...)` which removes sidecars, but incremental updates modify the DB in-place under WAL mode and do not explicitly checkpoint/truncate. If later tooling removes sidecars without a checkpoint, this can create “single-file DB” assumptions that do not hold.
+
+- [ ] **Indexing for hot maintenance queries can be improved**: `chunks(mode, file)` exists, but multiple maintenance queries order by `id` and would benefit from `(mode, file, id)`.
+
+- [ ] **Docs drift:** `docs/sqlite-incremental-updates.md` (and a few related docs) describe doc-id behavior and operational details that do not match current implementation (doc-id reuse/free-list behavior; ratio guard details; and operational caveats).
+
+#### “Good news” / items that look solid already
+
+- Most bulk write paths are transactional (build ingest, compaction copy, incremental applyChanges).
+- The extension download hardening in `tools/download-extensions.js` has multiple safety layers (hash verification support, archive path traversal protection, size/entry limits).
+- LMDB corruption handling has targeted tests (`tests/lmdb-corruption.js`) and tooling integration (`tests/lmdb-report-artifacts.js`).
+
+---
+
+## Checklist coverage and required follow-ups
+
+### A) Schema & migrations
+
+**Audit**
+
+- SQLite schema is versioned via `PRAGMA user_version` with `SCHEMA_VERSION = 7` (`src/storage/sqlite/schema.js`).
+- Incremental update explicitly checks schema version and required tables before mutating (`src/storage/sqlite/build/incremental-update.js`).
+- Table-level constraints are generally well-defined (primary keys per (mode, …), plus supporting indexes for vocab/postings).
+
+**Gaps / issues**
+
+- [ ] **Fail-closed at read time:** Add a `user_version` gate to the SQLite reader path (at minimum in `src/retrieval/cli-sqlite.js` / sqlite backend creation).
+  - Desired behavior:  
+    - If backend is *forced* to SQLite: throw a clear error (“SQLite schema mismatch: expected X, found Y”).
+    - If backend is not forced (auto): treat SQLite as unavailable and fall back to the file-backed backend, with a warning.
+- [ ] **Index alignment with hot predicates:** Consider adding `CREATE INDEX idx_chunks_file_id ON chunks(mode, file, id)` to support:
+  - `SELECT id FROM chunks WHERE mode=? AND file=? ORDER BY id`
+  - `SELECT file, id FROM chunks WHERE mode=? ORDER BY file, id` (incremental update id reuse scan)
+- [ ] **Document upgrade path explicitly:** The system is effectively “rebuild on schema bump”. Ensure docs and user-facing error messaging make that explicit (and fail closed rather than attempting to limp on).
+- [ ] **Consider column-level schema validation for critical tables** (optional but recommended): required-table-name checks do not catch incompatible column changes if a user provides an arbitrary SQLite file containing tables with the right names.
+
+---
+
+### B) SQLite build pipeline
+
+**Audit**
+
+- Build-from-artifacts path uses bulk inserts and creates secondary indexes after ingest (`src/storage/sqlite/build/from-artifacts.js`).
+- Build-from-bundles supports a fast-path using bundle workers (`src/storage/sqlite/build/from-bundles.js` + `bundle-loader.js`).
+- Validation includes `PRAGMA integrity_check` (full) and cross-table count consistency checks (`src/storage/sqlite/build/validate.js`).
+
+**Gaps / issues**
+
+- [ ] **(P0) Fix ANN insert statement preparation when the ANN table already exists:**
+  - In `src/storage/sqlite/build/from-bundles.js`:
+    - When `hasVectorTable` is true (L120), prepare `insertVectorAnn` immediately (same SQL as the “created table” path near L209).
+  - In `src/storage/sqlite/build/incremental-update.js`:
+    - When `vectorAnnReady` is set based on `hasVectorTable` (L240), prepare `insertVectorAnn` as well.
+  - Add a CI-friendly unit test that does not require a real sqlite-vec binary (see “Tests” section below).
+- [ ] **(P0) Enforce embedding dims consistency in bundle builds.**
+  - Recommendation: pre-scan each bundle (or the whole manifest) to ensure all embeddings are either absent or have a single consistent dimension; then hard-fail the build if mismatched.
+  - Current behavior: warns once around L197 and continues; this should be tightened to match the artifacts build path which throws on mismatch.
+- [ ] **Failure cleanup should include SQLite sidecars** (`.db-wal`, `.db-shm`) in:
+  - `src/storage/sqlite/build/from-artifacts.js`
+  - `src/storage/sqlite/build/from-bundles.js`
+
+  Today they remove only `outPath` on failure. If WAL/SHM exist, they can be left behind as confusing debris and can interfere with subsequent runs.
+- [ ] **Consider ensuring the produced DB is “single-file”** after build by checkpointing/truncating WAL (or switching journal mode back), rather than relying on implicit behavior.
+- [ ] **Prepared statement churn:** `deleteDocIds(...)` dynamically prepares multiple statements per chunk; consider statement caching keyed by chunk size to reduce overhead during large deletes.
+
+---
+
+### C) LMDB backend
+
+**Audit**
+
+- LMDB has a clear key-space separation (`meta:*`, `artifact:*`) and an explicit schema version (`src/storage/lmdb/schema.js`).
+- LMDB build tool stores artifacts plus metadata into LMDB (`tools/build-lmdb-index.js`).
+- Corruption handling is at least partially validated via tests (`tests/lmdb-corruption.js`, `tests/lmdb-report-artifacts.js`).
+
+**Gaps / issues**
+
+- [ ] Ensure the LMDB *reader* path (not in this checklist set) fails closed on schema mismatch the same way SQLite incremental update does (explicit schema version check; clear error messaging).
+- [ ] Consider adding a lightweight “LMDB quick check” command in tooling (or enhancing `tools/index-validate.js`) that validates the presence of all required keys (schema version, chunk meta, vocab, postings, etc.) and reports missing keys explicitly.
+- [ ] Document LMDB key invariants and expected artifact presence (which artifacts are mandatory vs optional).
+
+---
+
+### D) Incremental updates
+
+**Audit**
+
+- Incremental update gating exists (requires incremental manifest, rejects schema mismatch, rejects high change ratios) (`src/storage/sqlite/build/incremental-update.js`).
+- It preserves doc-id stability per-file by reusing IDs for changed files and reusing free IDs from deletions.
+- Deletes are applied across all relevant tables using `deleteDocIds(...)` with consistent table lists.
+
+**Gaps / issues**
+
+- [ ] **(P0) ANN table insertion bug** (same as in section B) must be fixed for incremental updates.
+- [ ] **WAL lifecycle:** after an in-place incremental update, run:
+  - `PRAGMA wal_checkpoint(TRUNCATE);`
+  - optionally `PRAGMA journal_mode = DELETE;` (if the project prefers single-file DBs)
+
+  This ensures the on-disk DB is not “dependent on sidecars” after the update and reduces the likelihood of later tooling accidentally discarding uncheckpointed state.
+- [ ] **Manifest match logic:** `isManifestMatch(...)` falls back to mtime/size when one side has a hash and the other does not.
+  - Consider tightening: if an incremental manifest provides a hash but the DB manifest row does not, treat as “changed” and update the DB row hash (this gradually converges the DB to the stronger invariant).
+- [ ] **Performance of doc-id reuse scan:** the “scan all chunks ordered by file,id” approach is correct but can be expensive; if it becomes a bottleneck, consider either:
+  - adding `(mode,file,id)` index, and/or
+  - materializing file→docId list in a side table (only if necessary).
+
+---
+
+### E) Performance
+
+**Audit**
+
+- Build pragmas in `src/storage/sqlite/build/pragmas.js` are set to favor build throughput (WAL + relaxed synchronous) and are restored (partially).
+- Compaction tool is designed to reduce doc-id sparsity and reclaim file size (`tools/compact-sqlite-index.js`).
+
+**Gaps / issues**
+
+- [ ] **Avoid repeated `COUNT(*)` scans** for backend auto-selection where possible (`src/storage/backend-policy.js`).
+  - Options: use `file_manifest` sum, maintain a meta counter, or store chunk count in `index_state.json`.
+- [ ] **Improve maintenance query performance** via `(mode,file,id)` index as noted above.
+- [ ] **Reduce query-time statement re-preparation** in `src/retrieval/sqlite-helpers.js` (`chunkArray(...)` creates fresh SQL each time); consider caching by chunk size.
+- [ ] **Add at least one p95 query latency regression test** using a stable fixture DB (details below).
+
+---
+
+### F) Refactoring goals
+
+**Audit**
+
+- The codebase already separates schema SQL, prepared statements, and build/validate logic into dedicated modules.
+
+**Gaps / issues**
+
+- [ ] **De-duplicate shared helpers:**
+  - `updateIndexStateManifest(...)` exists in both `tools/build-lmdb-index.js` and `tools/build-sqlite-index/index-state.js`.
+  - `chunkArray(...)` exists in both build and retrieval code (or adjacent helpers).
+- [ ] **Centralize ANN table setup logic** so that “table exists” vs “table created” paths always prepare the insert statement (avoid the current drift between `prepareVectorAnnTable(...)` and the bundle/incremental paths).
+- [ ] **Clarify naming:** `toVectorId(...)` is currently a “coerce to BigInt” helper; consider renaming to reflect that it does not encode/transform the id.
+
+---
+
+## Tests and benchmarks — required additions
+
+### Must-add tests (CI-friendly)
+
+- [ ] **Unit test: ANN insertion when the ANN table already exists** (no real extension binary required).
+  - Approach:
+    - Create a temporary SQLite DB with all required tables plus a *plain* `dense_vectors_ann` table (not virtual) matching the schema used by insert/delete (`rowid` + `embedding` BLOB column).
+    - Pass a mocked `vectorConfig` into `incrementalUpdateDatabase(...)` with:
+      - `loadVectorExtension: () => ({ ok: true })`
+      - `hasVectorTable: () => true`
+      - `encodeVector: () => Buffer.from([0])` (or similar stable stub)
+    - Run an incremental update that modifies at least one file and assert that:
+      - rows are deleted for removed docIds
+      - rows are inserted/replaced for changed docIds
+- [ ] **Unit test: bundle-build dims mismatch hard failure**
+  - Create two bundle files in the incremental bundle dir: one with embedding length N, one with embedding length N+1.
+  - Assert build fails (or returns count 0 with a clear reason) rather than “warn and continue”.
+
+### Additional recommended tests
+
+- [ ] **Reader fail-closed test:** Provide a DB with `user_version != SCHEMA_VERSION` and confirm:
+  - forced SQLite backend errors clearly
+  - auto backend falls back without using SQLite.
+- [ ] **Incremental WAL checkpoint test** (if WAL checkpointing is implemented): verify that after incremental update:
+  - no `*.db-wal` / `*.db-shm` remain (or WAL is truncated to a small size, depending on desired policy).
+
+### Benchmark / regression testing
+
+- [ ] **p95 query latency regression guard (fixture-based)**
+  - Add a small but non-trivial fixture SQLite DB (or build it deterministically during test setup) and run a representative query workload:
+    - candidate generation (ngrams)
+    - FTS ranking (if enabled)
+    - dense vector scoring (if enabled)
+  - Measure per-query durations and assert p95 stays under a budget (or does not regress beyond a tolerance vs a baseline).
+  - Keep it deterministic: single-threaded, warm cache (or explicit warm-up iterations), fixed query set, fixed limits.
+
+---
+
+## File-by-file findings and action items
+
+> This section lists concrete issues and improvement opportunities per reviewed file.  
+> Items are written as actionable checkboxes; severity tags (P0/P1/P2) are included where appropriate.
+
+### `src/storage/backend-policy.js`
+
+- [ ] Clarify threshold semantics for `autoSqliteThresholdChunks` / `autoSqliteThresholdBytes` when set to `0` (current code uses `> 0`, so `0` behaves like “disabled” rather than “always use SQLite”).
+- [ ] Consider avoiding expensive `COUNT(*)` scans for auto-selection; store chunk count in a meta table or `index_state.json` and read that instead (or sum `file_manifest.chunk_count`).
+- [ ] Consider logging/telemetry: when auto-select declines SQLite due to missing/invalid thresholds, surface that decision (currently it is silent except for return fields).
+
+### `src/storage/lmdb/schema.js`
+
+- [ ] Add brief inline documentation describing key-space expectations (which keys must exist for a usable LMDB index).
+- [ ] Consider adding a helper to enumerate expected artifact keys for validation tooling (to avoid drift).
+
+### `src/storage/sqlite/build-helpers.js`
+
+- [ ] Ensure `vectorConfig.extension.table` / `.column` are always sanitized before being interpolated into SQL (call-site currently depends on the caller to sanitize).
+- [ ] Consider making `buildChunkRow(...)` treat empty strings/arrays consistently (e.g., avoid turning `''` into `null` unintentionally for fields where empty-string is meaningful).
+- [ ] Consider reducing confusion: `buildChunkRow(...)` returns fields (`signature`, `doc`) that are not inserted into `chunks` but only into `chunks_fts`.
+
+### `src/storage/sqlite/build/bundle-loader.js`
+
+- [ ] Ensure loader failures return actionable error messages (bundle path, reason). (Current errors are decent; confirm `readBundleFile(...)` includes enough context.)
+- [ ] Consider exposing a small “max in-flight bundles” safeguard if worker threads are enabled (to avoid memory spikes on extremely large bundles).
+
+### `src/storage/sqlite/build/delete.js`
+
+- [ ] Cache delete statements by chunk size to reduce repeated `db.prepare(...)` overhead when deleting many docIds.
+- [ ] Consider supporting a temp table approach (`CREATE TEMP TABLE ids(...)`) if deletion performance becomes a bottleneck for large deletes.
+- [ ] Verify that the `vectorDeleteTargets` contract remains consistent across callers (column name `rowid` vs explicit id columns).
+
+### `src/storage/sqlite/build/from-artifacts.js`
+
+- [ ] Tighten shard discovery: `listShardFiles(...)` includes `.jsonl` but ingestion reads shards via `readJson(...)`; either:
+  - restrict token-postings shards to `.json`, or
+  - add JSONL support for token-postings shards (if they can be JSONL in practice).
+- [ ] Consider inserting `dense_meta` inside the same transaction as the first dense-vector batch (atomicity / consistency).
+- [ ] For `chunkMeta` ingestion (non-piece path), avoid building a single giant `rows` array in memory if the artifact can be large; use chunked batching as done in `ingestChunkMetaPieces(...)`.
+- [ ] Failure cleanup: remove sidecars (`outPath-wal`, `outPath-shm`) as well as `outPath` on failure.
+
+### `src/storage/sqlite/build/from-bundles.js`
+
+- [ ] **(P0) Prepare `insertVectorAnn` even when the ANN table already exists** (see around L120).  
+  The “table exists” branch sets `vectorAnnReady = true` but does not prepare the insert statement, so embeddings are not inserted into ANN.
+- [ ] **(P0) Make embedding dims mismatch a hard failure.**  
+  Current warning-only behavior (around L197) can produce inconsistent dense vectors.
+- [ ] Guard against malformed bundles: `count += result.bundle.chunks.length` should handle missing/invalid `chunks` gracefully (use `?.length || 0`).
+- [ ] Remove unused import (`path` is currently imported but not used).
+- [ ] Failure cleanup should remove SQLite sidecars, not just the DB file.
+
+### `src/storage/sqlite/build/incremental-update.js`
+
+- [ ] **(P0) Prepare `insertVectorAnn` when the ANN table already exists** (see around L240).  
+  Without this, incremental updates delete ANN rows but do not reinsert replacement vectors.
+- [ ] Add explicit WAL checkpointing/truncation at the end of a successful update (to keep the DB self-contained and avoid large WAL growth).
+- [ ] Consider tightening `isManifestMatch(...)` semantics when hashes are available on only one side (to converge DB manifest quality).
+- [ ] Performance: consider `(mode,file,id)` index or other optimization for `getDocIdsForFile(...)` scanning and per-file id lists.
+- [ ] Remove (or convert to assertion) the redundant “dims mismatch warn” path inside applyChanges; dims mismatch should already be rejected earlier.
+
+### `src/storage/sqlite/build/manifest.js`
+
+- [ ] De-duplicate `conflicts` output (currently can include repeated normalized paths).
+- [ ] Consider strict hash preference: if `entry.hash` is present but `dbEntry.hash` is null, treat as mismatch and update DB hash (do not silently match on mtime/size).
+
+### `src/storage/sqlite/build/pragmas.js`
+
+- [ ] Consider restoring `journal_mode` (or explicitly checkpointing) after build to ensure “single-file DB” invariants if the project expects that.
+- [ ] Consider surfacing pragma failures (currently swallowed silently).
+
+### `src/storage/sqlite/build/statements.js`
+
+- [ ] Consider adding `idx_chunks_file_id` (see schema/index alignment notes).
+- [ ] Reduce confusion: `buildChunkRowWithMeta(...)` populates fields not present in the schema (e.g., `churn_added`, `churn_deleted`, `churn_commits`). Either:
+  - add these columns to the schema if they are intended, or
+  - stop emitting them to avoid “looks supported but isn’t”.
+
+### `src/storage/sqlite/build/validate.js`
+
+- [ ] Consider validating ANN invariants when ANN is enabled:
+  - `dense_vectors_ann` row count should match `dense_vectors` row count for the mode (or at least have no orphans).
+- [ ] Consider making full `integrity_check` optional for very large DBs (it can be expensive); provide a quick-check mode and/or configurable validation levels.
+
+### `src/storage/sqlite/build/vocab.js`
+
+- [ ] Consider caching prepared statements by chunk size (similar to delete/vocab fetch) to reduce repeated SQL compilation overhead.
+- [ ] Error messaging: if `missing.length` is huge, cap printed missing values in the thrown error and include only a sample plus counts (to avoid megabyte-scale exception strings).
+
+### `src/storage/sqlite/incremental.js`
+
+- [ ] Document the on-disk incremental manifest contract and failure modes (missing manifest, conflicts, ratio guard).
+- [ ] Consider adding a small helper to validate the incremental manifest shape early, with clearer error output.
+
+### `src/storage/sqlite/schema.js`
+
+- [ ] Consider adding `(mode,file,id)` index for maintenance queries.
+- [ ] Ensure docs (`docs/sqlite-index-schema.md`) stay in sync when schema changes.
+
+### `src/storage/sqlite/utils.js`
+
+- [ ] `normalizeFilePath(...)` returns the input unchanged when it is not a string; consider returning `null` instead to reduce accidental “undefined as key” behavior.
+- [ ] `replaceSqliteDatabase(...)`: consider logging when fallback rename/remove paths are taken (debuggability of replacement failures).
+
+### `src/storage/sqlite/vector.js`
+
+- [ ] `toVectorId(...)` is effectively “coerce to BigInt”; consider renaming to reflect that (e.g., `toSqliteRowidInt64(...)`) to avoid implying a non-trivial mapping.
+- [ ] Consider making quantization parameters (`minVal`, `maxVal`) configurable or derived from embedding model metadata (avoid silent saturation if embeddings are out of range).
+
+---
+
+### Tooling files
+
+#### `tools/build-lmdb-index.js`
+
+- [ ] Consider a `--validate` option that checks required artifacts exist before writing LMDB (fail early, clearer errors).
+- [ ] Consider writing a small LMDB “manifest” key listing which artifacts were written (enables tool-side validation and reduces drift).
+
+#### `tools/build-sqlite-index.js`
+
+- [ ] Consider exit codes and messaging consistency across build modes (full rebuild vs incremental vs skipped).
+
+#### `tools/build-sqlite-index/cli.js`
+
+- [ ] Consider validating incompatible flag combinations early (e.g., `--bundle-workers` without a bundle dir).
+- [ ] Consider adding `--no-compact` / `--compact` clarity in CLI help (if not already covered elsewhere).
+
+#### `tools/build-sqlite-index/index-state.js`
+
+- [ ] De-duplicate `updateIndexStateManifest(...)` with the LMDB equivalent; extract to a shared helper module.
+- [ ] Consider including schema version and build mode (full vs incremental) in `index_state.json` for observability.
+
+#### `tools/build-sqlite-index/run.js`
+
+- [ ] Ensure `stopHeartbeat()` is always invoked via `try/finally` (avoid leaking an interval on error when `exitOnError=false`).
+- [ ] After incremental updates, consider forcing WAL checkpoint/truncate (see incremental update section).
+- [ ] Consider making the “incremental fallback to rebuild” reason more explicit in output (currently logged, but could include key stats: changedFiles, deletedFiles, ratio).
+
+#### `tools/build-sqlite-index/temp-path.js`
+
+- [ ] Consider a “same filesystem guarantee” note: temp DB path must be on same filesystem for atomic rename (current implementation uses same directory, which is good; document this).
+
+#### `tools/clean-artifacts.js`
+
+- [ ] Consider adding a `--dry-run` option that prints what would be deleted without deleting it (safety for new users).
+
+#### `tools/compact-sqlite-index.js`
+
+- [ ] If vector extension is enabled but cannot be loaded, consider warning that compaction may drop ANN acceleration (and suggest remediation, e.g. rerun embeddings rebuild once extension is available).
+- [ ] Consider recording pre/post compaction stats into `index_state.json` (bytes, row counts) for observability.
+
+#### `tools/download-extensions.js`
+
+- [ ] Consider streaming zip extraction rather than buffering each entry into memory (`adm-zip` forces buffer extraction; if large binaries become common, consider a streaming zip library).
+- [ ] Consider setting file permissions for extracted binaries explicitly per-platform conventions (e.g., preserve exec bit if needed, although shared libraries typically do not require it).
+
+#### `tools/index-validate.js`
+
+- [ ] Consider including actionable remediation hints per failure mode (e.g., “run build-index”, “run build-sqlite-index”, “run download-extensions”).
+
+#### `tools/report-artifacts.js`
+
+- [ ] Consider clarifying the units in output when printing both formatted size and raw bytes (currently raw bytes are printed in parentheses without a label).
+
+#### `tools/vector-extension.js`
+
+- [ ] Consider keying `loadCache` by (db, config) rather than only db (avoids surprising behavior if config changes during a long-lived process).
+- [ ] Consider restoring prior `trusted_schema` value after `ensureVectorTable(...)` (minimize global DB setting changes).
+
+#### `tools/verify-extensions.js`
+
+- [ ] Consider adding a quick “smoke query” that verifies the ANN table can be created and queried (optional).
+
+---
+
+### Test files
+
+#### `tests/backend-policy.js`
+
+- [ ] Add coverage for threshold edge cases (e.g., `autoSqliteThresholdChunks=0` semantics).
+- [ ] Add a test case where SQLite exists but artifact metadata cannot be read (ensure fallback behavior is correct and reason is surfaced).
+
+#### `tests/compact-pieces.js`
+
+- [ ] No issues noted (acts as a compaction functional check for artifact pieces).
+
+#### `tests/lmdb-backend.js`
+
+- [ ] Consider adding schema version mismatch coverage (fail closed when schema version differs).
+
+#### `tests/lmdb-corruption.js`
+
+- [ ] Consider asserting on error message content to ensure corruption reporting remains actionable.
+
+#### `tests/lmdb-report-artifacts.js`
+
+- [ ] Consider adding a test for “missing required key” vs “corruption” differentiation (if validation tooling can distinguish).
+
+#### `tests/retrieval-backend-policy.js`
+
+- [ ] Add coverage for schema version mismatch fallback (once reader-side user_version check exists).
+
+#### `tests/smoke-sqlite.js`
+
+- [ ] Add coverage for `user_version` mismatch behavior once implemented.
+
+#### `tests/sqlite-ann-extension.js`
+
+- [ ] Add a CI-friendly companion test that does not require the real extension binary (mock vectorConfig approach described above) to ensure ANN insert/delete invariants are enforced in CI.
+
+#### `tests/sqlite-ann-fallback.js`
+
+- [ ] Consider adding explicit coverage that fallback ANN search never returns out-of-range docIds (robustness guard).
+
+#### `tests/sqlite-auto-backend.js`
+
+- [ ] Add a test that covers the “SQLite present but too small” path + verifies reason reporting is stable.
+
+#### `tests/sqlite-build-delete.js`
+
+- [ ] Add coverage for deleting from an ANN table using `rowid` column and BigInt inputs (ensures `toVectorId(...)` conversion remains correct).
+
+#### `tests/sqlite-build-indexes.js`
+
+- [ ] Add coverage for any new maintenance index (e.g., `(mode,file,id)`), if introduced.
+
+#### `tests/sqlite-build-manifest.js`
+
+- [ ] Add a test for “manifest has hash but DB does not” semantics (once tightened).
+
+#### `tests/sqlite-build-vocab.js`
+
+- [ ] Add stress coverage for token sets larger than SQLite’s `IN` limit (ensuring chunking logic remains correct).
+
+#### `tests/sqlite-bundle-missing.js`
+
+- [ ] Add bundle-shape validation coverage (missing `chunks` field should not crash build loop).
+
+#### `tests/sqlite-cache.js`
+
+- [ ] No issues noted (validates cache path behavior / read path).
+
+#### `tests/sqlite-chunk-id.js`
+
+- [ ] No issues noted (docId/chunkId behavior).
+
+#### `tests/sqlite-compact.js`
+
+- [ ] Consider adding coverage for compaction with ANN enabled but extension mocked (ensures dense_vectors_ann remains consistent after compaction).
+
+#### `tests/sqlite-incremental-no-change.js`
+
+- [ ] Consider verifying `index_state.json` is unchanged (or only updated timestamp changes), depending on desired policy.
+
+#### `tests/sqlite-incremental.js`
+
+- [ ] Add coverage for doc-id reuse behavior (free-list) to prevent accidental regression to “always append”.
+
+#### `tests/sqlite-index-state-fail-closed.js`
+
+- [ ] Consider adding coverage that “pending” flips back to false on successful build (already implied but could be explicit).
+
+#### `tests/sqlite-missing-dep.js`
+
+- [ ] No issues noted (validates better-sqlite3 missing behavior).
+
+#### `tests/sqlite-sidecar-cleanup.js`
+
+- [ ] Add incremental-update sidecar cleanup coverage if WAL checkpointing/truncation is implemented.
+
+---
+
+### Documentation files
+
+#### `docs/contracts/sqlite.md`
+
+- [ ] Explicitly document the `user_version` contract and the “fail closed / rebuild on mismatch” behavior.
+- [ ] Ensure the list of required tables aligns with the actual reader/build code paths (and clearly separate “core” vs “optional” tables).
+
+#### `docs/external-backends.md`
+
+- [ ] Consider updating to reflect current backend-policy behavior (auto selection thresholds, forced backend semantics).
+
+#### `docs/model-compare-sqlite.json`, `docs/parity-sqlite-ann.json`, `docs/parity-sqlite-fts-ann.json`
+
+- [ ] Ensure these reports are either generated artifacts (and documented as such) or kept in sync with the current schema/tooling versions (otherwise they can mislead).
+
+#### `docs/references/dependency-bundle/deps/better-sqlite3.md`
+
+- [ ] Confirm documented behavior matches current runtime expectations (particularly around extension loading, platform binaries, and supported SQLite features).
+
+#### `docs/sqlite-ann-extension.md`
+
+- [ ] Document the invariant that `dense_vectors_ann` must remain consistent with `dense_vectors` (no orphans; same cardinality per mode when enabled).
+- [ ] Document how incremental updates maintain the ANN table (and note limitations when extension is not available).
+
+#### `docs/sqlite-compaction.md`
+
+- [ ] Clarify how compaction interacts with the ANN extension table (and the remediation path if ANN is temporarily unavailable during compaction).
+
+#### `docs/sqlite-incremental-updates.md`
+
+- [ ] Update doc-id behavior description to match implementation (per-file id reuse + free-list reuse rather than always appending).
+- [ ] Document the ratio guard behavior and fallback to full rebuild more explicitly.
+- [ ] Document WAL/sidecar expectations for incremental updates (single-file vs WAL sidecars).
+
+#### `docs/sqlite-index-schema.md`
+
+- [ ] Reconfirm schema matches `SCHEMA_VERSION = 7` (columns, indexes, optional extension table).
+- [ ] If `(mode,file,id)` index is added, document it as a maintenance/performance index.
+
+---
+
+## Exit criteria for this review section
+
+The following items should be completed to consider “Review Section 7” fully addressed:
+
+- [ ] ANN insert-preparation bug fixed in both bundle-build and incremental-update code paths.
+- [ ] Reader-side schema version fail-closed behavior implemented and tested.
+- [ ] Bundle-build embedding dims mismatch becomes a hard failure (with tests).
+- [ ] WAL/sidecar policy is explicitly decided, implemented consistently, and documented (at minimum for incremental updates).
+- [ ] At least one CI-friendly test covers ANN table sync invariants without requiring a real extension binary.
+- [ ] At least one fixture-based p95 latency regression test is added (or an equivalent deterministic perf guard).
+
+---
+
+---
+
+# Phase 34 — Phase 2/3/4/5/6 verification gates
+
+**Objective:** run and gate the regression tests that confirm Phase 2 contract alignment, Phase 3 chunking invariants, Phase 4 retrieval semantics, Phase 5 durability, and Phase 6 embeddings correctness.
+
+## 34.1 CLI flag removal and error handling
+- [ ] `tests/search-removed-flags.js`
+  - [ ] Failure: Expected actionable error for --human.
+  - [ ] Log: `logs/phase-22/search-removed-flags.log:1`
+- [ ] `tests/search-missing-flag-values.js`
+  - [ ] Failure: Expected missing value message for --type.
+  - [ ] Log: `logs/phase-22/search-missing-flag-values.log:1`
+
+## 34.10 Phase 9 CI gating + flaky test recovery
+- [ ] `tests/script-coverage.js`
+  - [ ] Failure: Error: unsafe tar entry: C:/Users/sneak/Development/PairOfCleats_CODEX/tests/.cache/download-extensions/tar/.tmp/extract-1768204937568/vec0.dll
+  - [ ] Log: `tests/.logs/2026-01-12T08-02-14-028Z/download-extensions-test.attempt-3.log:15`
+
+## 34.11 Phase 10 modularization regression sweep
+- [ ] `tests/search-help.js`
+  - [ ] Failure: Help output missing flag: --calls.
+  - [ ] Log: `logs/phase-22/search-help.log:1`
+
+## 34.12 Phase 11 docs/help parity checks
+- [ ] `tests/search-help.js`
+  - [ ] Failure: Help output missing flag: --calls.
+  - [ ] Log: `logs/phase-22/search-help.log:1`
+- [ ] `tests/search-removed-flags.js`
+  - [ ] Failure: Expected actionable error for --human.
+  - [ ] Log: `logs/phase-22/search-removed-flags.log:1`
+
+## 34.29 file processor skip
+- [ ] `tests/file-processor/skip.test.js`
+  - [ ] Failure: Expected binary buffer to skip with reason=binary.
+  - [ ] Log: `logs/phase-22/file-processor-skip.log:1`
+
+## 34.32 lang js chunking
+- [ ] `tests/lang/js-chunking.test.js`
+  - [ ] Failure: Missing exported function chunk (alpha).
+  - [ ] Log: `logs/phase-22/lang-js-chunking.log:1`
+
+## 34.34 lang js relations
+- [ ] `tests/lang/js-relations.test.js`
+  - [ ] Failure: Missing exports for run/default: [].
+  - [ ] Log: `logs/phase-22/lang-js-relations.log:1`
+
+## 34.38 language registry collectors
+- [ ] `tests/language-registry/collectors.test.js`
+  - [ ] Failure: dockerfile mismatch: ["node:18"] !== ["base","node:18"].
+  - [ ] Log: `logs/phase-22/language-registry-collectors.log:1`
+
+**Exit criteria**
+- [ ] All verification tests pass.
+
+---
+
+# Phase 35 — Isometric Visual Fidelity (Yoink-derived polish)
+
+**Objective:** fold in proven glass/postprocessing practices from the yoink prototype for higher visual quality without regressing performance.
+
+## 35.1 Glass + environment fidelity
+- [ ] Add HDR env map tone calibration controls (env intensity, exposure) to match yoink reference settings.
+- [ ] Support normal map repeat/scale on glass with clearcoat normal influence.
+- [ ] Add optional clearcoat normal map toggle for glass shells.
+
+## 35.2 Post-processing polish
+- [ ] Add optional UnrealBloomPass with user-controllable threshold/strength/radius.
+- [ ] Provide a toggle to enable/disable post-processing for performance.
+
+## 35.3 Rendering calibration
+- [ ] Expose metalness/roughness/transmission/ior/reflectivity/thickness controls as a grouped preset panel.
+- [ ] Add a “studio” preset that mirrors yoink defaults for fast tuning.
+
+---
diff --git a/README.md b/README.md
index 8f998402f..e68d7fb50 100644
--- a/README.md
+++ b/README.md
@@ -1,302 +1,340 @@
 # PairOfCleats
 
-*Give your coding agents a pair of cleats, so they can sprint through your codebase.*
+<p align=center><img src="https://github.com/doublemover/PairOfCleats/blob/main/clete.png" width=10% height=10%></img></p>
 
-## What is PairOfCleats?
-PairOfCleats builds a hybrid semantic index for a repo (code + docs) and exposes a CLI/MCP server for fast, filterable search. It is designed for agent workflows, with artifacts stored outside the repo by default so they can be shared across runs, containers, and CI while keeping working trees clean.
 
-The index captures rich structure and metadata: language-aware chunking across code, configs, and docs; docstrings/signatures/annotations; call/import/usage relations; control-flow and dataflow summaries; type inference (intra-file with optional cross-file); git-aware churn metadata; and embeddings for semantic search. Search combines BM25 token/phrase scoring, MinHash similarity, dense vectors, and optional SQLite backends (including FTS5 and ANN via sqlite-vec) with filters and human/JSON output. The tooling also includes incremental indexing, cache management, dictionary bootstrapping, CI artifact restore/build, optional language tooling detection/installation, and triage workflows for ingesting vulnerability records plus generating context packs.
+**Local-first hybrid code search for humans and coding agents.**  
+Build an offline index of a repo, then retrieve the most relevant *chunks* using **BM25 + fuzzy matching + embeddings + metadata filters**.
 
-## Status
-Active development. Current execution status lives in `COMPLETE_PLAN.md`; `ROADMAP.md` is historical.
+> **(“Paraclete”):** “PairOfCleats” is a phonetic nod to **Paraclete**—a word meaning *helper/advocate*.
+>
+> The idea: give your agent (or you) a helper that can sprint through a large codebase with better traction than plain grep.
+
+---
+
+## What this is
+
+PairOfCleats builds a **hybrid semantic index** for a repository (**code + configs + docs**, and optionally **triage records**) and exposes:
+
+- a CLI (`pairofcleats search`, `pairofcleats index build`)
+- an HTTP API server (`pairofcleats service api`)
+- an MCP server for agent/tool integration (`pairofcleats service mcp`)
+
+It’s optimized for agent workflows:
+- **artifacts are stored outside the repo by default** (cache-backed)
+- indexing is language-aware (AST / tree-sitter / heuristics)
+- search is fast, filterable, and can use SQLite + ANN when repos get big
+
+---
+
+## Why it exists
+
+Large repos make “just read the whole tree” impractical.
+
+- **Grep** is fast but literal.
+- **Pure embeddings** can be fuzzy and harder to constrain.
+- Agents need **structured context** (functions/classes/sections), not giant file dumps.
+
+PairOfCleats combines the strengths:
+
+- **Chunk-aware indexing** → results are immediately usable snippets
+- **Lexical + fuzzy + semantic retrieval** → better recall without losing precision
+- **Rich metadata** → filters like type/signature/reads-writes/calls/churn/risk tags
+- **Scale options** → memory artifacts for small repos; SQLite + ANN for large ones
+
+---
 
 ## Requirements
-- Node.js 18+
-- Optional: Python 3 for AST-based metadata on `.py` files (fallbacks to heuristics)
-- Optional: SQLite backend (via `better-sqlite3`)
-- Optional: SQLite vector extension (`sqlite-vec`) for ANN acceleration
+
+- **Node.js 18+**
+- Optional (recommended for best Python chunk metadata): **Python 3** (`indexing.pythonAst.*`)
+- Optional (recommended for large repos): **SQLite backend** (via `better-sqlite3`)
+- Optional (recommended for fastest semantic search): **sqlite-vec** extension for ANN
+
+---
 
 ## Quick start
-- `npm run setup`
+- `pairofcleats setup`
   - Guided prompts for install, dictionaries, models, extensions, tooling, and indexes.
   - Add `--non-interactive` for CI or automated runs.
+  - Add `--profile lite|balanced|full` to select a profile.
   - Add `--with-sqlite` to build SQLite indexes.
   - Add `--incremental` to reuse per-file cache bundles.
-- `npm run bootstrap` (fast, no prompts)
+- `pairofcleats bootstrap` (fast, no prompts)
   - Add `--with-sqlite` to build SQLite indexes.
   - Add `--incremental` to reuse per-file cache bundles.
-- `npm run watch-index` (polls for file changes and rebuilds incrementally)     
+- `pairofcleats index watch` (FS events by default; add `--watch-poll` to enable polling)
+- `pairofcleats service api` (local HTTP JSON API for status/search)
+- `pairofcleats service indexer` (multi-repo sync + queue; see [docs/service-mode.md](docs/service-mode.md))
 - Cache is outside the repo by default; set `cache.root` in `.pairofcleats.json` to override.
 - CLI commands auto-detect repo roots; use `--repo <path>` to override.
-- Local CLI entrypoint: `node bin/pairofcleats.js <command>` (mirrors `npm run` scripts).
-
-<details>
-<summary><h2>Index features</h2></summary>
-
-- Languages: JavaScript/TypeScript, Python, Swift, Rust, C/C++/ObjC, Go, Java, C#, Kotlin, Ruby, PHP, Lua, SQL (dialects), Perl, Shell
-- Config formats: JSON, TOML, INI/CFG/CONF, XML, YAML, Dockerfile, Makefile, GitHub Actions YAML
-- Docs: Markdown, RST, AsciiDoc
-- Chunking:
-  - Code declarations (functions, classes, methods, types)
-  - Config sections (keys/blocks)
-  - Doc headings/sections
-- Ignore files: `.pairofcleatsignore` (gitignore-style) and `.gitignore`        
-- Large file guardrails: `indexing.maxFileBytes` (default 5 MB; set to `0` to disable)
-- Metadata per chunk:
-  - docstrings, signatures, params, decorators/annotations
-  - modifiers + visibility + inheritance
-  - code relations (calls/imports/exports/usages)
-  - interprocedural call summaries (args + return hints)
-  - dataflow (reads/writes/mutations/aliases) + control-flow summaries
-  - risk signals (sources/sinks/flows + tags, with cross-file call correlation)
-  - type inference (intra-file, optional cross-file)
-  - git metadata (last author/date, churn = added+deleted lines), JS complexity/lint, headline + neighbor context
-- Triage records (findings + decisions) indexed outside the repo
-- Index artifacts:
-  - token postings (always)
-  - phrase/chargram postings (configurable via `indexing.postings.*`)
-  - MinHash signatures
-  - dense vectors (MiniLM)
-  - incremental per-file cache bundles
-</details>
-
-<details>
-<summary><h2>Search features</h2></summary>
-
-- BM25 token/phrase search + n-grams/chargrams
-- MinHash similarity fallback
-- Dense vectors (optional, ANN-aware when enabled)
-- Query syntax: `-term` excludes tokens, `"exact phrase"` boosts phrase matches, `-"phrase"` excludes phrases
-- Modes: `code`, `prose`, `both`, `records`, `all`
-- Backends:
-  - `memory` (file-backed JSON)
-  - `sqlite` (same scoring, shared artifacts)
-  - `sqlite-fts` (SQLite-only FTS5 scoring)
-- Filters (high-signal subset):
-  - `--type`, `--signature`, `--param`, `--decorator`, `--inferred-type`, `--return-type`
-  - `--throws`, `--reads`, `--writes`, `--mutates`, `--awaits`
-  - `--alias`
-  - `--risk`, `--risk-tag`, `--risk-source`, `--risk-sink`, `--risk-category`, `--risk-flow`
-  - `--branches`, `--loops`, `--breaks`, `--continues`
-  - `--async`, `--generator`, `--returns`
-  - `--author`, `--chunk-author`, `--modified-after`, `--modified-since`, `--churn [min]` (git numstat added+deleted), `--lint`, `--calls`, `--import`, `--uses`, `--extends`
-  - `--path`/`--file` (substring or `/regex/`), `--ext` (generic file filters)
-  - `--meta`, `--meta-json` (records metadata filters)
-- Output:
-  - human-readable (color), `--json` (full), or `--json-compact` (lean tooling payload)
-  - full JSON includes `score` (selected), `scoreType`, `sparseScore`, `annScore`, and `scoreBreakdown` (sparse/ann/phrase/selected)
-- Optional query cache (`search.queryCache.*` in `.pairofcleats.json`)
-</details>
-
-<details>
-<summary><h2>Triage records + context packs</h2></summary>
-
-- Ingest findings into cache-backed records:
-  - `node tools/triage/ingest.js --source dependabot --in dependabot.json --meta service=api --meta env=prod`
-  - `node tools/triage/ingest.js --source aws_inspector --in inspector.json --meta service=api --meta env=prod`
-  - `node tools/triage/ingest.js --source generic --in record.json --meta service=api --meta env=prod`
-- Build the records index: `node build_index.js --mode records --incremental`
-- Search records with metadata filters:
-  - `node search.js "CVE-2024-0001" --mode records --meta service=api --meta env=prod --json`
-- Create decision records:
-  - `node tools/triage/decision.js --finding <recordId> --status accept --justification "..."`
-- Generate a context pack:
-  - `node tools/triage/context-pack.js --record <recordId> --out context.json`
-- Docs: [`docs/triage-records.md`](docs/triage-records.md)
-</details>
-
-<details>
-<summary><h2>Dictionaries</h2></summary>
-
-- Default English wordlist: `npm run download-dicts -- --lang en` (setup/ bootstrap runs this)
-- Cache dir: `<cache>/dictionaries` (override with `dictionary.dir` or `PAIROFCLEATS_DICT_DIR`)
-- Update dictionaries with ETag/Last-Modified: `npm run download-dicts -- --update`
-- Add custom lists: `npm run download-dicts -- --url mylist=https://example.com/words.txt`
-- Slang support: drop `.txt` files into the `slang/` folder in the dictionary cache
-- Repo-specific dictionary (opt-in):
-  - `npm run generate-repo-dict -- --min-count 3`
-  - enable via `{ "dictionary": { "enableRepoDictionary": true } }`
-</details>
-
-<details>
-<summary><h2>Model cache</h2></summary>
-
-- Models live under `<cache>/models` by default
-- Download: `npm run download-models`
-- Override in `.pairofcleats.json`:
-  ```json
-  { "models": { "id": "Xenova/all-MiniLM-L12-v2", "dir": "C:/cache/pairofcleats/models" } }
-  ```
-- Env overrides: `PAIROFCLEATS_MODELS_DIR`, `PAIROFCLEATS_MODEL`
-</details>
-
-<details>
-<summary><h2>SQLite backend</h2></summary>
-
-- Build: `npm run build-sqlite-index`
-- Uses split DBs (`index-code.db` + `index-prose.db`) for concurrency
-- `search.js` auto-uses SQLite when `sqlite.use: true` and DBs exist
-- FTS5 scoring (optional): set `sqlite.scoreMode` to `fts`
-- ANN extension (optional): set `sqlite.annMode = "extension"` and install `sqlite-vec`
-  - ANN is on by default when `search.annDefault` is true; use `--no-ann` or set `search.annDefault: false` to disable
-  - Install: `npm run download-extensions`
-  - Verify: `npm run verify-extensions`
-</details>
-
-<details>
-<summary><h2>Installation</h2></summary>
-
-- Guided setup: `npm run setup` (prompts)
-- CI/automation: `npm run setup -- --non-interactive --json` (summary JSON on stdout)
-- Manual steps:
-  - Install dependencies: `npm install`
-  - Optional extras:
-    - Dictionaries: `npm run download-dicts -- --lang en`
-    - Models: `npm run download-models`
-    - SQLite ANN extension: `npm run download-extensions`
-    - Verify extension: `npm run verify-extensions`
-    - Detect tooling: `npm run tooling-detect`
-    - Install tooling: `npm run tooling-install -- --scope cache`
-    - Git hooks: `npm run git-hooks -- --install`
-    - Validate config: `npm run config-validate -- --config .pairofcleats.json`
-  - Build indexes:
-    - File-backed: `node build_index.js` (add `--incremental` if desired)
-    - SQLite: `npm run build-sqlite-index`
-</details>
-
-<details>
-<summary><h2>MCP server</h2></summary>
-
-Run: `npm run mcp-server`
-
-Tools:
-- `index_status`
-- `config_status`
-- `build_index`
-- `search`
-- `triage_ingest`
-- `triage_decision`
-- `triage_context_pack`
-- `download_models`
-- `download_dictionaries`
-- `download_extensions`
-- `verify_extensions`
-- `build_sqlite_index`
-- `compact_sqlite_index`
-- `cache_gc`
-- `clean_artifacts`
-- `bootstrap`
-- `report_artifacts`
-- `search` defaults to compact JSON payloads (set `output: "full"` for full JSON).
-- Progress: long-running tools emit `notifications/progress` with `{ id, tool, message, stream, phase }`.
-- Errors: `tools/call` responses set `isError=true` and return a JSON payload with `message` plus optional `code`, `stdout`, `stderr`, `hint`.
-</details>
-
-<details>
-<summary><h2>Tests</h2></summary>
-
-All-in-one (runs everything it can):
-- `npm run test-all` (pass `-- --skip-bench` to skip the benchmark run)
-
-Core:
-- `npm run verify`
-- `npm run fixture-smoke`
-- `npm run fixture-parity`
-- `npm run fixture-eval`
-
-Fidelity:
-- `npm run language-fidelity-test`
-- `npm run format-fidelity-test`
-- `npm run type-inference-crossfile-test`
-
-SQLite + extensions:
-- `npm run sqlite-incremental-test`
-- `npm run sqlite-compact-test`
-- `npm run sqlite-ann-extension-test`
-- `npm run download-extensions-test`
-
-Tooling + caches:
-- `npm run download-dicts-test`
-- `npm run setup-test`
-- `npm run tooling-detect-test`
-- `npm run tooling-install-test`
-- `npm run query-cache-test`
-- `npm run clean-artifacts-test`
-- `npm run uninstall-test`
-- `npm run cache-gc-test`
-- `npm run git-hooks-test`
-
-Triage:
-- `npm run triage-test`
-
-Reports + MCP:
-- `npm run repometrics-dashboard-test`
-- `npm run compare-models-test`
-- `npm run summary-report-test`
-- `npm run mcp-server-test`
-
-Meta:
-- `npm run script-coverage-test`
-- `npm run docs-consistency-test`
-- `npm run bench` / `npm run bench-ann`
-</details>
-
-<details>
-<summary><h2>Maintenance</h2></summary>
-
-- Report cache sizes: `npm run report-artifacts` (add `-- --all` for all repos)
-- Cache GC (age/size): `npm run cache-gc -- --max-gb 10` or `--max-age-days 30`
-- Clean repo artifacts: `npm run clean-artifacts` (add `-- --all` to clear repo caches; keeps models/dictionaries/extensions)
-- Uninstall caches + models + extensions: `npm run uninstall`
-- Compact SQLite indexes: `npm run compact-sqlite-index`
-- Dependency policy: versions are pinned in `package.json`; update via `npm install` and commit `package-lock.json`.
-- Repometrics dashboard: `npm run repometrics-dashboard`
-- Model comparison: `npm run compare-models`
-- Combined summary report: `npm run summary-report` (add `-- --json` for JSON output)
-- Tooling detect/install: `npm run tooling-detect`, `npm run tooling-install`
-- Git hooks (post-commit/post-merge): `npm run git-hooks -- --install`
-- CI artifacts: `node tools/ci-build-artifacts.js --out ci-artifacts`, `node tools/ci-restore-artifacts.js --from ci-artifacts`
-</details>
-
-<details>
-<summary><h2>Design docs</h2></summary>
-
-- [`COMPLETE_PLAN.md`](COMPLETE_PLAN.md) - single source of truth for all phases
-- [`docs/ast-feature-list.md`](docs/ast-feature-list.md) - metadata schema + per-language coverage
-- [`docs/language-fidelity.md`](docs/language-fidelity.md) - parsing validation checklist
-- [`docs/parser-backbone.md`](docs/parser-backbone.md) - parser and inference strategy
-- [`docs/language-handler-imports.md`](docs/language-handler-imports.md) - registry import tradeoffs
-- [`docs/sqlite-index-schema.md`](docs/sqlite-index-schema.md) - SQLite schema for artifacts
-- [`docs/sqlite-incremental-updates.md`](docs/sqlite-incremental-updates.md) - incremental update flow
-- [`docs/sqlite-compaction.md`](docs/sqlite-compaction.md) - compaction details
-- [`docs/sqlite-ann-extension.md`](docs/sqlite-ann-extension.md) - SQLite ANN extension setup
-- [`docs/model-comparison.md`](docs/model-comparison.md) - model evaluation harness
-- [`docs/query-cache.md`](docs/query-cache.md) - query cache behavior
-- [`docs/repometrics-dashboard.md`](docs/repometrics-dashboard.md) - repometrics output and usage
-- [`docs/setup.md`](docs/setup.md) - unified setup flow and flags
-- [`docs/triage-records.md`](docs/triage-records.md) - triage ingestion + context packs
-- [`docs/config-schema.json`](docs/config-schema.json) - config schema for `.pairofcleats.json`
-</details>
-
-<details>
-<summary><h2>Cache layout</h2></summary>
-
-- `<cache>/repos/<repoId>/index-code`
-- `<cache>/repos/<repoId>/index-prose`
-- `<cache>/repos/<repoId>/index-records`
-- `<cache>/repos/<repoId>/incremental/<mode>`
-- `<cache>/repos/<repoId>/repometrics`
-- `<cache>/repos/<repoId>/triage/records`
-- `<cache>/repos/<repoId>/triage/context-packs`
-- `<cache>/repos/<repoId>/index-sqlite/index-code.db`
-- `<cache>/repos/<repoId>/index-sqlite/index-prose.db`
-- `<cache>/dictionaries`
-- `<cache>/models`
-- `<cache>/extensions`
-- `<cache>/tooling`
-
-Default cache root:
-- Windows: `%LOCALAPPDATA%\\PairOfCleats`
-- Linux/macOS: `$XDG_CACHE_HOME/pairofcleats` or `~/.cache/pairofcleats`
-- Override with `cache.root`, `PAIROFCLEATS_CACHE_ROOT`, or `PAIROFCLEATS_HOME`
-</details>
+- Local CLI entrypoint: `node bin/pairofcleats.js <command>`.
+- `npm run <script>` wrappers remain available for CI/automation.
+- Core library API: [docs/core-api.md](docs/core-api.md)
+
+### Install
+```bash
+npm install
+```
+
+### Guided setup (recommended)
+```bash
+pairofcleats setup
+```
+
+### Bootstrap (no prompts)
+```bash
+pairofcleats bootstrap
+```
+
+### Build index
+```bash
+pairofcleats index build
+# Add --incremental to reuse per-file cache bundles
+# Add --no-sqlite to skip SQLite builds
+```
+
+### Search
+```bash
+pairofcleats search -- "how do we validate JWT tokens?"
+pairofcleats search -- "UserRepository findByEmail" --mode code
+pairofcleats search -- "rate limit exceeded" --mode prose
+```
+
+---
+
+## Query syntax (core)
+
+- `"exact phrase"` boosts phrase matches
+- `-term` excludes a token
+- `-"phrase"` excludes a phrase
+
+Modes:
+- `--mode code` (code-focused)
+- `--mode prose` (docs/readmes/comments)
+- `--mode extracted-prose` (comment-prose only; requires extracted-prose index)
+- `--mode both` (default in many workflows)
+
+Use `--explain` (or `--why`) to see score breakdowns.
+
+---
+
+## Profiles (configuration presets)
+
+PairOfCleats is highly configurable. In current form, there isn’t a single `--profile` flag — a “profile” is just a **small set of `.pairofcleats.json` overrides**.
+
+The table below highlights **real config keys** and the most impactful differences between three practical presets.
+
+> Notes:
+> - “Full” is closest to the **default behavior** (many features default to enabled unless explicitly set to `false`).
+> - “Lite” is for speed/minimal dependencies.
+> - “Balanced” is a strong day-to-day default: hybrid retrieval, but avoids the most expensive analysis.
+
+### Profile differences (actual config keys)
+
+| Capability | Config key(s) | Lite | Balanced | Full | Impact |
+|---|---|---:|---:|---:|---|
+| Build SQLite DBs | `sqlite.use` | `false` | `true` | `true` | Disables SQLite build step during `build-index` when `false`. |
+| Default ANN (semantic) | `search.annDefault` | `false` | `true` | `true` | Controls whether semantic rerank/ANN is enabled by default (CLI can override with `--ann` / `--no-ann`). |
+| Phrase n-grams | `indexing.postings.enablePhraseNgrams` | `false` | `true` | `true` | Improves phrase matching; increases index size/build time somewhat. |
+| Chargrams (fuzzy) | `indexing.postings.enableChargrams` | `false` | `true` | `true` | Helps typos/partials/paths/identifiers; can increase index size noticeably. |
+| AST dataflow summary | `indexing.astDataflow` | `false` | `false` | `true` | Enables reads/writes/mutates/alias-like metadata; increases build cost. |
+| Control-flow summary | `indexing.controlFlow` | `false` | `false` | `true` | Enables branches/loops/returns metadata; increases build cost. |
+| Risk signals | `indexing.riskAnalysis` | `false` | `false` | `true` | Enables risk tagging (sources/sinks/flows). |
+| Cross-file risk correlation | `indexing.riskAnalysisCrossFile` | `false` | `false` | `true` | Heavier; correlates risk through call chains across files. |
+| Type inference | `indexing.typeInference` | `false` | `false` | `true` | Opt-in; adds inferred types to metadata. |
+| Cross-file type inference | `indexing.typeInferenceCrossFile` | `false` | `false` | `true` | Heavier + more experimental than intra-file. |
+| Git blame/churn | `indexing.gitBlame` | `false` | `true` | `true` | Enables blame-derived metadata; adds IO/CPU during index. |
+| Lint metadata | `indexing.lint` | `false` | `false` | `true` | JS-focused extraction; increases build time. |
+| Complexity metadata | `indexing.complexity` | `false` | `false` | `true` | JS-focused extraction; increases build time. |
+| Python AST worker | `indexing.pythonAst.enabled` | `false` | `true` | `true` | Avoids Python dependency in Lite; when enabled, uses Python for richer chunk metadata. |
+
+### Example: Lite preset (`.pairofcleats.json` overrides)
+
+```json
+{
+  "sqlite": { "use": false },
+  "search": { "annDefault": false },
+  "indexing": {
+    "postings": { "enablePhraseNgrams": false, "enableChargrams": false },
+    "astDataflow": false,
+    "controlFlow": false,
+    "riskAnalysis": false,
+    "riskAnalysisCrossFile": false,
+    "typeInference": false,
+    "typeInferenceCrossFile": false,
+    "gitBlame": false,
+    "lint": false,
+    "complexity": false,
+    "pythonAst": { "enabled": false }
+  }
+}
+```
+
+If you also want **no model downloads**, run indexing/search with:
+```bash
+export PAIROFCLEATS_EMBEDDINGS=stub
+```
+(or pass `--stub-embeddings` to `build-index`).
+
+### Example: Balanced preset (`.pairofcleats.json` overrides)
+
+```json
+{
+  "sqlite": { "use": true },
+  "search": { "annDefault": true, "sqliteAutoChunkThreshold": 5000 },
+  "indexing": {
+    "postings": {
+      "enablePhraseNgrams": true,
+      "phraseMinN": 2,
+      "phraseMaxN": 4,
+      "enableChargrams": true,
+      "chargramMinN": 3,
+      "chargramMaxN": 5
+    },
+    "astDataflow": false,
+    "controlFlow": false,
+    "riskAnalysis": false,
+    "riskAnalysisCrossFile": false,
+    "gitBlame": true,
+    "lint": false,
+    "complexity": false
+  }
+}
+```
+
+### Example: Full preset (`.pairofcleats.json` overrides)
+
+```json
+{
+  "sqlite": { "use": true },
+  "search": { "annDefault": true },
+  "indexing": {
+    "postings": {
+      "enablePhraseNgrams": true,
+      "phraseMinN": 2,
+      "phraseMaxN": 4,
+      "enableChargrams": true,
+      "chargramMinN": 3,
+      "chargramMaxN": 5
+    },
+    "astDataflow": true,
+    "controlFlow": true,
+    "riskAnalysis": true,
+    "riskAnalysisCrossFile": true,
+    "typeInference": true,
+    "typeInferenceCrossFile": true,
+    "gitBlame": true,
+    "lint": true,
+    "complexity": true
+  }
+}
+```
+
+---
+
+## Backends (memory vs SQLite)
+
+PairOfCleats can query indexes through different backends:
+
+- **memory**: file-backed JSON artifacts loaded into memory
+- **sqlite**: SQLite tables used as the backend (same general scoring model)
+- **sqlite-fts**: FTS5 scoring mode (fast, but scoring differs)
+
+For large repos, SQLite is usually the best experience.
+
+Build SQLite indexes:
+```bash
+pairofcleats sqlite build
+```
+Search with SQLite:
+```bash
+pairofcleats search -- "query" --backend sqlite
+```
+
+---
+
+## Where artifacts live (cache)
+
+By default, caches and indexes live **outside the repo**:
+
+- cache root: OS-specific (or `PAIROFCLEATS_HOME` / `PAIROFCLEATS_CACHE_ROOT`)
+- per-repo artifacts: `<cache>/repos/<repoId>/builds/<buildId>/index-code`, `index-prose`, etc.
+- current pointer: `<cache>/repos/<repoId>/builds/current.json` (active build root)
+
+Override cache location via `.pairofcleats.json`:
+```json
+{ "cache": { "root": "/absolute/path/to/cache" } }
+```
+
+---
+
+## Mental model diagrams
+
+### Indexing pipeline (build)
+
+```mermaid
+flowchart TB
+  A["Repo files"] --> B["Discovery + ignore <br/>(.gitignore/etc)"]
+  B --> C["Shard planner <br/>(dir + lang, shard IDs)"]
+  C --> D["Shard queue"]
+  D --> W["Worker pool <br/>(tokenize, quantize, imports)"]
+  D --> M["Main thread"]
+  W --> E["File cache <br/>(hash -> reuse <br/>tokens/minhash/imports)"]
+  M --> E
+
+  subgraph S1["Stage 1 <br/>(foreground)"]
+    E --> F["Sparse index <br/>(tokens + postings<br/> + ngrams/chargrams)"]
+    F --> G["Artifacts <br/>(chunk_meta + postings<br/> + bundles)"]
+    F --> H["SQLite build <br/>(WAL + bulk tx)"]
+  end
+
+  subgraph S2["Stage 2 <br/>(background)"]
+    G --> Q["Enrichment queue"]
+    Q --> J["Tree-sitter + risk<br/> + lint + embeddings"]
+    J --> K["Enriched artifacts<br/> + vectors"]
+    K --> H
+  end
+```
+
+### Search pipeline (query)
+
+```mermaid
+flowchart TB
+  Q["Query string"] --> P["Parse terms + phrases"]
+  P --> T["Tokenize query <br/>(mode-aware)"]
+  T --> F["Apply filters <br/>(kind/type/signature/etc)"]
+  F --> C["Candidate prefilter <br/>(n/chargrams)"]
+  C --> S["Sparse rank <br/>(BM25/SQLite FTS)"]
+  C --> D["Dense rank <br/>(embeddings/ANN or MinHash)"]
+  S --> M["Merge + boosts <br/>(symbol/phrase/etc)"]
+  D --> M
+  M --> O["Top-N chunks + context <br/>(human/JSON output)"]
+  O --> R["Result source: <br/>memory index <br/>or <br/>SQLite artifacts"]
+```
+
+---
+
+## Learn more (repo docs)
+
+- Search pipeline: [`docs/search.md`](docs/search.md)
+- Setup & bootstrap: [`docs/setup.md`](docs/setup.md)
+- Config schema: [`docs/config-schema.json`](docs/config-schema.json)
+- SQLite schema: [`docs/sqlite-index-schema.md`](docs/sqlite-index-schema.md)
+- SQLite ANN extension: [`docs/sqlite-ann-extension.md`](docs/sqlite-ann-extension.md)
+- API server: [`docs/api-server.md`](docs/api-server.md)
+- MCP server: [`docs/mcp-server.md`](docs/mcp-server.md)
+- Triage records: [`docs/triage-records.md`](docs/triage-records.md)
+- Structural search: [`docs/structural-search.md`](docs/structural-search.md)
+
+---
+
+## Status
+
+Active development. See `NEW_ROADMAP.md` for current execution status.
+
+---
+
+## License
+
+See the repository license file.
diff --git a/ROADMAP.md b/ROADMAP.md
deleted file mode 100644
index bc73286b0..000000000
--- a/ROADMAP.md
+++ /dev/null
@@ -1,62 +0,0 @@
-# Roadmap
-
-This roadmap is historical; current execution status lives in `COMPLETE_PLAN.md`.
-
-## Recently completed
-- [x] Add .gitignore/.pairofcleatsignore support
-- [x] Rich chunk metadata + JS AST extraction depth
-- [x] Dictionary bootstrap/update tooling + slang support (repo dict opt-in)
-- [x] SQLite backend without VSS (FTS5 + JS ANN re-rank)
-- [x] Bootstrap workflow + lightweight tests
-- [x] SQLite as full index storage (phase 1 parity path)
-- [x] Phase 2: SQLite-driven candidate generation (postings/ngrams in SQL)
-- [x] Phase 3: Parity harness + baseline report
-- [x] Incremental indexing cache (per-file bundles)
-- [x] CI helper scripts for prebuilt index artifacts
-- [x] Fixture smoke + benchmark harness (phase 6 baseline)
-- [x] SQLite-only scoring option (FTS5)
-- [x] Deterministic ranking + BM25 calibration knobs
-- [x] Split SQLite indexes (code/prose DBs)
-- [x] Incremental indexing: SQLite delta updates for changed chunks
-- [x] Metrics/telemetry for index tuning
-- [x] Python AST parsing + richer metadata (docstrings, decorators, imports)
-- [x] Improved scoring calibration + deterministic ranking between backends
-- [x] Broader test harness + fixtures for indexing/search parity
-- [x] Benchmark suite + agent eval harness
-- [x] Optional persistent query cache
-- [x] Parallel indexing improvements + backpressure
-- [x] Swift support (chunking + metadata)
-- [x] ObjC/C/C++ support (chunking + metadata)
-- [x] Rust support (chunking + metadata)
-- [x] MCP server packaging + per-repo index management
-- [x] Pluggable embedding models + per-repo overrides
-- [x] Model comparison harness + scoring path checks
-- [x] SQLite ANN extension support (sqlite-vec)
-- [x] Index quality tuning + benchmark expansion
-- [x] Language support expansion (Go, Java, Perl-lite)
-- [x] Shell (lite) support (chunking + metadata)
-- [x] Language support expansion (TypeScript, C#, Kotlin, Ruby, PHP, Lua, SQL)
-
-## Now
-- [ ] CFG/dataflow + type inference passes
-
-## Language support (priority order)
-- [x] Python (basic chunking)
-- [x] Swift
-- [x] ObjC/C/C++
-- [x] Rust
-- [x] Go
-- [x] Java
-- [x] TypeScript
-- [x] C#
-- [x] Kotlin
-- [x] Ruby
-- [x] PHP
-- [x] Lua
-- [x] SQL
-- [x] Perl (lite)
-- [x] Shell (lite)
-
-## Quality and performance
-
-## Long-term
diff --git a/assets/isomap/moonless_golf_2k.hdr b/assets/isomap/moonless_golf_2k.hdr
new file mode 100644
index 000000000..432de58e6
Binary files /dev/null and b/assets/isomap/moonless_golf_2k.hdr differ
diff --git a/assets/isomap/normal.jpg b/assets/isomap/normal.jpg
new file mode 100644
index 000000000..1270de95e
Binary files /dev/null and b/assets/isomap/normal.jpg differ
diff --git a/benchmarks/queries/clike.txt b/benchmarks/queries/clike.txt
new file mode 100644
index 000000000..cb7f7cc4e
--- /dev/null
+++ b/benchmarks/queries/clike.txt
@@ -0,0 +1,10 @@
+int
+struct
+class
+template
+namespace
+typedef
+extern
+include
+@interface
+@implementation
diff --git a/benchmarks/queries/cmake.txt b/benchmarks/queries/cmake.txt
new file mode 100644
index 000000000..387ec0762
--- /dev/null
+++ b/benchmarks/queries/cmake.txt
@@ -0,0 +1,3 @@
+add_executable
+add_library
+include
diff --git a/benchmarks/queries/csharp.txt b/benchmarks/queries/csharp.txt
new file mode 100644
index 000000000..3640b9982
--- /dev/null
+++ b/benchmarks/queries/csharp.txt
@@ -0,0 +1,10 @@
+class
+interface
+public
+private
+async
+Task
+using
+namespace
+record
+linq
diff --git a/benchmarks/queries/dart.txt b/benchmarks/queries/dart.txt
new file mode 100644
index 000000000..d06202582
--- /dev/null
+++ b/benchmarks/queries/dart.txt
@@ -0,0 +1,3 @@
+class
+Future
+async
diff --git a/benchmarks/queries/dockerfile.txt b/benchmarks/queries/dockerfile.txt
new file mode 100644
index 000000000..0080468f2
--- /dev/null
+++ b/benchmarks/queries/dockerfile.txt
@@ -0,0 +1,4 @@
+FROM
+RUN
+COPY
+CMD
diff --git a/benchmarks/queries/go.txt b/benchmarks/queries/go.txt
new file mode 100644
index 000000000..c234a5657
--- /dev/null
+++ b/benchmarks/queries/go.txt
@@ -0,0 +1,10 @@
+func
+struct
+interface
+go
+defer
+chan
+context
+package
+import
+error
diff --git a/benchmarks/queries/graphql.txt b/benchmarks/queries/graphql.txt
new file mode 100644
index 000000000..8269cffad
--- /dev/null
+++ b/benchmarks/queries/graphql.txt
@@ -0,0 +1,4 @@
+type
+Query
+Widget
+enum
diff --git a/benchmarks/queries/groovy.txt b/benchmarks/queries/groovy.txt
new file mode 100644
index 000000000..8bba451fb
--- /dev/null
+++ b/benchmarks/queries/groovy.txt
@@ -0,0 +1,3 @@
+class
+def
+println
diff --git a/benchmarks/queries/handlebars.txt b/benchmarks/queries/handlebars.txt
new file mode 100644
index 000000000..59ea69e6c
--- /dev/null
+++ b/benchmarks/queries/handlebars.txt
@@ -0,0 +1,3 @@
+{{#
+partial
+each
diff --git a/benchmarks/queries/java.txt b/benchmarks/queries/java.txt
new file mode 100644
index 000000000..5aac873bd
--- /dev/null
+++ b/benchmarks/queries/java.txt
@@ -0,0 +1,10 @@
+class
+interface
+public
+private
+protected
+throws
+extends
+implements
+spring
+kafka
diff --git a/benchmarks/queries/javascript.txt b/benchmarks/queries/javascript.txt
new file mode 100644
index 000000000..3c74a3191
--- /dev/null
+++ b/benchmarks/queries/javascript.txt
@@ -0,0 +1,10 @@
+function
+class
+async
+Promise
+import
+export
+interface
+type
+router
+middleware
diff --git a/benchmarks/queries/jinja.txt b/benchmarks/queries/jinja.txt
new file mode 100644
index 000000000..ab6bbf887
--- /dev/null
+++ b/benchmarks/queries/jinja.txt
@@ -0,0 +1,3 @@
+{% block %}
+{% extends %}
+{% include %}
diff --git a/benchmarks/queries/julia.txt b/benchmarks/queries/julia.txt
new file mode 100644
index 000000000..de97a63ce
--- /dev/null
+++ b/benchmarks/queries/julia.txt
@@ -0,0 +1,3 @@
+module
+function
+using
diff --git a/benchmarks/queries/kotlin.txt b/benchmarks/queries/kotlin.txt
new file mode 100644
index 000000000..46ed92c0c
--- /dev/null
+++ b/benchmarks/queries/kotlin.txt
@@ -0,0 +1,10 @@
+fun
+class
+data
+suspend
+coroutine
+val
+var
+sealed
+object
+when
diff --git a/benchmarks/queries/lua.txt b/benchmarks/queries/lua.txt
new file mode 100644
index 000000000..b78474b54
--- /dev/null
+++ b/benchmarks/queries/lua.txt
@@ -0,0 +1,10 @@
+function
+local
+require
+table
+module
+metatable
+ngx
+kong
+return
+setmetatable
diff --git a/benchmarks/queries/makefile.txt b/benchmarks/queries/makefile.txt
new file mode 100644
index 000000000..e6b8dfdcc
--- /dev/null
+++ b/benchmarks/queries/makefile.txt
@@ -0,0 +1,4 @@
+build
+test
+clean
+install
diff --git a/benchmarks/queries/mustache.txt b/benchmarks/queries/mustache.txt
new file mode 100644
index 000000000..6806d224c
--- /dev/null
+++ b/benchmarks/queries/mustache.txt
@@ -0,0 +1,3 @@
+{{#
+{{/}}
+partial
diff --git a/benchmarks/queries/nix.txt b/benchmarks/queries/nix.txt
new file mode 100644
index 000000000..75f6473ee
--- /dev/null
+++ b/benchmarks/queries/nix.txt
@@ -0,0 +1,3 @@
+import
+callPackage
+mkDerivation
diff --git a/benchmarks/queries/perl.txt b/benchmarks/queries/perl.txt
new file mode 100644
index 000000000..9067f64c0
--- /dev/null
+++ b/benchmarks/queries/perl.txt
@@ -0,0 +1,10 @@
+sub
+use strict
+package
+my $self
+Moose
+DBI
+bless
+Test::More
+regex
+hashref
diff --git a/benchmarks/queries/php.txt b/benchmarks/queries/php.txt
new file mode 100644
index 000000000..816850e8e
--- /dev/null
+++ b/benchmarks/queries/php.txt
@@ -0,0 +1,10 @@
+function
+class
+namespace
+use
+public
+private
+trait
+extends
+composer
+$this
diff --git a/benchmarks/queries/protobuf.txt b/benchmarks/queries/protobuf.txt
new file mode 100644
index 000000000..7b53d46f6
--- /dev/null
+++ b/benchmarks/queries/protobuf.txt
@@ -0,0 +1,4 @@
+message
+service
+enum
+Widget
diff --git a/benchmarks/queries/python.txt b/benchmarks/queries/python.txt
new file mode 100644
index 000000000..c73844a39
--- /dev/null
+++ b/benchmarks/queries/python.txt
@@ -0,0 +1,10 @@
+def
+class
+async
+await
+import
+from
+dataclass
+typing
+requests
+pytest
diff --git a/benchmarks/queries/r.txt b/benchmarks/queries/r.txt
new file mode 100644
index 000000000..b7b1e6733
--- /dev/null
+++ b/benchmarks/queries/r.txt
@@ -0,0 +1,3 @@
+function
+library
+data.frame
diff --git a/benchmarks/queries/razor.txt b/benchmarks/queries/razor.txt
new file mode 100644
index 000000000..3f60cad58
--- /dev/null
+++ b/benchmarks/queries/razor.txt
@@ -0,0 +1,3 @@
+@page
+@model
+@code
diff --git a/benchmarks/queries/ruby.txt b/benchmarks/queries/ruby.txt
new file mode 100644
index 000000000..f7a803d6a
--- /dev/null
+++ b/benchmarks/queries/ruby.txt
@@ -0,0 +1,10 @@
+def
+class
+module
+include
+before_action
+scope
+ActiveRecord
+Rails
+render
+concern
diff --git a/benchmarks/queries/rust.txt b/benchmarks/queries/rust.txt
new file mode 100644
index 000000000..74744c857
--- /dev/null
+++ b/benchmarks/queries/rust.txt
@@ -0,0 +1,10 @@
+fn
+struct
+enum
+impl
+trait
+async
+await
+Result
+Option
+use
diff --git a/benchmarks/queries/scala.txt b/benchmarks/queries/scala.txt
new file mode 100644
index 000000000..fff332b05
--- /dev/null
+++ b/benchmarks/queries/scala.txt
@@ -0,0 +1,3 @@
+case class
+object
+def
diff --git a/benchmarks/queries/shell.txt b/benchmarks/queries/shell.txt
new file mode 100644
index 000000000..873319e4a
--- /dev/null
+++ b/benchmarks/queries/shell.txt
@@ -0,0 +1,10 @@
+shebang bash
+set -e
+function
+case
+trap
+export
+awk
+sed
+curl
+grep
diff --git a/benchmarks/queries/sql.txt b/benchmarks/queries/sql.txt
new file mode 100644
index 000000000..6f213218c
--- /dev/null
+++ b/benchmarks/queries/sql.txt
@@ -0,0 +1,10 @@
+select
+insert
+update
+delete
+create
+alter
+join
+index
+transaction
+schema
diff --git a/benchmarks/queries/starlark.txt b/benchmarks/queries/starlark.txt
new file mode 100644
index 000000000..34c2174d2
--- /dev/null
+++ b/benchmarks/queries/starlark.txt
@@ -0,0 +1,3 @@
+load
+workspace
+rule
diff --git a/benchmarks/queries/swift.txt b/benchmarks/queries/swift.txt
new file mode 100644
index 000000000..19382b9a2
--- /dev/null
+++ b/benchmarks/queries/swift.txt
@@ -0,0 +1,10 @@
+func
+class
+struct
+protocol
+extension
+async
+await
+throws
+guard
+init
diff --git a/benchmarks/repos.json b/benchmarks/repos.json
new file mode 100644
index 000000000..0ca821555
--- /dev/null
+++ b/benchmarks/repos.json
@@ -0,0 +1,718 @@
+{
+  "lua": {
+    "label": "Lua",
+    "queries": "benchmarks/queries/lua.txt",
+    "repos": {
+      "large": [
+        "Kong/kong",
+        "torch/torch7",
+        "luals/lua-language-server",
+        "openresty/lua-nginx-module",
+        "luarocks/luarocks"
+      ],
+      "typical": [
+        "luarocks/luarocks",
+        "lunarmodules/luasocket",
+        "jgm/pandoc",
+        "kikito/inspect.lua"
+      ],
+      "huge": [
+        "Kong/kong",
+        "tarantool/tarantool",
+        "neovim/neovim"
+      ]
+    }
+  },
+  "perl": {
+    "label": "Perl",
+    "queries": "benchmarks/queries/perl.txt",
+    "repos": {
+      "large": [
+        "bestpractical/rt",
+        "metacpan/metacpan-web",
+        "mojolicious/mojo",
+        "ocpi/openscap"
+      ],
+      "typical": [
+        "Perl-Critic/Perl-Critic",
+        "Dancer2/Dancer2",
+        "perl-carton/carton",
+        "gugod/App-perlbrew"
+      ],
+      "huge": [
+        "Perl/perl5",
+        "Ensembl/ensembl",
+        "metacpan/metacpan-web"
+      ]
+    }
+  },
+  "shell": {
+    "label": "Shell",
+    "queries": "benchmarks/queries/shell.txt",
+    "repos": {
+      "large": [
+        "ohmyzsh/ohmyzsh",
+        "nvm-sh/nvm",
+        "Homebrew/brew",
+        "asdf-vm/asdf",
+        "pyenv/pyenv"
+      ],
+      "typical": [
+        "bash-it/bash-it",
+        "ohmybash/oh-my-bash",
+        "acmesh-official/acme.sh",
+        "rupa/z"
+      ],
+      "huge": [
+        "ohmyzsh/ohmyzsh",
+        "github/gitignore",
+        "nvm-sh/nvm"
+      ]
+    }
+  },
+  "ruby": {
+    "label": "Ruby",
+    "queries": "benchmarks/queries/ruby.txt",
+    "repos": {
+      "problematic": [
+        "rails/rails"
+      ],
+      "large": [
+        "discourse/discourse",
+        "spree/spree",
+        "chef/chef",
+        "jekyll/jekyll"
+      ],
+      "typical": [
+        "jekyll/jekyll",
+        "fastlane/fastlane",
+        "sidekiq/sidekiq",
+        "rspec/rspec",
+        "ruby/rake"
+      ],
+      "huge": [
+        "rubocop/rubocop",
+        "jekyll/jekyll",
+        "huginn/huginn",
+        "rails/rails"
+      ]
+    }
+  },
+  "php": {
+    "label": "PHP",
+    "queries": "benchmarks/queries/php.txt",
+    "repos": {
+      "huge": [
+        "symfony/symfony",
+        "laravel/framework",
+        "magento/magento2",
+        "WordPress/wordpress-develop"
+      ],
+      "large": [
+        "composer/composer",
+        "woocommerce/woocommerce",
+        "drupal/drupal",
+        "laravel/framework"
+      ],
+      "typical": [
+        "slimphp/Slim",
+        "guzzle/guzzle",
+        "phpunit/phpunit",
+        "nikic/PHP-Parser"
+      ]
+    }
+  },
+  "sql": {
+    "label": "SQL",
+    "queries": "benchmarks/queries/sql.txt",
+    "repos": {
+      "huge": [
+        "mysql/mysql-server",
+        "postgres/postgres",
+        "cockroachdb/cockroach"
+      ],
+      "large": [
+        "timescale/timescaledb",
+        "ClickHouse/ClickHouse",
+        "citusdata/citus",
+        "duckdb/duckdb"
+      ],
+      "typical": [
+        "dbt-labs/jaffle_shop",
+        "ankane/pghero",
+        "Graphile/graphile-engine",
+        "dbt-labs/jaffle-shop"
+      ]
+    }
+  },
+  "python": {
+    "label": "Python",
+    "queries": "benchmarks/queries/python.txt",
+    "repos": {
+      "large": [
+        "ansible/ansible",
+        "saltstack/salt",
+        "pandas-dev/pandas"
+      ],
+      "typical": [
+        "psf/requests",
+        "pydantic/pydantic",
+        "pygments/pygments"
+      ],
+      "huge": [
+        "django/django",
+        "pallets/flask",
+        "scikit-learn/scikit-learn",
+        "python/cpython"
+      ]
+    }
+  },
+  "swift": {
+    "label": "Swift",
+    "queries": "benchmarks/queries/swift.txt",
+    "repos": {
+      "huge": [
+        "apple/swift",
+        "Alamofire/Alamofire",
+        "apple/swift-package-manager",
+        "apple/swift-corelibs-foundation",
+        "swiftlang/swift"
+      ],
+      "large": [
+        "apple/swift-nio",
+        "Alamofire/Alamofire",
+        "apple/swift-collections"
+      ],
+      "typical": [
+        "onevcat/Kingfisher",
+        "SnapKit/SnapKit",
+        "ReactiveX/RxSwift",
+        "apple/swift-argument-parser"
+      ]
+    }
+  },
+  "rust": {
+    "label": "Rust",
+    "queries": "benchmarks/queries/rust.txt",
+    "repos": {
+      "huge": [
+        "rust-lang/rust",
+        "servo/servo",
+        "tokio-rs/tokio"
+      ],
+      "typical": [
+        "BurntSushi/ripgrep",
+        "sharkdp/fd",
+        "serde-rs/serde"
+      ],
+      "large": [
+        "denoland/deno",
+        "nushell/nushell",
+        "alacritty/alacritty",
+        "tokio-rs/tokio"
+      ]
+    }
+  },
+  "go": {
+    "label": "Go",
+    "queries": "benchmarks/queries/go.txt",
+    "repos": {
+      "huge": [
+        "kubernetes/kubernetes",
+        "prometheus/prometheus",
+        "moby/moby"
+      ],
+      "large": [
+        "etcd-io/etcd",
+        "hashicorp/terraform",
+        "grafana/grafana",
+        "prometheus/prometheus"
+      ],
+      "typical": [
+        "gin-gonic/gin",
+        "spf13/cobra",
+        "gorilla/mux"
+      ]
+    }
+  },
+  "java": {
+    "label": "Java",
+    "queries": "benchmarks/queries/java.txt",
+    "repos": {
+      "huge": [
+        "elastic/elasticsearch",
+        "apache/kafka",
+        "spring-projects/spring-boot"
+      ],
+      "large": [
+        "apache/spark",
+        "google/guava",
+        "hibernate/hibernate-orm",
+        "spring-projects/spring-boot"
+      ],
+      "typical": [
+        "junit-team/junit5",
+        "square/okhttp",
+        "mockito/mockito",
+        "junit-team/junit-framework"
+      ]
+    }
+  },
+  "csharp": {
+    "label": "C#",
+    "queries": "benchmarks/queries/csharp.txt",
+    "repos": {
+      "huge": [
+        "dotnet/runtime",
+        "dotnet/roslyn",
+        "dotnet/aspnetcore"
+      ],
+      "large": [
+        "AutoMapper/AutoMapper",
+        "AvaloniaUI/Avalonia",
+        "OrchardCMS/OrchardCore",
+        "dotnet/roslyn"
+      ],
+      "typical": [
+        "jbogard/MediatR",
+        "Serilog/serilog",
+        "FluentValidation/FluentValidation",
+        "LuckyPennySoftware/AutoMapper"
+      ]
+    }
+  },
+  "kotlin": {
+    "label": "Kotlin",
+    "queries": "benchmarks/queries/kotlin.txt",
+    "repos": {
+      "huge": [
+        "JetBrains/kotlin",
+        "android/architecture-samples",
+        "Kotlin/kotlinx.coroutines"
+      ],
+      "large": [
+        "ktorio/ktor",
+        "Kotlin/kotlinx.coroutines",
+        "square/okhttp",
+        "airbnb/mavericks"
+      ],
+      "typical": [
+        "square/leakcanary",
+        "InsertKoinIO/koin",
+        "google/accompanist",
+        "Kotlin/kotlinx-datetime"
+      ]
+    }
+  },
+  "javascript": {
+    "label": "JavaScript / TypeScript",
+    "queries": "benchmarks/queries/javascript.txt",
+    "repos": {
+      "large": [
+        "vercel/next.js",
+        "microsoft/vscode",
+        "angular/angular",
+        "nestjs/nest"
+      ],
+      "typical": [
+        "expressjs/express",
+        "axios/axios",
+        "lodash/lodash"
+      ],
+      "huge": [
+        "microsoft/vscode",
+        "facebook/react",
+        "nodejs/node"
+      ]
+    }
+  },
+  "dockerfile": {
+    "label": "Dockerfile",
+    "queries": "benchmarks/queries/dockerfile.txt",
+    "repos": {
+      "typical": [
+        "docker-library/official-images",
+        "devcontainers/images",
+        "nginxinc/docker-nginx",
+        "docker-library/hello-world"
+      ],
+      "huge": [
+        "kubernetes/kubernetes",
+        "moby/moby",
+        "github/actions",
+        "docker-library/official-images"
+      ],
+      "large": [
+        "docker/compose",
+        "hashicorp/terraform",
+        "grafana/grafana",
+        "docker/awesome-compose"
+      ]
+    }
+  },
+  "makefile": {
+    "label": "Makefile",
+    "queries": "benchmarks/queries/makefile.txt",
+    "repos": {
+      "typical": [
+        "madler/zlib",
+        "libexpat/libexpat",
+        "ninja-build/ninja"
+      ],
+      "huge": [
+        "llvm/llvm-project",
+        "kubernetes/kubernetes",
+        "opencv/opencv",
+        "torvalds/linux"
+      ],
+      "large": [
+        "git/git",
+        "curl/curl",
+        "redis/redis"
+      ]
+    }
+  },
+  "protobuf": {
+    "label": "Protobuf",
+    "queries": "benchmarks/queries/protobuf.txt",
+    "repos": {
+      "typical": [
+        "protocolbuffers/protobuf",
+        "bufbuild/buf",
+        "gogo/protobuf",
+        "bufbuild/protovalidate"
+      ],
+      "huge": [
+        "kubernetes/kubernetes",
+        "googleapis/googleapis",
+        "envoyproxy/envoy"
+      ],
+      "large": [
+        "istio/istio",
+        "prometheus/prometheus",
+        "grpc/grpc",
+        "protocolbuffers/protobuf"
+      ]
+    }
+  },
+  "graphql": {
+    "label": "GraphQL",
+    "queries": "benchmarks/queries/graphql.txt",
+    "repos": {
+      "typical": [
+        "graphql-dotnet/graphql-dotnet",
+        "dgraph-io/dgraph",
+        "n1ru4l/graphql-live-query",
+        "graphql/graphql-spec"
+      ],
+      "huge": [
+        "facebook/relay",
+        "apollographql/apollo-client",
+        "hasura/graphql-engine"
+      ],
+      "large": [
+        "apollographql/apollo-server",
+        "graphql/graphql-js",
+        "urql-graphql/urql"
+      ]
+    }
+  },
+  "cmake": {
+    "label": "CMake",
+    "queries": "benchmarks/queries/cmake.txt",
+    "repos": {
+      "typical": [
+        "fmtlib/fmt",
+        "gabime/spdlog",
+        "nlohmann/json"
+      ],
+      "huge": [
+        "llvm/llvm-project",
+        "opencv/opencv",
+        "godotengine/godot"
+      ],
+      "large": [
+        "microsoft/vcpkg",
+        "tensorflow/tensorflow",
+        "facebook/folly",
+        "Kitware/CMake"
+      ]
+    }
+  },
+  "starlark": {
+    "label": "Bazel / Starlark",
+    "queries": "benchmarks/queries/starlark.txt",
+    "repos": {
+      "typical": [
+        "bazelbuild/rules_go",
+        "bazelbuild/rules_nodejs",
+        "bazelbuild/rules_proto",
+        "bazelbuild/bazel-skylib"
+      ],
+      "huge": [
+        "tensorflow/tensorflow",
+        "bazelbuild/bazel",
+        "google/tsunami-security-scanner"
+      ],
+      "large": [
+        "grpc/grpc",
+        "envoyproxy/envoy",
+        "protocolbuffers/protobuf",
+        "bazelbuild/bazel"
+      ]
+    }
+  },
+  "nix": {
+    "label": "Nix",
+    "queries": "benchmarks/queries/nix.txt",
+    "repos": {
+      "typical": [
+        "ryantm/agenix",
+        "hercules-ci/hercules-ci-agent",
+        "Mic92/sops-nix",
+        "numtide/devshell"
+      ],
+      "huge": [
+        "NixOS/nixpkgs",
+        "nixos/nix",
+        "numtide/flake-utils"
+      ],
+      "large": [
+        "home-assistant/core",
+        "cachix/cachix",
+        "nix-community/home-manager"
+      ]
+    }
+  },
+  "dart": {
+    "label": "Dart",
+    "queries": "benchmarks/queries/dart.txt",
+    "repos": {
+      "typical": [
+        "flutter/packages",
+        "rrousselGit/provider",
+        "google/json_serializable.dart",
+        "dart-lang/http"
+      ],
+      "huge": [
+        "flutter/flutter",
+        "dart-lang/sdk",
+        "flutter/engine"
+      ],
+      "large": [
+        "firebase/flutterfire",
+        "dart-lang/http",
+        "rrousselGit/riverpod",
+        "dart-lang/sdk"
+      ]
+    }
+  },
+  "scala": {
+    "label": "Scala",
+    "queries": "benchmarks/queries/scala.txt",
+    "repos": {
+      "typical": [
+        "typelevel/cats",
+        "scalatest/scalatest",
+        "circe/circe"
+      ],
+      "huge": [
+        "apache/spark",
+        "akka/akka",
+        "twitter/finagle"
+      ],
+      "large": [
+        "scalaz/scalaz",
+        "playframework/playframework",
+        "sbt/sbt",
+        "akka/akka-core"
+      ]
+    }
+  },
+  "groovy": {
+    "label": "Groovy",
+    "queries": "benchmarks/queries/groovy.txt",
+    "repos": {
+      "typical": [
+        "geb/geb",
+        "CodeNarc/CodeNarc",
+        "google/protobuf-gradle-plugin",
+        "spockframework/spock"
+      ],
+      "huge": [
+        "jenkinsci/jenkins",
+        "apache/groovy",
+        "gradle/gradle"
+      ],
+      "large": [
+        "spockframework/spock",
+        "grails/grails-core",
+        "jenkinsci/pipeline-model-definition-plugin",
+        "gradle/gradle"
+      ]
+    }
+  },
+  "r": {
+    "label": "R",
+    "queries": "benchmarks/queries/r.txt",
+    "repos": {
+      "typical": [
+        "r-lib/testthat",
+        "hadley/stringr",
+        "satijalab/seurat"
+      ],
+      "huge": [
+        "rstudio/rstudio",
+        "tidyverse/ggplot2",
+        "tidyverse/dplyr",
+        "wch/r-source"
+      ],
+      "large": [
+        "r-lib/devtools",
+        "r-lib/usethis",
+        "yihui/knitr",
+        "tidyverse/ggplot2"
+      ]
+    }
+  },
+  "julia": {
+    "label": "Julia",
+    "queries": "benchmarks/queries/julia.txt",
+    "repos": {
+      "typical": [
+        "JuliaLang/IJulia.jl",
+        "JuliaPlots/Plots.jl",
+        "JuliaIO/HDF5.jl",
+        "JuliaIO/JSON.jl"
+      ],
+      "huge": [
+        "JuliaLang/julia",
+        "JuliaData/DataFrames.jl",
+        "SciML/DifferentialEquations.jl"
+      ],
+      "large": [
+        "FluxML/Flux.jl",
+        "MakieOrg/Makie.jl",
+        "JuMP-dev/JuMP.jl",
+        "JuliaData/DataFrames.jl"
+      ]
+    }
+  },
+  "handlebars": {
+    "label": "Handlebars",
+    "queries": "benchmarks/queries/handlebars.txt",
+    "repos": {
+      "typical": [
+        "handlebars-lang/handlebars.js",
+        "assemble/assemble",
+        "metalsmith/metalsmith",
+        "express-handlebars/express-handlebars"
+      ],
+      "huge": [
+        "microsoft/vscode",
+        "vercel/next.js",
+        "nodejs/node",
+        "emberjs/ember.js"
+      ],
+      "large": [
+        "twbs/bootstrap",
+        "ghost/ghost",
+        "emberjs/ember.js",
+        "TryGhost/Ghost"
+      ]
+    }
+  },
+  "mustache": {
+    "label": "Mustache",
+    "queries": "benchmarks/queries/mustache.txt",
+    "repos": {
+      "typical": [
+        "mustache/spec",
+        "janl/mustache.js",
+        "mustache/mustache"
+      ],
+      "huge": [
+        "kubernetes/kubernetes",
+        "ansible/ansible",
+        "elastic/elasticsearch",
+        "OpenAPITools/openapi-generator"
+      ],
+      "large": [
+        "helm/helm",
+        "hashicorp/terraform",
+        "grafana/grafana",
+        "swagger-api/swagger-codegen"
+      ]
+    }
+  },
+  "clike": {
+    "label": "C / C++ / Objective-C",
+    "queries": "benchmarks/queries/clike.txt",
+    "repos": {
+      "huge": [
+        "opencv/opencv",
+        "llvm/llvm-project",
+        "godotengine/godot",
+        "torvalds/linux"
+      ],
+      "large": [
+        "llvm/llvm-project",
+        "curl/curl",
+        "redis/redis",
+        "git/git"
+      ],
+      "typical": [
+        "nlohmann/json",
+        "fmtlib/fmt",
+        "gabime/spdlog",
+        "madler/zlib"
+      ]
+    }
+  },
+  "jinja": {
+    "label": "Jinja / Django",
+    "queries": "benchmarks/queries/jinja.txt",
+    "repos": {
+      "typical": [
+        "pallets/jinja",
+        "cookiecutter/cookiecutter",
+        "apache/superset",
+        "pallets/flask"
+      ],
+      "huge": [
+        "ansible/ansible",
+        "home-assistant/core",
+        "pallets/flask",
+        "django/django"
+      ],
+      "large": [
+        "django/django",
+        "jupyter/notebook",
+        "pallets/jinja"
+      ]
+    }
+  },
+  "razor": {
+    "label": "Razor",
+    "queries": "benchmarks/queries/razor.txt",
+    "repos": {
+      "typical": [
+        "ardalis/CleanArchitecture",
+        "jasontaylordev/CleanArchitecture",
+        "dotnet-presentations/blazor-workshop",
+        "dotnet-architecture/eShopOnWeb"
+      ],
+      "huge": [
+        "dotnet/aspnetcore",
+        "OrchardCMS/OrchardCore",
+        "nopSolutions/nopCommerce"
+      ],
+      "large": [
+        "MudBlazor/MudBlazor",
+        "radzenhq/radzen-blazor",
+        "serenity-is/Serenity",
+        "OrchardCMS/OrchardCore"
+      ]
+    }
+  }
+}
diff --git a/bin/pairofcleats.js b/bin/pairofcleats.js
index e2b6d7d9f..ab7300969 100644
--- a/bin/pairofcleats.js
+++ b/bin/pairofcleats.js
@@ -1,43 +1,18 @@
 #!/usr/bin/env node
-import { spawnSync } from 'node:child_process';
+import { execaSync } from 'execa';
 import fs from 'node:fs';
 import path from 'node:path';
-import { fileURLToPath } from 'node:url';
+import { getRuntimeConfig, loadUserConfig, resolveRepoRoot, resolveRuntimeEnv, resolveToolRoot } from '../tools/dict-utils.js';
 
-const ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
+const ROOT = resolveToolRoot();
 
 const COMMANDS = new Map([
-  ['build-index', { script: 'build_index.js', extraArgs: [] }],
-  ['index', { script: 'build_index.js', extraArgs: [] }],
-  ['watch-index', { script: 'build_index.js', extraArgs: ['--watch'] }],
   ['search', { script: 'search.js', extraArgs: [] }],
   ['bootstrap', { script: 'tools/bootstrap.js', extraArgs: [] }],
   ['setup', { script: 'tools/setup.js', extraArgs: [] }],
-  ['build-sqlite-index', { script: 'tools/build-sqlite-index.js', extraArgs: [] }],
-  ['compact-sqlite-index', { script: 'tools/compact-sqlite-index.js', extraArgs: [] }],
-  ['search-sqlite', { script: 'tools/search-sqlite.js', extraArgs: [] }],
-  ['cache-gc', { script: 'tools/cache-gc.js', extraArgs: [] }],
-  ['clean-artifacts', { script: 'tools/clean-artifacts.js', extraArgs: [] }],
-  ['report-artifacts', { script: 'tools/report-artifacts.js', extraArgs: [] }],
-  ['status', { script: 'tools/report-artifacts.js', extraArgs: [] }],
-  ['download-dicts', { script: 'tools/download-dicts.js', extraArgs: [] }],
-  ['download-models', { script: 'tools/download-models.js', extraArgs: [] }],
-  ['download-extensions', { script: 'tools/download-extensions.js', extraArgs: [] }],
-  ['verify-extensions', { script: 'tools/verify-extensions.js', extraArgs: [] }],
   ['generate-repo-dict', { script: 'tools/generate-repo-dict.js', extraArgs: [] }],
-  ['tooling-detect', { script: 'tools/tooling-detect.js', extraArgs: [] }],
-  ['tooling-install', { script: 'tools/tooling-install.js', extraArgs: [] }],
   ['git-hooks', { script: 'tools/git-hooks.js', extraArgs: [] }],
-  ['repometrics-dashboard', { script: 'tools/repometrics-dashboard.js', extraArgs: [] }],
-  ['compare-models', { script: 'tools/compare-models.js', extraArgs: [] }],
-  ['summary-report', { script: 'tools/combined-summary.js', extraArgs: [] }],
-  ['uninstall', { script: 'tools/uninstall.js', extraArgs: [] }],
-  ['mcp-server', { script: 'tools/mcp-server.js', extraArgs: [] }],
-  ['mcp', { script: 'tools/mcp-server.js', extraArgs: [] }],
-  ['config-validate', { script: 'tools/validate-config.js', extraArgs: [] }],
-  ['triage-ingest', { script: 'tools/triage/ingest.js', extraArgs: [] }],
-  ['triage-decision', { script: 'tools/triage/decision.js', extraArgs: [] }],
-  ['triage-context-pack', { script: 'tools/triage/context-pack.js', extraArgs: [] }]
+  ['uninstall', { script: 'tools/uninstall.js', extraArgs: [] }]
 ]);
 
 const args = process.argv.slice(2);
@@ -64,6 +39,235 @@ if (!resolved) {
 runScript(resolved.script, resolved.extraArgs, resolved.args);
 
 function resolveCommand(primary, rest) {
+  if (primary === 'index') {
+    const sub = rest.shift();
+    if (!sub || isHelpCommand(sub)) {
+      return { script: 'build_index.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'build') {
+      return { script: 'build_index.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'watch') {
+      return { script: 'build_index.js', extraArgs: ['--watch'], args: rest };
+    }
+    if (sub === 'validate') {
+      return { script: 'tools/index-validate.js', extraArgs: [], args: rest };
+    }
+    console.error(`Unknown index subcommand: ${sub}`);
+    printHelp();
+    process.exit(1);
+  }
+  if (primary === 'sqlite') {
+    const sub = rest.shift();
+    if (!sub || isHelpCommand(sub)) {
+      console.error('sqlite requires a subcommand: build|compact|search');
+      printHelp();
+      process.exit(1);
+    }
+    if (sub === 'build') {
+      return { script: 'tools/build-sqlite-index.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'compact') {
+      return { script: 'tools/compact-sqlite-index.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'search') {
+      return resolveSqliteSearch(rest);
+    }
+    console.error(`Unknown sqlite subcommand: ${sub}`);
+    printHelp();
+    process.exit(1);
+  }
+  if (primary === 'lmdb') {
+    const sub = rest.shift();
+    if (!sub || isHelpCommand(sub)) {
+      console.error('lmdb requires a subcommand: build');
+      printHelp();
+      process.exit(1);
+    }
+    if (sub === 'build') {
+      return { script: 'tools/build-lmdb-index.js', extraArgs: [], args: rest };
+    }
+    console.error(`Unknown lmdb subcommand: ${sub}`);
+    printHelp();
+    process.exit(1);
+  }
+  if (primary === 'bench') {
+    const sub = rest.shift();
+    if (!sub || isHelpCommand(sub)) {
+      console.error('bench requires a subcommand: micro|language|matrix');
+      printHelp();
+      process.exit(1);
+    }
+    if (sub === 'micro') {
+      return { script: 'tools/bench/micro/run.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'language') {
+      return { script: 'tools/bench-language-repos.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'matrix') {
+      return { script: 'tools/bench-language-matrix.js', extraArgs: [], args: rest };
+    }
+    console.error(`Unknown bench subcommand: ${sub}`);
+    printHelp();
+    process.exit(1);
+  }
+  if (primary === 'assets') {
+    const sub = rest.shift();
+    if (!sub || isHelpCommand(sub)) {
+      console.error('assets requires a subcommand: dicts|models|extensions|extensions-verify');
+      printHelp();
+      process.exit(1);
+    }
+    if (sub === 'dicts') {
+      return { script: 'tools/download-dicts.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'models') {
+      return { script: 'tools/download-models.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'extensions') {
+      return { script: 'tools/download-extensions.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'extensions-verify') {
+      return { script: 'tools/verify-extensions.js', extraArgs: [], args: rest };
+    }
+    console.error(`Unknown assets subcommand: ${sub}`);
+    printHelp();
+    process.exit(1);
+  }
+  if (primary === 'tooling') {
+    const sub = rest.shift();
+    if (!sub || isHelpCommand(sub)) {
+      console.error('tooling requires a subcommand: detect|install');
+      printHelp();
+      process.exit(1);
+    }
+    if (sub === 'detect') {
+      return { script: 'tools/tooling-detect.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'install') {
+      return { script: 'tools/tooling-install.js', extraArgs: [], args: rest };
+    }
+    console.error(`Unknown tooling subcommand: ${sub}`);
+    printHelp();
+    process.exit(1);
+  }
+  if (primary === 'ingest') {
+    const sub = rest.shift();
+    if (!sub || isHelpCommand(sub)) {
+      console.error('ingest requires a subcommand: ctags|scip|lsif|gtags');
+      printHelp();
+      process.exit(1);
+    }
+    if (sub === 'ctags') {
+      return { script: 'tools/ctags-ingest.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'scip') {
+      return { script: 'tools/scip-ingest.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'lsif') {
+      return { script: 'tools/lsif-ingest.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'gtags') {
+      return { script: 'tools/gtags-ingest.js', extraArgs: [], args: rest };
+    }
+    console.error(`Unknown ingest subcommand: ${sub}`);
+    printHelp();
+    process.exit(1);
+  }
+  if (primary === 'structural') {
+    const sub = rest.shift();
+    if (!sub || isHelpCommand(sub)) {
+      console.error('structural requires a subcommand: search');
+      printHelp();
+      process.exit(1);
+    }
+    if (sub === 'search') {
+      return { script: 'tools/structural-search.js', extraArgs: [], args: rest };
+    }
+    console.error(`Unknown structural subcommand: ${sub}`);
+    printHelp();
+    process.exit(1);
+  }
+  if (primary === 'service') {
+    const sub = rest.shift();
+    if (!sub || isHelpCommand(sub)) {
+      console.error('service requires a subcommand: api|indexer|mcp');
+      printHelp();
+      process.exit(1);
+    }
+    if (sub === 'api') {
+      return { script: 'tools/api-server.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'indexer') {
+      return { script: 'tools/indexer-service.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'mcp') {
+      return { script: 'tools/mcp-server.js', extraArgs: [], args: rest };
+    }
+    console.error(`Unknown service subcommand: ${sub}`);
+    printHelp();
+    process.exit(1);
+  }
+  if (primary === 'report') {
+    const sub = rest.shift();
+    if (!sub || isHelpCommand(sub)) {
+      console.error('report requires a subcommand: repometrics|compare-models|summary|eval|map');
+      printHelp();
+      process.exit(1);
+    }
+    if (sub === 'repometrics') {
+      return { script: 'tools/repometrics-dashboard.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'compare-models') {
+      return { script: 'tools/compare-models.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'summary') {
+      return { script: 'tools/combined-summary.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'eval') {
+      return { script: 'tools/eval/run.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'map') {
+      return { script: 'tools/report-code-map.js', extraArgs: [], args: rest };
+    }
+    console.error(`Unknown report subcommand: ${sub}`);
+    printHelp();
+    process.exit(1);
+  }
+  if (primary === 'cache') {
+    const sub = rest.shift();
+    if (!sub || isHelpCommand(sub)) {
+      console.error('cache requires a subcommand: gc|clean|report');
+      printHelp();
+      process.exit(1);
+    }
+    if (sub === 'gc') {
+      return { script: 'tools/cache-gc.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'clean') {
+      return { script: 'tools/clean-artifacts.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'report') {
+      return { script: 'tools/report-artifacts.js', extraArgs: [], args: rest };
+    }
+    console.error(`Unknown cache subcommand: ${sub}`);
+    printHelp();
+    process.exit(1);
+  }
+  if (primary === 'embeddings') {
+    const sub = rest.shift();
+    if (!sub || isHelpCommand(sub)) {
+      console.error('embeddings requires a subcommand: build');
+      printHelp();
+      process.exit(1);
+    }
+    if (sub === 'build') {
+      return { script: 'tools/build-embeddings.js', extraArgs: [], args: rest };
+    }
+    console.error(`Unknown embeddings subcommand: ${sub}`);
+    printHelp();
+    process.exit(1);
+  }
   if (primary === 'config') {
     const sub = rest.shift();
     if (!sub || isHelpCommand(sub)) {
@@ -73,6 +277,12 @@ function resolveCommand(primary, rest) {
     if (sub === 'validate') {
       return { script: 'tools/validate-config.js', extraArgs: [], args: rest };
     }
+    if (sub === 'dump') {
+      return { script: 'tools/config-dump.js', extraArgs: [], args: rest };
+    }
+    if (sub === 'reset') {
+      return { script: 'tools/reset-config.js', extraArgs: [], args: rest };
+    }
     console.error(`Unknown config subcommand: ${sub}`);
     printConfigHelp();
     process.exit(1);
@@ -103,16 +313,35 @@ function resolveCommand(primary, rest) {
   return null;
 }
 
+function resolveSqliteSearch(args) {
+  const hasBackend = args.some((arg) => arg === '--backend' || arg.startsWith('--backend='));
+  const extraArgs = hasBackend ? [] : ['--backend', 'sqlite-fts'];
+  return { script: 'search.js', extraArgs, args };
+}
+
 function runScript(scriptPath, extraArgs, restArgs) {
   const resolved = path.join(ROOT, scriptPath);
   if (!fs.existsSync(resolved)) {
     console.error(`Script not found: ${resolved}`);
     process.exit(1);
   }
-  const result = spawnSync(process.execPath, [resolved, ...extraArgs, ...restArgs], {
-    stdio: 'inherit'
+  const repoOverride = extractRepoArg(restArgs);
+  const repoRoot = repoOverride ? path.resolve(repoOverride) : resolveRepoRoot(process.cwd());
+  const userConfig = loadUserConfig(repoRoot);
+  const runtimeConfig = getRuntimeConfig(repoRoot, userConfig);
+  const env = resolveRuntimeEnv(runtimeConfig, process.env);
+  const result = execaSync(process.execPath, [resolved, ...extraArgs, ...restArgs], {
+    stdio: 'inherit',
+    env,
+    reject: false
   });
-  process.exit(result.status ?? 1);
+  process.exit(result.exitCode ?? 1);
+}
+
+function extractRepoArg(args) {
+  const idx = args.indexOf('--repo');
+  if (idx >= 0 && args[idx + 1]) return args[idx + 1];
+  return null;
 }
 
 function isHelpCommand(value) {
@@ -136,55 +365,84 @@ function printHelp() {
   console.log(`Usage: pairofcleats <command> [args]
 
 Core:
-  build-index             Build file-backed indexes
-  index                   Alias for build-index
-  watch-index             Watch and rebuild indexes incrementally
-  search                  Query indexed data
-  status                  Report current artifacts/status
+  index build [--mode code|prose|records|extracted-prose|all]  Build file-backed indexes
+  index watch             Watch and rebuild indexes incrementally
+  index validate          Validate index artifacts
+  search [--mode code|prose|both|records|all|extracted-prose]   Query indexed data
+  embeddings build        Build embedding vectors from chunk metadata
   bootstrap               Fast bootstrap flow
   setup                   Guided setup flow
-  build-sqlite-index       Build SQLite indexes
-  compact-sqlite-index     Compact SQLite indexes
-  search-sqlite            SQLite-specific search helper
-  cache-gc                Garbage collect cache by age/size
-  clean-artifacts          Remove repo artifacts (keep shared caches)
-  report-artifacts         Report artifact sizes
-
-Assets + tooling:
-  download-dicts           Download dictionary files
-  download-models          Download embedding models
-  download-extensions      Download SQLite ANN extensions
-  verify-extensions        Verify ANN extension availability
-  generate-repo-dict        Build repo-specific dictionary
-  tooling-detect           Detect optional language tooling
-  tooling-install          Install optional language tooling
-  git-hooks                Install git hooks
 
-Reports + services:
-  repometrics-dashboard    Summarize repometrics
-  compare-models           Compare search models
-  summary-report           Generate summary report
-  mcp-server               Run MCP server
-  mcp                      Alias for mcp-server
+SQLite:
+  sqlite build            Build SQLite indexes
+  sqlite compact          Compact SQLite indexes
+  sqlite search           SQLite-specific search helper
+
+LMDB:
+  lmdb build              Build LMDB indexes
+
+Bench:
+  bench micro             Run microbench suite
+  bench language          Run language benchmark suite
+  bench matrix            Run language/config benchmark matrix
+
+Assets:
+  assets dicts            Download dictionary files
+  assets models           Download embedding models
+  assets extensions       Download SQLite ANN extensions
+  assets extensions-verify Verify ANN extension availability
+
+Tooling:
+  tooling detect          Detect optional language tooling
+  tooling install         Install optional language tooling
+
+Ingest:
+  ingest ctags            Ingest ctags symbol dumps
+  ingest scip             Ingest SCIP symbol dumps
+  ingest lsif             Ingest LSIF dumps
+  ingest gtags            Ingest GNU Global dumps
+
+Structural:
+  structural search       Run structural rule packs
+
+Cache:
+  cache gc                Garbage collect cache by age/size
+  cache clean             Remove repo artifacts (keep shared caches)
+  cache report            Report artifact sizes
+
+Reports:
+  report repometrics      Summarize repometrics
+  report compare-models   Compare search models
+  report summary          Generate summary report
+  report eval             Run retrieval evaluation harness
+  report map              Generate code map artifacts
+
+Services:
+  service api             Run local HTTP JSON API
+  service indexer         Run multi-repo indexer service
+  service mcp             Run MCP server
+
+Other:
+  generate-repo-dict       Build repo-specific dictionary
+  git-hooks                Install git hooks
 
 Config + triage:
   config validate          Validate .pairofcleats.json
+  config dump              Show effective config + derived paths
+  config reset             Reset .pairofcleats.json to defaults
   triage ingest            Ingest triage records
   triage decision          Create triage decisions
   triage context-pack      Generate context packs
-
-Aliases:
-  config-validate          Same as config validate
-  triage-ingest            Same as triage ingest
-  triage-decision          Same as triage decision
-  triage-context-pack      Same as triage context-pack`);
+`);
 }
 
 function printConfigHelp() {
   console.log(`Usage: pairofcleats config <subcommand> [args]
 
 Subcommands:
-  validate                 Validate .pairofcleats.json (see docs/config-schema.json)`);
+  validate                 Validate .pairofcleats.json (see docs/config-schema.json)
+  dump                     Show effective config + derived paths
+  reset                    Reset .pairofcleats.json to defaults`);
 }
 
 function printTriageHelp() {
diff --git a/build_index.js b/build_index.js
index 0ced89352..dc635a9a3 100644
--- a/build_index.js
+++ b/build_index.js
@@ -1,37 +1,13 @@
 #!/usr/bin/env node
 
 import path from 'node:path';
-import { parseBuildArgs } from './src/indexer/build/args.js';
-import { createBuildRuntime } from './src/indexer/build/runtime.js';
-import { buildIndexForMode } from './src/indexer/build/indexer.js';
-import { acquireIndexLock } from './src/indexer/build/lock.js';
-import { watchIndex } from './src/indexer/build/watch.js';
-import { log } from './src/shared/progress.js';
+import { parseBuildArgs } from './src/index/build/args.js';
+import { buildIndex } from './src/integrations/core/index.js';
 import { resolveRepoRoot } from './tools/dict-utils.js';
 
-const { argv, modes } = parseBuildArgs(process.argv.slice(2));
+const { argv } = parseBuildArgs(process.argv.slice(2));
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
-const runtime = await createBuildRuntime({
-  root: rootArg || resolveRepoRoot(process.cwd()),
-  argv,
+await buildIndex(rootArg || resolveRepoRoot(process.cwd()), {
+  ...argv,
   rawArgv: process.argv
 });
-
-if (argv.watch) {
-  const pollMs = Number.isFinite(Number(argv['watch-poll'])) ? Number(argv['watch-poll']) : 2000;
-  const debounceMs = Number.isFinite(Number(argv['watch-debounce'])) ? Number(argv['watch-debounce']) : 500;
-  await watchIndex({ runtime, modes, pollMs, debounceMs });
-  process.exit(0);
-}
-
-const lock = await acquireIndexLock({ repoCacheRoot: runtime.repoCacheRoot, log });
-if (!lock) process.exit(1);
-try {
-  for (const mode of modes) {
-    await buildIndexForMode({ mode, runtime });
-  }
-} finally {
-  await lock.release();
-}
-
-log('\nDone.');
diff --git a/clete.png b/clete.png
new file mode 100644
index 000000000..7a30f66e9
Binary files /dev/null and b/clete.png differ
diff --git a/count-lines.js b/count-lines.js
new file mode 100644
index 000000000..309722b69
--- /dev/null
+++ b/count-lines.js
@@ -0,0 +1,81 @@
+import fs from 'fs';
+import path from 'path';
+
+const root = process.cwd();
+const skipDirs = new Set(['.git', 'node_modules', 'coverage', 'test-results']);
+
+const toPosix = (value) => value.split(path.sep).join('/');
+
+const shouldSkipDir = (dirName) => skipDirs.has(dirName);
+
+const countLines = (content) => {
+  if (!content) return 0;
+  return content.split(/\r\n|\r|\n/).length;
+};
+
+const walk = (dir, files) => {
+  const entries = fs.readdirSync(dir, { withFileTypes: true });
+  for (const entry of entries) {
+    const fullPath = path.join(dir, entry.name);
+    if (entry.isDirectory()) {
+      if (shouldSkipDir(entry.name)) continue;
+      walk(fullPath, files);
+      continue;
+    }
+    if (entry.isFile() && entry.name.endsWith('.js')) {
+      files.push(fullPath);
+    }
+  }
+};
+
+const allFiles = [];
+walk(root, allFiles);
+
+const isTestFile = (filePath) => {
+  const posix = toPosix(path.relative(root, filePath));
+  return posix.startsWith('tests/') || posix.includes('/test/');
+};
+
+const isExcludedSourceFile = (filePath) => {
+  const posix = `/${toPosix(path.relative(root, filePath))}`;
+  return (
+    posix.includes('/js/vendor/') ||
+    posix.includes('/webmidi/') ||
+    posix.includes('/jquery/') ||
+    posix.includes('/benchmarks/') || 
+    posix.includes('/.cache/') ||
+    posix.includes('/.logs/')
+  );
+};
+
+const sourceFiles = [];
+const testFiles = [];
+
+for (const filePath of allFiles) {
+  if (isTestFile(filePath)) {
+    testFiles.push(filePath);
+    continue;
+  }
+  if (!isExcludedSourceFile(filePath)) {
+    sourceFiles.push(filePath);
+  }
+}
+
+const sumLines = (files) => (
+  files.reduce((total, filePath) => {
+    const content = fs.readFileSync(filePath, 'utf8');
+    return total + countLines(content);
+  }, 0)
+);
+
+const sourceLines = sumLines(sourceFiles);
+const testLines = sumLines(testFiles);
+
+const lines = [
+  `SourceFiles : ${sourceFiles.length}`,
+  `SourceLines : ${sourceLines}`,
+  `TestFiles   : ${testFiles.length}`,
+  `TestLines   : ${testLines}`
+];
+
+console.log(lines.join('\n'));
diff --git a/demo.pairofcleats.json b/demo.pairofcleats.json
new file mode 100644
index 000000000..ac52ed719
--- /dev/null
+++ b/demo.pairofcleats.json
@@ -0,0 +1,650 @@
+{
+  "profile": null,
+  "logging": {
+    // Accepted values: "text", "json", "pretty"
+    "format": null,
+    "level": null,
+    "ringMax": null,
+    "ringMaxBytes": null,
+    // Accepted values: true, false
+    "redact": []
+  },
+  "security": {
+    "downloads": {
+      // Accepted values: true, false
+      "requireHash": null,
+      // Accepted values: true, false
+      "warnUnsigned": null,
+      "maxBytes": null,
+      "allowlist": null
+    },
+    "archives": {
+      "maxBytes": null,
+      "maxEntryBytes": null,
+      "maxEntries": null
+    }
+  },
+  "dictionary": {
+    "dir": null,
+    "languages": [],
+    "files": [],
+    // Accepted values: true, false
+    "includeSlang": null,
+    "slangDirs": [],
+    "slangFiles": [],
+    // Accepted values: true, false
+    "enableRepoDictionary": null,
+    "segmentation": null,
+    "dpMaxTokenLength": null,
+    "dpMaxTokenLengthByFileCount": []
+  },
+  "cache": {
+    "root": null,
+    "runtime": {
+      "fileText": {
+        "maxMb": null,
+        "ttlMs": null
+      },
+      "summary": {
+        "maxMb": null,
+        "ttlMs": null
+      },
+      "lint": {
+        "maxMb": null,
+        "ttlMs": null
+      },
+      "complexity": {
+        "maxMb": null,
+        "ttlMs": null
+      },
+      "gitMeta": {
+        "maxMb": null,
+        "ttlMs": null
+      }
+    },
+    "gc": {
+      "maxBytes": null,
+      "maxGb": null,
+      "maxAgeDays": null
+    }
+  },
+  // Default: {"use":true}
+  "sqlite": {
+    // Accepted values: true, false
+    // Default: true
+    "use": true,
+    "dbDir": null,
+    "codeDbPath": null,
+    "proseDbPath": null,
+    "scoreMode": null,
+    // Accepted values: true, false
+    "compactOnIncremental": null,
+    "vectorExtension": {
+      // Accepted values: true, false
+      "enabled": null,
+      "annMode": null,
+      "provider": null,
+      "module": null,
+      "encoding": null,
+      "table": null,
+      "column": null,
+      "options": null,
+      "dir": null,
+      "filename": null,
+      "path": null,
+      "url": null,
+      "downloads": null,
+      "platform": null,
+      "arch": null
+    }
+  },
+  // Default: {"use":true}
+  "lmdb": {
+    // Accepted values: true, false
+    // Default: true
+    "use": true,
+    "dbDir": null,
+    "codeDbPath": null,
+    "proseDbPath": null
+  },
+  // Default: {"annDefault":true,"annBackend":"lancedb","denseVectorMode":"merged","regex":{"maxPatternLength":512,"maxInputLength":10000,"maxProgramSize":2000,"timeoutMs":25,"flags":""}}
+  "search": {
+    // Accepted values: true, false
+    // Default: true
+    "annDefault": true,
+    // Accepted values: "auto", "lancedb", "sqlite-vector", "sqlite-extension", "hnsw", "js"
+    // Default: "lancedb"
+    "annBackend": "lancedb",
+    "sqliteAutoChunkThreshold": null,
+    "sqliteAutoArtifactBytes": null,
+    // Accepted values: true, false
+    "sqliteFtsNormalize": null,
+    "sqliteFtsProfile": null,
+    // Accepted values: "merged", "code", "doc", "auto"
+    // Default: "merged"
+    "denseVectorMode": "merged",
+    // Default: {"maxPatternLength":512,"maxInputLength":10000,"maxProgramSize":2000,"timeoutMs":25,"flags":""}
+    "regex": {
+      // Accepted values: "auto", "re2", "re2js"
+      "engine": null
+    },
+    "sqliteFtsWeights": {
+      "file": null,
+      "name": null,
+      "signature": null,
+      "kind": null,
+      "headline": null,
+      "doc": null,
+      "tokens": null
+    },
+    "scoreBlend": {
+      // Accepted values: true, false
+      "enabled": null,
+      "sparseWeight": null,
+      "annWeight": null
+    },
+    "symbolBoost": {
+      // Accepted values: true, false
+      // Default: true
+      "enabled": true,
+      // Default: 1.2
+      "definitionWeight": 1.2,
+      // Default: 1.1
+      "exportWeight": 1.1
+    },
+    "minhashMaxDocs": null,
+    "queryCache": {
+      // Accepted values: true, false
+      "enabled": null,
+      "maxEntries": null,
+      "ttlMs": null
+    },
+    "filePrefilter": {
+      // Accepted values: true, false
+      "enabled": null,
+      "chargramN": null
+    },
+    "bm25": {
+      "k1": null,
+      "b": null
+    },
+    "fieldWeights": {
+      "name": null,
+      "signature": null,
+      "doc": null,
+      "comment": null,
+      "body": null
+    },
+    "contextExpansion": {
+      // Accepted values: true, false
+      "enabled": null,
+      "maxPerHit": null,
+      "maxTotal": null,
+      // Accepted values: true, false
+      "includeCalls": null,
+      // Accepted values: true, false
+      "includeImports": null,
+      // Accepted values: true, false
+      "includeExports": null,
+      // Accepted values: true, false
+      "includeUsages": null,
+      // Accepted values: true, false
+      "respectFilters": null
+    },
+    "rrf": {
+      // Accepted values: true, false
+      "enabled": null,
+      "k": null
+    }
+  },
+  "triage": {
+    "recordsDir": null,
+    // Accepted values: true, false
+    "storeRawPayload": null,
+    "promoteFields": [],
+    "contextPack": {
+      "maxHistory": null,
+      "maxEvidencePerQuery": null
+    }
+  },
+  // Default: {"embeddings":{"lancedb":{"enabled":true,"table":"vectors","embeddingColumn":"vector","idColumn":"id","metric":"cosine","batchSize":1024}},"postings":{"enablePhraseNgrams":true,"phraseMinN":2,"phraseMaxN":4,"enableChargrams":true,"chargramMinN":3,"chargramMaxN":5,"chargramSource":"fields","chargramMaxTokenLength":48,"fielded":true},"importScan":"post","astDataflow":true,"controlFlow":true,"riskAnalysis":true,"riskAnalysisCrossFile":true,"riskRegex":{"maxPatternLength":512,"maxInputLength":10000,"maxProgramSize":2000,"timeoutMs":25,"flags":"i"},"typeInference":false,"typeInferenceCrossFile":false,"gitBlame":true,"lint":true,"complexity":true,"pythonAst":{"enabled":true},"treeSitter":{"enabled":true}}
+  "indexing": {
+    "concurrency": null,
+    "importConcurrency": null,
+    "ioConcurrencyCap": null,
+    // Accepted values: true, false
+    // Default: "post"
+    "importScan": "post",
+    "watch": {
+      // Accepted values: "auto", "chokidar", "parcel"
+      "backend": null
+    },
+    "hash": {
+      // Accepted values: "auto", "native", "wasm"
+      "backend": null
+    },
+    // Accepted values: true, false
+    "maxFileBytes": null,
+    "untrusted": {
+      // Accepted values: true, false
+      "enabled": null,
+      "maxFileBytes": null,
+      "maxLines": null,
+      "maxFiles": null,
+      "maxDepth": null
+    },
+    "fileCaps": {
+      "default": {
+        "maxBytes": null,
+        "maxLines": null
+      },
+      "byExt": null,
+      "byLanguage": null
+    },
+    "fileScan": {
+      "sampleBytes": null,
+      "minified": {
+        "sampleMinBytes": null,
+        "minChars": null,
+        "singleLineChars": null,
+        "avgLineThreshold": null,
+        "maxLineThreshold": null,
+        "maxWhitespaceRatio": null
+      },
+      "binary": {
+        "sampleMinBytes": null,
+        "maxNonTextRatio": null
+      }
+    },
+    "segments": {
+      // Accepted values: true, false
+      "inlineCodeSpans": null,
+      "inlineCodeMinChars": null,
+      "inlineCodeMaxSpans": null,
+      "inlineCodeMaxBytes": null
+    },
+    "comments": {
+      // Accepted values: "off", "doc", "all"
+      "extract": null,
+      // Accepted values: true, false
+      // Default: false
+      "includeInCode": false,
+      // Accepted values: true, false
+      "includeLicense": null,
+      "minDocChars": null,
+      "minInlineChars": null,
+      "minTokens": null,
+      "maxPerChunk": null,
+      "maxBytesPerChunk": null,
+      "headerMaxLines": null,
+      "licensePattern": null,
+      "generatedPattern": null,
+      "linterPattern": null,
+      // Accepted values: true, false
+      "skipGenerated": null,
+      // Accepted values: true, false
+      "skipLinter": null
+    },
+    // Accepted values: true, false
+    // Default: true
+    "astDataflow": true,
+    // Accepted values: true, false
+    // Default: true
+    "controlFlow": true,
+    "javascriptParser": null,
+    // Accepted values: true, false
+    "javascriptFlow": null,
+    "typescriptParser": null,
+    "typescript": {
+      // Accepted values: true, false
+      "importsOnly": null,
+      "embeddingBatchMultiplier": null
+    },
+    // Default: {"enabled":true}
+    "pythonAst": {
+      // Accepted values: true, false
+      // Default: true
+      "enabled": true,
+      "workerCount": null,
+      "maxWorkers": null,
+      // Accepted values: true, false
+      "allowOverCap": null,
+      "scaleUpQueueMs": null,
+      "taskTimeoutMs": null,
+      "maxRetries": null,
+      "maxQueued": null,
+      "crashLoopMax": null,
+      "crashWindowMs": null,
+      "crashBackoffMs": null
+    },
+    "kotlin": {
+      "flowMaxBytes": null,
+      "flowMaxLines": null,
+      "relationsMaxBytes": null,
+      "relationsMaxLines": null
+    },
+    "embeddingBatchSize": null,
+    "embeddingBatchMultipliers": null,
+    // Default: {"lancedb":{"enabled":true,"table":"vectors","embeddingColumn":"vector","idColumn":"id","metric":"cosine","batchSize":1024}}
+    "embeddings": {
+      // Accepted values: true, false
+      "enabled": null,
+      "concurrency": null,
+      // Accepted values: "xenova", "onnx"
+      "provider": null,
+      // Accepted values: "auto", "inline", "service", "stub", "off"
+      "mode": null,
+      "onnx": {
+        "modelPath": null,
+        "tokenizerId": null,
+        "executionProviders": [],
+        "intraOpNumThreads": null,
+        "interOpNumThreads": null,
+        // Accepted values: "disabled", "basic", "extended", "all"
+        "graphOptimizationLevel": null
+      },
+      "queue": {
+        "dir": null,
+        "maxQueued": null
+      },
+      "cache": {
+        "dir": null
+      },
+      // Default: {"enabled":true,"table":"vectors","embeddingColumn":"vector","idColumn":"id","metric":"cosine","batchSize":1024}
+      "lancedb": {
+        // Accepted values: true, false
+        // Default: true
+        "enabled": true,
+        // Default: "vectors"
+        "table": "vectors",
+        // Default: "vector"
+        "embeddingColumn": "vector",
+        // Default: "id"
+        "idColumn": "id",
+        // Accepted values: "cosine", "l2", "dot"
+        // Default: "cosine"
+        "metric": "cosine",
+        // Default: 1024
+        "batchSize": 1024
+      },
+      "hnsw": {
+        // Accepted values: true, false
+        "enabled": null,
+        // Accepted values: "cosine", "ip", "l2"
+        "space": null,
+        "m": null,
+        "efConstruction": null,
+        "efSearch": null,
+        "randomSeed": null,
+        // Accepted values: true, false
+        "allowReplaceDeleted": null
+      }
+    },
+    "twoStage": {
+      // Accepted values: true, false
+      "enabled": null,
+      // Accepted values: true, false
+      "background": null,
+      // Accepted values: true, false
+      "queue": null,
+      "stage1": null,
+      "stage2": null,
+      "stage3": null,
+      "stage4": null
+    },
+    "shards": {
+      // Accepted values: true, false
+      "enabled": null,
+      "maxWorkers": null,
+      "maxShards": null,
+      "minFiles": null,
+      "dirDepth": null,
+      "maxShardBytes": null,
+      "maxShardLines": null
+    },
+    // Accepted values: "auto", "full", "sample", "none"
+    "chunkTokenMode": null,
+    "chunkTokenMaxFiles": null,
+    "chunkTokenMaxTokens": null,
+    "chunkTokenSampleSize": null,
+    "chunking": {
+      "maxBytes": null,
+      "maxLines": null
+    },
+    "documentExtraction": {
+      // Accepted values: true, false
+      "enabled": null
+    },
+    "artifactCompression": {
+      // Accepted values: true, false
+      "enabled": null,
+      // Accepted values: "auto", "gzip", "zstd", "none"
+      "mode": null,
+      // Accepted values: true, false
+      "keepRaw": null
+    },
+    "incrementalBundles": {
+      // Accepted values: "json", "msgpack"
+      "format": null
+    },
+    "artifacts": {
+      "mode": null,
+      "chunkMetaFormat": null,
+      "chunkMetaJsonlThreshold": null,
+      "chunkMetaShardSize": null,
+      "tokenPostingsFormat": null,
+      "tokenPostingsShardSize": null,
+      "tokenPostingsShardThreshold": null
+    },
+    // Default: {"enabled":true}
+    "treeSitter": {
+      // Accepted values: true, false
+      // Default: true
+      "enabled": true,
+      // Accepted values: true, false
+      "configChunking": null,
+      "maxBytes": null,
+      "maxLines": null,
+      "maxParseMs": null,
+      // Accepted values: true, false
+      "preload": null,
+      "preloadConcurrency": null,
+      "byLanguage": null,
+      "languages": {
+        // Accepted values: true, false
+        "javascript": null,
+        // Accepted values: true, false
+        "typescript": null,
+        // Accepted values: true, false
+        "tsx": null,
+        // Accepted values: true, false
+        "jsx": null,
+        // Accepted values: true, false
+        "python": null,
+        // Accepted values: true, false
+        "json": null,
+        // Accepted values: true, false
+        "yaml": null,
+        // Accepted values: true, false
+        "toml": null,
+        // Accepted values: true, false
+        "markdown": null,
+        // Accepted values: true, false
+        "swift": null,
+        // Accepted values: true, false
+        "kotlin": null,
+        // Accepted values: true, false
+        "csharp": null,
+        // Accepted values: true, false
+        "clike": null,
+        // Accepted values: true, false
+        "cpp": null,
+        // Accepted values: true, false
+        "objc": null,
+        // Accepted values: true, false
+        "go": null,
+        // Accepted values: true, false
+        "rust": null,
+        // Accepted values: true, false
+        "java": null,
+        // Accepted values: true, false
+        "css": null,
+        // Accepted values: true, false
+        "html": null
+      },
+      "worker": {
+        // Accepted values: true, false
+        "enabled": null,
+        "maxWorkers": null,
+        "idleTimeoutMs": null,
+        "taskTimeoutMs": null
+      }
+    },
+    "workerPool": {
+      // Accepted values: true, false
+      "enabled": null,
+      "maxWorkers": null,
+      // Accepted values: true, false
+      "allowOverCap": null,
+      "maxFileBytes": null,
+      "idleTimeoutMs": null,
+      "taskTimeoutMs": null,
+      "quantizeBatchSize": null,
+      // Accepted values: true, false
+      "splitByTask": null,
+      // Accepted values: true, false
+      "splitTasks": null,
+      "quantizeMaxWorkers": null
+    },
+    // Accepted values: true, false
+    // Default: true
+    "riskAnalysis": true,
+    // Accepted values: true, false
+    // Default: true
+    "riskAnalysisCrossFile": true,
+    "riskRules": {
+      // Accepted values: true, false
+      "includeDefaults": null,
+      "rulesPath": null,
+      "rules": null
+    },
+    "riskCaps": {
+      "maxBytes": null,
+      "maxLines": null,
+      "maxNodes": null,
+      "maxEdges": null,
+      "maxMs": null,
+      "maxFlows": null
+    },
+    // Accepted values: true, false
+    // Default: false
+    "typeInference": false,
+    // Accepted values: true, false
+    // Default: false
+    "typeInferenceCrossFile": false,
+    // Accepted values: true, false
+    // Default: true
+    "gitBlame": true,
+    // Accepted values: true, false
+    // Default: true
+    "lint": true,
+    // Accepted values: true, false
+    // Default: true
+    "complexity": true,
+    // Accepted values: true, false
+    "debugFileLists": null,
+    "fileListSampleSize": null,
+    // Accepted values: "auto", "root", "top-level"
+    "yamlChunking": null,
+    "yamlTopLevelMaxBytes": null,
+    // Accepted values: true, false
+    "debugCrash": null,
+    // Default: {"enablePhraseNgrams":true,"phraseMinN":2,"phraseMaxN":4,"enableChargrams":true,"chargramMinN":3,"chargramMaxN":5,"chargramSource":"fields","chargramMaxTokenLength":48,"fielded":true}
+    "postings": {
+      // Accepted values: true, false
+      // Default: true
+      "enablePhraseNgrams": true,
+      // Default: 2
+      "phraseMinN": 2,
+      // Default: 4
+      "phraseMaxN": 4,
+      // Accepted values: true, false
+      // Default: true
+      "enableChargrams": true,
+      // Default: 3
+      "chargramMinN": 3,
+      // Default: 5
+      "chargramMaxN": 5,
+      // Default: 48
+      "chargramMaxTokenLength": 48,
+      // Accepted values: "full", "fields"
+      // Default: "fields"
+      "chargramSource": "fields",
+      // Accepted values: true, false
+      // Default: true
+      "fielded": true
+    }
+  },
+  "sql": {
+    "dialect": null,
+    "dialectByExt": null
+  },
+  "models": {
+    "id": null,
+    "dir": null,
+    "compare": []
+  },
+  "runtime": {
+    "maxOldSpaceMb": null,
+    "nodeOptions": null,
+    "uvThreadpoolSize": null
+  },
+  "tooling": {
+    // Accepted values: true, false
+    "autoInstallOnDetect": null,
+    // Accepted values: true, false
+    "autoEnableOnDetect": null,
+    "timeoutMs": null,
+    "maxRetries": null,
+    "circuitBreakerThreshold": null,
+    "logDir": null,
+    "installScope": null,
+    // Accepted values: true, false
+    "allowGlobalFallback": null,
+    "dir": null,
+    "enabledTools": [],
+    "disabledTools": [],
+    "typescript": {
+      // Accepted values: true, false
+      "enabled": null,
+      "resolveOrder": [],
+      // Accepted values: true, false
+      "useTsconfig": null,
+      "tsconfigPath": null
+    },
+    "clangd": {
+      // Accepted values: true, false
+      "requireCompilationDatabase": null,
+      "compileCommandsDir": null
+    }
+  },
+  "mcp": {
+    // Accepted values: "auto", "sdk", "legacy"
+    "transport": null,
+    "queueMax": null,
+    "toolTimeoutMs": null,
+    "toolTimeouts": null
+  },
+  "extensions": {
+    "dir": null
+  },
+  // Accepted values: true, false
+  "useDefaultSkips": null,
+  // Accepted values: true, false
+  "useGitignore": null,
+  // Accepted values: true, false
+  "usePairofcleatsIgnore": null,
+  "ignoreFiles": [],
+  "extraIgnore": []
+}
+
diff --git a/docs/api-server.md b/docs/api-server.md
new file mode 100644
index 000000000..f41f64410
--- /dev/null
+++ b/docs/api-server.md
@@ -0,0 +1,111 @@
+# Minimal API Server
+
+## Overview
+The API server is a lightweight local HTTP JSON wrapper around the search/status
+pipeline with CLI-compatible payloads. It is intended for local developer
+tooling (or local agent orchestration), not for exposing publicly. There is no
+auth layer; bind to `127.0.0.1` or a private interface.
+
+## Startup
+- `pairofcleats service api`
+
+Options:
+- `--host <addr>`: bind address (default `127.0.0.1`)
+- `--port <port>`: port number (use `0` for an ephemeral port)
+- `--repo <path>`: default repo root (auto-detected if omitted)
+- `--output <compact|full|json>`: default search output (default `compact`)
+- `--json`: emit a JSON startup line with `host`, `port`, and `baseUrl`
+- `--quiet`: suppress non-essential logs
+
+## Endpoints
+
+### `GET /health`
+Returns a heartbeat payload with uptime.
+
+Response:
+```json
+{ "ok": true, "uptimeMs": 12345 }
+```
+
+### `GET /status`
+Reports artifact sizes and cache health using the same logic as
+`pairofcleats cache report`.
+
+Query params:
+- `repo`: optional repo path override
+
+Response:
+```json
+{
+  "ok": true,
+  "repo": "/path/to/repo",
+  "status": { "...": "see cache report output" }
+}
+```
+
+### `GET /status/stream`
+Streams status as Server-Sent Events (SSE). Each event includes JSON `data`.    
+
+Events:
+- `start` `{ ok, repo }`
+- `result` `{ ok, repo, status }`
+- `error` `{ ok: false, code, message }`
+- `done` `{ ok }`
+
+### `GET /metrics`
+Returns Prometheus metrics for the API server.
+
+Response:
+```text
+# HELP ...
+# TYPE ...
+```
+
+### `POST /search`
+Executes the search pipeline with the provided payload and returns JSON output.
+
+Payload fields:
+- `query` (required)
+- `repo` / `repoPath` (optional override)
+- `mode`, `backend`, `output`, `ann`, `top`, `context`
+- Any CLI filter equivalent (e.g. `type`, `signature`, `reads`, `riskTag`, `path`, `ext`, `meta`)
+
+Response:
+```json
+{
+  "ok": true,
+  "repo": "/path/to/repo",
+  "result": { "code": [ ... ], "prose": [ ... ] }
+}
+```
+
+Errors:
+- `400 INVALID_REQUEST` for schema or repo validation failures.
+- `409 NO_INDEX` when indexes are missing.
+- `500 INTERNAL` for unexpected failures.
+Error payloads include `{ ok: false, code, message }` plus optional `errors` (validation) or `error` (internal detail).
+
+### `POST /search/stream`
+Runs a search and streams progress/results as SSE events. The request payload   
+matches `/search`.
+
+Events:
+- `start` `{ ok: true }`
+- `result` `{ ok: true, repo, result }`
+- `error` `{ ok: false, code, message }`
+- `done` `{ ok }`
+
+Example:
+```bash
+curl -N http://127.0.0.1:7345/search/stream \
+  -H "Content-Type: application/json" \
+  -d '{"query":"return","mode":"code"}'
+```
+
+Notes:
+- By default, `output` is `compact` (same as `--json-compact` in the CLI).      
+- Missing indexes return `409 NO_INDEX` with a JSON error payload.
+
+## Security considerations
+- No authentication is built in; bind locally and protect with firewall rules.
+- The server shells out to the CLI on each request. Ensure the repo is trusted.
diff --git a/docs/artifact-contract.md b/docs/artifact-contract.md
new file mode 100644
index 000000000..4e35babbc
--- /dev/null
+++ b/docs/artifact-contract.md
@@ -0,0 +1,147 @@
+# Artifact Contract
+
+This document defines the on-disk artifact layout, formats, and invariants for PairOfCleats index builds.
+
+## Build layout
+
+Artifacts live under the per-repo cache and are promoted atomically via a current pointer.
+
+```
+<cache>/repos/<repoId>/builds/<buildId>/
+  build_state.json
+  index-code/
+  index-prose/
+  index-records/
+  index-sqlite/
+  index-lmdb/
+<cache>/repos/<repoId>/builds/current.json
+```
+
+`buildId` format: `YYYYMMDDTHHMMSSZ_<gitShortSha|nogit>_<configHash8>`.
+
+## Core artifacts (per mode)
+
+Each `index-<mode>/` directory contains:
+
+- `chunk_meta.json` (or `chunk_meta.jsonl` or sharded `chunk_meta.parts/` + `chunk_meta.meta.json`)
+  - Array or JSONL of chunk metadata entries.
+  - Each entry includes `id`, `fileId`, `start`, `end`, `startLine`, `endLine`, `kind`, `name`, plus optional metadata.
+- `file_meta.json`
+  - Array of `{ id, file, ext, ... }` describing files referenced by `chunk_meta`.
+- `token_postings.json` (or sharded `token_postings.shards/` + `token_postings.meta.json`)
+  - Token vocabulary and postings lists.
+- `repo_map.json`
+  - Flattened symbol list for repo map output.
+- `file_relations.json` (optional)
+  - Per-file relation metadata (imports/exports/relations).
+- `filter_index.json` (optional)
+  - Serialized filter index for fast metadata filters.
+- `field_postings.json` + `field_tokens.json` (optional; only when fielded postings enabled)
+  - Fielded vocab + postings and per-chunk field token arrays.
+- `phrase_ngrams.json` and `chargram_postings.json` (optional, per config)
+  - Phrase/chargram vocab + postings arrays.
+- `minhash_signatures.json` (optional)
+  - Per-chunk MinHash signatures.
+- `dense_vectors_uint8.json` + `dense_vectors_doc_uint8.json` + `dense_vectors_code_uint8.json` (optional)
+  - Quantized embeddings with `model`, `dims`, and `scale`.
+- `index_state.json`
+  - Build feature flags + stage metadata for the mode.
+- `.filelists.json`
+  - Scan summary (sampled file lists).
+- `pieces/manifest.json`
+  - Piece inventory with checksums and sizes.
+
+Compressed artifacts may appear as `.json.gz`. When compression is enabled, the JSON payload contains a `compression` field and gzip streams are written via fflate.
+
+## Incremental bundles
+
+Incremental caches store per-file bundles under `<cache>/repos/<repoId>/incremental/<mode>/files/`.
+Bundles are written as:
+
+- `*.json` (legacy JSON bundles), or
+- `*.mpk` (MsgPack envelopes: `{ format: "pairofcleats.bundle", version: 1, checksum: { algo, value }, payload }`).
+
+MsgPack bundles use stable key ordering before encoding, and the checksum covers the normalized payload for deterministic verification.
+
+## SQLite artifacts
+
+When SQLite is enabled, `index-sqlite/` contains:
+
+- `index-code.db`
+- `index-prose.db`
+
+SQLite builds are written to temporary files and atomically swapped into place on success.
+
+## LMDB artifacts
+
+When LMDB is enabled, `index-lmdb/` contains:
+
+- `index-code/` (LMDB store for code mode)
+- `index-prose/` (LMDB store for prose mode)
+
+LMDB stores contain msgpack-encoded values. The keyspace is:
+
+Meta keys:
+- `meta:schemaVersion` (integer schema version)
+- `meta:createdAt` (ISO timestamp)
+- `meta:mode` (`code` or `prose`)
+- `meta:sourceIndex` (path to source index directory)
+- `meta:chunkCount` (integer chunk count)
+- `meta:artifacts` (list of artifact keys stored)
+
+Artifact keys:
+- `artifact:chunk_meta`
+- `artifact:token_postings`
+- `artifact:file_meta`
+- `artifact:file_relations`
+- `artifact:repo_map`
+- `artifact:filter_index`
+- `artifact:field_postings`
+- `artifact:field_tokens`
+- `artifact:phrase_ngrams`
+- `artifact:chargram_postings`
+- `artifact:minhash_signatures`
+- `artifact:dense_vectors_uint8`
+- `artifact:dense_vectors_doc_uint8`
+- `artifact:dense_vectors_code_uint8`
+- `artifact:dense_vectors_hnsw_meta`
+- `artifact:index_state`
+
+HNSW binary indexes remain in the file-backed index directory (`index-<mode>/dense_vectors_hnsw.bin`).
+
+LMDB stores are rebuilt from file-backed artifacts via `pairofcleats lmdb build`.
+
+## LMDB migration rules
+
+- If `meta:schemaVersion` is missing or does not match `src/storage/lmdb/schema.js`,
+  the LMDB store is invalid and the system falls back to file-backed indexes (unless
+  `--backend lmdb` was forced, which yields an error).
+- Rebuild LMDB stores after schema changes by running `pairofcleats lmdb build`.
+  The build process updates `index_state.json` with `lmdb.pending` and `lmdb.ready`
+  flags to gate readers during rebuilds.
+
+## Invariants
+
+These invariants are validated by `pairofcleats index validate`:
+
+- `chunk_meta` entries have sequential, zero-based `id` values.
+- `file_meta.id` values are unique and referenced by `chunk_meta.fileId`.
+- `token_postings.docLengths.length == chunk_meta.length`.
+- Posting lists only reference valid chunk IDs.
+- `minhash_signatures.signatures.length == chunk_meta.length`.
+- `dense_vectors*.vectors.length == chunk_meta.length`.
+- Vector dimensionality matches `dims`.
+- `filter_index.fileChunksById` references valid chunk IDs.
+- `pieces/manifest.json` paths exist and `xxh64` checksums match (legacy `sha1` accepted).
+
+## Validation and remediation
+
+Failures are categorized as:
+
+- **Missing artifact**: rebuild the index for the affected mode.
+- **Schema violations**: artifacts are corrupt or out of date; rebuild the index.
+- **Embedding dims mismatch**: hard failure; rebuild embeddings with a consistent model/dims and regenerate dependent indexes (SQLite/ANN). Clear incremental bundles if the mismatch persists.
+- **Cross-reference errors**: artifacts are inconsistent; rebuild the index.    
+- **SQLite table issues**: rebuild SQLite indexes.
+
+Use `pairofcleats index build` for file-backed artifacts and `pairofcleats sqlite build` for SQLite indexes.
diff --git a/docs/ast-feature-list.md b/docs/ast-feature-list.md
index e5220bdd5..8d3f418fc 100644
--- a/docs/ast-feature-list.md
+++ b/docs/ast-feature-list.md
@@ -50,6 +50,9 @@ This document defines the "complete" AST metadata feature set and how each AST-b
 - `indexing.riskAnalysisCrossFile` (default: true) controls cross-file risk correlation.
 - `indexing.typeInference` (default: false) controls whether inferred types are collected.
 - `indexing.typeInferenceCrossFile` (default: false) controls cross-file inference and linking.
+- `indexing.pythonAst.*` controls Python AST worker behavior (enable/disable, worker counts, timeouts).
+- `indexing.kotlin.flowMaxBytes/flowMaxLines` auto-disable Kotlin flow extraction above a file size/line threshold.
+- `indexing.kotlin.relationsMaxBytes/relationsMaxLines` auto-disable Kotlin call/usage scans above a threshold.
 
 ## Per-language coverage
 
@@ -62,6 +65,15 @@ This document defines the "complete" AST metadata feature set and how each AST-b
 - Control-flow: keyword counts (branches/loops/returns/breaks/continues/throws/awaits/yields).
 - Type inference: annotations + defaults + literal assignments (when enabled).
 
+### TypeScript (Compiler AST when available)
+- Declarations: class/interface/enum/type/function/method chunks via the TypeScript compiler API when available; heuristic fallback otherwise.
+- Signatures: params + return annotations from AST when possible, signature text fallback.
+- Modifiers: async/static/visibility/export extracted from signatures.
+- Inheritance: extends/implements from AST (fallback to signature parsing).
+- Dataflow: heuristic reads/writes/mutations/aliases/throws/returns/awaits/yields per function.
+- Control-flow: keyword counts (branches/loops/returns/breaks/continues/throws/awaits/yields).
+- Type inference: tooling-based types from the TypeScript compiler when enabled.
+
 ### Python (stdlib ast)
 - Declarations: function/method/class chunks via AST.
 - Signatures: full args (positional, keyword-only, varargs), defaults, return type annotations.
@@ -73,4 +85,7 @@ This document defines the "complete" AST metadata feature set and how each AST-b
 - Type inference: annotations + defaults + literal assignments (when enabled).
 
 ## Heuristic languages
-- C/C++/ObjC, Rust, Go, Java, Swift, C#, Kotlin, Ruby, PHP, Lua, Perl, Shell include control-flow counts when enabled.
+- C/C++/ObjC and Swift can be enriched with LSP tooling (clangd/sourcekit-lsp) for signatures and types when tooling is enabled; clangd uses compile_commands.json when available and runs best-effort without it.
+- C/C++/ObjC, Rust, Go, Java, Swift, C#, Kotlin, Ruby, PHP, Lua, SQL, Perl, Shell include heuristic dataflow (reads/writes/mutations/aliases/throws/awaits/yields/returns) when enabled.
+- SQL uses node-sql-parser (when available) to extract table usages for richer relations.
+- Control-flow keyword counts (branches/loops/returns/breaks/continues/throws/awaits/yields) are captured when enabled.
diff --git a/docs/benchmarks.md b/docs/benchmarks.md
new file mode 100644
index 000000000..732c22e48
--- /dev/null
+++ b/docs/benchmarks.md
@@ -0,0 +1,69 @@
+# Benchmarks
+
+This project has two layers of benchmarking:
+- Microbenchmarks for fast component-level timing.
+- Language benchmarks for full-size repo comparisons.
+
+## Query generation
+
+Use `npm run bench-queries` to generate a deterministic query suite from the
+current index metadata. The generator writes to `benchmarks/queries/` by default
+and accepts `--mode`, `--count`, and `--seed` for reproducibility.
+
+## Microbench suite
+
+Run the microbench suite with:
+
+```
+pairofcleats bench micro
+```
+
+By default it targets `tests/fixtures/sample` with stub embeddings and runs the
+index build plus three search modes.
+
+### Components
+- Index build (no embeddings): `pairofcleats index build --stub-embeddings` (defaults to code mode).
+- Search sparse-only: `--no-ann`.
+- Search dense-only: `--ann` plus blend weights that fully weight ANN.
+- Search hybrid: `--ann` plus balanced blend weights.
+
+Note: dense-only still performs sparse candidate generation; the blend weights
+zero out sparse contributions in scoring. The dense/hybrid presets use the
+Use the standard profiles under `profiles/` if you need to override defaults.
+
+### Warm vs cold
+- Cold run: first execution after clearing in-process caches.
+- Warm runs: repeated executions in the same process (index cache reused).
+
+The suite reports the cold time and warm p50/p95/p99 stats.
+
+### Expected runtime
+With the default fixtures and stub embeddings, the microbench suite should finish
+well under 5 minutes on a typical dev machine.
+
+### Options
+- `--repo <path>`: benchmark a different repo.
+- `--mode <code|prose>`: choose index/search mode.
+- `--runs <n>`: warm run count (default 5).
+- `--warmup <n>`: warmup runs excluded from stats (default 1).
+- `--backend <memory|sqlite|sqlite-fts|lmdb>`: search backend.
+- `--no-build`: skip building indexes before search benchmarks.
+- `--no-clean`: keep cache for the cold build run.
+- `--out <file>`: write JSON results to a file.
+
+### Tinybench harness
+For tighter microbench loops, use the Tinybench runner:
+
+```
+npm run bench-micro:tiny
+```
+
+The runner stores baselines at `benchmarks/baselines/microbench.json` (override
+with `--baseline`). Use `--write-baseline` to capture a new baseline, and `--compare`
+to print deltas against the stored file. It reports p50/p95/p99 latencies for
+each component.
+
+## Language benchmarks
+
+Language benchmarks focus on larger repos and end-to-end indexing + search runs.
+See `docs/language-benchmarks.md` for tiered repo lists and recommended commands.
diff --git a/docs/code-maps.md b/docs/code-maps.md
new file mode 100644
index 000000000..8192f3d1d
--- /dev/null
+++ b/docs/code-maps.md
@@ -0,0 +1,69 @@
+# Code maps
+
+PairOfCleats can generate **code maps** from your indexed repository. A map is a graph model that can be exported as:
+
+- **JSON** (map model)
+- **Graphviz DOT** (graph definition)
+- **SVG** or **HTML** (rendered via Graphviz)
+- **HTML-ISO** (isometric 3D viewer powered by three.js)
+
+The map model is derived from the same indexed metadata used for search (imports, exports, calls, usages, dataflow, etc.).
+
+## CLI usage
+
+Generate a map report (writes artifacts to disk and prints a JSON report):
+
+```bash
+pairofcleats report map --format json
+pairofcleats report map --format dot
+pairofcleats report map --format svg
+pairofcleats report map --format html
+pairofcleats report map --format html-iso
+```
+
+Useful options (subset):
+
+- `--scope repo|dir|file|symbol`
+- `--focus <path | folder | file::symbol>`
+- `--include imports,calls,usages,dataflow,exports`
+- Guardrails: `--max-files`, `--max-members-per-file`, `--max-edges`, `--top-k-by-degree`
+
+## API server usage
+
+When running `pairofcleats service api`, the server exposes:
+
+- `GET /map?format=<...>`
+- `GET /map/nodes`
+
+See [docs/api-server.md](api-server.md) for details.
+
+## Graphviz is optional
+
+PairOfCleats can always produce **DOT** output (and the JSON map model) without Graphviz.
+
+Graphviz is only required when you request rendered formats:
+
+- `format=svg`
+- `format=html`
+
+If Graphviz is not installed or `dot` is not on your `PATH`, these formats will fail (or the API may downgrade to DOT when possible).
+
+### DOT-only mode
+
+If you do not want Graphviz installed on the same machine, you can work in "DOT-only" mode:
+
+1) Produce DOT:
+
+```bash
+pairofcleats report map --format dot --out map.dot
+```
+
+2) Render later (any machine with Graphviz installed):
+
+```bash
+dot -Tsvg map.dot > map.svg
+# or
+ dot -Tpng map.dot > map.png
+```
+
+This workflow is also useful for CI environments where installing Graphviz is undesirable.
diff --git a/docs/codebase-review-2026-01-03.md b/docs/codebase-review-2026-01-03.md
new file mode 100644
index 000000000..ebddee901
--- /dev/null
+++ b/docs/codebase-review-2026-01-03.md
@@ -0,0 +1,54 @@
+# Codebase Review (2026-01-03)
+
+This document tracks correctness-first findings and fixes during a systematic review of the codebase. Each section records verified issues, decisions, and follow-ups.
+
+## Indexing
+
+Correctness fixes:
+- `build_index.js` now resolves `tools/build-sqlite-index.js` from the install root so tests running under a temp repo can still build SQLite artifacts.
+- SQLite ingestion now resolves `file_meta.json` when `chunk_meta.json` stores `fileId` only, so `file_manifest` entries (and external docs) are correctly populated.
+
+Open items:
+- None in this pass.
+
+## Search + Scoring
+
+Correctness fixes:
+- Added a minhash parity test to ensure search and indexing signatures stay compatible.
+- Added a file/path regex prefilter using chargrams to reduce full scans for `--file`/`--path`.
+- Preserved punctuation tokens in code tokenization and query parsing to keep operator searches reliable.
+
+Open items:
+- None in this pass.
+
+## SQLite
+
+Correctness fixes:
+- File-level metadata for SQLite ingestion now uses `file_meta.json` so `file_manifest` entries exist even when chunk metadata omits `file`.
+
+Open items:
+- None in this pass.
+
+## Tooling + Utilities
+
+Correctness fixes:
+- None in this pass.
+
+Open items:
+- Reviewed tooling/utility scripts; no correctness issues found in this pass.
+
+## Language Handlers
+
+Correctness fixes:
+- Added a fast `require()` scan to the JS import pass so CJS imports contribute to `importLinks` without a full AST pass.
+
+Open items:
+- Reviewed remaining language handlers; no additional correctness issues found in this pass.
+
+## Shared Core Utilities
+
+Correctness fixes:
+- None in this pass.
+
+Open items:
+- Reviewed shared utilities; no correctness issues found in this pass.
diff --git a/docs/codebase-review-temp.md b/docs/codebase-review-temp.md
index ca0af9ef6..3fdb33ebb 100644
--- a/docs/codebase-review-temp.md
+++ b/docs/codebase-review-temp.md
@@ -1,15 +1,15 @@
 # Codebase Review (Temporary)
 
-This document captures mistakes, enhancement ideas, and refactoring opportunities by phase. It will be used to update `COMPLETE_PLAN.md` and then removed.
+This document captures mistakes, enhancement ideas, and refactoring opportunities by phase. It will be used to update `NEW_ROADMAP.md` and then removed.
 
 ## Phase 1: Indexing Core (build_index + indexer/shared)
 
 Mistakes:
-- `src/indexer/build/file-processor.js` sets `meta.weightt` but later uses `meta.weight`; `chunkPayload.weight` becomes `undefined`, so `bm * c.weight` yields `NaN` and weights never apply.
-- `src/indexer/build/postings.js` logs "Using real model embeddings" even when stub embeddings are active, which is misleading for users.
+- `src/index/build/file-processor.js` sets `meta.weightt` but later uses `meta.weight`; `chunkPayload.weight` becomes `undefined`, so `bm * c.weight` yields `NaN` and weights never apply.
+- `src/index/build/postings.js` logs "Using real model embeddings" even when stub embeddings are active, which is misleading for users.
 
 Enhancements:
-- Avoid O(n^2) token frequency recomputation in `src/indexer/build/postings.js` by storing per-chunk token counts when chunking.
+- Avoid O(n^2) token frequency recomputation in `src/index/build/postings.js` by storing per-chunk token counts when chunking.
 - Add a config option to skip per-chunk `git blame` (or downgrade to file-level blame) to reduce indexing latency on large repos.
 - Remove or repurpose unused `state.wordFreq` and `postings.sparse` to reduce memory footprint if they are not used downstream.
 
@@ -49,7 +49,7 @@ Risks/notes:
 ## Phase 3: Search + Scoring
 
 Mistakes:
-- MinHash mismatch: indexing uses `src/indexer/minhash.js` (SimpleMinHash) while search uses the `minhash` package (`src/search/rankers.js`), so signatures are likely incompatible and similarity scores unreliable.
+- MinHash mismatch: indexing uses `src/index/minhash.js` (SimpleMinHash) while search uses the `minhash` package (`src/retrieval/rankers.js`), so signatures are likely incompatible and similarity scores unreliable.
 - `sparse_postings_varint.bin` is produced but never read by search, which suggests dead artifacts or missing integration.
 
 Enhancements:
@@ -58,7 +58,7 @@ Enhancements:
 - Offer a config flag to fully disable MinHash ranking when the signature pipeline is not in use.
 
 Refactoring opportunities:
-- Extract common search argument parsing and output formatting between `search.js` and `tools/search-sqlite.js` to avoid drift.
+- Extract common search argument parsing and output formatting between `search.js` and `search.js --backend sqlite` to avoid drift.
 - Centralize candidate selection/merging logic so both backends share identical score normalization paths.
 
 Tests/edge cases:
@@ -113,10 +113,10 @@ Risks/notes:
 ## Phase 6: MCP + Tests + Docs
 
 Mistakes:
-- `ROADMAP.md` is stale (still lists CFG/dataflow + type inference as pending despite completion in `COMPLETE_PLAN.md`).
+- `ROADMAP.md` is stale (still lists CFG/dataflow + type inference as pending despite completion in `NEW_ROADMAP.md`).
 
 Enhancements:
-- Add MCP server options for async build/index tasks (spawn and stream output rather than `spawnSync`).
+- MCP server now streams build/index tasks via async subprocesses; add troubleshooting guidance for progress output if needed.
 - Document MCP error payloads and include a small troubleshooting section in docs.
 
 Refactoring opportunities:
@@ -128,4 +128,5 @@ Tests/edge cases:
 - Add a docs regression test to ensure `ROADMAP.md` matches completed phases or is explicitly marked as historical.
 
 Risks/notes:
-- MCP server currently blocks on `spawnSync`, which can stall the RPC loop during long index builds.
+- MCP server now uses async subprocesses for long-running tasks; keep stdout/stderr buffers bounded to avoid memory spikes.
+
diff --git a/docs/commands.md b/docs/commands.md
new file mode 100644
index 000000000..22e571406
--- /dev/null
+++ b/docs/commands.md
@@ -0,0 +1,85 @@
+# Command Surface
+
+PairOfCleats uses the `pairofcleats` CLI as the primary interface. `npm run <script>`
+wrappers remain available for CI or local convenience, but the list is intentionally
+small. If the binary is not on your PATH, use `node bin/pairofcleats.js`.
+
+Experimental commands require `profile=full` (or `PAIROFCLEATS_PROFILE=full`).
+
+## Core
+- `pairofcleats setup`
+- `pairofcleats bootstrap`
+- `pairofcleats search`
+- `pairofcleats index build`
+- `pairofcleats index watch`
+- `pairofcleats index validate`
+- `pairofcleats embeddings build`
+- `pairofcleats generate-repo-dict`
+- `pairofcleats git-hooks`
+
+## SQLite
+- `pairofcleats sqlite build`
+- `pairofcleats sqlite compact`
+- `pairofcleats sqlite search` (defaults to `sqlite-fts` when `--backend` is omitted)
+
+## LMDB
+- `pairofcleats lmdb build`
+
+## Assets
+- `pairofcleats assets dicts`
+- `pairofcleats assets models`
+- `pairofcleats assets extensions`
+- `pairofcleats assets extensions-verify`
+
+## Tooling
+- `pairofcleats tooling detect`
+- `pairofcleats tooling install`
+
+## Ingest
+- `pairofcleats ingest ctags`
+- `pairofcleats ingest scip`
+- `pairofcleats ingest lsif`
+- `pairofcleats ingest gtags`
+
+## Structural
+- `pairofcleats structural search`
+
+## Cache
+- `pairofcleats cache gc`
+- `pairofcleats cache clean`
+- `pairofcleats cache report`
+
+## Reports
+- `pairofcleats report repometrics`
+- `pairofcleats report compare-models`
+- `pairofcleats report summary`
+- `pairofcleats report eval`
+
+## Services
+- `pairofcleats service api`
+- `pairofcleats service indexer`
+- `pairofcleats service mcp`
+
+## Config + triage
+- `pairofcleats config validate`
+- `pairofcleats config dump`
+- `pairofcleats config reset`
+- `pairofcleats triage ingest`
+- `pairofcleats triage decision`
+- `pairofcleats triage context-pack`
+
+## Benchmarks
+- `pairofcleats bench micro`
+- `pairofcleats bench language`
+- `pairofcleats bench matrix`
+
+## Migration notes
+- Language-specific bench scripts were removed; use `pairofcleats bench language --language <lang>`
+  and `--tier <typical|large>` instead.
+
+## Tests
+- `npm run test-all` / `npm run test-all-no-bench`
+- `npm run verify`
+- `npm run script-coverage-test`
+
+For the full list of test scripts, see `package.json` or run `npm run`.
diff --git a/docs/config-deprecations.md b/docs/config-deprecations.md
new file mode 100644
index 000000000..b3e674f23
--- /dev/null
+++ b/docs/config-deprecations.md
@@ -0,0 +1,15 @@
+# Config Deprecations
+
+This document tracks deprecated config keys and their replacements.
+
+## Removed deprecations
+- `sqlite.dbPath` -> `sqlite.dbDir` or `sqlite.codeDbPath`/`sqlite.proseDbPath`
+  - Single database paths are legacy; split DBs are the default.
+- `sqlite.annMode` -> `sqlite.vectorExtension.annMode`
+- `indexing.fileCaps.defaults` -> `indexing.fileCaps.default`
+- `indexing.fileCaps.byExtension` -> `indexing.fileCaps.byExt`
+- `indexing.fileCaps.byLang` -> `indexing.fileCaps.byLanguage`
+- `cache.runtime.*.maxMB` -> `cache.runtime.*.maxMb`
+- `cache.runtime.*.ttlMS` -> `cache.runtime.*.ttlMs`
+
+These keys are no longer accepted. Update configs to the replacement keys.
diff --git a/docs/config-inventory-notes.md b/docs/config-inventory-notes.md
new file mode 100644
index 000000000..13a38c3d7
--- /dev/null
+++ b/docs/config-inventory-notes.md
@@ -0,0 +1,30 @@
+# Config Inventory Notes
+
+This file complements `docs/config-inventory.md` with manual analysis and ownership hints.
+
+## Ownership map (primary modules)
+- cache: `tools/dict-utils.js`, `src/shared/cache.js`
+- dictionary: `tools/dict-utils.js`, `src/index/build/runtime.js`, `src/index/build/tokenization.js`
+- extensions: `tools/download-extensions.js`, `tools/verify-extensions.js`, `tools/vector-extension.js`
+- indexing: `src/index/build/runtime.js`, `src/index/build/indexer.js`, `src/index/build/file-processor.js`
+- models: `tools/dict-utils.js`, `src/shared/embedding.js`, `src/index/build/runtime.js`
+- profile: `src/shared/cli.js`, `tools/dict-utils.js`
+- runtime: `tools/dict-utils.js`, `src/shared/cli.js`
+- search: `src/retrieval/cli.js`, `src/retrieval/pipeline.js`, `src/retrieval/sqlite-helpers.js`
+- sql: `src/index/build/runtime.js`, `src/lang/sql.js`
+- sqlite: `src/storage/sqlite/*`, `tools/build-sqlite-index.js`, `tools/compact-sqlite-index.js`
+- tooling: `tools/tooling-detect.js`, `tools/tooling-install.js`, `src/integrations/tooling/*`
+- triage: `src/integrations/triage/*`, `tools/triage/*`
+
+## Overlap candidates to consolidate (initial)
+- Profiles: `PAIROFCLEATS_PROFILE`, `--profile`, and `--index-profile` overlap and need a single precedence rule.
+- Embeddings: `PAIROFCLEATS_EMBEDDINGS`, `indexing.embeddings.*`, `--stub-embeddings`, and `--real-embeddings` are redundant toggles.
+- Threads/concurrency: `PAIROFCLEATS_THREADS`, `indexing.concurrency`, `--threads`, per-feature concurrency fields, and worker-pool max workers overlap.
+- Cache roots: `cache.root`, `PAIROFCLEATS_CACHE_ROOT`, `--cache-root`, and per-benchmark cache overrides are duplicated.
+- SQLite paths: `sqlite.dbDir`, `codeDbPath`, `proseDbPath`, `--out`, and `--code-dir/--prose-dir` overlap in purpose.
+- Search defaults: `search.annDefault`, `--ann/--no-ann`, `search.bm25.*`, and `--bm25-*` duplicate control surfaces.
+- Watch/index toggles: `build_index.js` CLI flags vs indexing config `watch` and `incremental` semantics.
+
+## Suspected unused or legacy knobs
+- Requires targeted audit; config schema currently does not distinguish between deprecated and active keys.
+- Flags and env vars in `docs/config-inventory.md` with low call-site counts are good candidates for pruning once behavior is traced.
diff --git a/docs/config-inventory.json b/docs/config-inventory.json
new file mode 100644
index 000000000..8b7034d5c
--- /dev/null
+++ b/docs/config-inventory.json
@@ -0,0 +1,3984 @@
+{
+  "generatedAt": "2026-01-11T06:52:59.949Z",
+  "configSchema": {
+    "path": "docs/config-schema.json",
+    "totalKeys": 406,
+    "topLevel": [
+      {
+        "key": "cache",
+        "count": 22
+      },
+      {
+        "key": "dictionary",
+        "count": 14
+      },
+      {
+        "key": "extensions",
+        "count": 2
+      },
+      {
+        "key": "extraIgnore",
+        "count": 1
+      },
+      {
+        "key": "ignoreFiles",
+        "count": 1
+      },
+      {
+        "key": "indexing",
+        "count": 226
+      },
+      {
+        "key": "lmdb",
+        "count": 5
+      },
+      {
+        "key": "logging",
+        "count": 6
+      },
+      {
+        "key": "mcp",
+        "count": 4
+      },
+      {
+        "key": "models",
+        "count": 4
+      },
+      {
+        "key": "profile",
+        "count": 1
+      },
+      {
+        "key": "runtime",
+        "count": 3
+      },
+      {
+        "key": "search",
+        "count": 52
+      },
+      {
+        "key": "security",
+        "count": 9
+      },
+      {
+        "key": "sql",
+        "count": 3
+      },
+      {
+        "key": "sqlite",
+        "count": 23
+      },
+      {
+        "key": "tooling",
+        "count": 20
+      },
+      {
+        "key": "triage",
+        "count": 7
+      },
+      {
+        "key": "useDefaultSkips",
+        "count": 1
+      },
+      {
+        "key": "useGitignore",
+        "count": 1
+      },
+      {
+        "key": "usePairofcleatsIgnore",
+        "count": 1
+      }
+    ]
+  },
+  "configKeys": [
+    {
+      "path": "cache",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "cache.gc",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "cache.gc.maxAgeDays",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "cache.gc.maxBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "cache.gc.maxGb",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "cache.root",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "cache.runtime",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "cache.runtime.complexity",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "cache.runtime.complexity.maxMb",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "cache.runtime.complexity.ttlMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "cache.runtime.fileText",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "cache.runtime.fileText.maxMb",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "cache.runtime.fileText.ttlMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "cache.runtime.gitMeta",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "cache.runtime.gitMeta.maxMb",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "cache.runtime.gitMeta.ttlMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "cache.runtime.lint",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "cache.runtime.lint.maxMb",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "cache.runtime.lint.ttlMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "cache.runtime.summary",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "cache.runtime.summary.maxMb",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "cache.runtime.summary.ttlMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "dictionary",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "dictionary.dir",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "dictionary.dpMaxTokenLength",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "dictionary.dpMaxTokenLengthByFileCount",
+      "type": "array",
+      "enum": null
+    },
+    {
+      "path": "dictionary.dpMaxTokenLengthByFileCount[]",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "dictionary.dpMaxTokenLengthByFileCount[].dpMaxTokenLength",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "dictionary.dpMaxTokenLengthByFileCount[].maxFiles",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "dictionary.enableRepoDictionary",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "dictionary.files",
+      "type": "array",
+      "enum": null
+    },
+    {
+      "path": "dictionary.includeSlang",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "dictionary.languages",
+      "type": "array",
+      "enum": null
+    },
+    {
+      "path": "dictionary.segmentation",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "dictionary.slangDirs",
+      "type": "array",
+      "enum": null
+    },
+    {
+      "path": "dictionary.slangFiles",
+      "type": "array",
+      "enum": null
+    },
+    {
+      "path": "extensions",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "extensions.dir",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "extraIgnore",
+      "type": "array",
+      "enum": null
+    },
+    {
+      "path": "ignoreFiles",
+      "type": "array",
+      "enum": null
+    },
+    {
+      "path": "indexing",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.artifactCompression",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.artifactCompression.enabled",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.artifactCompression.keepRaw",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.artifactCompression.mode",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "indexing.artifacts",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.artifacts.chunkMetaFormat",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "indexing.artifacts.chunkMetaJsonlThreshold",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.artifacts.chunkMetaShardSize",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.artifacts.mode",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "indexing.artifacts.tokenPostingsFormat",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "indexing.artifacts.tokenPostingsShardSize",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.artifacts.tokenPostingsShardThreshold",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.astDataflow",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.chunking",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.chunking.maxBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.chunking.maxLines",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.chunkTokenMaxFiles",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.chunkTokenMaxTokens",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.chunkTokenMode",
+      "type": "string",
+      "enum": [
+        "auto",
+        "full",
+        "sample",
+        "none"
+      ]
+    },
+    {
+      "path": "indexing.chunkTokenSampleSize",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.comments",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.comments.extract",
+      "type": "string",
+      "enum": [
+        "off",
+        "doc",
+        "all"
+      ]
+    },
+    {
+      "path": "indexing.comments.generatedPattern",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "indexing.comments.headerMaxLines",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.comments.includeLicense",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.comments.licensePattern",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "indexing.comments.linterPattern",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "indexing.comments.maxBytesPerChunk",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.comments.maxPerChunk",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.comments.minDocChars",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.comments.minInlineChars",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.comments.minTokens",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.comments.skipGenerated",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.comments.skipLinter",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.complexity",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.concurrency",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.controlFlow",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.debugCrash",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.debugFileLists",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddingBatchMultipliers",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddingBatchSize",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.cache",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.cache.dir",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.concurrency",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.enabled",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.hnsw",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.hnsw.allowReplaceDeleted",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.hnsw.efConstruction",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.hnsw.efSearch",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.hnsw.enabled",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.hnsw.m",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.hnsw.randomSeed",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.hnsw.space",
+      "type": "string",
+      "enum": [
+        "cosine",
+        "ip",
+        "l2"
+      ]
+    },
+    {
+      "path": "indexing.embeddings.mode",
+      "type": "string",
+      "enum": [
+        "auto",
+        "inline",
+        "service",
+        "stub",
+        "off"
+      ]
+    },
+    {
+      "path": "indexing.embeddings.onnx",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.onnx.executionProviders",
+      "type": null,
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.onnx.graphOptimizationLevel",
+      "type": "string",
+      "enum": [
+        "disabled",
+        "basic",
+        "extended",
+        "all"
+      ]
+    },
+    {
+      "path": "indexing.embeddings.onnx.interOpNumThreads",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.onnx.intraOpNumThreads",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.onnx.modelPath",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.onnx.tokenizerId",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.provider",
+      "type": "string",
+      "enum": [
+        "xenova",
+        "onnx"
+      ]
+    },
+    {
+      "path": "indexing.embeddings.queue",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.queue.dir",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "indexing.embeddings.queue.maxQueued",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileCaps",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileCaps.byExt",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileCaps.byExt.*",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileCaps.byExt.*.maxBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileCaps.byExt.*.maxLines",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileCaps.byLanguage",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileCaps.byLanguage.*",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileCaps.byLanguage.*.maxBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileCaps.byLanguage.*.maxLines",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileCaps.default",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileCaps.default.maxBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileCaps.default.maxLines",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileListSampleSize",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileScan",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileScan.binary",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileScan.binary.maxNonTextRatio",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileScan.binary.sampleMinBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileScan.minified",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileScan.minified.avgLineThreshold",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileScan.minified.maxLineThreshold",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileScan.minified.maxWhitespaceRatio",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileScan.minified.minChars",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileScan.minified.sampleMinBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileScan.minified.singleLineChars",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.fileScan.sampleBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.gitBlame",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.importConcurrency",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.ioConcurrencyCap",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.importScan",
+      "type": "string|boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.incrementalBundles",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.incrementalBundles.format",
+      "type": "string",
+      "enum": [
+        "json",
+        "msgpack"
+      ]
+    },
+    {
+      "path": "indexing.javascriptFlow",
+      "type": "string|boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.javascriptParser",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "indexing.kotlin",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.kotlin.flowMaxBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.kotlin.flowMaxLines",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.kotlin.relationsMaxBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.kotlin.relationsMaxLines",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.lint",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.maxFileBytes",
+      "type": "number|boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.postings",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.postings.chargramMaxN",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.postings.chargramMaxTokenLength",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.postings.chargramMinN",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.postings.chargramSource",
+      "type": "string",
+      "enum": [
+        "full",
+        "fields"
+      ]
+    },
+    {
+      "path": "indexing.postings.enableChargrams",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.postings.enablePhraseNgrams",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.postings.fielded",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.postings.phraseMaxN",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.postings.phraseMinN",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.pythonAst",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.pythonAst.allowOverCap",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.pythonAst.crashBackoffMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.pythonAst.crashLoopMax",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.pythonAst.crashWindowMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.pythonAst.enabled",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.pythonAst.maxQueued",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.pythonAst.maxRetries",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.pythonAst.maxWorkers",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.pythonAst.scaleUpQueueMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.pythonAst.taskTimeoutMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.pythonAst.workerCount",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.riskAnalysis",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.riskAnalysisCrossFile",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.riskCaps",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.riskCaps.maxBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.riskCaps.maxEdges",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.riskCaps.maxFlows",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.riskCaps.maxLines",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.riskCaps.maxMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.riskCaps.maxNodes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.riskRules",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.riskRules.includeDefaults",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.riskRules.rules",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.riskRules.rulesPath",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "indexing.segments",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.segments.inlineCodeMaxBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.segments.inlineCodeMaxSpans",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.segments.inlineCodeMinChars",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.segments.inlineCodeSpans",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.shards",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.shards.dirDepth",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.shards.enabled",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.shards.maxShardBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.shards.maxShardLines",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.shards.maxShards",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.shards.maxWorkers",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.shards.minFiles",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.byLanguage",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.byLanguage.*",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.byLanguage.*.maxBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.byLanguage.*.maxLines",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.byLanguage.*.maxParseMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.configChunking",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.enabled",
+      "type": "boolean|string",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.clike",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.cpp",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.csharp",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.css",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.go",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.html",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.java",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.javascript",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.json",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.jsx",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.kotlin",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.markdown",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.objc",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.python",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.rust",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.swift",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.toml",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.tsx",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.typescript",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.languages.yaml",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.maxBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.maxLines",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.maxParseMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.preload",
+      "type": "string|boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.preloadConcurrency",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.worker",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.worker.enabled",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.worker.idleTimeoutMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.worker.maxWorkers",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.treeSitter.worker.taskTimeoutMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.twoStage",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.twoStage.background",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.twoStage.enabled",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.twoStage.queue",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.twoStage.stage1",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.twoStage.stage2",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.twoStage.stage3",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.twoStage.stage4",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.typeInference",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.typeInferenceCrossFile",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.typescript",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.typescript.embeddingBatchMultiplier",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.typescript.importsOnly",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.typescriptParser",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "indexing.untrusted",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.untrusted.enabled",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.untrusted.maxDepth",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.untrusted.maxFileBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.untrusted.maxFiles",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.untrusted.maxLines",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.workerPool",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "indexing.workerPool.allowOverCap",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.workerPool.enabled",
+      "type": "boolean|string",
+      "enum": null
+    },
+    {
+      "path": "indexing.workerPool.idleTimeoutMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.workerPool.maxFileBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.workerPool.maxWorkers",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.workerPool.quantizeBatchSize",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.workerPool.quantizeMaxWorkers",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.workerPool.splitByTask",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.workerPool.splitTasks",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "indexing.workerPool.taskTimeoutMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "indexing.yamlChunking",
+      "type": "string",
+      "enum": [
+        "auto",
+        "root",
+        "top-level"
+      ]
+    },
+    {
+      "path": "indexing.yamlTopLevelMaxBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "lmdb",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "lmdb.codeDbPath",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "lmdb.dbDir",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "lmdb.proseDbPath",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "lmdb.use",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "logging",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "logging.format",
+      "type": "string",
+      "enum": [
+        "text",
+        "json",
+        "pretty"
+      ]
+    },
+    {
+      "path": "logging.level",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "logging.redact",
+      "type": null,
+      "enum": null
+    },
+    {
+      "path": "logging.ringMax",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "logging.ringMaxBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "mcp",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "mcp.queueMax",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "mcp.toolTimeoutMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "mcp.toolTimeouts",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "models",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "models.compare",
+      "type": "array",
+      "enum": null
+    },
+    {
+      "path": "models.dir",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "models.id",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "profile",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "runtime",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "runtime.maxOldSpaceMb",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "runtime.uvThreadpoolSize",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "runtime.nodeOptions",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "search",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "search.annDefault",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "search.bm25",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "search.bm25.b",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.bm25.k1",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.contextExpansion",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "search.contextExpansion.enabled",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "search.contextExpansion.includeCalls",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "search.contextExpansion.includeExports",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "search.contextExpansion.includeImports",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "search.contextExpansion.includeUsages",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "search.contextExpansion.maxPerHit",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.contextExpansion.maxTotal",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.contextExpansion.respectFilters",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "search.denseVectorMode",
+      "type": "string",
+      "enum": [
+        "merged",
+        "code",
+        "doc",
+        "auto"
+      ]
+    },
+    {
+      "path": "search.fieldWeights",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "search.fieldWeights.body",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.fieldWeights.comment",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.fieldWeights.doc",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.fieldWeights.name",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.fieldWeights.signature",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.filePrefilter",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "search.filePrefilter.chargramN",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.filePrefilter.enabled",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "search.minhashMaxDocs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.queryCache",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "search.queryCache.enabled",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "search.queryCache.maxEntries",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.queryCache.ttlMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.rrf",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "search.rrf.enabled",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "search.rrf.k",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.scoreBlend",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "search.scoreBlend.annWeight",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.scoreBlend.enabled",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "search.scoreBlend.sparseWeight",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.sqliteAutoArtifactBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.sqliteAutoChunkThreshold",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.sqliteFtsNormalize",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "search.sqliteFtsProfile",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "search.sqliteFtsWeights",
+      "type": "array|object",
+      "enum": null
+    },
+    {
+      "path": "search.sqliteFtsWeights.doc",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.sqliteFtsWeights.file",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.sqliteFtsWeights.headline",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.sqliteFtsWeights.kind",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.sqliteFtsWeights.name",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.sqliteFtsWeights.signature",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.sqliteFtsWeights.tokens",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.symbolBoost",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "search.symbolBoost.definitionWeight",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "search.symbolBoost.enabled",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "search.symbolBoost.exportWeight",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "security",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "security.archives",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "security.archives.maxBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "security.archives.maxEntries",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "security.archives.maxEntryBytes",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "security.downloads",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "security.downloads.allowlist",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "security.downloads.requireHash",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "security.downloads.warnUnsigned",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "sql",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "sql.dialect",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sql.dialectByExt",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "sqlite",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "sqlite.codeDbPath",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sqlite.compactOnIncremental",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "sqlite.dbDir",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sqlite.proseDbPath",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sqlite.scoreMode",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sqlite.use",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "sqlite.vectorExtension",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "sqlite.vectorExtension.annMode",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sqlite.vectorExtension.arch",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sqlite.vectorExtension.column",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sqlite.vectorExtension.dir",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sqlite.vectorExtension.downloads",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "sqlite.vectorExtension.enabled",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "sqlite.vectorExtension.encoding",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sqlite.vectorExtension.filename",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sqlite.vectorExtension.module",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sqlite.vectorExtension.options",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sqlite.vectorExtension.path",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sqlite.vectorExtension.platform",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sqlite.vectorExtension.provider",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sqlite.vectorExtension.table",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "sqlite.vectorExtension.url",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "tooling",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "tooling.allowGlobalFallback",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "tooling.autoEnableOnDetect",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "tooling.autoInstallOnDetect",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "tooling.circuitBreakerThreshold",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "tooling.clangd",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "tooling.clangd.compileCommandsDir",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "tooling.clangd.requireCompilationDatabase",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "tooling.dir",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "tooling.disabledTools",
+      "type": "array",
+      "enum": null
+    },
+    {
+      "path": "tooling.enabledTools",
+      "type": "array",
+      "enum": null
+    },
+    {
+      "path": "tooling.installScope",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "tooling.logDir",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "tooling.maxRetries",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "tooling.timeoutMs",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "tooling.typescript",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "tooling.typescript.enabled",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "tooling.typescript.resolveOrder",
+      "type": "array|string",
+      "enum": null
+    },
+    {
+      "path": "tooling.typescript.tsconfigPath",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "tooling.typescript.useTsconfig",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "triage",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "triage.contextPack",
+      "type": "object",
+      "enum": null
+    },
+    {
+      "path": "triage.contextPack.maxEvidencePerQuery",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "triage.contextPack.maxHistory",
+      "type": "number",
+      "enum": null
+    },
+    {
+      "path": "triage.promoteFields",
+      "type": "array",
+      "enum": null
+    },
+    {
+      "path": "triage.recordsDir",
+      "type": "string",
+      "enum": null
+    },
+    {
+      "path": "triage.storeRawPayload",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "useDefaultSkips",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "useGitignore",
+      "type": "boolean",
+      "enum": null
+    },
+    {
+      "path": "usePairofcleatsIgnore",
+      "type": "boolean",
+      "enum": null
+    }
+  ],
+  "envVars": [
+    {
+      "name": "PAIROFCLEATS_BREAKING",
+      "files": [
+        "tools/release-check.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_BUNDLE_THREADS",
+      "files": [
+        "src/shared/env.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_CACHE_ROOT",
+      "files": [
+        "src/shared/env.js",
+        "tests/api-server-stream.js",
+        "tests/api-server.js",
+        "tests/artifact-size-guardrails.js",
+        "tests/build-embeddings-cache.js",
+        "tests/build-index-all.js",
+        "tests/cache-gc.js",
+        "tests/churn-filter.js",
+        "tests/clean-artifacts.js",
+        "tests/compact-pieces.js",
+        "tests/core-api.js",
+        "tests/embedding-batch-autotune.js",
+        "tests/embeddings-cache-identity.js",
+        "tests/embeddings-dims-mismatch.js",
+        "tests/embeddings-validate.js",
+        "tests/encoding-fallback.js",
+        "tests/eval-quality.js",
+        "tests/external-docs.js",
+        "tests/extracted-prose.js",
+        "tests/fielded-bm25.js",
+        "tests/file-line-guard.js",
+        "tests/file-size-guard.js",
+        "tests/fixture-empty.js",
+        "tests/fixture-eval.js",
+        "tests/fixture-parity.js",
+        "tests/fixture-smoke.js",
+        "tests/format-fidelity.js",
+        "tests/git-blame-range.js",
+        "tests/hnsw-ann.js",
+        "tests/hnsw-atomic.js",
+        "tests/import-links.js",
+        "tests/incremental-cache-signature.js",
+        "tests/incremental-manifest.js",
+        "tests/incremental-tokenization-cache.js",
+        "tests/index-validate.js",
+        "tests/language-fidelity.js",
+        "tests/lmdb-backend.js",
+        "tests/mcp-robustness.js",
+        "tests/mcp-schema.js",
+        "tests/mcp-server.js",
+        "tests/piece-assembly.js",
+        "tests/prose-skip-imports.js",
+        "tests/query-cache.js",
+        "tests/repo-root.js",
+        "tests/repometrics-dashboard.js",
+        "tests/script-coverage.js",
+        "tests/search-determinism.js",
+        "tests/search-explain-symbol.js",
+        "tests/search-explain.js",
+        "tests/search-filters.js",
+        "tests/search-missing-index.js",
+        "tests/search-rrf.js",
+        "tests/search-topn-filters.js",
+        "tests/search-windows-path-filter.js",
+        "tests/setup-index-detection.js",
+        "tests/setup.js",
+        "tests/shard-merge.js",
+        "tests/skip-minified-binary.js",
+        "tests/smoke-retrieval.js",
+        "tests/sqlite-ann-extension.js",
+        "tests/sqlite-ann-fallback.js",
+        "tests/sqlite-auto-backend.js",
+        "tests/sqlite-build-indexes.js",
+        "tests/sqlite-bundle-missing.js",
+        "tests/sqlite-compact.js",
+        "tests/sqlite-incremental.js",
+        "tests/sqlite-index-state-fail-closed.js",
+        "tests/sqlite-missing-dep.js",
+        "tests/sqlite-sidecar-cleanup.js",
+        "tests/summary-report.js",
+        "tests/tool-root.js",
+        "tests/triage-records.js",
+        "tests/two-stage-state.js",
+        "tests/type-inference-crossfile-go.js",
+        "tests/type-inference-crossfile.js",
+        "tests/type-inference-lsp-enrichment.js",
+        "tests/unicode-offset.js",
+        "tests/uninstall.js",
+        "tools/bench-language-repos.js",
+        "tools/bench-score-strategy.js",
+        "tools/compare-models.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_DEBUG_CRASH",
+      "files": [
+        "src/shared/env.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_DICT_DIR",
+      "files": [
+        "src/shared/env.js",
+        "tests/uninstall.js",
+        "tools/compare-models.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_EMBEDDINGS",
+      "files": [
+        "src/shared/env.js",
+        "tests/api-server-stream.js",
+        "tests/api-server.js",
+        "tests/artifact-size-guardrails.js",
+        "tests/bench.js",
+        "tests/build-embeddings-cache.js",
+        "tests/build-index-all.js",
+        "tests/churn-filter.js",
+        "tests/compact-pieces.js",
+        "tests/core-api.js",
+        "tests/embeddings-cache-identity.js",
+        "tests/embeddings-dims-mismatch.js",
+        "tests/embeddings-validate.js",
+        "tests/encoding-fallback.js",
+        "tests/eval-quality.js",
+        "tests/external-docs.js",
+        "tests/extracted-prose.js",
+        "tests/fielded-bm25.js",
+        "tests/file-line-guard.js",
+        "tests/file-size-guard.js",
+        "tests/fixture-empty.js",
+        "tests/fixture-eval.js",
+        "tests/fixture-parity.js",
+        "tests/fixture-smoke.js",
+        "tests/format-fidelity.js",
+        "tests/git-blame-range.js",
+        "tests/hnsw-ann.js",
+        "tests/hnsw-atomic.js",
+        "tests/import-links.js",
+        "tests/incremental-cache-signature.js",
+        "tests/incremental-manifest.js",
+        "tests/incremental-tokenization-cache.js",
+        "tests/index-validate.js",
+        "tests/language-fidelity.js",
+        "tests/lmdb-backend.js",
+        "tests/piece-assembly.js",
+        "tests/prose-skip-imports.js",
+        "tests/query-cache.js",
+        "tests/repo-root.js",
+        "tests/script-coverage.js",
+        "tests/search-determinism.js",
+        "tests/search-explain-symbol.js",
+        "tests/search-explain.js",
+        "tests/search-filters.js",
+        "tests/search-missing-index.js",
+        "tests/search-rrf.js",
+        "tests/search-topn-filters.js",
+        "tests/search-windows-path-filter.js",
+        "tests/shard-merge.js",
+        "tests/skip-minified-binary.js",
+        "tests/smoke-retrieval.js",
+        "tests/sqlite-ann-extension.js",
+        "tests/sqlite-ann-fallback.js",
+        "tests/sqlite-auto-backend.js",
+        "tests/sqlite-build-indexes.js",
+        "tests/sqlite-bundle-missing.js",
+        "tests/sqlite-compact.js",
+        "tests/sqlite-incremental.js",
+        "tests/sqlite-index-state-fail-closed.js",
+        "tests/sqlite-missing-dep.js",
+        "tests/sqlite-sidecar-cleanup.js",
+        "tests/summary-report.js",
+        "tests/tool-root.js",
+        "tests/triage-records.js",
+        "tests/two-stage-state.js",
+        "tests/type-inference-crossfile-go.js",
+        "tests/type-inference-crossfile.js",
+        "tests/type-inference-lsp-enrichment.js",
+        "tests/unicode-offset.js",
+        "tools/bench-score-strategy.js",
+        "tools/bench/micro/tinybench.js",
+        "tools/compare-models.js",
+        "tools/mcp/transport.js",
+        "tools/triage/context-pack.js",
+        "tools/triage/ingest.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_EXTENSIONS_DIR",
+      "files": [
+        "src/shared/env.js",
+        "tests/uninstall.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_FILE_CACHE_MAX",
+      "files": [
+        "src/shared/env.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_FTS_PROFILE",
+      "files": [
+        "src/shared/env.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_HOME",
+      "files": [
+        "src/shared/env.js",
+        "tests/mcp-robustness.js",
+        "tests/mcp-schema.js",
+        "tests/mcp-server.js",
+        "tests/uninstall.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_LOG_FORMAT",
+      "files": [
+        "src/shared/env.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_LOG_LEVEL",
+      "files": [
+        "src/shared/env.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_MAX_JSON_BYTES",
+      "files": [
+        "src/shared/artifact-io.js",
+        "tests/artifact-size-guardrails.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_MAX_OLD_SPACE_MB",
+      "files": [
+        "src/shared/env.js",
+        "tests/bench.js",
+        "tools/bench-language-repos.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_MCP_QUEUE_MAX",
+      "files": [
+        "tests/mcp-robustness.js",
+        "tools/mcp-server.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_MCP_TOOL_TIMEOUT_MS",
+      "files": [
+        "tests/mcp-robustness.js",
+        "tools/mcp-server.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_MODEL",
+      "files": [
+        "src/shared/env.js",
+        "tools/compare-models.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_MODELS_DIR",
+      "files": [
+        "src/shared/env.js",
+        "tests/uninstall.js",
+        "tools/compare-models.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_NODE_OPTIONS",
+      "files": [
+        "src/shared/env.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_PROFILE",
+      "files": [
+        "src/retrieval/cli-args.js",
+        "src/shared/cli.js",
+        "src/shared/env.js",
+        "tests/bench.js",
+        "tests/fixture-parity.js",
+        "tests/profile-config.js",
+        "tests/structural-search.js",
+        "tests/summary-report.js",
+        "tools/bench-language-repos.js",
+        "tools/bench/micro/search.js",
+        "tools/combined-summary.js",
+        "tools/compare-models.js",
+        "tools/structural-search.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_PROGRESS_FILES",
+      "files": [
+        "src/shared/env.js",
+        "tools/bench-language-repos.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_PROGRESS_LINES",
+      "files": [
+        "src/shared/env.js",
+        "tools/bench-language-repos.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_RESET_FORCE",
+      "files": [
+        "tools/reset-config.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_SKIP_BENCH",
+      "files": [
+        "tests/all.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_SKIP_SCRIPT_COVERAGE",
+      "files": [
+        "tests/all.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_SQLITE_DISABLED",
+      "files": [
+        "src/retrieval/cli-sqlite.js",
+        "tests/sqlite-missing-dep.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_STAGE",
+      "files": [
+        "src/shared/env.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_SUMMARY_CACHE_MAX",
+      "files": [
+        "src/shared/env.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_TEST_LOG_DIR",
+      "files": [
+        "tests/script-coverage.js",
+        "tests/triage-records.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_TEST_RETRIES",
+      "files": [
+        "tests/script-coverage.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_TEST_TIMEOUT_MS",
+      "files": [
+        "tests/type-inference-crossfile.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_THREADS",
+      "files": [
+        "src/shared/env.js",
+        "tests/bench.js",
+        "tools/bench-language-repos.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_TOOLING_DIR",
+      "files": [
+        "src/shared/env.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_TOOLING_INSTALL_SCOPE",
+      "files": [
+        "src/shared/env.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_VECTOR_EXTENSION",
+      "files": [
+        "src/shared/env.js",
+        "tests/sqlite-ann-extension.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_VERBOSE",
+      "files": [
+        "src/shared/env.js"
+      ]
+    },
+    {
+      "name": "PAIROFCLEATS_WORKER_POOL",
+      "files": [
+        "src/index/build/failure-taxonomy.js",
+        "src/shared/env.js",
+        "tests/encoding-fallback.js",
+        "tests/language-fidelity.js",
+        "tests/search-explain-symbol.js",
+        "tests/search-windows-path-filter.js"
+      ]
+    }
+  ],
+  "cliFlags": {
+    "totalFlags": 220,
+    "byFile": [
+      {
+        "file": "src/integrations/mcp/defs.js",
+        "flags": [
+          "type"
+        ]
+      },
+      {
+        "file": "src/lang/javascript.js",
+        "flags": [
+          "log",
+          "treeSitter"
+        ]
+      },
+      {
+        "file": "src/lang/typescript/chunks.js",
+        "flags": [
+          "log",
+          "treeSitter"
+        ]
+      },
+      {
+        "file": "src/retrieval/cli-args.js",
+        "flags": [
+          "alias",
+          "ann",
+          "async",
+          "author",
+          "awaits",
+          "backend",
+          "bm25-b",
+          "bm25-k1",
+          "branch",
+          "branches",
+          "breaks",
+          "calls",
+          "case",
+          "case-file",
+          "case-tokens",
+          "chunk-author",
+          "churn",
+          "continues",
+          "decorator",
+          "explain",
+          "ext",
+          "extends",
+          "file",
+          "fts-profile",
+          "fts-weights",
+          "generator",
+          "import",
+          "inferred-type",
+          "json",
+          "json-compact",
+          "lang",
+          "lint",
+          "loops",
+          "matched",
+          "meta",
+          "meta-json",
+          "mode",
+          "model",
+          "modified-after",
+          "modified-since",
+          "mutates",
+          "param",
+          "path",
+          "profile",
+          "reads",
+          "repo",
+          "return-type",
+          "returns",
+          "risk",
+          "risk-category",
+          "risk-flow",
+          "risk-sink",
+          "risk-source",
+          "risk-tag",
+          "signature",
+          "stats",
+          "struct-pack",
+          "struct-rule",
+          "struct-tag",
+          "throws",
+          "type",
+          "uses",
+          "visibility",
+          "why",
+          "writes"
+        ]
+      },
+      {
+        "file": "src/shared/cli.js",
+        "flags": [
+          "profile"
+        ]
+      },
+      {
+        "file": "src/shared/progress.js",
+        "flags": [
+          "colorize",
+          "translateTime"
+        ]
+      },
+      {
+        "file": "tests/all.js",
+        "flags": [
+          "log-dir",
+          "retries",
+          "skip-bench",
+          "skip-script-coverage"
+        ]
+      },
+      {
+        "file": "tests/context-expansion.js",
+        "flags": [
+          "includeCalls",
+          "includeImports",
+          "includeUsages",
+          "maxPerHit",
+          "maxTotal"
+        ]
+      },
+      {
+        "file": "tests/fixture-eval.js",
+        "flags": [
+          "backend",
+          "json",
+          "out",
+          "top",
+          "write-report"
+        ]
+      },
+      {
+        "file": "tests/fixture-parity.js",
+        "flags": [
+          "all",
+          "fixture",
+          "fixtures",
+          "timeout-ms"
+        ]
+      },
+      {
+        "file": "tests/fixture-smoke.js",
+        "flags": [
+          "all",
+          "fixture"
+        ]
+      },
+      {
+        "file": "tests/fixtures/medium/generate.js",
+        "flags": [
+          "clean",
+          "count",
+          "out",
+          "seed"
+        ]
+      },
+      {
+        "file": "tests/parity.js",
+        "flags": [
+          "ann",
+          "enforce",
+          "enforce-fts",
+          "limit",
+          "max-delta",
+          "min-overlap",
+          "min-overlap-single",
+          "min-rank-corr",
+          "out",
+          "queries",
+          "search",
+          "sqlite-backend",
+          "top",
+          "write-report"
+        ]
+      },
+      {
+        "file": "tests/script-coverage.js",
+        "flags": [
+          "log-dir",
+          "retries"
+        ]
+      },
+      {
+        "file": "tests/smoke.js",
+        "flags": [
+          "require-dicts",
+          "require-index",
+          "require-sqlite"
+        ]
+      },
+      {
+        "file": "tests/tree-sitter-chunks.js",
+        "flags": [
+          "log",
+          "treeSitter"
+        ]
+      },
+      {
+        "file": "tools/api-server.js",
+        "flags": [
+          "host",
+          "json",
+          "output",
+          "port",
+          "quiet",
+          "repo"
+        ]
+      },
+      {
+        "file": "tools/assemble-pieces.js",
+        "flags": [
+          "force",
+          "input",
+          "inputs",
+          "mode",
+          "out",
+          "repo",
+          "stage"
+        ]
+      },
+      {
+        "file": "tools/bench-dict-seg.js",
+        "flags": [
+          "dict",
+          "dp-max",
+          "fixture",
+          "json",
+          "out",
+          "sample",
+          "tokens"
+        ]
+      },
+      {
+        "file": "tools/bench-query-generator.js",
+        "flags": [
+          "count",
+          "index-root",
+          "json",
+          "mode",
+          "out",
+          "repo",
+          "seed"
+        ]
+      },
+      {
+        "file": "tools/bench-score-strategy.js",
+        "flags": [
+          "backend",
+          "build",
+          "build-index",
+          "in-place",
+          "json",
+          "limit",
+          "out",
+          "queries",
+          "repo",
+          "stub-embeddings",
+          "top"
+        ]
+      },
+      {
+        "file": "tools/bootstrap.js",
+        "flags": [
+          "incremental",
+          "repo",
+          "skip-artifacts",
+          "skip-dicts",
+          "skip-index",
+          "skip-install",
+          "skip-tooling",
+          "validate-config",
+          "with-sqlite"
+        ]
+      },
+      {
+        "file": "tools/build-embeddings.js",
+        "flags": [
+          "batch",
+          "dims",
+          "index-root",
+          "mode",
+          "repo",
+          "stub-embeddings"
+        ]
+      },
+      {
+        "file": "tools/build-lmdb-index.js",
+        "flags": [
+          "index-root",
+          "mode",
+          "repo"
+        ]
+      },
+      {
+        "file": "tools/build-sqlite-index.js",
+        "flags": [
+          "code-dir",
+          "compact",
+          "incremental",
+          "index-root",
+          "mode",
+          "out",
+          "prose-dir",
+          "repo",
+          "validate"
+        ]
+      },
+      {
+        "file": "tools/cache-gc.js",
+        "flags": [
+          "dry-run",
+          "json",
+          "max-age-days",
+          "max-bytes",
+          "max-gb",
+          "repo"
+        ]
+      },
+      {
+        "file": "tools/ci-build-artifacts.js",
+        "flags": [
+          "incremental",
+          "out",
+          "repo",
+          "skip-build",
+          "skip-sqlite"
+        ]
+      },
+      {
+        "file": "tools/ci-restore-artifacts.js",
+        "flags": [
+          "force",
+          "from",
+          "repo"
+        ]
+      },
+      {
+        "file": "tools/clean-artifacts.js",
+        "flags": [
+          "all",
+          "dry-run",
+          "repo"
+        ]
+      },
+      {
+        "file": "tools/combined-summary.js",
+        "flags": [
+          "ann",
+          "baseline",
+          "build",
+          "incremental",
+          "json",
+          "limit",
+          "mode",
+          "models",
+          "no-ann",
+          "out",
+          "profile",
+          "queries",
+          "repo",
+          "top"
+        ]
+      },
+      {
+        "file": "tools/compact-pieces.js",
+        "flags": [
+          "chunk-meta-size",
+          "dry-run",
+          "mode",
+          "repo",
+          "token-postings-size"
+        ]
+      },
+      {
+        "file": "tools/compact-sqlite-index.js",
+        "flags": [
+          "dry-run",
+          "keep-backup",
+          "mode",
+          "repo"
+        ]
+      },
+      {
+        "file": "tools/compare-models.js",
+        "flags": [
+          "ann",
+          "backend",
+          "baseline",
+          "build",
+          "build-index",
+          "build-sqlite",
+          "cache-root",
+          "incremental",
+          "json",
+          "limit",
+          "mode",
+          "models",
+          "no-ann",
+          "out",
+          "profile",
+          "queries",
+          "repo",
+          "stub-embeddings",
+          "top"
+        ]
+      },
+      {
+        "file": "tools/config-dump.js",
+        "flags": [
+          "json",
+          "repo"
+        ]
+      },
+      {
+        "file": "tools/config-inventory.js",
+        "flags": [
+          "profile"
+        ]
+      },
+      {
+        "file": "tools/ctags-ingest.js",
+        "flags": [
+          "args",
+          "ctags",
+          "fields",
+          "input",
+          "interactive",
+          "json",
+          "out",
+          "repo",
+          "run"
+        ]
+      },
+      {
+        "file": "tools/download-dicts.js",
+        "flags": [
+          "dir",
+          "force",
+          "lang",
+          "repo",
+          "sha256",
+          "update",
+          "url"
+        ]
+      },
+      {
+        "file": "tools/download-extensions.js",
+        "flags": [
+          "arch",
+          "dir",
+          "force",
+          "out",
+          "platform",
+          "provider",
+          "repo",
+          "sha256",
+          "update",
+          "url"
+        ]
+      },
+      {
+        "file": "tools/download-models.js",
+        "flags": [
+          "cache-dir",
+          "model",
+          "repo"
+        ]
+      },
+      {
+        "file": "tools/eval/run.js",
+        "flags": [
+          "ann",
+          "backend",
+          "dataset",
+          "out",
+          "pretty",
+          "repo",
+          "top"
+        ]
+      },
+      {
+        "file": "tools/generate-repo-dict.js",
+        "flags": [
+          "extensions",
+          "include-prose",
+          "min-count",
+          "out",
+          "repo"
+        ]
+      },
+      {
+        "file": "tools/git-hooks.js",
+        "flags": [
+          "hooks",
+          "install",
+          "repo",
+          "status",
+          "uninstall"
+        ]
+      },
+      {
+        "file": "tools/gtags-ingest.js",
+        "flags": [
+          "args",
+          "global",
+          "input",
+          "json",
+          "out",
+          "repo",
+          "run"
+        ]
+      },
+      {
+        "file": "tools/index-validate.js",
+        "flags": [
+          "index-root",
+          "json",
+          "mode",
+          "repo"
+        ]
+      },
+      {
+        "file": "tools/indexer-service.js",
+        "flags": [
+          "command",
+          "concurrency",
+          "config",
+          "interval",
+          "mode",
+          "queue",
+          "reason",
+          "repo",
+          "stage",
+          "watch"
+        ]
+      },
+      {
+        "file": "tools/lsif-ingest.js",
+        "flags": [
+          "input",
+          "json",
+          "out",
+          "repo"
+        ]
+      },
+      {
+        "file": "tools/parity-matrix.js",
+        "flags": [
+          "ann-modes",
+          "backend",
+          "backends",
+          "dry-run",
+          "fail-fast",
+          "limit",
+          "out-dir",
+          "queries",
+          "queries-dir",
+          "results",
+          "search",
+          "top"
+        ]
+      },
+      {
+        "file": "tools/repometrics-dashboard.js",
+        "flags": [
+          "json",
+          "out",
+          "repo",
+          "top"
+        ]
+      },
+      {
+        "file": "tools/report-artifacts.js",
+        "flags": [
+          "all",
+          "json",
+          "repo"
+        ]
+      },
+      {
+        "file": "tools/reset-config.js",
+        "flags": [
+          "backup",
+          "config",
+          "force",
+          "json",
+          "repo"
+        ]
+      },
+      {
+        "file": "tools/scip-ingest.js",
+        "flags": [
+          "args",
+          "input",
+          "json",
+          "out",
+          "repo",
+          "run",
+          "scip"
+        ]
+      },
+      {
+        "file": "tools/setup.js",
+        "flags": [
+          "heap-mb",
+          "incremental",
+          "json",
+          "non-interactive",
+          "repo",
+          "root",
+          "skip-artifacts",
+          "skip-dicts",
+          "skip-extensions",
+          "skip-index",
+          "skip-install",
+          "skip-models",
+          "skip-sqlite",
+          "skip-tooling",
+          "skip-validate",
+          "tooling-scope",
+          "validate-config",
+          "with-sqlite"
+        ]
+      },
+      {
+        "file": "tools/shard-census.js",
+        "flags": [
+          "bench",
+          "repo"
+        ]
+      },
+      {
+        "file": "tools/structural-search.js",
+        "flags": [
+          "engine",
+          "format",
+          "json",
+          "list-packs",
+          "out",
+          "pack",
+          "profile",
+          "registry",
+          "repo",
+          "rule"
+        ]
+      },
+      {
+        "file": "tools/tooling-detect.js",
+        "flags": [
+          "json",
+          "languages",
+          "repo",
+          "root"
+        ]
+      },
+      {
+        "file": "tools/tooling-install.js",
+        "flags": [
+          "dry-run",
+          "json",
+          "languages",
+          "no-fallback",
+          "repo",
+          "root",
+          "scope",
+          "tools"
+        ]
+      },
+      {
+        "file": "tools/triage/context-pack.js",
+        "flags": [
+          "ann",
+          "out",
+          "record",
+          "repo",
+          "stub-embeddings"
+        ]
+      },
+      {
+        "file": "tools/triage/decision.js",
+        "flags": [
+          "code",
+          "evidence",
+          "expires",
+          "finding",
+          "justification",
+          "meta",
+          "record",
+          "repo",
+          "reviewer",
+          "status"
+        ]
+      },
+      {
+        "file": "tools/triage/ingest.js",
+        "flags": [
+          "build-index",
+          "in",
+          "incremental",
+          "meta",
+          "repo",
+          "source",
+          "stub-embeddings"
+        ]
+      },
+      {
+        "file": "tools/uninstall.js",
+        "flags": [
+          "dry-run",
+          "repo",
+          "yes"
+        ]
+      },
+      {
+        "file": "tools/validate-config.js",
+        "flags": [
+          "config",
+          "json",
+          "repo"
+        ]
+      },
+      {
+        "file": "tools/verify-extensions.js",
+        "flags": [
+          "ann-mode",
+          "arch",
+          "column",
+          "dir",
+          "encoding",
+          "json",
+          "load",
+          "module",
+          "options",
+          "path",
+          "platform",
+          "provider",
+          "repo",
+          "table"
+        ]
+      }
+    ],
+    "duplicated": [
+      {
+        "flag": "repo",
+        "count": 44,
+        "files": [
+          "src/retrieval/cli-args.js",
+          "tools/api-server.js",
+          "tools/assemble-pieces.js",
+          "tools/bench-query-generator.js",
+          "tools/bench-score-strategy.js",
+          "tools/bootstrap.js",
+          "tools/build-embeddings.js",
+          "tools/build-lmdb-index.js",
+          "tools/build-sqlite-index.js",
+          "tools/cache-gc.js",
+          "tools/ci-build-artifacts.js",
+          "tools/ci-restore-artifacts.js",
+          "tools/clean-artifacts.js",
+          "tools/combined-summary.js",
+          "tools/compact-pieces.js",
+          "tools/compact-sqlite-index.js",
+          "tools/compare-models.js",
+          "tools/config-dump.js",
+          "tools/ctags-ingest.js",
+          "tools/download-dicts.js",
+          "tools/download-extensions.js",
+          "tools/download-models.js",
+          "tools/eval/run.js",
+          "tools/generate-repo-dict.js",
+          "tools/git-hooks.js",
+          "tools/gtags-ingest.js",
+          "tools/index-validate.js",
+          "tools/indexer-service.js",
+          "tools/lsif-ingest.js",
+          "tools/repometrics-dashboard.js",
+          "tools/report-artifacts.js",
+          "tools/reset-config.js",
+          "tools/scip-ingest.js",
+          "tools/setup.js",
+          "tools/shard-census.js",
+          "tools/structural-search.js",
+          "tools/tooling-detect.js",
+          "tools/tooling-install.js",
+          "tools/triage/context-pack.js",
+          "tools/triage/decision.js",
+          "tools/triage/ingest.js",
+          "tools/uninstall.js",
+          "tools/validate-config.js",
+          "tools/verify-extensions.js"
+        ]
+      },
+      {
+        "flag": "json",
+        "count": 24,
+        "files": [
+          "src/retrieval/cli-args.js",
+          "tests/fixture-eval.js",
+          "tools/api-server.js",
+          "tools/bench-dict-seg.js",
+          "tools/bench-query-generator.js",
+          "tools/bench-score-strategy.js",
+          "tools/cache-gc.js",
+          "tools/combined-summary.js",
+          "tools/compare-models.js",
+          "tools/config-dump.js",
+          "tools/ctags-ingest.js",
+          "tools/gtags-ingest.js",
+          "tools/index-validate.js",
+          "tools/lsif-ingest.js",
+          "tools/repometrics-dashboard.js",
+          "tools/report-artifacts.js",
+          "tools/reset-config.js",
+          "tools/scip-ingest.js",
+          "tools/setup.js",
+          "tools/structural-search.js",
+          "tools/tooling-detect.js",
+          "tools/tooling-install.js",
+          "tools/validate-config.js",
+          "tools/verify-extensions.js"
+        ]
+      },
+      {
+        "flag": "out",
+        "count": 21,
+        "files": [
+          "tests/fixture-eval.js",
+          "tests/fixtures/medium/generate.js",
+          "tests/parity.js",
+          "tools/assemble-pieces.js",
+          "tools/bench-dict-seg.js",
+          "tools/bench-query-generator.js",
+          "tools/bench-score-strategy.js",
+          "tools/build-sqlite-index.js",
+          "tools/ci-build-artifacts.js",
+          "tools/combined-summary.js",
+          "tools/compare-models.js",
+          "tools/ctags-ingest.js",
+          "tools/download-extensions.js",
+          "tools/eval/run.js",
+          "tools/generate-repo-dict.js",
+          "tools/gtags-ingest.js",
+          "tools/lsif-ingest.js",
+          "tools/repometrics-dashboard.js",
+          "tools/scip-ingest.js",
+          "tools/structural-search.js",
+          "tools/triage/context-pack.js"
+        ]
+      },
+      {
+        "flag": "mode",
+        "count": 12,
+        "files": [
+          "src/retrieval/cli-args.js",
+          "tools/assemble-pieces.js",
+          "tools/bench-query-generator.js",
+          "tools/build-embeddings.js",
+          "tools/build-lmdb-index.js",
+          "tools/build-sqlite-index.js",
+          "tools/combined-summary.js",
+          "tools/compact-pieces.js",
+          "tools/compact-sqlite-index.js",
+          "tools/compare-models.js",
+          "tools/index-validate.js",
+          "tools/indexer-service.js"
+        ]
+      },
+      {
+        "flag": "top",
+        "count": 8,
+        "files": [
+          "tests/fixture-eval.js",
+          "tests/parity.js",
+          "tools/bench-score-strategy.js",
+          "tools/combined-summary.js",
+          "tools/compare-models.js",
+          "tools/eval/run.js",
+          "tools/parity-matrix.js",
+          "tools/repometrics-dashboard.js"
+        ]
+      },
+      {
+        "flag": "dry-run",
+        "count": 7,
+        "files": [
+          "tools/cache-gc.js",
+          "tools/clean-artifacts.js",
+          "tools/compact-pieces.js",
+          "tools/compact-sqlite-index.js",
+          "tools/parity-matrix.js",
+          "tools/tooling-install.js",
+          "tools/uninstall.js"
+        ]
+      },
+      {
+        "flag": "incremental",
+        "count": 7,
+        "files": [
+          "tools/bootstrap.js",
+          "tools/build-sqlite-index.js",
+          "tools/ci-build-artifacts.js",
+          "tools/combined-summary.js",
+          "tools/compare-models.js",
+          "tools/setup.js",
+          "tools/triage/ingest.js"
+        ]
+      },
+      {
+        "flag": "ann",
+        "count": 6,
+        "files": [
+          "src/retrieval/cli-args.js",
+          "tests/parity.js",
+          "tools/combined-summary.js",
+          "tools/compare-models.js",
+          "tools/eval/run.js",
+          "tools/triage/context-pack.js"
+        ]
+      },
+      {
+        "flag": "backend",
+        "count": 6,
+        "files": [
+          "src/retrieval/cli-args.js",
+          "tests/fixture-eval.js",
+          "tools/bench-score-strategy.js",
+          "tools/compare-models.js",
+          "tools/eval/run.js",
+          "tools/parity-matrix.js"
+        ]
+      },
+      {
+        "flag": "profile",
+        "count": 6,
+        "files": [
+          "src/retrieval/cli-args.js",
+          "src/shared/cli.js",
+          "tools/combined-summary.js",
+          "tools/compare-models.js",
+          "tools/config-inventory.js",
+          "tools/structural-search.js"
+        ]
+      },
+      {
+        "flag": "force",
+        "count": 5,
+        "files": [
+          "tools/assemble-pieces.js",
+          "tools/ci-restore-artifacts.js",
+          "tools/download-dicts.js",
+          "tools/download-extensions.js",
+          "tools/reset-config.js"
+        ]
+      },
+      {
+        "flag": "index-root",
+        "count": 5,
+        "files": [
+          "tools/bench-query-generator.js",
+          "tools/build-embeddings.js",
+          "tools/build-lmdb-index.js",
+          "tools/build-sqlite-index.js",
+          "tools/index-validate.js"
+        ]
+      },
+      {
+        "flag": "input",
+        "count": 5,
+        "files": [
+          "tools/assemble-pieces.js",
+          "tools/ctags-ingest.js",
+          "tools/gtags-ingest.js",
+          "tools/lsif-ingest.js",
+          "tools/scip-ingest.js"
+        ]
+      },
+      {
+        "flag": "limit",
+        "count": 5,
+        "files": [
+          "tests/parity.js",
+          "tools/bench-score-strategy.js",
+          "tools/combined-summary.js",
+          "tools/compare-models.js",
+          "tools/parity-matrix.js"
+        ]
+      },
+      {
+        "flag": "queries",
+        "count": 5,
+        "files": [
+          "tests/parity.js",
+          "tools/bench-score-strategy.js",
+          "tools/combined-summary.js",
+          "tools/compare-models.js",
+          "tools/parity-matrix.js"
+        ]
+      },
+      {
+        "flag": "stub-embeddings",
+        "count": 5,
+        "files": [
+          "tools/bench-score-strategy.js",
+          "tools/build-embeddings.js",
+          "tools/compare-models.js",
+          "tools/triage/context-pack.js",
+          "tools/triage/ingest.js"
+        ]
+      },
+      {
+        "flag": "all",
+        "count": 4,
+        "files": [
+          "tests/fixture-parity.js",
+          "tests/fixture-smoke.js",
+          "tools/clean-artifacts.js",
+          "tools/report-artifacts.js"
+        ]
+      },
+      {
+        "flag": "args",
+        "count": 3,
+        "files": [
+          "tools/ctags-ingest.js",
+          "tools/gtags-ingest.js",
+          "tools/scip-ingest.js"
+        ]
+      },
+      {
+        "flag": "build",
+        "count": 3,
+        "files": [
+          "tools/bench-score-strategy.js",
+          "tools/combined-summary.js",
+          "tools/compare-models.js"
+        ]
+      },
+      {
+        "flag": "build-index",
+        "count": 3,
+        "files": [
+          "tools/bench-score-strategy.js",
+          "tools/compare-models.js",
+          "tools/triage/ingest.js"
+        ]
+      },
+      {
+        "flag": "config",
+        "count": 3,
+        "files": [
+          "tools/indexer-service.js",
+          "tools/reset-config.js",
+          "tools/validate-config.js"
+        ]
+      },
+      {
+        "flag": "dir",
+        "count": 3,
+        "files": [
+          "tools/download-dicts.js",
+          "tools/download-extensions.js",
+          "tools/verify-extensions.js"
+        ]
+      },
+      {
+        "flag": "fixture",
+        "count": 3,
+        "files": [
+          "tests/fixture-parity.js",
+          "tests/fixture-smoke.js",
+          "tools/bench-dict-seg.js"
+        ]
+      },
+      {
+        "flag": "log",
+        "count": 3,
+        "files": [
+          "src/lang/javascript.js",
+          "src/lang/typescript/chunks.js",
+          "tests/tree-sitter-chunks.js"
+        ]
+      },
+      {
+        "flag": "meta",
+        "count": 3,
+        "files": [
+          "src/retrieval/cli-args.js",
+          "tools/triage/decision.js",
+          "tools/triage/ingest.js"
+        ]
+      },
+      {
+        "flag": "root",
+        "count": 3,
+        "files": [
+          "tools/setup.js",
+          "tools/tooling-detect.js",
+          "tools/tooling-install.js"
+        ]
+      },
+      {
+        "flag": "run",
+        "count": 3,
+        "files": [
+          "tools/ctags-ingest.js",
+          "tools/gtags-ingest.js",
+          "tools/scip-ingest.js"
+        ]
+      },
+      {
+        "flag": "treeSitter",
+        "count": 3,
+        "files": [
+          "src/lang/javascript.js",
+          "src/lang/typescript/chunks.js",
+          "tests/tree-sitter-chunks.js"
+        ]
+      },
+      {
+        "flag": "arch",
+        "count": 2,
+        "files": [
+          "tools/download-extensions.js",
+          "tools/verify-extensions.js"
+        ]
+      },
+      {
+        "flag": "baseline",
+        "count": 2,
+        "files": [
+          "tools/combined-summary.js",
+          "tools/compare-models.js"
+        ]
+      },
+      {
+        "flag": "count",
+        "count": 2,
+        "files": [
+          "tests/fixtures/medium/generate.js",
+          "tools/bench-query-generator.js"
+        ]
+      },
+      {
+        "flag": "lang",
+        "count": 2,
+        "files": [
+          "src/retrieval/cli-args.js",
+          "tools/download-dicts.js"
+        ]
+      },
+      {
+        "flag": "languages",
+        "count": 2,
+        "files": [
+          "tools/tooling-detect.js",
+          "tools/tooling-install.js"
+        ]
+      },
+      {
+        "flag": "log-dir",
+        "count": 2,
+        "files": [
+          "tests/all.js",
+          "tests/script-coverage.js"
+        ]
+      },
+      {
+        "flag": "model",
+        "count": 2,
+        "files": [
+          "src/retrieval/cli-args.js",
+          "tools/download-models.js"
+        ]
+      },
+      {
+        "flag": "models",
+        "count": 2,
+        "files": [
+          "tools/combined-summary.js",
+          "tools/compare-models.js"
+        ]
+      },
+      {
+        "flag": "no-ann",
+        "count": 2,
+        "files": [
+          "tools/combined-summary.js",
+          "tools/compare-models.js"
+        ]
+      },
+      {
+        "flag": "path",
+        "count": 2,
+        "files": [
+          "src/retrieval/cli-args.js",
+          "tools/verify-extensions.js"
+        ]
+      },
+      {
+        "flag": "platform",
+        "count": 2,
+        "files": [
+          "tools/download-extensions.js",
+          "tools/verify-extensions.js"
+        ]
+      },
+      {
+        "flag": "provider",
+        "count": 2,
+        "files": [
+          "tools/download-extensions.js",
+          "tools/verify-extensions.js"
+        ]
+      },
+      {
+        "flag": "record",
+        "count": 2,
+        "files": [
+          "tools/triage/context-pack.js",
+          "tools/triage/decision.js"
+        ]
+      },
+      {
+        "flag": "retries",
+        "count": 2,
+        "files": [
+          "tests/all.js",
+          "tests/script-coverage.js"
+        ]
+      },
+      {
+        "flag": "search",
+        "count": 2,
+        "files": [
+          "tests/parity.js",
+          "tools/parity-matrix.js"
+        ]
+      },
+      {
+        "flag": "seed",
+        "count": 2,
+        "files": [
+          "tests/fixtures/medium/generate.js",
+          "tools/bench-query-generator.js"
+        ]
+      },
+      {
+        "flag": "sha256",
+        "count": 2,
+        "files": [
+          "tools/download-dicts.js",
+          "tools/download-extensions.js"
+        ]
+      },
+      {
+        "flag": "skip-artifacts",
+        "count": 2,
+        "files": [
+          "tools/bootstrap.js",
+          "tools/setup.js"
+        ]
+      },
+      {
+        "flag": "skip-dicts",
+        "count": 2,
+        "files": [
+          "tools/bootstrap.js",
+          "tools/setup.js"
+        ]
+      },
+      {
+        "flag": "skip-index",
+        "count": 2,
+        "files": [
+          "tools/bootstrap.js",
+          "tools/setup.js"
+        ]
+      },
+      {
+        "flag": "skip-install",
+        "count": 2,
+        "files": [
+          "tools/bootstrap.js",
+          "tools/setup.js"
+        ]
+      },
+      {
+        "flag": "skip-sqlite",
+        "count": 2,
+        "files": [
+          "tools/ci-build-artifacts.js",
+          "tools/setup.js"
+        ]
+      },
+      {
+        "flag": "skip-tooling",
+        "count": 2,
+        "files": [
+          "tools/bootstrap.js",
+          "tools/setup.js"
+        ]
+      },
+      {
+        "flag": "stage",
+        "count": 2,
+        "files": [
+          "tools/assemble-pieces.js",
+          "tools/indexer-service.js"
+        ]
+      },
+      {
+        "flag": "status",
+        "count": 2,
+        "files": [
+          "tools/git-hooks.js",
+          "tools/triage/decision.js"
+        ]
+      },
+      {
+        "flag": "type",
+        "count": 2,
+        "files": [
+          "src/integrations/mcp/defs.js",
+          "src/retrieval/cli-args.js"
+        ]
+      },
+      {
+        "flag": "update",
+        "count": 2,
+        "files": [
+          "tools/download-dicts.js",
+          "tools/download-extensions.js"
+        ]
+      },
+      {
+        "flag": "url",
+        "count": 2,
+        "files": [
+          "tools/download-dicts.js",
+          "tools/download-extensions.js"
+        ]
+      },
+      {
+        "flag": "validate-config",
+        "count": 2,
+        "files": [
+          "tools/bootstrap.js",
+          "tools/setup.js"
+        ]
+      },
+      {
+        "flag": "with-sqlite",
+        "count": 2,
+        "files": [
+          "tools/bootstrap.js",
+          "tools/setup.js"
+        ]
+      },
+      {
+        "flag": "write-report",
+        "count": 2,
+        "files": [
+          "tests/fixture-eval.js",
+          "tests/parity.js"
+        ]
+      }
+    ],
+    "dynamicOptionFiles": [
+      "src/index/build/args.js",
+      "src/index/build/context-window.js",
+      "src/index/build/file-processor.js",
+      "src/index/build/imports.js",
+      "src/index/chunking.js",
+      "src/index/tooling/typescript-provider.js",
+      "src/lang/workers/tree-sitter-worker.js",
+      "src/retrieval/cli.js",
+      "tests/bench.js",
+      "tools/bench-language-matrix.js",
+      "tools/bench-language-repos.js",
+      "tools/vector-extension.js"
+    ]
+  }
+}
\ No newline at end of file
diff --git a/docs/config-inventory.md b/docs/config-inventory.md
new file mode 100644
index 000000000..8a86fdf97
--- /dev/null
+++ b/docs/config-inventory.md
@@ -0,0 +1,817 @@
+# Config Inventory
+
+Generated: 2026-01-11T06:52:59.949Z
+
+This file is generated by `node tools/config-inventory.js`.
+See `docs/config-inventory-notes.md` for ownership and overlap analysis.
+
+## Summary
+- Config keys: 406
+- Env vars: 37
+- CLI flags: 220
+
+## Config keys by top-level namespace
+
+- cache: 22
+- dictionary: 14
+- extensions: 2
+- extraIgnore: 1
+- ignoreFiles: 1
+- indexing: 226
+- lmdb: 5
+- logging: 6
+- mcp: 4
+- models: 4
+- profile: 1
+- runtime: 3
+- search: 52
+- security: 9
+- sql: 3
+- sqlite: 23
+- tooling: 20
+- triage: 7
+- useDefaultSkips: 1
+- useGitignore: 1
+- usePairofcleatsIgnore: 1
+
+## Env vars
+
+- PAIROFCLEATS_BREAKING (1 files)
+- PAIROFCLEATS_BUNDLE_THREADS (1 files)
+- PAIROFCLEATS_CACHE_ROOT (81 files)
+- PAIROFCLEATS_DEBUG_CRASH (1 files)
+- PAIROFCLEATS_DICT_DIR (3 files)
+- PAIROFCLEATS_EMBEDDINGS (75 files)
+- PAIROFCLEATS_EXTENSIONS_DIR (2 files)
+- PAIROFCLEATS_FILE_CACHE_MAX (1 files)
+- PAIROFCLEATS_FTS_PROFILE (1 files)
+- PAIROFCLEATS_HOME (5 files)
+- PAIROFCLEATS_LOG_FORMAT (1 files)
+- PAIROFCLEATS_LOG_LEVEL (1 files)
+- PAIROFCLEATS_MAX_JSON_BYTES (2 files)
+- PAIROFCLEATS_MAX_OLD_SPACE_MB (3 files)
+- PAIROFCLEATS_MCP_QUEUE_MAX (2 files)
+- PAIROFCLEATS_MCP_TOOL_TIMEOUT_MS (2 files)
+- PAIROFCLEATS_MODEL (2 files)
+- PAIROFCLEATS_MODELS_DIR (3 files)
+- PAIROFCLEATS_NODE_OPTIONS (1 files)
+- PAIROFCLEATS_PROFILE (13 files)
+- PAIROFCLEATS_PROGRESS_FILES (2 files)
+- PAIROFCLEATS_PROGRESS_LINES (2 files)
+- PAIROFCLEATS_RESET_FORCE (1 files)
+- PAIROFCLEATS_SKIP_BENCH (1 files)
+- PAIROFCLEATS_SKIP_SCRIPT_COVERAGE (1 files)
+- PAIROFCLEATS_SQLITE_DISABLED (2 files)
+- PAIROFCLEATS_STAGE (1 files)
+- PAIROFCLEATS_SUMMARY_CACHE_MAX (1 files)
+- PAIROFCLEATS_TEST_LOG_DIR (2 files)
+- PAIROFCLEATS_TEST_RETRIES (1 files)
+- PAIROFCLEATS_TEST_TIMEOUT_MS (1 files)
+- PAIROFCLEATS_THREADS (3 files)
+- PAIROFCLEATS_TOOLING_DIR (1 files)
+- PAIROFCLEATS_TOOLING_INSTALL_SCOPE (1 files)
+- PAIROFCLEATS_VECTOR_EXTENSION (2 files)
+- PAIROFCLEATS_VERBOSE (1 files)
+- PAIROFCLEATS_WORKER_POOL (6 files)
+
+## CLI flags (duplicated across files)
+
+- repo (44 files)
+- json (24 files)
+- out (21 files)
+- mode (12 files)
+- top (8 files)
+- dry-run (7 files)
+- incremental (7 files)
+- ann (6 files)
+- backend (6 files)
+- profile (6 files)
+- force (5 files)
+- index-root (5 files)
+- input (5 files)
+- limit (5 files)
+- queries (5 files)
+- stub-embeddings (5 files)
+- all (4 files)
+- args (3 files)
+- build (3 files)
+- build-index (3 files)
+- config (3 files)
+- dir (3 files)
+- fixture (3 files)
+- log (3 files)
+- meta (3 files)
+- root (3 files)
+- run (3 files)
+- treeSitter (3 files)
+- arch (2 files)
+- baseline (2 files)
+- count (2 files)
+- lang (2 files)
+- languages (2 files)
+- log-dir (2 files)
+- model (2 files)
+- models (2 files)
+- no-ann (2 files)
+- path (2 files)
+- platform (2 files)
+- provider (2 files)
+- record (2 files)
+- retries (2 files)
+- search (2 files)
+- seed (2 files)
+- sha256 (2 files)
+- skip-artifacts (2 files)
+- skip-dicts (2 files)
+- skip-index (2 files)
+- skip-install (2 files)
+- skip-sqlite (2 files)
+- skip-tooling (2 files)
+- stage (2 files)
+- status (2 files)
+- type (2 files)
+- update (2 files)
+- url (2 files)
+- validate-config (2 files)
+- with-sqlite (2 files)
+- write-report (2 files)
+
+## CLI flags by file
+
+### src/integrations/mcp/defs.js
+
+type
+
+### src/lang/javascript.js
+
+log, treeSitter
+
+### src/lang/typescript/chunks.js
+
+log, treeSitter
+
+### src/retrieval/cli-args.js
+
+alias, ann, async, author, awaits, backend, bm25-b, bm25-k1, branch, branches, breaks, calls, case, case-file, case-tokens, chunk-author, churn, continues, decorator, explain, ext, extends, file, fts-profile, fts-weights, generator, import, inferred-type, json, json-compact, lang, lint, loops, matched, meta, meta-json, mode, model, modified-after, modified-since, mutates, param, path, profile, reads, repo, return-type, returns, risk, risk-category, risk-flow, risk-sink, risk-source, risk-tag, signature, stats, struct-pack, struct-rule, struct-tag, throws, type, uses, visibility, why, writes
+
+### src/shared/cli.js
+
+profile
+
+### src/shared/progress.js
+
+colorize, translateTime
+
+### tests/all.js
+
+log-dir, retries, skip-bench, skip-script-coverage
+
+### tests/context-expansion.js
+
+includeCalls, includeImports, includeUsages, maxPerHit, maxTotal
+
+### tests/fixture-eval.js
+
+backend, json, out, top, write-report
+
+### tests/fixture-parity.js
+
+all, fixture, fixtures, timeout-ms
+
+### tests/fixture-smoke.js
+
+all, fixture
+
+### tests/fixtures/medium/generate.js
+
+clean, count, out, seed
+
+### tests/parity.js
+
+ann, enforce, enforce-fts, limit, max-delta, min-overlap, min-overlap-single, min-rank-corr, out, queries, search, sqlite-backend, top, write-report
+
+### tests/script-coverage.js
+
+log-dir, retries
+
+### tests/smoke.js
+
+require-dicts, require-index, require-sqlite
+
+### tests/tree-sitter-chunks.js
+
+log, treeSitter
+
+### tools/api-server.js
+
+host, json, output, port, quiet, repo
+
+### tools/assemble-pieces.js
+
+force, input, inputs, mode, out, repo, stage
+
+### tools/bench-dict-seg.js
+
+dict, dp-max, fixture, json, out, sample, tokens
+
+### tools/bench-query-generator.js
+
+count, index-root, json, mode, out, repo, seed
+
+### tools/bench-score-strategy.js
+
+backend, build, build-index, in-place, json, limit, out, queries, repo, stub-embeddings, top
+
+### tools/bootstrap.js
+
+incremental, repo, skip-artifacts, skip-dicts, skip-index, skip-install, skip-tooling, validate-config, with-sqlite
+
+### tools/build-embeddings.js
+
+batch, dims, index-root, mode, repo, stub-embeddings
+
+### tools/build-lmdb-index.js
+
+index-root, mode, repo
+
+### tools/build-sqlite-index.js
+
+code-dir, compact, incremental, index-root, mode, out, prose-dir, repo, validate
+
+### tools/cache-gc.js
+
+dry-run, json, max-age-days, max-bytes, max-gb, repo
+
+### tools/ci-build-artifacts.js
+
+incremental, out, repo, skip-build, skip-sqlite
+
+### tools/ci-restore-artifacts.js
+
+force, from, repo
+
+### tools/clean-artifacts.js
+
+all, dry-run, repo
+
+### tools/combined-summary.js
+
+ann, baseline, build, incremental, json, limit, mode, models, no-ann, out, profile, queries, repo, top
+
+### tools/compact-pieces.js
+
+chunk-meta-size, dry-run, mode, repo, token-postings-size
+
+### tools/compact-sqlite-index.js
+
+dry-run, keep-backup, mode, repo
+
+### tools/compare-models.js
+
+ann, backend, baseline, build, build-index, build-sqlite, cache-root, incremental, json, limit, mode, models, no-ann, out, profile, queries, repo, stub-embeddings, top
+
+### tools/config-dump.js
+
+json, repo
+
+### tools/config-inventory.js
+
+profile
+
+### tools/ctags-ingest.js
+
+args, ctags, fields, input, interactive, json, out, repo, run
+
+### tools/download-dicts.js
+
+dir, force, lang, repo, sha256, update, url
+
+### tools/download-extensions.js
+
+arch, dir, force, out, platform, provider, repo, sha256, update, url
+
+### tools/download-models.js
+
+cache-dir, model, repo
+
+### tools/eval/run.js
+
+ann, backend, dataset, out, pretty, repo, top
+
+### tools/generate-repo-dict.js
+
+extensions, include-prose, min-count, out, repo
+
+### tools/git-hooks.js
+
+hooks, install, repo, status, uninstall
+
+### tools/gtags-ingest.js
+
+args, global, input, json, out, repo, run
+
+### tools/index-validate.js
+
+index-root, json, mode, repo
+
+### tools/indexer-service.js
+
+command, concurrency, config, interval, mode, queue, reason, repo, stage, watch
+
+### tools/lsif-ingest.js
+
+input, json, out, repo
+
+### tools/parity-matrix.js
+
+ann-modes, backend, backends, dry-run, fail-fast, limit, out-dir, queries, queries-dir, results, search, top
+
+### tools/repometrics-dashboard.js
+
+json, out, repo, top
+
+### tools/report-artifacts.js
+
+all, json, repo
+
+### tools/reset-config.js
+
+backup, config, force, json, repo
+
+### tools/scip-ingest.js
+
+args, input, json, out, repo, run, scip
+
+### tools/setup.js
+
+heap-mb, incremental, json, non-interactive, repo, root, skip-artifacts, skip-dicts, skip-extensions, skip-index, skip-install, skip-models, skip-sqlite, skip-tooling, skip-validate, tooling-scope, validate-config, with-sqlite
+
+### tools/shard-census.js
+
+bench, repo
+
+### tools/structural-search.js
+
+engine, format, json, list-packs, out, pack, profile, registry, repo, rule
+
+### tools/tooling-detect.js
+
+json, languages, repo, root
+
+### tools/tooling-install.js
+
+dry-run, json, languages, no-fallback, repo, root, scope, tools
+
+### tools/triage/context-pack.js
+
+ann, out, record, repo, stub-embeddings
+
+### tools/triage/decision.js
+
+code, evidence, expires, finding, justification, meta, record, repo, reviewer, status
+
+### tools/triage/ingest.js
+
+build-index, in, incremental, meta, repo, source, stub-embeddings
+
+### tools/uninstall.js
+
+dry-run, repo, yes
+
+### tools/validate-config.js
+
+config, json, repo
+
+### tools/verify-extensions.js
+
+ann-mode, arch, column, dir, encoding, json, load, module, options, path, platform, provider, repo, table
+
+## Config keys (full list)
+
+```
+cache (object)
+cache.gc (object)
+cache.gc.maxAgeDays (number)
+cache.gc.maxBytes (number)
+cache.gc.maxGb (number)
+cache.root (string)
+cache.runtime (object)
+cache.runtime.complexity (object)
+cache.runtime.complexity.maxMb (number)
+cache.runtime.complexity.ttlMs (number)
+cache.runtime.fileText (object)
+cache.runtime.fileText.maxMb (number)
+cache.runtime.fileText.ttlMs (number)
+cache.runtime.gitMeta (object)
+cache.runtime.gitMeta.maxMb (number)
+cache.runtime.gitMeta.ttlMs (number)
+cache.runtime.lint (object)
+cache.runtime.lint.maxMb (number)
+cache.runtime.lint.ttlMs (number)
+cache.runtime.summary (object)
+cache.runtime.summary.maxMb (number)
+cache.runtime.summary.ttlMs (number)
+dictionary (object)
+dictionary.dir (string)
+dictionary.dpMaxTokenLength (number)
+dictionary.dpMaxTokenLengthByFileCount (array)
+dictionary.dpMaxTokenLengthByFileCount[] (object)
+dictionary.dpMaxTokenLengthByFileCount[].dpMaxTokenLength (number)
+dictionary.dpMaxTokenLengthByFileCount[].maxFiles (number)
+dictionary.enableRepoDictionary (boolean)
+dictionary.files (array)
+dictionary.includeSlang (boolean)
+dictionary.languages (array)
+dictionary.segmentation (string)
+dictionary.slangDirs (array)
+dictionary.slangFiles (array)
+extensions (object)
+extensions.dir (string)
+extraIgnore (array)
+ignoreFiles (array)
+indexing (object)
+indexing.artifactCompression (object)
+indexing.artifactCompression.enabled (boolean)
+indexing.artifactCompression.keepRaw (boolean)
+indexing.artifactCompression.mode (string)
+indexing.artifacts (object)
+indexing.artifacts.chunkMetaFormat (string)
+indexing.artifacts.chunkMetaJsonlThreshold (number)
+indexing.artifacts.chunkMetaShardSize (number)
+indexing.artifacts.mode (string)
+indexing.artifacts.tokenPostingsFormat (string)
+indexing.artifacts.tokenPostingsShardSize (number)
+indexing.artifacts.tokenPostingsShardThreshold (number)
+indexing.astDataflow (boolean)
+indexing.chunking (object)
+indexing.chunking.maxBytes (number)
+indexing.chunking.maxLines (number)
+indexing.chunkTokenMaxFiles (number)
+indexing.chunkTokenMaxTokens (number)
+indexing.chunkTokenMode (string) enum=auto|full|sample|none
+indexing.chunkTokenSampleSize (number)
+indexing.comments (object)
+indexing.comments.extract (string) enum=off|doc|all
+indexing.comments.generatedPattern (string)
+indexing.comments.headerMaxLines (number)
+indexing.comments.includeLicense (boolean)
+indexing.comments.licensePattern (string)
+indexing.comments.linterPattern (string)
+indexing.comments.maxBytesPerChunk (number)
+indexing.comments.maxPerChunk (number)
+indexing.comments.minDocChars (number)
+indexing.comments.minInlineChars (number)
+indexing.comments.minTokens (number)
+indexing.comments.skipGenerated (boolean)
+indexing.comments.skipLinter (boolean)
+indexing.complexity (boolean)
+indexing.concurrency (number)
+indexing.controlFlow (boolean)
+indexing.debugCrash (boolean)
+indexing.debugFileLists (boolean)
+indexing.embeddingBatchMultipliers (object)
+indexing.embeddingBatchSize (number)
+indexing.embeddings (object)
+indexing.embeddings.cache (object)
+indexing.embeddings.cache.dir (string)
+indexing.embeddings.concurrency (number)
+indexing.embeddings.enabled (boolean)
+indexing.embeddings.hnsw (object)
+indexing.embeddings.hnsw.allowReplaceDeleted (boolean)
+indexing.embeddings.hnsw.efConstruction (number)
+indexing.embeddings.hnsw.efSearch (number)
+indexing.embeddings.hnsw.enabled (boolean)
+indexing.embeddings.hnsw.m (number)
+indexing.embeddings.hnsw.randomSeed (number)
+indexing.embeddings.hnsw.space (string) enum=cosine|ip|l2
+indexing.embeddings.mode (string) enum=auto|inline|service|stub|off
+indexing.embeddings.onnx (object)
+indexing.embeddings.onnx.executionProviders
+indexing.embeddings.onnx.graphOptimizationLevel (string) enum=disabled|basic|extended|all
+indexing.embeddings.onnx.interOpNumThreads (number)
+indexing.embeddings.onnx.intraOpNumThreads (number)
+indexing.embeddings.onnx.modelPath (string)
+indexing.embeddings.onnx.tokenizerId (string)
+indexing.embeddings.provider (string) enum=xenova|onnx
+indexing.embeddings.queue (object)
+indexing.embeddings.queue.dir (string)
+indexing.embeddings.queue.maxQueued (number)
+indexing.fileCaps (object)
+indexing.fileCaps.byExt (object)
+indexing.fileCaps.byExt.* (object)
+indexing.fileCaps.byExt.*.maxBytes (number)
+indexing.fileCaps.byExt.*.maxLines (number)
+indexing.fileCaps.byLanguage (object)
+indexing.fileCaps.byLanguage.* (object)
+indexing.fileCaps.byLanguage.*.maxBytes (number)
+indexing.fileCaps.byLanguage.*.maxLines (number)
+indexing.fileCaps.default (object)
+indexing.fileCaps.default.maxBytes (number)
+indexing.fileCaps.default.maxLines (number)
+indexing.fileListSampleSize (number)
+indexing.fileScan (object)
+indexing.fileScan.binary (object)
+indexing.fileScan.binary.maxNonTextRatio (number)
+indexing.fileScan.binary.sampleMinBytes (number)
+indexing.fileScan.minified (object)
+indexing.fileScan.minified.avgLineThreshold (number)
+indexing.fileScan.minified.maxLineThreshold (number)
+indexing.fileScan.minified.maxWhitespaceRatio (number)
+indexing.fileScan.minified.minChars (number)
+indexing.fileScan.minified.sampleMinBytes (number)
+indexing.fileScan.minified.singleLineChars (number)
+indexing.fileScan.sampleBytes (number)
+indexing.gitBlame (boolean)
+indexing.importConcurrency (number)
+indexing.ioConcurrencyCap (number)
+indexing.importScan (string|boolean)
+indexing.incrementalBundles (object)
+indexing.incrementalBundles.format (string) enum=json|msgpack
+indexing.javascriptFlow (string|boolean)
+indexing.javascriptParser (string)
+indexing.kotlin (object)
+indexing.kotlin.flowMaxBytes (number)
+indexing.kotlin.flowMaxLines (number)
+indexing.kotlin.relationsMaxBytes (number)
+indexing.kotlin.relationsMaxLines (number)
+indexing.lint (boolean)
+indexing.maxFileBytes (number|boolean)
+indexing.postings (object)
+indexing.postings.chargramMaxN (number)
+indexing.postings.chargramMaxTokenLength (number)
+indexing.postings.chargramMinN (number)
+indexing.postings.chargramSource (string) enum=full|fields
+indexing.postings.enableChargrams (boolean)
+indexing.postings.enablePhraseNgrams (boolean)
+indexing.postings.fielded (boolean)
+indexing.postings.phraseMaxN (number)
+indexing.postings.phraseMinN (number)
+indexing.pythonAst (object)
+indexing.pythonAst.allowOverCap (boolean)
+indexing.pythonAst.crashBackoffMs (number)
+indexing.pythonAst.crashLoopMax (number)
+indexing.pythonAst.crashWindowMs (number)
+indexing.pythonAst.enabled (boolean)
+indexing.pythonAst.maxQueued (number)
+indexing.pythonAst.maxRetries (number)
+indexing.pythonAst.maxWorkers (number)
+indexing.pythonAst.scaleUpQueueMs (number)
+indexing.pythonAst.taskTimeoutMs (number)
+indexing.pythonAst.workerCount (number)
+indexing.riskAnalysis (boolean)
+indexing.riskAnalysisCrossFile (boolean)
+indexing.riskCaps (object)
+indexing.riskCaps.maxBytes (number)
+indexing.riskCaps.maxEdges (number)
+indexing.riskCaps.maxFlows (number)
+indexing.riskCaps.maxLines (number)
+indexing.riskCaps.maxMs (number)
+indexing.riskCaps.maxNodes (number)
+indexing.riskRules (object)
+indexing.riskRules.includeDefaults (boolean)
+indexing.riskRules.rules (object)
+indexing.riskRules.rulesPath (string)
+indexing.segments (object)
+indexing.segments.inlineCodeMaxBytes (number)
+indexing.segments.inlineCodeMaxSpans (number)
+indexing.segments.inlineCodeMinChars (number)
+indexing.segments.inlineCodeSpans (boolean)
+indexing.shards (object)
+indexing.shards.dirDepth (number)
+indexing.shards.enabled (boolean)
+indexing.shards.maxShardBytes (number)
+indexing.shards.maxShardLines (number)
+indexing.shards.maxShards (number)
+indexing.shards.maxWorkers (number)
+indexing.shards.minFiles (number)
+indexing.treeSitter (object)
+indexing.treeSitter.byLanguage (object)
+indexing.treeSitter.byLanguage.* (object)
+indexing.treeSitter.byLanguage.*.maxBytes (number)
+indexing.treeSitter.byLanguage.*.maxLines (number)
+indexing.treeSitter.byLanguage.*.maxParseMs (number)
+indexing.treeSitter.configChunking (boolean)
+indexing.treeSitter.enabled (boolean|string)
+indexing.treeSitter.languages (object)
+indexing.treeSitter.languages.clike (boolean)
+indexing.treeSitter.languages.cpp (boolean)
+indexing.treeSitter.languages.csharp (boolean)
+indexing.treeSitter.languages.css (boolean)
+indexing.treeSitter.languages.go (boolean)
+indexing.treeSitter.languages.html (boolean)
+indexing.treeSitter.languages.java (boolean)
+indexing.treeSitter.languages.javascript (boolean)
+indexing.treeSitter.languages.json (boolean)
+indexing.treeSitter.languages.jsx (boolean)
+indexing.treeSitter.languages.kotlin (boolean)
+indexing.treeSitter.languages.markdown (boolean)
+indexing.treeSitter.languages.objc (boolean)
+indexing.treeSitter.languages.python (boolean)
+indexing.treeSitter.languages.rust (boolean)
+indexing.treeSitter.languages.swift (boolean)
+indexing.treeSitter.languages.toml (boolean)
+indexing.treeSitter.languages.tsx (boolean)
+indexing.treeSitter.languages.typescript (boolean)
+indexing.treeSitter.languages.yaml (boolean)
+indexing.treeSitter.maxBytes (number)
+indexing.treeSitter.maxLines (number)
+indexing.treeSitter.maxParseMs (number)
+indexing.treeSitter.preload (string|boolean)
+indexing.treeSitter.preloadConcurrency (number)
+indexing.treeSitter.worker (object)
+indexing.treeSitter.worker.enabled (boolean)
+indexing.treeSitter.worker.idleTimeoutMs (number)
+indexing.treeSitter.worker.maxWorkers (number)
+indexing.treeSitter.worker.taskTimeoutMs (number)
+indexing.twoStage (object)
+indexing.twoStage.background (boolean)
+indexing.twoStage.enabled (boolean)
+indexing.twoStage.queue (boolean)
+indexing.twoStage.stage1 (object)
+indexing.twoStage.stage2 (object)
+indexing.twoStage.stage3 (object)
+indexing.twoStage.stage4 (object)
+indexing.typeInference (boolean)
+indexing.typeInferenceCrossFile (boolean)
+indexing.typescript (object)
+indexing.typescript.embeddingBatchMultiplier (number)
+indexing.typescript.importsOnly (boolean)
+indexing.typescriptParser (string)
+indexing.untrusted (object)
+indexing.untrusted.enabled (boolean)
+indexing.untrusted.maxDepth (number)
+indexing.untrusted.maxFileBytes (number)
+indexing.untrusted.maxFiles (number)
+indexing.untrusted.maxLines (number)
+indexing.workerPool (object)
+indexing.workerPool.allowOverCap (boolean)
+indexing.workerPool.enabled (boolean|string)
+indexing.workerPool.idleTimeoutMs (number)
+indexing.workerPool.maxFileBytes (number)
+indexing.workerPool.maxWorkers (number)
+indexing.workerPool.quantizeBatchSize (number)
+indexing.workerPool.quantizeMaxWorkers (number)
+indexing.workerPool.splitByTask (boolean)
+indexing.workerPool.splitTasks (boolean)
+indexing.workerPool.taskTimeoutMs (number)
+indexing.yamlChunking (string) enum=auto|root|top-level
+indexing.yamlTopLevelMaxBytes (number)
+lmdb (object)
+lmdb.codeDbPath (string)
+lmdb.dbDir (string)
+lmdb.proseDbPath (string)
+lmdb.use (boolean)
+logging (object)
+logging.format (string) enum=text|json|pretty
+logging.level (string)
+logging.redact
+logging.ringMax (number)
+logging.ringMaxBytes (number)
+mcp (object)
+mcp.queueMax (number)
+mcp.toolTimeoutMs (number)
+mcp.toolTimeouts (object)
+models (object)
+models.compare (array)
+models.dir (string)
+models.id (string)
+profile (string)
+runtime (object)
+runtime.maxOldSpaceMb (number)
+runtime.uvThreadpoolSize (number)
+runtime.nodeOptions (string)
+search (object)
+search.annDefault (boolean)
+search.bm25 (object)
+search.bm25.b (number)
+search.bm25.k1 (number)
+search.contextExpansion (object)
+search.contextExpansion.enabled (boolean)
+search.contextExpansion.includeCalls (boolean)
+search.contextExpansion.includeExports (boolean)
+search.contextExpansion.includeImports (boolean)
+search.contextExpansion.includeUsages (boolean)
+search.contextExpansion.maxPerHit (number)
+search.contextExpansion.maxTotal (number)
+search.contextExpansion.respectFilters (boolean)
+search.denseVectorMode (string) enum=merged|code|doc|auto
+search.fieldWeights (object)
+search.fieldWeights.body (number)
+search.fieldWeights.comment (number)
+search.fieldWeights.doc (number)
+search.fieldWeights.name (number)
+search.fieldWeights.signature (number)
+search.filePrefilter (object)
+search.filePrefilter.chargramN (number)
+search.filePrefilter.enabled (boolean)
+search.minhashMaxDocs (number)
+search.queryCache (object)
+search.queryCache.enabled (boolean)
+search.queryCache.maxEntries (number)
+search.queryCache.ttlMs (number)
+search.rrf (object)
+search.rrf.enabled (boolean)
+search.rrf.k (number)
+search.scoreBlend (object)
+search.scoreBlend.annWeight (number)
+search.scoreBlend.enabled (boolean)
+search.scoreBlend.sparseWeight (number)
+search.sqliteAutoArtifactBytes (number)
+search.sqliteAutoChunkThreshold (number)
+search.sqliteFtsNormalize (boolean)
+search.sqliteFtsProfile (string)
+search.sqliteFtsWeights (array|object)
+search.sqliteFtsWeights.doc (number)
+search.sqliteFtsWeights.file (number)
+search.sqliteFtsWeights.headline (number)
+search.sqliteFtsWeights.kind (number)
+search.sqliteFtsWeights.name (number)
+search.sqliteFtsWeights.signature (number)
+search.sqliteFtsWeights.tokens (number)
+search.symbolBoost (object)
+search.symbolBoost.definitionWeight (number)
+search.symbolBoost.enabled (boolean)
+search.symbolBoost.exportWeight (number)
+security (object)
+security.archives (object)
+security.archives.maxBytes (number)
+security.archives.maxEntries (number)
+security.archives.maxEntryBytes (number)
+security.downloads (object)
+security.downloads.allowlist (object)
+security.downloads.requireHash (boolean)
+security.downloads.warnUnsigned (boolean)
+sql (object)
+sql.dialect (string)
+sql.dialectByExt (object)
+sqlite (object)
+sqlite.codeDbPath (string)
+sqlite.compactOnIncremental (boolean)
+sqlite.dbDir (string)
+sqlite.proseDbPath (string)
+sqlite.scoreMode (string)
+sqlite.use (boolean)
+sqlite.vectorExtension (object)
+sqlite.vectorExtension.annMode (string)
+sqlite.vectorExtension.arch (string)
+sqlite.vectorExtension.column (string)
+sqlite.vectorExtension.dir (string)
+sqlite.vectorExtension.downloads (object)
+sqlite.vectorExtension.enabled (boolean)
+sqlite.vectorExtension.encoding (string)
+sqlite.vectorExtension.filename (string)
+sqlite.vectorExtension.module (string)
+sqlite.vectorExtension.options (string)
+sqlite.vectorExtension.path (string)
+sqlite.vectorExtension.platform (string)
+sqlite.vectorExtension.provider (string)
+sqlite.vectorExtension.table (string)
+sqlite.vectorExtension.url (string)
+tooling (object)
+tooling.allowGlobalFallback (boolean)
+tooling.autoEnableOnDetect (boolean)
+tooling.autoInstallOnDetect (boolean)
+tooling.circuitBreakerThreshold (number)
+tooling.clangd (object)
+tooling.clangd.compileCommandsDir (string)
+tooling.clangd.requireCompilationDatabase (boolean)
+tooling.dir (string)
+tooling.disabledTools (array)
+tooling.enabledTools (array)
+tooling.installScope (string)
+tooling.logDir (string)
+tooling.maxRetries (number)
+tooling.timeoutMs (number)
+tooling.typescript (object)
+tooling.typescript.enabled (boolean)
+tooling.typescript.resolveOrder (array|string)
+tooling.typescript.tsconfigPath (string)
+tooling.typescript.useTsconfig (boolean)
+triage (object)
+triage.contextPack (object)
+triage.contextPack.maxEvidencePerQuery (number)
+triage.contextPack.maxHistory (number)
+triage.promoteFields (array)
+triage.recordsDir (string)
+triage.storeRawPayload (boolean)
+useDefaultSkips (boolean)
+useGitignore (boolean)
+usePairofcleatsIgnore (boolean)
+```
+
+## Notes
+
+Dynamic CLI options detected in these files; verify flags manually:
+
+- src/index/build/args.js
+- src/index/build/context-window.js
+- src/index/build/file-processor.js
+- src/index/build/imports.js
+- src/index/chunking.js
+- src/index/tooling/typescript-provider.js
+- src/lang/workers/tree-sitter-worker.js
+- src/retrieval/cli.js
+- tests/bench.js
+- tools/bench-language-matrix.js
+- tools/bench-language-repos.js
+- tools/vector-extension.js
diff --git a/docs/config-schema.json b/docs/config-schema.json
index 7db270fcc..9eed42963 100644
--- a/docs/config-schema.json
+++ b/docs/config-schema.json
@@ -4,6 +4,59 @@
   "type": "object",
   "additionalProperties": false,
   "properties": {
+    "profile": { "type": "string" },
+    "logging": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "format": { "type": "string", "enum": ["text", "json", "pretty"] },
+        "level": { "type": "string" },
+        "ringMax": { "type": "number" },
+        "ringMaxBytes": { "type": "number" },
+        "redact": {
+          "oneOf": [
+            { "type": "array", "items": { "type": "string" } },
+            {
+              "type": "object",
+              "additionalProperties": false,
+              "properties": {
+                "paths": { "type": "array", "items": { "type": "string" } },
+                "censor": { "type": "string" },
+                "remove": { "type": "boolean" }
+              }
+            },
+            { "type": "boolean" }
+          ]
+        }
+      }
+    },
+    "security": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "downloads": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "requireHash": { "type": "boolean" },
+            "warnUnsigned": { "type": "boolean" },
+            "allowlist": {
+              "type": "object",
+              "additionalProperties": { "type": "string" }
+            }
+          }
+        },
+        "archives": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "maxBytes": { "type": "number" },
+            "maxEntryBytes": { "type": "number" },
+            "maxEntries": { "type": "number" }
+          }
+        }
+      }
+    },
     "dictionary": {
       "type": "object",
       "additionalProperties": false,
@@ -14,7 +67,20 @@
         "includeSlang": { "type": "boolean" },
         "slangDirs": { "type": "array", "items": { "type": "string" } },
         "slangFiles": { "type": "array", "items": { "type": "string" } },
-        "enableRepoDictionary": { "type": "boolean" }
+        "enableRepoDictionary": { "type": "boolean" },
+        "segmentation": { "type": "string" },
+        "dpMaxTokenLength": { "type": "number" },
+        "dpMaxTokenLengthByFileCount": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "additionalProperties": false,
+            "properties": {
+              "maxFiles": { "type": "number" },
+              "dpMaxTokenLength": { "type": "number" }
+            }
+          }
+        }
       }
     },
     "cache": {
@@ -22,6 +88,52 @@
       "additionalProperties": false,
       "properties": {
         "root": { "type": "string" },
+        "runtime": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "fileText": {
+              "type": "object",
+              "additionalProperties": false,
+              "properties": {
+                "maxMb": { "type": "number" },
+                "ttlMs": { "type": "number" }
+              }
+            },
+            "summary": {
+              "type": "object",
+              "additionalProperties": false,
+              "properties": {
+                "maxMb": { "type": "number" },
+                "ttlMs": { "type": "number" }
+              }
+            },
+            "lint": {
+              "type": "object",
+              "additionalProperties": false,
+              "properties": {
+                "maxMb": { "type": "number" },
+                "ttlMs": { "type": "number" }
+              }
+            },
+            "complexity": {
+              "type": "object",
+              "additionalProperties": false,
+              "properties": {
+                "maxMb": { "type": "number" },
+                "ttlMs": { "type": "number" }
+              }
+            },
+            "gitMeta": {
+              "type": "object",
+              "additionalProperties": false,
+              "properties": {
+                "maxMb": { "type": "number" },
+                "ttlMs": { "type": "number" }
+              }
+            }
+          }
+        },
         "gc": {
           "type": "object",
           "additionalProperties": false,
@@ -39,11 +151,9 @@
       "properties": {
         "use": { "type": "boolean" },
         "dbDir": { "type": "string" },
-        "dbPath": { "type": "string" },
         "codeDbPath": { "type": "string" },
         "proseDbPath": { "type": "string" },
         "scoreMode": { "type": "string" },
-        "annMode": { "type": "string" },
         "compactOnIncremental": { "type": "boolean" },
         "vectorExtension": {
           "type": "object",
@@ -68,25 +178,59 @@
         }
       }
     },
+    "lmdb": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "use": { "type": "boolean" },
+        "dbDir": { "type": "string" },
+        "codeDbPath": { "type": "string" },
+        "proseDbPath": { "type": "string" }
+      }
+    },
     "search": {
       "type": "object",
       "additionalProperties": false,
       "properties": {
         "annDefault": { "type": "boolean" },
+        "sqliteAutoChunkThreshold": { "type": "number" },
+        "sqliteAutoArtifactBytes": { "type": "number" },
         "sqliteFtsNormalize": { "type": "boolean" },
         "sqliteFtsProfile": { "type": "string" },
-        "sqliteFtsWeights": {
-          "type": ["array", "object"],
-          "items": { "type": "number" },
+        "denseVectorMode": { "type": "string", "enum": ["merged", "code", "doc", "auto"] },
+          "sqliteFtsWeights": {
+            "type": ["array", "object"],
+            "items": { "type": "number" },
+            "additionalProperties": false,
+            "properties": {
+              "file": { "type": "number" },
+              "name": { "type": "number" },
+              "signature": { "type": "number" },
+              "kind": { "type": "number" },
+              "headline": { "type": "number" },
+              "doc": { "type": "number" },
+              "tokens": { "type": "number" }
+            }
+          },
+        "scoreBlend": {
+          "type": "object",
           "additionalProperties": false,
           "properties": {
-            "file": { "type": "number" },
-            "name": { "type": "number" },
-            "kind": { "type": "number" },
-            "headline": { "type": "number" },
-            "tokens": { "type": "number" }
+            "enabled": { "type": "boolean" },
+            "sparseWeight": { "type": "number" },
+            "annWeight": { "type": "number" }
+          }
+        },
+        "symbolBoost": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "enabled": { "type": "boolean", "default": true },
+            "definitionWeight": { "type": "number", "default": 1.2 },
+            "exportWeight": { "type": "number", "default": 1.1 }
           }
         },
+        "minhashMaxDocs": { "type": "number" },
         "queryCache": {
           "type": "object",
           "additionalProperties": false,
@@ -96,6 +240,14 @@
             "ttlMs": { "type": "number" }
           }
         },
+        "filePrefilter": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "enabled": { "type": "boolean" },
+            "chargramN": { "type": "number" }
+          }
+        },
         "bm25": {
           "type": "object",
           "additionalProperties": false,
@@ -103,6 +255,39 @@
             "k1": { "type": "number" },
             "b": { "type": "number" }
           }
+        },
+        "fieldWeights": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "name": { "type": "number" },
+            "signature": { "type": "number" },
+            "doc": { "type": "number" },
+            "comment": { "type": "number" },
+            "body": { "type": "number" }
+          }
+        },
+        "contextExpansion": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "enabled": { "type": "boolean" },
+            "maxPerHit": { "type": "number" },
+            "maxTotal": { "type": "number" },
+            "includeCalls": { "type": "boolean" },
+            "includeImports": { "type": "boolean" },
+            "includeExports": { "type": "boolean" },
+            "includeUsages": { "type": "boolean" },
+            "respectFilters": { "type": "boolean" }
+          }
+        },
+        "rrf": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "enabled": { "type": "boolean" },
+            "k": { "type": "number" }
+          }
         }
       }
     },
@@ -129,14 +314,398 @@
       "properties": {
         "concurrency": { "type": "number" },
         "importConcurrency": { "type": "number" },
+        "ioConcurrencyCap": { "type": "number" },
+        "importScan": { "type": ["string", "boolean"] },
         "maxFileBytes": { "type": ["number", "boolean"] },
+        "untrusted": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "enabled": { "type": "boolean" },
+            "maxFileBytes": { "type": "number" },
+            "maxLines": { "type": "number" },
+            "maxFiles": { "type": "number" },
+            "maxDepth": { "type": "number" }
+          }
+        },
+        "fileCaps": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "default": {
+              "type": "object",
+              "additionalProperties": false,
+              "properties": {
+                "maxBytes": { "type": "number" },
+                "maxLines": { "type": "number" }
+              }
+            },
+            "byExt": {
+              "type": "object",
+              "additionalProperties": {
+                "type": "object",
+                "additionalProperties": false,
+                "properties": {
+                  "maxBytes": { "type": "number" },
+                  "maxLines": { "type": "number" }
+                }
+              }
+            },
+            "byLanguage": {
+              "type": "object",
+              "additionalProperties": {
+                "type": "object",
+                "additionalProperties": false,
+                "properties": {
+                  "maxBytes": { "type": "number" },
+                  "maxLines": { "type": "number" }
+                }
+              }
+            }
+          }
+        },
+        "fileScan": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "sampleBytes": { "type": "number" },
+            "minified": {
+              "type": "object",
+              "additionalProperties": false,
+              "properties": {
+                "sampleMinBytes": { "type": "number" },
+                "minChars": { "type": "number" },
+                "singleLineChars": { "type": "number" },
+                "avgLineThreshold": { "type": "number" },
+                "maxLineThreshold": { "type": "number" },
+                "maxWhitespaceRatio": { "type": "number" }
+              }
+            },
+            "binary": {
+              "type": "object",
+              "additionalProperties": false,
+              "properties": {
+                "sampleMinBytes": { "type": "number" },
+                "maxNonTextRatio": { "type": "number" }
+              }
+            }
+          }
+        },
+        "segments": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "inlineCodeSpans": { "type": "boolean" },
+            "inlineCodeMinChars": { "type": "number" },
+            "inlineCodeMaxSpans": { "type": "number" },
+            "inlineCodeMaxBytes": { "type": "number" }
+          }
+        },
+        "comments": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "extract": { "type": "string", "enum": ["off", "doc", "all"] },
+            "includeLicense": { "type": "boolean" },
+            "minDocChars": { "type": "number" },
+            "minInlineChars": { "type": "number" },
+            "minTokens": { "type": "number" },
+            "maxPerChunk": { "type": "number" },
+            "maxBytesPerChunk": { "type": "number" },
+            "headerMaxLines": { "type": "number" },
+            "licensePattern": { "type": "string" },
+            "generatedPattern": { "type": "string" },
+            "linterPattern": { "type": "string" },
+            "skipGenerated": { "type": "boolean" },
+            "skipLinter": { "type": "boolean" }
+          }
+        },
         "astDataflow": { "type": "boolean" },
         "controlFlow": { "type": "boolean" },
+        "javascriptParser": { "type": "string" },
+        "javascriptFlow": { "type": ["string", "boolean"] },
+        "typescriptParser": { "type": "string" },
+        "typescript": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "importsOnly": { "type": "boolean" },
+            "embeddingBatchMultiplier": { "type": "number" }
+          }
+        },
+        "pythonAst": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "enabled": { "type": "boolean" },
+            "workerCount": { "type": "number" },
+            "maxWorkers": { "type": "number" },
+            "allowOverCap": { "type": "boolean" },
+            "scaleUpQueueMs": { "type": "number" },
+            "taskTimeoutMs": { "type": "number" },
+            "maxRetries": { "type": "number" },
+            "maxQueued": { "type": "number" },
+            "crashLoopMax": { "type": "number" },
+            "crashWindowMs": { "type": "number" },
+            "crashBackoffMs": { "type": "number" }
+          }
+        },
+          "kotlin": {
+            "type": "object",
+            "additionalProperties": false,
+          "properties": {
+            "flowMaxBytes": { "type": "number" },
+            "flowMaxLines": { "type": "number" },
+            "relationsMaxBytes": { "type": "number" },
+            "relationsMaxLines": { "type": "number" }
+          }
+        },
+        "embeddingBatchSize": { "type": "number" },
+        "embeddingBatchMultipliers": {
+          "type": "object",
+          "additionalProperties": { "type": "number" }
+        },
+        "embeddings": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "enabled": { "type": "boolean" },
+            "concurrency": { "type": "number" },
+            "provider": {
+              "type": "string",
+              "enum": ["xenova", "onnx"]
+            },
+            "mode": {
+              "type": "string",
+              "enum": ["auto", "inline", "service", "stub", "off"]
+            },
+            "onnx": {
+              "type": "object",
+              "additionalProperties": false,
+              "properties": {
+                "modelPath": { "type": "string" },
+                "tokenizerId": { "type": "string" },
+                "executionProviders": {
+                  "oneOf": [
+                    { "type": "array", "items": { "type": "string" } },
+                    { "type": "string" }
+                  ]
+                },
+                "intraOpNumThreads": { "type": "number" },
+                "interOpNumThreads": { "type": "number" },
+                "graphOptimizationLevel": {
+                  "type": "string",
+                  "enum": ["disabled", "basic", "extended", "all"]
+                }
+              }
+            },
+            "queue": {
+              "type": "object",
+              "additionalProperties": false,
+              "properties": {
+                "dir": { "type": "string" },
+                "maxQueued": { "type": "number" }
+              }
+            },
+            "cache": {
+              "type": "object",
+              "additionalProperties": false,
+              "properties": {
+                "dir": { "type": "string" }
+              }
+            },
+            "hnsw": {
+              "type": "object",
+              "additionalProperties": false,
+              "properties": {
+                "enabled": { "type": "boolean" },
+                "space": { "type": "string", "enum": ["cosine", "ip", "l2"] },
+                "m": { "type": "number" },
+                "efConstruction": { "type": "number" },
+                "efSearch": { "type": "number" },
+                "randomSeed": { "type": "number" },
+                "allowReplaceDeleted": { "type": "boolean" }
+              }
+            }
+          }
+        },
+          "twoStage": {
+            "type": "object",
+            "additionalProperties": false,
+            "properties": {
+              "enabled": { "type": "boolean" },
+              "background": { "type": "boolean" },
+              "queue": { "type": "boolean" },
+              "stage1": { "type": "object", "additionalProperties": true },
+              "stage2": { "type": "object", "additionalProperties": true },
+              "stage3": { "type": "object", "additionalProperties": true },
+              "stage4": { "type": "object", "additionalProperties": true }
+            }
+          },
+          "shards": {
+            "type": "object",
+            "additionalProperties": false,
+            "properties": {
+              "enabled": { "type": "boolean" },
+              "maxWorkers": { "type": "number" },
+              "maxShards": { "type": "number" },
+              "minFiles": { "type": "number" },
+              "dirDepth": { "type": "number" },
+              "maxShardBytes": { "type": "number" },
+              "maxShardLines": { "type": "number" }
+            }
+          },
+          "chunkTokenMode": { "type": "string", "enum": ["auto", "full", "sample", "none"] },
+        "chunkTokenMaxFiles": { "type": "number" },
+        "chunkTokenMaxTokens": { "type": "number" },
+        "chunkTokenSampleSize": { "type": "number" },
+          "chunking": {
+            "type": "object",
+            "additionalProperties": false,
+            "properties": {
+              "maxBytes": { "type": "number" },
+              "maxLines": { "type": "number" }
+            }
+          },
+        "artifactCompression": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "enabled": { "type": "boolean" },
+            "mode": { "type": "string" },
+            "keepRaw": { "type": "boolean" }
+          }
+        },
+        "incrementalBundles": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "format": { "type": "string", "enum": ["json", "msgpack"] }
+          }
+        },
+        "artifacts": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "mode": { "type": "string" },
+            "chunkMetaFormat": { "type": "string" },
+            "chunkMetaJsonlThreshold": { "type": "number" },
+            "chunkMetaShardSize": { "type": "number" },
+            "tokenPostingsFormat": { "type": "string" },
+            "tokenPostingsShardSize": { "type": "number" },
+            "tokenPostingsShardThreshold": { "type": "number" }
+          }
+        },
+        "treeSitter": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "enabled": { "type": ["boolean", "string"] },
+            "configChunking": { "type": "boolean" },
+            "maxBytes": { "type": "number" },
+            "maxLines": { "type": "number" },
+            "maxParseMs": { "type": "number" },
+            "preload": { "type": ["string", "boolean"] },
+            "preloadConcurrency": { "type": "number" },
+            "byLanguage": {
+              "type": "object",
+              "additionalProperties": {
+                "type": "object",
+                "additionalProperties": false,
+                "properties": {
+                  "maxBytes": { "type": "number" },
+                  "maxLines": { "type": "number" },
+                  "maxParseMs": { "type": "number" }
+                }
+              }
+            },
+            "languages": {
+              "type": "object",
+              "additionalProperties": { "type": "boolean" },
+              "properties": {
+                "javascript": { "type": "boolean" },
+                "typescript": { "type": "boolean" },
+                "tsx": { "type": "boolean" },
+                "jsx": { "type": "boolean" },
+                "python": { "type": "boolean" },
+                "json": { "type": "boolean" },
+                "yaml": { "type": "boolean" },
+                "toml": { "type": "boolean" },
+                "markdown": { "type": "boolean" },
+                "swift": { "type": "boolean" },
+                "kotlin": { "type": "boolean" },
+                "csharp": { "type": "boolean" },
+                "clike": { "type": "boolean" },
+                "cpp": { "type": "boolean" },
+                "objc": { "type": "boolean" },
+                "go": { "type": "boolean" },
+                "rust": { "type": "boolean" },
+                "java": { "type": "boolean" },
+                "css": { "type": "boolean" },
+                "html": { "type": "boolean" }
+              }
+            },
+            "worker": {
+              "type": "object",
+              "additionalProperties": false,
+              "properties": {
+                "enabled": { "type": "boolean" },
+                "maxWorkers": { "type": "number" },
+                "idleTimeoutMs": { "type": "number" },
+                "taskTimeoutMs": { "type": "number" }
+              }
+            }
+          }
+        },
+        "workerPool": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "enabled": { "type": ["boolean", "string"] },
+            "maxWorkers": { "type": "number" },
+            "allowOverCap": { "type": "boolean" },
+            "maxFileBytes": { "type": "number" },
+            "idleTimeoutMs": { "type": "number" },
+            "taskTimeoutMs": { "type": "number" },
+            "quantizeBatchSize": { "type": "number" },
+            "splitByTask": { "type": "boolean" },
+            "splitTasks": { "type": "boolean" },
+            "quantizeMaxWorkers": { "type": "number" }
+          }
+        },
         "riskAnalysis": { "type": "boolean" },
         "riskAnalysisCrossFile": { "type": "boolean" },
+        "riskRules": {
+          "type": "object",
+          "additionalProperties": true,
+          "properties": {
+            "includeDefaults": { "type": "boolean" },
+            "rulesPath": { "type": "string" },
+            "rules": { "type": "object", "additionalProperties": true }
+          }
+        },
+        "riskCaps": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "maxBytes": { "type": "number" },
+            "maxLines": { "type": "number" },
+            "maxNodes": { "type": "number" },
+            "maxEdges": { "type": "number" },
+            "maxMs": { "type": "number" },
+            "maxFlows": { "type": "number" }
+          }
+        },
         "typeInference": { "type": "boolean" },
         "typeInferenceCrossFile": { "type": "boolean" },
         "gitBlame": { "type": "boolean" },
+        "lint": { "type": "boolean" },
+        "complexity": { "type": "boolean" },
+        "debugFileLists": { "type": "boolean" },
+        "fileListSampleSize": { "type": "number" },
+        "yamlChunking": { "type": "string", "enum": ["auto", "root", "top-level"] },
+        "yamlTopLevelMaxBytes": { "type": "number" },
+        "debugCrash": { "type": "boolean" },
         "postings": {
           "type": "object",
           "additionalProperties": false,
@@ -146,7 +715,10 @@
             "phraseMaxN": { "type": "number" },
             "enableChargrams": { "type": "boolean" },
             "chargramMinN": { "type": "number" },
-            "chargramMaxN": { "type": "number" }
+            "chargramMaxN": { "type": "number" },
+            "chargramMaxTokenLength": { "type": "number" },
+            "chargramSource": { "type": "string", "enum": ["full", "fields"] },
+            "fielded": { "type": "boolean" }
           }
         }
       }
@@ -171,14 +743,60 @@
         "compare": { "type": "array", "items": { "type": "string" } }
       }
     },
+    "runtime": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "maxOldSpaceMb": { "type": "number" },
+        "uvThreadpoolSize": { "type": "number" },
+        "nodeOptions": { "type": "string" }
+      }
+    },
     "tooling": {
       "type": "object",
       "additionalProperties": false,
       "properties": {
         "autoInstallOnDetect": { "type": "boolean" },
+        "autoEnableOnDetect": { "type": "boolean" },
+        "timeoutMs": { "type": "number" },
+        "maxRetries": { "type": "number" },
+        "circuitBreakerThreshold": { "type": "number" },
+        "logDir": { "type": "string" },
         "installScope": { "type": "string" },
         "allowGlobalFallback": { "type": "boolean" },
-        "dir": { "type": "string" }
+        "dir": { "type": "string" },
+        "enabledTools": { "type": "array", "items": { "type": "string" } },
+        "disabledTools": { "type": "array", "items": { "type": "string" } },
+        "typescript": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "enabled": { "type": "boolean" },
+            "resolveOrder": { "type": ["array", "string"], "items": { "type": "string" } },
+            "useTsconfig": { "type": "boolean" },
+            "tsconfigPath": { "type": "string" }
+          }
+        },
+        "clangd": {
+          "type": "object",
+          "additionalProperties": false,
+          "properties": {
+            "requireCompilationDatabase": { "type": "boolean" },
+            "compileCommandsDir": { "type": "string" }
+          }
+        }
+      }
+    },
+    "mcp": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "queueMax": { "type": "number" },
+        "toolTimeoutMs": { "type": "number" },
+        "toolTimeouts": {
+          "type": "object",
+          "additionalProperties": { "type": "number" }
+        }
       }
     },
     "extensions": {
diff --git a/docs/contracts/api-mcp.md b/docs/contracts/api-mcp.md
new file mode 100644
index 000000000..2ca7bd21c
--- /dev/null
+++ b/docs/contracts/api-mcp.md
@@ -0,0 +1,21 @@
+# API server and MCP server contract
+
+## API server
+- `GET /health` returns `{ ok: true, uptimeMs }`.
+- `GET /status` returns `{ ok: true, repo, status }`.
+- `GET /status/stream` streams `start` → `result`/`error` → `done` SSE events.
+- `GET /metrics` returns Prometheus metrics.
+- `POST /search` accepts `{ query, mode, top, ... }` and returns `{ ok: true, repo, result }`.
+- `POST /search/stream` streams `start` → `result`/`error` → `done` SSE events.
+- Invalid input returns HTTP 400 with `{ ok: false, code: "INVALID_REQUEST", message }` plus optional `errors`.
+- Missing indexes return HTTP 409 with `{ ok: false, code: "NO_INDEX", message }`.
+
+## MCP server
+- JSON-RPC 2.0 with `Content-Length` framing over stdio.
+- `initialize` must return server info and capabilities.
+- `tools/list` includes `index_status`, `config_status`, `search`, and maintenance tools.
+- Tool errors return `isError: true` with a JSON payload in `content`.
+
+## References
+- `docs/api-server.md`
+- `docs/mcp-server.md`
diff --git a/docs/contracts/chunking.md b/docs/contracts/chunking.md
new file mode 100644
index 000000000..1ba98c9e9
--- /dev/null
+++ b/docs/contracts/chunking.md
@@ -0,0 +1,19 @@
+# Chunk identity and sizing contract
+
+## Identity
+- `chunk.metaV2.chunkId` is the stable, external identifier across builds.
+- `chunk.id` is an index-local numeric id and is not stable across builds.
+
+## Shape
+- Each chunk includes `file`, `ext`, and positional metadata where available.
+- Token-derived fields (`tokens`, `ngrams`, `chargrams`, `minhashSig`) align with postings.
+
+## Sizing
+- Chunk boundaries follow segmentation rules per language/format.
+- Size guards prevent pathological chunks; oversized files/segments are split or skipped with a recorded reason.
+- Per-chunk limits can be enforced via `indexing.chunking.maxBytes` / `indexing.chunking.maxLines`.
+
+## References
+- `docs/metadata-schema-v2.md`
+- `docs/parser-backbone.md`
+- `docs/ast-feature-list.md`
diff --git a/docs/contracts/coverage-ledger.md b/docs/contracts/coverage-ledger.md
new file mode 100644
index 000000000..a8e6e6a7b
--- /dev/null
+++ b/docs/contracts/coverage-ledger.md
@@ -0,0 +1,21 @@
+# Entrypoint coverage ledger
+
+| Entrypoint | Contract | Content-asserting tests | Gaps/notes |
+| --- | --- | --- | --- |
+| `build_index.js` | `docs/contracts/indexing.md` | `tests/core-api.js`, `tests/fixture-smoke.js`, `tests/build-index-all.js`, `tests/extracted-prose.js` | - |
+| `tools/build-embeddings.js` | `docs/contracts/indexing.md` | `tests/build-embeddings-cache.js`, `tests/embeddings-validate.js`, `tests/embeddings-dims-mismatch.js` | - |
+| `tools/build-sqlite-index.js` | `docs/contracts/sqlite.md` | `tests/sqlite-incremental.js`, `tests/sqlite-build-indexes.js`, `tests/sqlite-ann-extension.js`, `tests/sqlite-ann-fallback.js` | - |
+| `search.js` | `docs/contracts/search-cli.md` | `tests/search-help.js`, `tests/search-filters.js`, `tests/search-explain.js`, `tests/search-rrf.js`, `tests/search-symbol-boost.js` | - |
+| `search.js --backend sqlite` | `docs/contracts/sqlite.md` | `tests/sqlite-auto-backend.js`, `tests/sqlite-missing-dep.js` | - |
+| `bin/pairofcleats.js` | `docs/contracts/search-cli.md` | `tests/cli.js` | - |
+| `tools/api-server.js` | `docs/contracts/api-mcp.md` | `tests/api-server.js`, `tests/api-server-stream.js` | - |
+| `tools/mcp-server.js` | `docs/contracts/api-mcp.md` | `tests/mcp-server.js` | - |
+| `tools/indexer-service.js` | `docs/contracts/indexing.md` | `tests/indexer-service.js`, `tests/service-queue.js`, `tests/two-stage-state.js` | - |
+| `tools/index-validate.js` | `docs/contracts/indexing.md` | `tests/index-validate.js`, `tests/embeddings-dims-mismatch.js` | - |
+| `tools/ctags-ingest.js` | `docs/contracts/indexing.md` | `tests/ctags-ingest.js` | - |
+| `tools/lsif-ingest.js` | `docs/contracts/indexing.md` | `tests/lsif-ingest.js` | - |
+| `tools/scip-ingest.js` | `docs/contracts/indexing.md` | `tests/scip-ingest.js` | - |
+| `tools/gtags-ingest.js` | `docs/contracts/indexing.md` | `tests/gtags-ingest.js` | - |
+| `tools/download-dicts.js` | `docs/contracts/indexing.md` | `tests/download-dicts.js` | - |
+| `tools/download-extensions.js` | `docs/contracts/sqlite.md` | `tests/download-extensions.js` | - |
+| `tools/bench-language-repos.js` | `docs/contracts/retrieval-ranking.md` | `tests/bench.js` (harness only) | Gap: long-running benchmarks are not asserted in CI. |
diff --git a/docs/contracts/indexing.md b/docs/contracts/indexing.md
new file mode 100644
index 000000000..c589e5921
--- /dev/null
+++ b/docs/contracts/indexing.md
@@ -0,0 +1,27 @@
+# Indexing contract
+
+## Stages and modes
+- Stage1 (sparse): discovery + chunking + token postings for each mode.
+- Stage2 (enrich): file metadata, repo maps, relations, and filter indexes.
+- Stage3 (embeddings): dense vectors + HNSW artifacts, index state updates.
+- Stage4 (sqlite): sqlite index build plus optional ANN tables.
+- Modes: `code`, `prose`, `extracted-prose`, `records`. Mode `all` builds the enabled set.
+
+## Artifact minimum set
+- `chunk_meta.json` (or jsonl/sharded variants).
+- `token_postings.json` (+ optional `phrase_ngrams.json` / `chargram_postings.json`).
+- `minhash_signatures.json`.
+- `file_meta.json` (required when chunk metadata omits file fields).
+- Embeddings artifacts (`dense_vectors_*`, `dense_vectors_hnsw.*`) when enabled.
+- `index_state.json` tracks stage completion and gating.
+
+## Invariants
+- Each mode writes to its own index directory under the cache root.
+- Artifact counts and dimensions must be internally consistent.
+- Readers gate on `index_state.json` for staged outputs.
+- Artifact writes are atomic; previous versions are retained as `.bak` and readers fall back to `.bak` when primaries are missing/corrupt, then clean up on successful reads.
+
+## References
+- `docs/artifact-contract.md`
+- `docs/metadata-schema-v2.md`
+- `docs/sqlite-index-schema.md`
diff --git a/docs/contracts/retrieval-ranking.md b/docs/contracts/retrieval-ranking.md
new file mode 100644
index 000000000..bd04cf408
--- /dev/null
+++ b/docs/contracts/retrieval-ranking.md
@@ -0,0 +1,15 @@
+# Retrieval ranking and explain contract
+
+## Ranking modes
+- Sparse ranking uses BM25 over token/posting artifacts.
+- Dense ranking uses ANN when available (HNSW or sqlite extension).
+- When both sparse and ANN are active, results combine via RRF or blend.
+
+## Explainability
+- `scoreType` is one of `bm25`, `bm25-fielded`, `fts`, `ann`, `rrf`, `blend`, `context`, or `none`.
+- `scoreBreakdown` includes `selected`, plus the component blocks used (`sparse`, `ann`, `rrf`, `blend`, `symbol`, `phrase`).
+- `--explain` and `--why` must render identical content.
+
+## References
+- `docs/search-contract.md`
+- `docs/benchmarks.md`
diff --git a/docs/contracts/search-cli.md b/docs/contracts/search-cli.md
new file mode 100644
index 000000000..ac83338c9
--- /dev/null
+++ b/docs/contracts/search-cli.md
@@ -0,0 +1,18 @@
+# Search CLI contract
+
+## Inputs
+- Requires a query; missing query returns non-zero with usage/help.
+- Mode selection via `--mode` (code/prose/extracted-prose/records).
+- Filters include file/path, extension, language, type, author, import, calls/uses, and risk tags.
+- `--explain` / `--why` toggle human-readable score breakdowns.
+- `--top` applies after ranking within each mode; it may return fewer results when filters or candidate sets are too small.
+
+## Outputs
+- JSON output includes `code`, `prose`, `extractedProse`, `records`, and `stats`.
+- Compact JSON omits heavy fields (tokens/lines) unless explicitly requested.
+- Stats include backend selection, ANN state, models, cache info, and timing.
+- Full JSON hits include `metaV2.chunkId`, the stable chunk identifier across rebuilds (top-level `id` is index-local).
+
+## References
+- `docs/search-contract.md`
+- `docs/search.md`
diff --git a/docs/contracts/sqlite.md b/docs/contracts/sqlite.md
new file mode 100644
index 000000000..19fa8eaae
--- /dev/null
+++ b/docs/contracts/sqlite.md
@@ -0,0 +1,18 @@
+# SQLite contract
+
+## Schema
+- SQLite builds follow `docs/sqlite-index-schema.md` and include dense vectors + metadata.
+- Required tables include `chunks`, `token_vocab`, `token_postings`, `minhash_signatures`, `dense_vectors`, and `dense_meta` (plus FTS tables when configured).
+
+## Incremental updates
+- Incremental builds reuse manifests and remove deleted file rows.
+- Dense vectors and ANN rows must stay in sync with chunk counts.
+
+## ANN extension
+- Vector extension usage is optional and configuration-driven.
+- When the extension is missing, search falls back to non-extension ANN and reports availability accordingly.
+
+## References
+- `docs/sqlite-index-schema.md`
+- `docs/sqlite-incremental-updates.md`
+- `docs/sqlite-ann-extension.md`
diff --git a/docs/core-api.md b/docs/core-api.md
new file mode 100644
index 000000000..4fc5c2420
--- /dev/null
+++ b/docs/core-api.md
@@ -0,0 +1,28 @@
+# Core API
+
+PairOfCleats exposes a lightweight programmatic API for build/search/status. Import from `src/integrations/core/index.js` in repo-local tooling.
+
+## Functions
+- `buildIndex(repoRoot, options)`
+  - Builds file-backed indexes (and SQLite if enabled).
+  - Options: `mode`, `threads`, `incremental`, `stubEmbeddings`, `sqlite`, `watch`, `watch-poll`, `watch-debounce`, `model`.
+- `buildSqliteIndex(repoRoot, options)`
+  - Builds or updates SQLite indexes from file-backed artifacts or incremental bundles.
+  - Options: `mode`, `incremental`, `compact`, `out`, `codeDir`, `proseDir`.
+- `search(repoRoot, params)`
+  - Runs search and returns a JSON payload (same shape as `search.js --json`).
+  - Params: `query`, `mode`, `backend`, `ann`, `json`, `jsonCompact`, `explain`, plus `args` for raw CLI flags.
+- `status(repoRoot, options)`
+- Returns artifact sizes and health hints (same as `cache report --json`).
+  - Options: `all` to include all cached repos.
+
+## Example
+
+```js
+import { buildIndex, search, status } from '../src/integrations/core/index.js';
+
+await buildIndex(process.cwd(), { mode: 'code', sqlite: false, stubEmbeddings: true });
+const results = await search(process.cwd(), { query: 'function', mode: 'code', json: true });
+const report = await status(process.cwd());
+```
+
diff --git a/docs/ctags.md b/docs/ctags.md
new file mode 100644
index 000000000..661e5348e
--- /dev/null
+++ b/docs/ctags.md
@@ -0,0 +1,36 @@
+# Ctags ingestion
+
+Use the ctags ingestion tool to stream JSONL symbol output into a cache-backed artifact for later use.
+
+## Generate JSONL from ctags
+
+```bash
+ctags --output-format=json --tag-relative=yes --recurse=yes . > ctags.jsonl
+pairofcleats ingest ctags --repo . --input ctags.jsonl
+```
+
+## Run ctags directly
+
+```bash
+pairofcleats ingest ctags --repo . --run
+```
+
+## Interactive mode (stdin)
+
+If you run ctags in interactive mode yourself, pipe JSONL output into stdin:
+
+```bash
+ctags --_interactive --output-format=json
+# In another shell, feed the output into the ingest tool:
+pairofcleats ingest ctags --repo . --input - --interactive
+```
+
+## Outputs
+
+- `ctags.jsonl`: normalized symbol rows under the repo cache root.
+- `ctags.jsonl.meta.json`: summary metadata and per-kind counts.
+
+## Notes
+
+- `--fields` and `--args` are passed to ctags when using `--run`.
+- The tool stores file paths relative to the repo root and preserves ctags metadata where available.
diff --git a/docs/editor-integration.md b/docs/editor-integration.md
new file mode 100644
index 000000000..0e11e5fe1
--- /dev/null
+++ b/docs/editor-integration.md
@@ -0,0 +1,46 @@
+# Editor Integration
+
+## CLI contract for editor tooling
+Editor integrations shell out to the CLI and expect JSON output.
+- The VS Code extension uses `--json-compact`.
+- The Sublime Text integration is designed to use `--json` to retain full metadata.
+
+The JSON payload contains the following top-level keys:
+- `backend`: the selected backend (`memory`, `sqlite`, `sqlite-fts`, `lmdb`).
+- `code`, `prose`, `records`: arrays of result hits (may be empty).
+- `stats`: search timing and cache metadata.
+
+Compact hit fields (subset, for `--json-compact`):
+- `file`: repo-relative path for the chunk.
+- `startLine`, `endLine`: 1-based line numbers for editor navigation.
+- `start`, `end`: byte offsets (optional).
+- `kind`, `name`, `headline`.
+- `score`, `scoreType`, `sparseScore`, `annScore`.
+- `scoreBreakdown` (optional when `--explain` is used).
+
+Editor integrations should prefer `file` + `startLine` for navigation. If line
+numbers are missing, fall back to file-only navigation.
+
+## VS Code extension (CLI shell-out)
+The bundled VS Code extension lives in `extensions/vscode` and defines a single
+command: `PairOfCleats: Search`. It:
+- prompts for a query
+- runs `pairofcleats search --json-compact`
+- shows a Quick Pick for results
+- opens the selected file at `startLine`
+
+### Settings
+- `pairofcleats.cliPath`: override the CLI command or point to a JS entrypoint.
+- `pairofcleats.cliArgs`: arguments inserted before the `search` command.
+- `pairofcleats.searchMode`: default search mode (`both` by default).
+- `pairofcleats.searchBackend`: optional backend override.
+- `pairofcleats.searchAnn`: enable/disable ANN usage.
+- `pairofcleats.maxResults`: max results to request.
+- `pairofcleats.extraSearchArgs`: extra search flags appended to the CLI call.
+
+### Notes
+- If `cliPath` is empty and the workspace contains `bin/pairofcleats.js`, the
+  extension uses `node` with that entrypoint. Otherwise it falls back to the
+  `pairofcleats` binary in PATH.
+- The extension assumes a trusted local workspace and does not attempt to
+  sandbox CLI execution.
diff --git a/docs/env-overrides.md b/docs/env-overrides.md
new file mode 100644
index 000000000..d132c24f0
--- /dev/null
+++ b/docs/env-overrides.md
@@ -0,0 +1,31 @@
+# Environment Overrides
+
+PairOfCleats supports a small set of environment variables for advanced overrides and CI tooling. Prefer profiles and config files when possible.
+
+## Priority order
+1) CLI flags
+2) `.pairofcleats.json`
+3) `PAIROFCLEATS_PROFILE` (profile config)
+4) Other `PAIROFCLEATS_*` env overrides
+
+## Supported env vars (selected)
+- `PAIROFCLEATS_PROFILE`: apply a profile from `profiles/*.json`.
+- `PAIROFCLEATS_CACHE_ROOT`: override the cache root.
+- `PAIROFCLEATS_HOME`: override the cache home directory (highest precedence for cache root selection).
+- `PAIROFCLEATS_DICT_DIR`: override dictionaries directory.
+- `PAIROFCLEATS_MODELS_DIR`: override models directory.
+- `PAIROFCLEATS_TOOLING_DIR`: override tooling directory.
+- `PAIROFCLEATS_EXTENSIONS_DIR`: override extensions directory.
+- `PAIROFCLEATS_MODEL`: override embedding model id.
+- `PAIROFCLEATS_EMBEDDINGS`: set to `stub` to bypass real embeddings.
+- `PAIROFCLEATS_THREADS`: override indexing concurrency.
+- `PAIROFCLEATS_BUNDLE_THREADS`: override SQLite bundle parse threads.
+- `PAIROFCLEATS_MAX_OLD_SPACE_MB`: override Node heap size.
+- `PAIROFCLEATS_UV_THREADPOOL_SIZE`: set libuv threadpool size for child Node processes (must be set before Node starts).
+- `PAIROFCLEATS_NODE_OPTIONS`: append to Node options.
+- `PAIROFCLEATS_STAGE`: force indexing stage (`stage1` sparse without relations/imports, `stage2` enrichment, `stage3` embeddings pass, `stage4` sqlite/ANN pass).
+- `PAIROFCLEATS_WORKER_POOL`: control worker pool (`on`/`off`/`auto`).
+- `PAIROFCLEATS_VERBOSE`: enable verbose logging.
+- `PAIROFCLEATS_PROGRESS_FILES`: show file progress during indexing.
+- `PAIROFCLEATS_PROGRESS_LINES`: show line progress during indexing.
+- `PAIROFCLEATS_MAX_JSON_BYTES`: override JSON artifact size guardrails (bytes).
diff --git a/docs/eval.md b/docs/eval.md
new file mode 100644
index 000000000..13de59fa5
--- /dev/null
+++ b/docs/eval.md
@@ -0,0 +1,45 @@
+# Retrieval Evaluation
+
+`tools/eval/run.js` runs a query set against a repo and emits JSON metrics (Recall@k, MRR, nDCG@k).
+
+## Usage
+
+```bash
+pairofcleats report eval --repo /path/to/repo --dataset /path/to/queries.json --backend sqlite --top 10
+```
+
+Options:
+- `--repo`: repo root (defaults to current working directory)
+- `--dataset`: JSON file with queries (defaults to `tests/fixtures/sample/eval.json`)
+- `--backend`: `auto|memory|sqlite|sqlite-fts|lmdb`
+- `--top` (`-n`): top N results to evaluate (default: 10)
+- `--ann` / `--no-ann`: include dense ANN in the run
+- `--out`: write JSON report to a file
+- `--pretty`: pretty-print JSON to stdout
+
+## Dataset format
+
+Each entry uses a query plus expected hits. `relevant` is the silver label set; `gold` is an optional stricter subset.
+
+```json
+[
+  {
+    "query": "greet",
+    "mode": "code",
+    "relevant": [{ "file": "src/index.js", "name": "greet" }],
+    "gold": [{ "file": "src/index.js", "name": "greet" }]
+  }
+]
+```
+
+Compatibility:
+- `expect` (used by fixture eval) is treated as `relevant` if present.
+
+Sample dataset:
+- `tools/eval/sample.json` includes a small silver+gold set using the sample fixture.
+
+## Metrics
+
+- Recall@k: relevant hits found in the top k results.
+- MRR: mean reciprocal rank of the first relevant hit.
+- nDCG@k: rank-aware gain normalized by the ideal ordering.
diff --git a/docs/external-backends.md b/docs/external-backends.md
new file mode 100644
index 000000000..b44c65d55
--- /dev/null
+++ b/docs/external-backends.md
@@ -0,0 +1,26 @@
+# External Backends (Prototype Notes)
+
+This document captures an initial evaluation of external sparse/vector
+backends. These are not integrated yet; the notes are meant to guide future
+experiments and adopters.
+
+Sparse backends
+- Tantivy (Rust, Lucene-like): excellent performance and index size, but
+  requires a Rust service or CLI integration.
+- SQLite FTS5: already supported, fast to iterate, good default for most repos.
+
+Vector backends
+- LanceDB: good for vector search with local storage, Python-first but has
+  Rust/JavaScript bindings. Suitable for a standalone ANN service.
+- SQLite-based ANN: good for local/offline workflows, but large repos may need
+  more tuning or server-backed vector stores.
+
+Search UI backends
+- Meilisearch: simple API, great for autocomplete and UI suggestions.
+- Typesense: similar to Meilisearch, stronger schema controls.
+
+Recommendation
+1. Keep SQLite FTS5 + local ANN as the default for local and medium repos.
+2. Add a Rust-based service (Tantivy) for large-scale deployments.
+3. For UI-heavy use cases, evaluate Meilisearch or Typesense as a parallel
+   suggestion index while retaining PairOfCleats for code-aware search.
diff --git a/docs/failing-tests.md b/docs/failing-tests.md
new file mode 100644
index 000000000..d23843d58
--- /dev/null
+++ b/docs/failing-tests.md
@@ -0,0 +1,19 @@
+# Failing tests (temporary)
+
+This file tracks tests that are currently gated due to failures.
+
+## type-inference-lsp-enrichment-test
+- Status: gated in `tests/script-coverage.js`
+- First seen: 2026-01-03
+- Symptom: `ERR_STREAM_DESTROYED` from `vscode-jsonrpc` while clangd LSP is writing.
+- Context: occurs after clangd best-effort mode without `compile_commands.json`.
+- Logs: `tests/.logs/2026-01-03T03-19-19-760Z/type-inference-lsp-enrichment-test.attempt-3.log`
+- Next steps: isolate LSP client shutdown order and ensure writer is not used after stream close.
+
+## fixture-parity
+- Status: gated in `tests/script-coverage.js`
+- First seen: 2026-01-03
+- Symptom: build-index failure during the `languages` fixture; process exits with code 3221226505 (Windows crash).
+- Context: occurs after worker tokenization fallback in the languages fixture while indexing code files.
+- Logs: `tests/.logs/2026-01-03T03-29-47-495Z/fixture-parity.attempt-1.log`
+- Next steps: capture crash stack with `node --trace-uncaught` and isolate worker pool/tokenization failure in language fixture.
diff --git a/docs/gtags.md b/docs/gtags.md
new file mode 100644
index 000000000..0985ba70f
--- /dev/null
+++ b/docs/gtags.md
@@ -0,0 +1,22 @@
+# GNU Global (GTAGS) Ingest
+
+PairOfCleats can ingest GNU Global tag output as a fallback symbol source when
+LSP/SCIP/LSIF/ctags are unavailable. The ingest tool converts `global -x` output
+into JSONL for downstream indexing or analysis.
+
+CLI
+```bash
+# Run global -x inside the repo
+pairofcleats ingest gtags --repo /path/to/repo --run
+
+# Ingest from a file
+pairofcleats ingest gtags --repo /path/to/repo --input gtags.txt --out gtags.jsonl
+```
+
+Output
+- JSONL entries include: `file`, `name`, `startLine`, `endLine`, `role`, `source`.
+- A `.meta.json` summary is written next to the output file.
+
+Notes
+- The tool expects `global -x` format: `name line file`.
+- Paths are normalized relative to the repo root.
diff --git a/docs/import-links.md b/docs/import-links.md
new file mode 100644
index 000000000..429c75ad8
--- /dev/null
+++ b/docs/import-links.md
@@ -0,0 +1,16 @@
+# Import Links
+
+## What they are
+`importLinks` are a best-effort co-import graph. For each file, PairOfCleats records the set of import specifiers it sees (language-specific). Each import specifier is then looked up in the repo-wide import map, producing a list of other files that import the same module. The flattened list of those files becomes `importLinks`.
+
+## What they are not
+- They do not resolve an import specifier to a canonical module path.
+- They do not guarantee that the linked files *depend on* the current file.
+- They do not attempt runtime or build-system resolution.
+
+## How they are used
+- `importLinks` is a lightweight related-files signal for search output and tooling.
+- The links are computed during index build and are only as accurate as the static import collection.
+
+## Format
+`importLinks` is stored per file (in `file_relations.json`) as an array of repo-relative file paths. When present in search results, it is the same list projected onto the chunk's file.
diff --git a/docs/language-benchmarks.md b/docs/language-benchmarks.md
new file mode 100644
index 000000000..91009587f
--- /dev/null
+++ b/docs/language-benchmarks.md
@@ -0,0 +1,60 @@
+# Language benchmarks
+
+Use the language benchmark harness to run search and performance baselines across large and typical repos. It reads `benchmarks/repos.json` for repo lists and `benchmarks/queries/*.txt` for per-language queries. For microbench definitions and warm/cold timing conventions, see `docs/benchmarks.md`.
+
+## Requirements
+- GitHub CLI (`gh`) or `git` for cloning (authenticated if needed).
+- Disk space for large repos (several are tens of GB).
+- Windows: enable long paths or use a shorter `--root` path if cloning large repos fails.
+- Existing models/dictionaries/extensions as needed for your setup.
+
+## Quick usage
+- List targets:
+  - `pairofcleats bench language --list`
+- Run only JavaScript repos (clone if missing, build indexes, write per-repo JSON):
+  - `pairofcleats bench language --language javascript --build`
+- Run everything with builds:
+  - `pairofcleats bench language --build`
+- Run only typical repos, skip cloning:
+  - `pairofcleats bench language --tier typical --no-clone`
+- Write an aggregate summary for Grafana:
+  - `pairofcleats bench language --language python --build --out docs/benchmarks-python.json --json`
+
+## Convenience note
+The old `bench-language:*` npm scripts were removed; use `pairofcleats bench language` with flags instead.
+The matrix runner is now `pairofcleats bench matrix`.
+
+## Output
+- Per-repo reports are written under `benchmarks/results/<language>/` (JSON payload from `tests/bench.js`).
+- Summary output is printed to the console; use `--json` and/or `--out` for a machine-readable aggregate.
+- The runner shows a live progress line, a metrics line, and a scrolling log window when stdout is a TTY. Use `--log-lines <n>` (3-50, default 20) to change the window height.
+- The log window coalesces tagged updates (debounced) to reduce noise; file progress lines use `[shard <index>/<total>]` prefixes with file counts and line totals.
+- A run log is written to `benchmarks/results/logs/bench-language/<timestamp>.log` by default (override with `--log <file>`).
+- Runs now log start/finish, termination signals, and in-progress indexing counters with elapsed time, rate, and ETA, plus recent file names during indexing (expect larger logs on large repos).
+- If index artifacts are missing, the runner auto-enables build steps even if `--build` was not provided.
+
+## Key flags
+- `--config <path>`: repo list (default `benchmarks/repos.json`).
+- `--language <csv>` / `--tier <csv>`: filter targets (tiers are `large` or `typical`).
+- `--clone` / `--no-clone`: clone missing repos (default on).
+- `--root <path>`: clone destination root (default `benchmarks/repos`).
+- `--cache-root <path>`: cache root for all benchmark runs (default `benchmarks/cache`).
+- `--cache-suffix <name>` / `--cache-run`: append a suffix or auto-generate a run id to isolate caches per run.
+- `--build`, `--build-index`, `--build-sqlite`: build indexes before search. `--build-sqlite` uses incremental bundles when available; otherwise it will auto-enable `--build-index` to create file-backed indexes.
+- `--backend <csv|all>`: control backends passed to `tests/bench.js`.
+- `--ann` / `--no-ann`: toggle ANN for dense search.
+- `--index-profile <name>` / `--no-index-profile`: apply a configuration profile for indexing during benchmarks (default `full`; bench-* profiles are ignored for language runs).
+- `--lock-mode <fail-fast|wait|stale-clear>`: handle existing index locks (default `fail-fast`).
+- `--lock-wait-ms <ms>` / `--lock-stale-ms <ms>`: tune wait and stale thresholds when lock mode is `wait`/`stale-clear`.
+- `--stub-embeddings`: ignored for language benchmarks (always uses real embeddings).
+- `--real-embeddings`: retained for compatibility (real embeddings are already forced).
+- `--log <file>`: write run logs to a specific file (default `benchmarks/results/logs/bench-language/<timestamp>.log`).
+- `--out <file>`: write aggregate JSON summary.
+
+## Notes
+- `tests/bench.js` is the underlying runner and supports extra tuning flags (`--bm25-k1`, `--bm25-b`, `--fts-profile`, `--fts-weights`).
+- Queries are plain text, one query per line; lines starting with `#` are ignored.
+- Language benchmarks run with the `full` profile by default to keep all enrichment steps enabled; use `--no-index-profile` if you want the repo's base config instead.
+- The runner uses `execa` for child processes and terminates trees via `taskkill` on Windows and `SIGTERM` elsewhere; we avoid `tree-kill` due to past Windows command-injection advisories and only pass trusted PIDs.
+- Set `PAIROFCLEATS_VERBOSE=1` to emit shard plan diagnostics (top shard sizes and split summaries) during builds.
+- Shard planning uses line counts: subdirs with <3 files merge unless a file is at least half the size of the 10th largest shard (by lines), and oversized shards are split by line totals for balance.
diff --git a/docs/language-fidelity.md b/docs/language-fidelity.md
index cd96f4d7d..1aa7bd615 100644
--- a/docs/language-fidelity.md
+++ b/docs/language-fidelity.md
@@ -19,7 +19,7 @@ Use this checklist to validate chunking and metadata for each language. The goal
 - Decorators, docstrings, params, and returns are captured.
 - Dataclass/attrs field definitions are surfaced in metadata.
 - Imports and calls are captured from AST.
-- Dataflow metadata includes reads/writes/mutations/throws/awaits/yields when enabled.
+- Dataflow metadata includes reads/writes/mutations/aliases/throws/awaits/yields when enabled.
 - Control-flow metadata includes branch/loop/return counts when enabled.
 - Base classes, visibility, and param type/default metadata are present when available.
 
@@ -28,7 +28,7 @@ Use this checklist to validate chunking and metadata for each language. The goal
 - Signatures and params are captured from AST (including defaults).
 - Modifiers include async/generator/static and visibility where detectable.
 - Class inheritance (`extends`) is captured for class declarations.
-- Dataflow metadata includes reads/writes/mutations/throws/awaits/yields when enabled.
+- Dataflow metadata includes reads/writes/mutations/aliases/throws/awaits/yields when enabled.
 - Control-flow metadata includes branch/loop/return counts when enabled.
 
 ## Swift
@@ -37,12 +37,14 @@ Use this checklist to validate chunking and metadata for each language. The goal
 - Signatures and modifiers are captured.
 - Generics and where clauses are captured in metadata.
 - Extensions do not break chunking.
+- When sourcekit-lsp is available, signatures and types are enriched from LSP metadata.
 
 ## ObjC/C/C++
 - C-family functions and types are chunked with brace matching.
 - ObjC interface/implementation blocks are chunked by @end.
 - ObjC method selectors include the parent type when known.
 - Includes are captured as imports; basic calls/usages are present when possible.
+- When clangd is available, signatures and types are enriched from LSP metadata (best-effort without compile_commands.json; set `tooling.clangd.requireCompilationDatabase` to enforce).
 
 ## Rust
 - struct/enum/trait/mod/impl/fn declarations are chunked.
@@ -71,6 +73,7 @@ Use this checklist to validate chunking and metadata for each language. The goal
 - Doc comments and decorators are captured in metadata.
 - Imports/exports are captured from ES module syntax.
 - Calls/usages are captured for function bodies when possible.
+- Dataflow metadata includes reads/writes/mutations/aliases/throws/awaits/yields when enabled.
 
 ## C#
 - namespace/type declarations are chunked.
@@ -85,6 +88,7 @@ Use this checklist to validate chunking and metadata for each language. The goal
 - KDoc and annotations are captured in metadata.
 - Imports are captured from import statements.
 - Calls/usages are captured for function bodies when possible.
+- Flow/relations auto-disable above `indexing.kotlin.flowMax*`/`relationsMax*` thresholds.
 
 ## Ruby
 - module/class declarations are chunked.
@@ -111,6 +115,7 @@ Use this checklist to validate chunking and metadata for each language. The goal
 - Statement doc comments are captured from preceding -- or /* */ blocks.
 - Dialect metadata is captured via extension mapping or config overrides.
 - Exports include declared objects when possible.
+- Heuristic dataflow/control-flow metadata is present when enabled.
 
 ## Perl (lite)
 - package declarations and subs are chunked.
diff --git a/docs/language-onboarding-playbook.md b/docs/language-onboarding-playbook.md
new file mode 100644
index 000000000..0910fb6e9
--- /dev/null
+++ b/docs/language-onboarding-playbook.md
@@ -0,0 +1,35 @@
+# Language onboarding playbook
+
+Use this checklist when adding a new language or container format.
+
+## Decide parsing strategy
+- Prefer tree-sitter (WASM) when a stable grammar exists.
+- Use heuristics when the grammar is unstable or not needed.
+- Consider tool-only enrichment (LSP, external tooling) if parsing is expensive.
+
+## Implement core support
+1) Extensions + language IDs
+   - Add extensions and special filenames in `src/index/constants.js`.
+   - Add markdown fence aliases + language ID mapping in `src/index/segments.js`.
+2) Chunk extraction
+   - Add chunkers in `src/index/chunking.js` (tree-sitter or heuristic).
+3) Minimal relations
+   - Add import collection in `src/index/language-registry.js`.
+   - Use `buildSimpleRelations` for import-only languages.
+4) Comment extraction rules
+   - Add comment styles + extension overrides in `src/index/comments.js`.
+5) Metadata v2 mapping
+   - Ensure `extractDocMeta` returns a stable shape (even `{}`) in `src/index/language-registry.js`.
+
+## Tests and guardrails
+- Add fixtures under `tests/fixtures/languages/src/`.
+- Extend `tests/language-fidelity.js` to assert chunk coverage.
+- Add perf guard tests (max bytes/lines) when parsing or relations are expensive.
+
+## Benchmarks
+- Add a query file under `benchmarks/queries/`.
+- Add an entry to `benchmarks/repos.json` with at least a `typical` list (empty is OK).
+
+## Validation
+- Run `node tests/language-fidelity.js` (or `npm run language-fidelity-test`).
+- Run `npm run bench-language -- --list` to confirm the matrix entry appears.
diff --git a/docs/lsif.md b/docs/lsif.md
new file mode 100644
index 000000000..53839650c
--- /dev/null
+++ b/docs/lsif.md
@@ -0,0 +1,19 @@
+# LSIF ingestion
+
+Use the LSIF ingestion tool to import offline code intelligence graphs.
+
+## Ingest JSONL
+
+```bash
+pairofcleats ingest lsif --repo . --input dump.lsif
+```
+
+## Outputs
+
+- `lsif.jsonl`: normalized symbol occurrences under the repo cache root.
+- `lsif.jsonl.meta.json`: summary metadata and per-kind counts.
+
+## Notes
+
+- LSIF output is a JSONL graph (vertices + edges).
+- Definitions and references are derived from `definitionResult` and `referenceResult` edges.
diff --git a/docs/map-schema.json b/docs/map-schema.json
new file mode 100644
index 000000000..895ca1538
--- /dev/null
+++ b/docs/map-schema.json
@@ -0,0 +1,124 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "PairOfCleats Map Model",
+  "type": "object",
+  "additionalProperties": false,
+  "required": ["version", "generatedAt", "root", "legend", "nodes", "edges"],
+  "properties": {
+    "version": { "type": "string" },
+    "generatedAt": { "type": "string" },
+    "root": {
+      "type": "object",
+      "required": ["path"],
+      "additionalProperties": false,
+      "properties": {
+        "path": { "type": "string" },
+        "id": { "type": ["string", "null"] }
+      }
+    },
+    "mode": { "type": ["string", "null"] },
+    "options": {
+      "type": "object",
+      "additionalProperties": true
+    },
+    "legend": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["nodeTypes", "fileShapes", "functionBadges", "edgeTypes", "edgeStyles"],
+      "properties": {
+        "nodeTypes": { "type": "array", "items": { "type": "string" } },
+        "fileShapes": { "type": "object", "additionalProperties": true },
+        "functionBadges": { "type": "object", "additionalProperties": true },
+        "edgeTypes": { "type": "object", "additionalProperties": true },
+        "edgeStyles": { "type": "object", "additionalProperties": true }
+      }
+    },
+    "nodes": {
+      "type": "array",
+      "items": { "$ref": "#/definitions/fileNode" }
+    },
+    "edges": {
+      "type": "array",
+      "items": { "$ref": "#/definitions/edge" }
+    },
+    "summary": {
+      "type": "object",
+      "additionalProperties": true
+    },
+    "viewer": {
+      "type": "object",
+      "additionalProperties": true
+    },
+    "warnings": {
+      "type": "array",
+      "items": { "type": "string" }
+    }
+  },
+  "definitions": {
+    "range": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "startLine": { "type": ["integer", "null"], "minimum": 0 },
+        "endLine": { "type": ["integer", "null"], "minimum": 0 }
+      }
+    },
+    "member": {
+      "type": "object",
+      "additionalProperties": true,
+      "required": ["id", "name", "kind"],
+      "properties": {
+        "id": { "type": "string" },
+        "name": { "type": "string" },
+        "kind": { "type": ["string", "null"] },
+        "type": { "type": ["string", "null"] },
+        "signature": { "type": ["string", "null"] },
+        "params": { "type": ["array", "null"], "items": { "type": "string" } },
+        "returns": { "type": ["string", "null"] },
+        "modifiers": { "type": ["object", "null"], "additionalProperties": true },
+        "dataflow": { "type": ["object", "null"], "additionalProperties": true },
+        "controlFlow": { "type": ["object", "null"], "additionalProperties": true },
+        "exported": { "type": ["boolean", "null"] },
+        "range": { "$ref": "#/definitions/range" },
+        "port": { "type": ["string", "null"] }
+      }
+    },
+    "fileNode": {
+      "type": "object",
+      "additionalProperties": true,
+      "required": ["id", "path", "name", "type"],
+      "properties": {
+        "id": { "type": "string" },
+        "path": { "type": "string" },
+        "name": { "type": "string" },
+        "ext": { "type": ["string", "null"] },
+        "category": { "type": ["string", "null"] },
+        "type": { "type": "string", "enum": ["file"] },
+        "members": {
+          "type": "array",
+          "items": { "$ref": "#/definitions/member" }
+        }
+      }
+    },
+    "edgeEndpoint": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "file": { "type": ["string", "null"] },
+        "member": { "type": ["string", "null"] },
+        "port": { "type": ["string", "null"] }
+      }
+    },
+    "edge": {
+      "type": "object",
+      "additionalProperties": true,
+      "required": ["type", "from", "to"],
+      "properties": {
+        "type": { "type": "string" },
+        "label": { "type": ["string", "null"] },
+        "from": { "$ref": "#/definitions/edgeEndpoint" },
+        "to": { "$ref": "#/definitions/edgeEndpoint" }
+      }
+    }
+  }
+}
diff --git a/docs/mcp-server.md b/docs/mcp-server.md
new file mode 100644
index 000000000..4b577d5f7
--- /dev/null
+++ b/docs/mcp-server.md
@@ -0,0 +1,36 @@
+# MCP server
+
+PairOfCleats ships an MCP server that exposes indexing, search, and maintenance tools over JSON-RPC.
+
+## Run
+- `pairofcleats service mcp`
+
+## Tools
+- `index_status`
+- `config_status`
+- `build_index`
+- `build_sqlite_index`
+- `compact_sqlite_index`
+- `search`
+- `triage_ingest`
+- `triage_decision`
+- `triage_context_pack`
+- `download_models`
+- `download_dictionaries`
+- `download_extensions`
+- `verify_extensions`
+- `cache_gc`
+- `clean_artifacts`
+- `bootstrap`
+- `report_artifacts`
+
+## Output
+- `search` defaults to compact JSON payloads. Use `output: "full"` in params to return full JSON.
+- Long-running tools emit `notifications/progress` with `{ id, tool, message, stream, phase }`.
+- Errors return `isError=true` with `{ message, code, stdout, stderr, hint }` when available.
+
+## Notes
+- Cache location defaults to the PairOfCleats cache root; override with `cache.root` or `PAIROFCLEATS_CACHE_ROOT`.
+- Repo paths are auto-detected; pass explicit `repoPath` when running out-of-tree.
+- JSON-RPC framing uses `vscode-jsonrpc`; LSP helpers rely on `vscode-languageserver-protocol` for symbol/position constants.
+- Tool commands spawn child Node processes via `execa` with bounded stdout/stderr buffers; long-running tools stream progress lines.
diff --git a/docs/metadata-schema-v2.md b/docs/metadata-schema-v2.md
new file mode 100644
index 000000000..84d1041af
--- /dev/null
+++ b/docs/metadata-schema-v2.md
@@ -0,0 +1,140 @@
+# Metadata Schema v2
+
+This document defines the v2 per-chunk metadata contract. It is the canonical schema for rich metadata and provenance across indexing stages.
+
+## Versioning
+
+- **Schema version:** 2.0.0
+- **Container:** Stored inside `chunk_meta` entries as `metaV2`.
+- **Compatibility:** Readers must tolerate missing v2 metadata until migration completes.
+
+## Core fields (stable)
+
+These fields identify the chunk and its location.
+
+- `chunkId` (string): Stable chunk identifier (derived from file + segment + range + content hash rules).
+- `file` (string): Repo-relative file path.
+- `segment` (object):
+  - `segmentId` (string)
+  - `type` (code|prose|config|comment|embedded)
+  - `languageId` (string)
+  - `parentSegmentId` (string|null)
+- `range` (object):
+  - `start` (number)
+  - `end` (number)
+  - `startLine` (number)
+  - `endLine` (number)
+- `lang` (string): Canonical language id.
+- `ext` (string|null): File extension.
+- `kind` (string|null): Symbol kind (function/class/etc).
+- `name` (string|null): Symbol name.
+
+## Provenance
+
+Each derived signal includes explicit provenance.
+
+- `generatedBy` (string): Indexer version or build ID.
+- `tooling` (object):
+  - `tool` (string)
+  - `version` (string)
+  - `configHash` (string)
+- `parser` (object):
+  - `name` (string)
+  - `version` (string|null)
+- `confidence` (number|null): 0–1 confidence for derived metadata (when applicable).
+
+## Doc metadata
+
+- `signature` (string|null)
+- `doc` (string|null): Summary/docstring.
+- `annotations` (string[]): Decorators/attributes.
+- `modifiers` (object): `visibility`, `static`, `abstract`, `async`, `generator`, `readonly`.
+- `params` (string[]): Parameter names.
+- `returns` (string|null): Declared return type/name.
+- `docComments` (object):
+  - `summary` (string|null)
+  - `tags` (array)
+
+## Control-flow summary
+
+- `controlFlow` (object):
+  - `branches` (number)
+  - `loops` (number)
+  - `breaks` (number)
+  - `continues` (number)
+  - `returns` (number)
+  - `throws` (number)
+  - `awaits` (number)
+  - `yields` (number)
+  - `async` (boolean)
+  - `generator` (boolean)
+
+## Dataflow summary
+
+- `dataflow` (object):
+  - `reads` (string[])
+  - `writes` (string[])
+  - `mutations` (string[])
+  - `aliases` (string[])
+
+## Dependencies
+
+- `dependencies` (object):
+  - `imports` (string[])
+  - `requires` (string[])
+  - `includes` (string[])
+  - `references` (string[])
+
+## Risk metadata
+
+- `risk` (object):
+  - `sources` (array of `{ id, label, confidence }`)
+  - `sinks` (array of `{ id, label, confidence }`)
+  - `sanitizers` (array of `{ id, label, confidence }`)
+  - `flows` (array of `{ from, to, confidence }`)
+
+## Type metadata
+
+- `types` (object):
+  - `declared` (object)
+  - `inferred` (object)
+  - `tooling` (object)
+  - Each entry includes `{ type, source, confidence }`
+
+## Embedded metadata
+
+- `embedded` (object):
+  - `parentSegmentId` (string|null)
+  - `languageId` (string|null)
+  - `context` (string|null)
+
+## Mapping from current `docmeta`
+
+Current `docmeta` fields map to v2 as follows:
+
+- `docmeta.signature` -> `metaV2.signature`
+- `docmeta.doc` -> `metaV2.doc`
+- `docmeta.decorators` -> `metaV2.annotations`
+- `docmeta.modifiers` -> `metaV2.modifiers`
+- `docmeta.params` -> `metaV2.params`
+- `docmeta.paramTypes` -> `metaV2.types.declared.params`
+- `docmeta.paramDefaults` -> `metaV2.types.declared.defaults`
+- `docmeta.returnType` -> `metaV2.types.declared.returns`
+- `docmeta.returnsValue` -> `metaV2.controlFlow.returns`
+- `docmeta.throws` -> `metaV2.controlFlow.throws`
+- `docmeta.awaits` -> `metaV2.controlFlow.awaits`
+- `docmeta.yields` -> `metaV2.controlFlow.yields`
+- `docmeta.controlFlow.*` -> `metaV2.controlFlow.*`
+- `docmeta.dataflow.*` -> `metaV2.dataflow.*`
+- `docmeta.risk.*` -> `metaV2.risk.*`
+- `docmeta.inferredTypes.*` -> `metaV2.types.inferred.*`
+- `docmeta.record` -> `metaV2.record` (legacy passthrough until record schema v2)
+
+## Deprecation schedule
+
+- **Phase 2:** Publish schema v2 and begin emitting `metaV2` alongside legacy `docmeta`.
+- **Phase 3:** Readers accept both `metaV2` and `docmeta`.
+- **Phase 4:** Writers deprecate legacy-only fields and emit v2 as canonical.
+- **Phase 5:** Readers treat `docmeta` as legacy and prefer v2 fields by default.
+
+No removal occurs before Phase 5 gates are met and migration coverage is verified.
diff --git a/docs/model-compare-report.json b/docs/model-compare-report.json
index c4125192a..32d62fa86 100644
--- a/docs/model-compare-report.json
+++ b/docs/model-compare-report.json
@@ -1,5 +1,5 @@
 {
-  "generatedAt": "2025-12-29T02:42:32.637Z",
+  "generatedAt": "2025-12-31T00:38:31.431Z",
   "repo": {
     "root": "C:\\Users\\sneak\\Development\\PairOfCleats_CODEX",
     "repoId": "8c76cec86f7d90e372915a640348d0c6b3263422"
@@ -20,14 +20,14 @@
   "summary": {
     "models": {
       "Xenova/all-MiniLM-L12-v2": {
-        "elapsedMsAvg": 266.6666666666667,
-        "wallMsAvg": 390.93333333333334,
+        "elapsedMsAvg": 410.4,
+        "wallMsAvg": 527.6,
         "codeCountAvg": 4.666666666666667,
         "proseCountAvg": 4.733333333333333
       },
       "Xenova/all-MiniLM-L6-v2": {
-        "elapsedMsAvg": 268.4,
-        "wallMsAvg": 391.73333333333335,
+        "elapsedMsAvg": 411,
+        "wallMsAvg": 524.5333333333333,
         "codeCountAvg": 4.666666666666667,
         "proseCountAvg": 4.733333333333333
       }
@@ -55,14 +55,14 @@
       "query": "index",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 341,
-          "wallMs": 482,
+          "elapsedMs": 392,
+          "wallMs": 507,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 261,
-          "wallMs": 373,
+          "elapsedMs": 397,
+          "wallMs": 515,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -88,14 +88,14 @@
       "query": "search",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 277,
-          "wallMs": 421,
+          "elapsedMs": 395,
+          "wallMs": 512,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 266,
-          "wallMs": 389,
+          "elapsedMs": 414,
+          "wallMs": 535,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -121,14 +121,14 @@
       "query": "sqlite",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 316,
-          "wallMs": 441,
+          "elapsedMs": 403,
+          "wallMs": 538,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 256,
-          "wallMs": 383,
+          "elapsedMs": 396,
+          "wallMs": 525,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -154,14 +154,14 @@
       "query": "dictionary",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 254,
-          "wallMs": 385,
+          "elapsedMs": 392,
+          "wallMs": 502,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 270,
-          "wallMs": 403,
+          "elapsedMs": 416,
+          "wallMs": 529,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -187,14 +187,14 @@
       "query": "bootstrap",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 262,
-          "wallMs": 387,
+          "elapsedMs": 438,
+          "wallMs": 571,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 262,
-          "wallMs": 391,
+          "elapsedMs": 392,
+          "wallMs": 499,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -220,14 +220,14 @@
       "query": "chunk",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 252,
-          "wallMs": 364,
+          "elapsedMs": 398,
+          "wallMs": 514,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 251,
-          "wallMs": 371,
+          "elapsedMs": 421,
+          "wallMs": 541,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -253,14 +253,14 @@
       "query": "minhash",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 262,
-          "wallMs": 389,
+          "elapsedMs": 421,
+          "wallMs": 536,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 253,
-          "wallMs": 365,
+          "elapsedMs": 438,
+          "wallMs": 559,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -286,14 +286,14 @@
       "query": "ann",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 255,
-          "wallMs": 367,
+          "elapsedMs": 395,
+          "wallMs": 502,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 259,
-          "wallMs": 371,
+          "elapsedMs": 407,
+          "wallMs": 516,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -319,14 +319,14 @@
       "query": "bm25",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 250,
-          "wallMs": 374,
+          "elapsedMs": 439,
+          "wallMs": 549,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 252,
-          "wallMs": 373,
+          "elapsedMs": 399,
+          "wallMs": 508,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -352,14 +352,14 @@
       "query": "cache",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 274,
-          "wallMs": 399,
+          "elapsedMs": 390,
+          "wallMs": 510,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 252,
-          "wallMs": 383,
+          "elapsedMs": 437,
+          "wallMs": 547,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -385,14 +385,14 @@
       "query": "python",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 251,
-          "wallMs": 361,
+          "elapsedMs": 420,
+          "wallMs": 531,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 307,
-          "wallMs": 432,
+          "elapsedMs": 399,
+          "wallMs": 508,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -418,14 +418,14 @@
       "query": "docstring",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 249,
-          "wallMs": 376,
+          "elapsedMs": 457,
+          "wallMs": 568,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 287,
-          "wallMs": 415,
+          "elapsedMs": 427,
+          "wallMs": 537,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -451,14 +451,14 @@
       "query": "repometrics",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 259,
-          "wallMs": 373,
+          "elapsedMs": 396,
+          "wallMs": 523,
           "codeCount": 5,
           "proseCount": 1
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 323,
-          "wallMs": 458,
+          "elapsedMs": 384,
+          "wallMs": 493,
           "codeCount": 5,
           "proseCount": 1
         }
@@ -484,14 +484,14 @@
       "query": "fts",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 250,
-          "wallMs": 374,
+          "elapsedMs": 404,
+          "wallMs": 514,
           "codeCount": 0,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 277,
-          "wallMs": 390,
+          "elapsedMs": 428,
+          "wallMs": 539,
           "codeCount": 0,
           "proseCount": 5
         }
@@ -517,14 +517,14 @@
       "query": "incremental",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 248,
-          "wallMs": 371,
+          "elapsedMs": 416,
+          "wallMs": 537,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 250,
-          "wallMs": 379,
+          "elapsedMs": 410,
+          "wallMs": 517,
           "codeCount": 5,
           "proseCount": 5
         }
diff --git a/docs/model-compare-sqlite.json b/docs/model-compare-sqlite.json
index d44840dbd..2236d4405 100644
--- a/docs/model-compare-sqlite.json
+++ b/docs/model-compare-sqlite.json
@@ -1,5 +1,5 @@
 {
-  "generatedAt": "2025-12-29T02:43:19.945Z",
+  "generatedAt": "2025-12-31T00:40:00.318Z",
   "repo": {
     "root": "C:\\Users\\sneak\\Development\\PairOfCleats_CODEX",
     "repoId": "8c76cec86f7d90e372915a640348d0c6b3263422"
@@ -20,14 +20,14 @@
   "summary": {
     "models": {
       "Xenova/all-MiniLM-L12-v2": {
-        "elapsedMsAvg": 245.26666666666668,
-        "wallMsAvg": 357.3333333333333,
+        "elapsedMsAvg": 344.3333333333333,
+        "wallMsAvg": 471.1333333333333,
         "codeCountAvg": 4.666666666666667,
         "proseCountAvg": 4.733333333333333
       },
       "Xenova/all-MiniLM-L6-v2": {
-        "elapsedMsAvg": 228.8,
-        "wallMsAvg": 330.46666666666664,
+        "elapsedMsAvg": 336.06666666666666,
+        "wallMsAvg": 461.6,
         "codeCountAvg": 4.666666666666667,
         "proseCountAvg": 4.733333333333333
       }
@@ -55,14 +55,14 @@
       "query": "index",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 296,
-          "wallMs": 407,
+          "elapsedMs": 323,
+          "wallMs": 454,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 233,
-          "wallMs": 330,
+          "elapsedMs": 352,
+          "wallMs": 470,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -88,14 +88,14 @@
       "query": "search",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 272,
-          "wallMs": 383,
+          "elapsedMs": 349,
+          "wallMs": 514,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 228,
-          "wallMs": 321,
+          "elapsedMs": 326,
+          "wallMs": 440,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -121,14 +121,14 @@
       "query": "sqlite",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 272,
-          "wallMs": 393,
+          "elapsedMs": 373,
+          "wallMs": 494,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 226,
-          "wallMs": 320,
+          "elapsedMs": 341,
+          "wallMs": 479,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -154,14 +154,14 @@
       "query": "dictionary",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 286,
-          "wallMs": 397,
+          "elapsedMs": 363,
+          "wallMs": 482,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 237,
-          "wallMs": 346,
+          "elapsedMs": 328,
+          "wallMs": 449,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -187,14 +187,14 @@
       "query": "bootstrap",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 228,
-          "wallMs": 328,
+          "elapsedMs": 375,
+          "wallMs": 500,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 225,
-          "wallMs": 322,
+          "elapsedMs": 362,
+          "wallMs": 481,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -220,14 +220,14 @@
       "query": "chunk",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 236,
-          "wallMs": 350,
+          "elapsedMs": 328,
+          "wallMs": 440,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 226,
-          "wallMs": 330,
+          "elapsedMs": 344,
+          "wallMs": 463,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -253,14 +253,14 @@
       "query": "minhash",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 240,
-          "wallMs": 333,
+          "elapsedMs": 334,
+          "wallMs": 446,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 238,
-          "wallMs": 330,
+          "elapsedMs": 336,
+          "wallMs": 452,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -286,14 +286,14 @@
       "query": "ann",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 224,
-          "wallMs": 333,
+          "elapsedMs": 332,
+          "wallMs": 449,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 225,
-          "wallMs": 334,
+          "elapsedMs": 330,
+          "wallMs": 450,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -319,14 +319,14 @@
       "query": "bm25",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 221,
-          "wallMs": 315,
+          "elapsedMs": 357,
+          "wallMs": 495,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 224,
-          "wallMs": 333,
+          "elapsedMs": 339,
+          "wallMs": 550,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -352,14 +352,14 @@
       "query": "cache",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 242,
-          "wallMs": 350,
+          "elapsedMs": 318,
+          "wallMs": 440,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 234,
-          "wallMs": 349,
+          "elapsedMs": 325,
+          "wallMs": 441,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -385,14 +385,14 @@
       "query": "python",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 226,
-          "wallMs": 330,
+          "elapsedMs": 341,
+          "wallMs": 502,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 231,
-          "wallMs": 342,
+          "elapsedMs": 318,
+          "wallMs": 434,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -418,14 +418,14 @@
       "query": "docstring",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 226,
-          "wallMs": 321,
+          "elapsedMs": 363,
+          "wallMs": 481,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 223,
-          "wallMs": 327,
+          "elapsedMs": 326,
+          "wallMs": 450,
           "codeCount": 5,
           "proseCount": 5
         }
@@ -451,14 +451,14 @@
       "query": "repometrics",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 247,
-          "wallMs": 344,
+          "elapsedMs": 358,
+          "wallMs": 477,
           "codeCount": 5,
           "proseCount": 1
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 229,
-          "wallMs": 321,
+          "elapsedMs": 360,
+          "wallMs": 482,
           "codeCount": 5,
           "proseCount": 1
         }
@@ -484,14 +484,14 @@
       "query": "fts",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 234,
-          "wallMs": 344,
+          "elapsedMs": 326,
+          "wallMs": 450,
           "codeCount": 0,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 227,
-          "wallMs": 321,
+          "elapsedMs": 338,
+          "wallMs": 449,
           "codeCount": 0,
           "proseCount": 5
         }
@@ -517,14 +517,14 @@
       "query": "incremental",
       "runs": {
         "Xenova/all-MiniLM-L12-v2": {
-          "elapsedMs": 229,
-          "wallMs": 432,
+          "elapsedMs": 325,
+          "wallMs": 443,
           "codeCount": 5,
           "proseCount": 5
         },
         "Xenova/all-MiniLM-L6-v2": {
-          "elapsedMs": 226,
-          "wallMs": 331,
+          "elapsedMs": 316,
+          "wallMs": 434,
           "codeCount": 5,
           "proseCount": 5
         }
diff --git a/docs/model-comparison.md b/docs/model-comparison.md
index e7592cc7f..2796be753 100644
--- a/docs/model-comparison.md
+++ b/docs/model-comparison.md
@@ -4,14 +4,16 @@
 Compare search latency and ranking differences across embedding models.
 
 ## Usage
-- `npm run compare-models -- --models Xenova/all-MiniLM-L12-v2,Xenova/all-MiniLM-L6-v2 --build`
-- `npm run compare-models -- --models Xenova/all-MiniLM-L12-v2,Xenova/all-MiniLM-L6-v2 --backend sqlite --build --build-sqlite`
+- `pairofcleats report compare-models --models Xenova/all-MiniLM-L12-v2,Xenova/all-MiniLM-L6-v2 --build`
+- `pairofcleats report compare-models --models Xenova/all-MiniLM-L12-v2,Xenova/all-MiniLM-L6-v2 --backend sqlite --build --build-sqlite`
+
+Note: model comparison is gated behind `profile=full` (or `PAIROFCLEATS_PROFILE=full`).
 - JSON output: add `--json` or `--out path/to/report.json`
 
 ## Notes
 - The harness isolates per-model indexes under `<cache>/model-compare/<modelId>` by default.
 - If `cache.root` is set in `.pairofcleats.json`, caches are shared; use `--build` to rebuild per model.
-- SQLite backends require `--build-sqlite` when comparing multiple models (SQLite db paths are shared unless configured).
+- SQLite backends require `--build-sqlite` when comparing multiple models (SQLite db paths are shared unless configured). If incremental bundles exist, rebuilds stream from them instead of loading `chunk_meta.json`.
 - Models and dictionaries still use the shared cache directories unless overridden with `PAIROFCLEATS_MODELS_DIR` / `PAIROFCLEATS_DICT_DIR`.
 
 ## Options
diff --git a/docs/parser-backbone.md b/docs/parser-backbone.md
index be536bfd7..0adb25a7b 100644
--- a/docs/parser-backbone.md
+++ b/docs/parser-backbone.md
@@ -1,6 +1,6 @@
 # Parser Backbone and Analysis Pipeline
 
-This document describes the planned unified parsing backbone, native parser usage, and the shared analysis pipeline for control-flow, dataflow, and type inference.
+This document describes the unified parsing backbone, native parser usage, and the shared analysis pipeline for control-flow, dataflow, and type inference.
 
 ## Goals
 - Prefer stable native parsers when they are available and reliable.
@@ -10,13 +10,21 @@ This document describes the planned unified parsing backbone, native parser usag
 ## Parser strategy
 
 ### Native parsers (preferred when stable)
-- JavaScript/TypeScript: native parser when available (Acorn/TypeScript compiler API).
+- JavaScript/Flow: Babel parser by default, with Acorn/Esprima fallbacks for comparison.
+- TypeScript: TypeScript compiler API when available (prefers the target repo `node_modules`), with Babel parser fallback when not.
 - Python: stdlib ast via a local interpreter.
 - Other languages: native parsers only when stable and easy to integrate.
 
 ### Unified backbone
 - tree-sitter provides a consistent AST interface for new languages and formats.
 - Native parsers still run first when available to enrich or replace tree-sitter output.
+- Default choice: web-tree-sitter WASM grammars (no native build dependencies, easier to ship).
+- Optional fallback: native tree-sitter bindings when maximum throughput is required.
+
+### ESTree interop
+- `@typescript-eslint/typescript-estree` was considered for strict ESTree output.
+- Current decision: not required because TypeScript compiler + Babel parser cover the needed syntax and metadata.
+- Revisit if ESTree-specific tooling or stricter AST interop becomes necessary.
 
 ## Planned metadata schema
 
@@ -45,15 +53,26 @@ This document describes the planned unified parsing backbone, native parser usag
 - Optional install scope: user or system when requested.
 - When auto-install is not possible, print the canonical install guide URL.
 
-Planned config keys:
+Config keys (current; see `docs/config-schema.json` for defaults):
 - tooling.autoInstallOnDetect (default false)
+- tooling.autoEnableOnDetect (default true)
 - tooling.installScope (cache | user | system)
 - tooling.allowGlobalFallback (default true)
-- indexing.cfg (default false)
+- tooling.enabledTools (allowlist of tool ids)
+- tooling.disabledTools (denylist of tool ids)
+- tooling.typescript.enabled (default true)
+- tooling.typescript.resolveOrder (default: repo, cache, global)
+- tooling.typescript.useTsconfig (default true)
+- tooling.typescript.tsconfigPath (optional)
+- tooling.clangd.requireCompilationDatabase (default false; best-effort without compile_commands.json)
+- tooling.clangd.compileCommandsDir (optional)
 - indexing.astDataflow (default true)
 - indexing.typeInference (default false)
 - indexing.typeInferenceCrossFile (default false)
 - indexing.gitBlame (default true)
+- indexing.pythonAst.enabled (default true)
+- indexing.pythonAst.workerCount / maxWorkers / scaleUpQueueMs / taskTimeoutMs
+- search.sqliteAutoChunkThreshold (default 0)
 
 ## SQL dialects
 - PostgreSQL, MySQL, and SQLite grammars with dialect selection rules.
diff --git a/docs/perf-profiling.md b/docs/perf-profiling.md
new file mode 100644
index 000000000..d12d1fd08
--- /dev/null
+++ b/docs/perf-profiling.md
@@ -0,0 +1,104 @@
+# Performance Profiling
+
+This document describes a repeatable workflow for profiling PairOfCleats indexing and retrieval performance. The intent is to help you distinguish CPU-bound work from I/O-bound work, identify the hottest call paths, and validate that optimizations are real (and do not regress correctness).
+
+The recommendations below assume you are running PairOfCleats via Node.js.
+
+## Quick triage checklist
+
+Before collecting deep profiles, capture:
+
+- Repo size: number of files scanned, number skipped, chunk count (see `preprocess.json`).
+- Index mode: file-backed vs SQLite, incremental vs full.
+- Embeddings: enabled/disabled, provider, dims, batching.
+- Wall clock time and peak RSS (roughly).
+
+Then classify the bottleneck:
+
+- **CPU-bound**: a single core is pegged for long stretches, and a CPU profile shows large self-time in parsing/tokenization/JSON/Hashing.
+- **I/O-bound**: many threads are waiting on filesystem reads, libuv threadpool is saturated, CPU is not continuously pegged.
+- **Memory pressure**: heavy GC, frequent allocations, or out-of-memory crashes.
+
+## CPU profiling (Node built-in)
+
+Node can emit a `.cpuprofile` file that you can open in Chrome DevTools.
+
+### Index build (CPU profile)
+
+Run:
+
+```bash
+node --cpu-prof --cpu-prof-name poc-index.cpuprofile build_index.js --repo /path/to/repo
+```
+
+Notes:
+
+- The output profile will be written to the current working directory.
+- If you need additional memory headroom, add `--max-old-space-size=...`.
+
+### Viewing the profile
+
+1. Open Chrome (or Chromium).
+2. Open DevTools → **Performance**.
+3. Use **Load profile...** and select `poc-index.cpuprofile`.
+4. Focus on:
+   - **Bottom-Up** view for the heaviest call stacks.
+   - **Self time** to find functions that dominate execution.
+
+### What to look for
+
+Typical hotspots during indexing:
+
+- File reading and UTF-8 decoding.
+- Tokenization/chunking (tree-sitter parsing, fallback tokenization).
+- JSON parsing/stringification in artifact writing.
+- Hashing (content hashing, signature computation).
+
+If the majority of samples are inside GC (garbage collection), address allocations and buffering before micro-optimizing algorithmic code.
+
+## I/O profiling and libuv threadpool saturation
+
+Indexing can become I/O-bound for large repos, especially when scanning and reading many small files.
+
+### Relevant configuration knobs
+
+- `runtime.uvThreadpoolSize` (or `PAIROFCLEATS_UV_THREADPOOL_SIZE`): libuv threadpool size.
+- `runtime.ioConcurrencyCap`: caps I/O concurrency at the application layer.
+
+General guidance:
+
+- Increasing `uvThreadpoolSize` can improve throughput on fast SSDs, but may regress performance on slow disks due to contention.
+- `ioConcurrencyCap` is the first lever to adjust when you see excessive parallel reads and OS-level throttling.
+
+### Trace events (optional)
+
+For deeper analysis you can emit Node trace events:
+
+```bash
+node --trace-event-categories node,libuv,v8 \
+  --trace-event-file-pattern=trace-events-%p.json \
+  build_index.js --repo /path/to/repo
+```
+
+You can load the resulting JSON in Chrome DevTools (Performance tab) to correlate CPU activity with event timing.
+
+## Measuring improvements safely
+
+When you implement an optimization:
+
+1. Re-run the exact same workload (same repo, same config).
+2. Compare:
+   - total wall time
+   - chunk count and artifacts (sanity)
+   - CPU profile hot paths
+3. Prefer changes that reduce total work (fewer parses, fewer allocations, fewer redundant reads) over changes that only shift costs.
+
+## When to consider native I/O acceleration
+
+If profiling shows that a large fraction of wall time is spent in libuv threadpool work (filesystem reads, compression, crypto), and you have exhausted configuration tuning, a native module may help by bypassing certain JavaScript overheads.
+
+If you reach that point, treat native work as an opt-in acceleration layer, gated behind a feature flag and accompanied by:
+
+- deterministic correctness checks
+- a fallback path to the pure-JS implementation
+- robust CI coverage for the “native present” and “native absent” configurations
diff --git a/docs/phase7-verification-gates.md b/docs/phase7-verification-gates.md
new file mode 100644
index 000000000..30690f28b
--- /dev/null
+++ b/docs/phase7-verification-gates.md
@@ -0,0 +1,82 @@
+# Phase 7 Verification Gates
+
+Phase 7 in the roadmap focuses on **regression**, **parity**, and **UX acceptance** gates for the core user workflows:
+
+1. **Index build** (code + prose)
+2. **Search** (deterministic, stable ranking)
+3. **Code map** (deterministic, guardrailed, multiple render formats)
+4. **Editor integrations** (baseline parity of defaults + command coverage)
+
+This repo now includes a concrete, automated set of gates that map directly to the Phase 7 items.
+
+## What is covered
+
+### Parity checklist vs editor extensions
+
+Automated parity checks live in:
+
+- `npm run editor-parity-test`
+
+This test verifies (at minimum):
+
+- The VS Code extension exposes the expected configuration keys.
+- The Sublime Text package exposes the expected baseline command palette entries (search, indexing, mapping).
+- Default settings parity for search defaults (mode, backend, max results) and map guardrails (max files/members/edges).
+
+### Deterministic outputs for map/search
+
+Determinism gates:
+
+- `npm run search-determinism-test`
+- `npm run code-map-determinism-test`
+
+These tests run the same command twice against the same temporary repo and assert the results are identical (excluding explicitly time-varying fields where relevant).
+
+### Performance acceptance criteria via guardrails
+
+Guardrail gates:
+
+- `npm run code-map-guardrails-test` (explicit limits)
+- `npm run code-map-default-guardrails-test` (default limits)
+
+These tests assert that map generation enforces hard bounds on member/edge growth and reports truncation.
+
+### End-to-end smoke (index + search + map)
+
+The existing fixture smoke now exercises the full Phase 7 workflow:
+
+- `npm run fixture-smoke`
+
+It builds indexes, runs representative searches, and then generates maps in:
+
+- JSON (`--format json`)
+- DOT (`--format dot`)
+- SVG (`--format svg`, with an automatic DOT fallback if Graphviz is not installed)
+
+### Optional SVG rendering when Graphviz is available
+
+Two complementary gates exist:
+
+- `npm run code-map-graphviz-fallback-test` (forces `dot` to be unavailable and asserts DOT fallback)
+- `npm run code-map-graphviz-available-test` (runs only when `dot` is present; otherwise exits 0 and prints a skip message)
+
+## Recommended Phase 7 command set
+
+For local verification, the following is the intended Phase 7 gate set:
+
+```bash
+npm run fixture-smoke
+npm run search-determinism-test
+npm run code-map-determinism-test
+npm run code-map-dot-test
+npm run code-map-guardrails-test
+npm run code-map-default-guardrails-test
+npm run code-map-graphviz-fallback-test
+npm run code-map-graphviz-available-test
+npm run editor-parity-test
+```
+
+Notes:
+
+- `code-map-graphviz-available-test` is **optional** by design and will auto-skip if `dot` is not present.
+- `fixture-smoke` uses stub embeddings to keep the run deterministic and CI-friendly.
diff --git a/docs/references/01-zoekt.md b/docs/references/01-zoekt.md
new file mode 100644
index 000000000..0df25db57
--- /dev/null
+++ b/docs/references/01-zoekt.md
@@ -0,0 +1,14 @@
+# Zoekt
+
+- Source: https://github.com/sourcegraph/zoekt
+- Type: repo
+
+## Summary
+- Fast trigram-based code search engine designed for large codebases.
+- Ships indexer/searcher components and supports shard-based indexes.
+- Supports query language features and boosts symbol definitions (ctags).
+
+## PairOfCleats takeaways
+- Add a trigram candidate generator to narrow regex and substring queries.
+- Consider a service-mode indexer + query server split for large repos.
+- Use symbol metadata as a ranking boost for definitions and exports.
diff --git a/docs/references/02-zoekt-query-syntax.md b/docs/references/02-zoekt-query-syntax.md
new file mode 100644
index 000000000..9a57c4355
--- /dev/null
+++ b/docs/references/02-zoekt-query-syntax.md
@@ -0,0 +1,13 @@
+# Zoekt query syntax
+
+- Source: https://sourcegraph.com/github.com/sourcegraph/zoekt/-/blob/doc/query_syntax.md
+- Type: doc
+
+## Summary
+- Documents Zoekt query operators, filters, and regex support.
+- Supports file/path, repo, language, and case sensitivity modifiers.
+- Emphasizes narrowing queries to keep search fast.
+
+## PairOfCleats takeaways
+- Expand query language with cheap filters that prune candidate sets.
+- Support regex-to-ngram prefilters before exact matching.
diff --git a/docs/references/03-zoekt-go-docs.md b/docs/references/03-zoekt-go-docs.md
new file mode 100644
index 000000000..d77e6c0fd
--- /dev/null
+++ b/docs/references/03-zoekt-go-docs.md
@@ -0,0 +1,12 @@
+# Zoekt Go package docs
+
+- Source: https://pkg.go.dev/github.com/sourcegraph/zoekt
+- Type: doc
+
+## Summary
+- Go API surface for building, loading, and querying Zoekt indexes.
+- Exposes query parsing, scoring, and shard handling.
+
+## PairOfCleats takeaways
+- If we adopt Zoekt-like trigram indexing, map it to our query pipeline.
+- Keep sharded index metadata so we can parallelize searches cleanly.
diff --git a/docs/references/04-gitlab-exact-code-search.md b/docs/references/04-gitlab-exact-code-search.md
new file mode 100644
index 000000000..d50e048d5
--- /dev/null
+++ b/docs/references/04-gitlab-exact-code-search.md
@@ -0,0 +1,13 @@
+# GitLab Exact Code Search
+
+- Source: https://about.gitlab.com/blog/exact-code-search-find-code-faster-across-repositories/
+- Type: blog
+
+## Summary
+- Describes GitLab's exact and regex search built on Zoekt.
+- Highlights regex acceleration via trigram-style candidate generation.
+- Focuses on cross-repo search and fast response times.
+
+## PairOfCleats takeaways
+- Add regex-to-ngram prefilters to reduce full scans.
+- Make cross-repo search cheap by reusing common index artifacts.
diff --git a/docs/references/05-hound.md b/docs/references/05-hound.md
new file mode 100644
index 000000000..d691e8946
--- /dev/null
+++ b/docs/references/05-hound.md
@@ -0,0 +1,12 @@
+# Hound
+
+- Source: https://github.com/hound-search/hound
+- Type: repo
+
+## Summary
+- Go-based code search server with a simple web UI and config file.
+- Syncs and indexes multiple repos; optimized for fast text search.
+
+## PairOfCleats takeaways
+- Keep repo sync and index building decoupled from query serving.
+- Provide a simple service mode for multi-repo deployments.
diff --git a/docs/references/06-livegrep.md b/docs/references/06-livegrep.md
new file mode 100644
index 000000000..03df90fdb
--- /dev/null
+++ b/docs/references/06-livegrep.md
@@ -0,0 +1,12 @@
+# livegrep
+
+- Source: https://github.com/livegrep/livegrep
+- Type: repo
+
+## Summary
+- Interactive regex search with streaming results.
+- Designed for low-latency query loops over large codebases.
+
+## PairOfCleats takeaways
+- Add streaming result delivery in the search pipeline.
+- Optimize for tight regex query loops using candidate generation.
diff --git a/docs/references/07-opengrok.md b/docs/references/07-opengrok.md
new file mode 100644
index 000000000..a0bba60cb
--- /dev/null
+++ b/docs/references/07-opengrok.md
@@ -0,0 +1,12 @@
+# OpenGrok
+
+- Source: https://github.com/oracle/opengrok
+- Type: repo
+
+## Summary
+- Java-based source code search and cross-reference engine.
+- Provides definitions, references, and history-aware views.
+
+## PairOfCleats takeaways
+- Add cross-reference navigation features alongside search.
+- Preserve history metadata to enrich results and context views.
diff --git a/docs/references/08-ripgrep.md b/docs/references/08-ripgrep.md
new file mode 100644
index 000000000..66020a55f
--- /dev/null
+++ b/docs/references/08-ripgrep.md
@@ -0,0 +1,12 @@
+# ripgrep
+
+- Source: https://github.com/BurntSushi/ripgrep
+- Type: repo
+
+## Summary
+- Fast recursive search that respects .gitignore and hidden/binary rules.
+- Known for efficient file traversal and regex performance.
+
+## PairOfCleats takeaways
+- Keep a dedicated fast file discovery step with ignore semantics.
+- Treat binary detection and encoding handling as first-class.
diff --git a/docs/references/09-ctags-json-output.md b/docs/references/09-ctags-json-output.md
new file mode 100644
index 000000000..aa20c2d1f
--- /dev/null
+++ b/docs/references/09-ctags-json-output.md
@@ -0,0 +1,12 @@
+# Universal Ctags JSON output
+
+- Source: https://docs.ctags.io/en/latest/man/ctags-json-output.5.html
+- Type: doc
+
+## Summary
+- Describes JSON lines output for symbols, including name/kind/scope fields.
+- Suitable for streaming symbol extraction without large JSON blobs.
+
+## PairOfCleats takeaways
+- Prefer JSONL symbol extraction to avoid memory spikes.
+- Align symbol schema with existing chunk metadata fields.
diff --git a/docs/references/10-ctags-interactive-mode.md b/docs/references/10-ctags-interactive-mode.md
new file mode 100644
index 000000000..d101fe294
--- /dev/null
+++ b/docs/references/10-ctags-interactive-mode.md
@@ -0,0 +1,12 @@
+# Universal Ctags interactive mode
+
+- Source: https://docs.ctags.io/en/latest/interactive-mode.html
+- Type: doc
+
+## Summary
+- Documents interactive mode for long-lived ctags processes over stdio.
+- Supports incremental symbol queries without restarting the tool.
+
+## PairOfCleats takeaways
+- Use a long-lived ctags process to reduce per-file startup cost.
+- Stream JSON responses into the indexer pipeline.
diff --git a/docs/references/11-scip.md b/docs/references/11-scip.md
new file mode 100644
index 000000000..9594f663e
--- /dev/null
+++ b/docs/references/11-scip.md
@@ -0,0 +1,12 @@
+# SCIP
+
+- Source: https://github.com/sourcegraph/scip
+- Type: repo
+
+## Summary
+- Code intelligence protocol for symbols and occurrences across languages.
+- Provides a standard format for definitions, references, and relationships.
+
+## PairOfCleats takeaways
+- Add optional SCIP ingestion to avoid running language servers.
+- Map SCIP symbols to chunk metadata for navigation features.
diff --git a/docs/references/12-lsif.md b/docs/references/12-lsif.md
new file mode 100644
index 000000000..e43c9faa5
--- /dev/null
+++ b/docs/references/12-lsif.md
@@ -0,0 +1,12 @@
+# LSIF
+
+- Source: https://lsif.dev/
+- Type: doc
+
+## Summary
+- LSIF is a standard format for offline code intelligence.
+- Encodes definitions, references, and hover data as a graph.
+
+## PairOfCleats takeaways
+- Accept LSIF artifacts as optional inputs for code navigation.
+- Use LSIF to improve go-to-definition without running LSP.
diff --git a/docs/references/13-glean-meta-blog.md b/docs/references/13-glean-meta-blog.md
new file mode 100644
index 000000000..7f83858cd
--- /dev/null
+++ b/docs/references/13-glean-meta-blog.md
@@ -0,0 +1,12 @@
+# Glean open source indexing blog
+
+- Source: https://engineering.fb.com/2024/12/19/developer-tools/glean-open-source-code-indexing/
+- Type: blog
+
+## Summary
+- Describes Meta's Glean system for indexing and querying code facts.
+- Emphasizes incremental updates and a fact database for scale.
+
+## PairOfCleats takeaways
+- Favor a fact-oriented storage model for cross-language metadata.
+- Invest in incremental indexing to keep updates cheap.
diff --git a/docs/references/14-glean.md b/docs/references/14-glean.md
new file mode 100644
index 000000000..e2b43ccfb
--- /dev/null
+++ b/docs/references/14-glean.md
@@ -0,0 +1,12 @@
+# Glean
+
+- Source: https://github.com/facebookincubator/Glean
+- Type: repo
+
+## Summary
+- Open-source system for collecting and querying facts about codebases.
+- Uses schemas to describe data and supports incremental indexing.
+
+## PairOfCleats takeaways
+- Model metadata as facts with schema-driven validation.
+- Keep a query layer that can evolve without reindexing everything.
diff --git a/docs/references/15-kythe.md b/docs/references/15-kythe.md
new file mode 100644
index 000000000..164f43ae5
--- /dev/null
+++ b/docs/references/15-kythe.md
@@ -0,0 +1,12 @@
+# Kythe
+
+- Source: https://github.com/kythe/kythe
+- Type: repo
+
+## Summary
+- Language-agnostic ecosystem for building code indexing and analysis tools.
+- Uses a graph model to connect definitions, references, and documentation.
+
+## PairOfCleats takeaways
+- Consider a graph-oriented representation for cross-file navigation.
+- Keep indexer output language-agnostic for long-term extensibility.
diff --git a/docs/references/16-stack-graphs.md b/docs/references/16-stack-graphs.md
new file mode 100644
index 000000000..6b8e49d6a
--- /dev/null
+++ b/docs/references/16-stack-graphs.md
@@ -0,0 +1,12 @@
+# Stack graphs
+
+- Source: https://github.com/github/stack-graphs
+- Type: repo
+
+## Summary
+- Rust implementation of the stack graphs algorithm for name resolution.
+- Designed for fast, incremental, cross-file symbol binding.
+
+## PairOfCleats takeaways
+- Evaluate stack graphs for scalable name resolution.
+- Use incremental name binding to avoid full reindexing.
diff --git a/docs/references/17-stack-graphs-blog.md b/docs/references/17-stack-graphs-blog.md
new file mode 100644
index 000000000..82d5bcdd2
--- /dev/null
+++ b/docs/references/17-stack-graphs-blog.md
@@ -0,0 +1,12 @@
+# Stack graphs blog
+
+- Source: https://github.blog/open-source/introducing-stack-graphs/
+- Type: blog
+
+## Summary
+- Introduces stack graphs and their use for code navigation.
+- Highlights declarative language rules and incremental updates.
+
+## PairOfCleats takeaways
+- Use declarative binding rules where possible.
+- Prefer incremental name resolution for large repos.
diff --git a/docs/references/18-github-code-search-tech.md b/docs/references/18-github-code-search-tech.md
new file mode 100644
index 000000000..a52208fe0
--- /dev/null
+++ b/docs/references/18-github-code-search-tech.md
@@ -0,0 +1,13 @@
+# GitHub code search technology
+
+- Source: https://github.blog/engineering/architecture-optimization/the-technology-behind-githubs-new-code-search/
+- Type: blog
+
+## Summary
+- Describes the indexing and ranking behind GitHub's new code search.
+- Emphasizes punctuation-sensitive search and regex-friendly indexes.
+- Uses syntax-aware metadata to improve ranking.
+
+## PairOfCleats takeaways
+- Do not normalize punctuation away for code queries.
+- Use syntax metadata and symbols to boost ranking.
diff --git a/docs/references/19-sourcebot.md b/docs/references/19-sourcebot.md
new file mode 100644
index 000000000..8c28e3da1
--- /dev/null
+++ b/docs/references/19-sourcebot.md
@@ -0,0 +1,12 @@
+# Sourcebot
+
+- Source: https://github.com/sourcebot-dev/sourcebot
+- Type: repo
+
+## Summary
+- Self-hosted code search with repo syncing and a web UI.
+- Supports search across multiple repositories with filters.
+
+## PairOfCleats takeaways
+- Keep multi-repo indexing and sync as first-class workflows.
+- Provide a service mode for continuous updates.
diff --git a/docs/references/20-sourcebot-docs.md b/docs/references/20-sourcebot-docs.md
new file mode 100644
index 000000000..69255516c
--- /dev/null
+++ b/docs/references/20-sourcebot-docs.md
@@ -0,0 +1,12 @@
+# Sourcebot documentation
+
+- Source: https://docs.sourcebot.dev/
+- Type: doc
+
+## Summary
+- Documents Sourcebot configuration, connectors, and search features.
+- Describes query filters and indexing behavior.
+
+## PairOfCleats takeaways
+- Mirror common query filters and document them clearly.
+- Keep connector config simple and explicit.
diff --git a/docs/references/21-sourcebot-v3-discussion.md b/docs/references/21-sourcebot-v3-discussion.md
new file mode 100644
index 000000000..207ec6ba0
--- /dev/null
+++ b/docs/references/21-sourcebot-v3-discussion.md
@@ -0,0 +1,12 @@
+# Sourcebot v3 discussion
+
+- Source: https://github.com/sourcebot-dev/sourcebot/discussions/256
+- Type: discussion
+
+## Summary
+- Release notes and architectural notes for Sourcebot v3.
+- Highlights parallel indexing and durable queues for repo sync.
+
+## PairOfCleats takeaways
+- Use a job queue for predictable multi-repo indexing.
+- Track indexing state and backpressure explicitly.
diff --git a/docs/references/22-continue-embeddings.md b/docs/references/22-continue-embeddings.md
new file mode 100644
index 000000000..e60f1289e
--- /dev/null
+++ b/docs/references/22-continue-embeddings.md
@@ -0,0 +1,12 @@
+# Continue embeddings role
+
+- Source: https://docs.continue.dev/customize/model-roles/embeddings
+- Type: doc
+
+## Summary
+- Documents the embeddings model role and configuration choices.
+- Emphasizes role-specific model selection and caching.
+
+## PairOfCleats takeaways
+- Keep embedding model selection explicit and configurable.
+- Document model caching behavior and defaults.
diff --git a/docs/references/23-continue-retrieval-accuracy.md b/docs/references/23-continue-retrieval-accuracy.md
new file mode 100644
index 000000000..8ebd93bb6
--- /dev/null
+++ b/docs/references/23-continue-retrieval-accuracy.md
@@ -0,0 +1,12 @@
+# Continue retrieval accuracy limits
+
+- Source: https://blog.continue.dev/accuracy-limits-of-codebase-retrieval/
+- Type: blog
+
+## Summary
+- Discusses the limits of codebase retrieval accuracy.
+- Emphasizes evaluation metrics and dataset-driven benchmarking.
+
+## PairOfCleats takeaways
+- Maintain a retrieval evaluation harness with stable datasets.
+- Track recall/precision tradeoffs per strategy.
diff --git a/docs/references/24-aider-repomap-docs.md b/docs/references/24-aider-repomap-docs.md
new file mode 100644
index 000000000..5e54cf098
--- /dev/null
+++ b/docs/references/24-aider-repomap-docs.md
@@ -0,0 +1,12 @@
+# Aider repository map docs
+
+- Source: https://aider.chat/docs/repomap.html
+- Type: doc
+
+## Summary
+- Describes a low-token "repo map" for LLM context.
+- Focuses on symbol summaries and file-level overviews.
+
+## PairOfCleats takeaways
+- Maintain a compact symbol map artifact for retrieval and context.
+- Keep the map token budgeted and deterministic.
diff --git a/docs/references/25-aider-repomap-blog.md b/docs/references/25-aider-repomap-blog.md
new file mode 100644
index 000000000..4328742be
--- /dev/null
+++ b/docs/references/25-aider-repomap-blog.md
@@ -0,0 +1,12 @@
+# Aider repository map blog
+
+- Source: https://aider.chat/2023/10/22/repomap.html
+- Type: blog
+
+## Summary
+- Explains why tree-sitter improves repo map quality.
+- Emphasizes better symbol extraction and ranking.
+
+## PairOfCleats takeaways
+- Prefer AST-backed symbol extraction for repo maps.
+- Rank symbols by usage and API surface.
diff --git a/docs/references/26-ast-grep.md b/docs/references/26-ast-grep.md
new file mode 100644
index 000000000..9092f27ce
--- /dev/null
+++ b/docs/references/26-ast-grep.md
@@ -0,0 +1,12 @@
+# ast-grep
+
+- Source: https://github.com/ast-grep/ast-grep
+- Type: repo
+
+## Summary
+- Tree-sitter based structural search, lint, and rewrite tool.
+- Uses pattern rules to match AST shapes across languages.
+
+## PairOfCleats takeaways
+- Add structural search as an optional query engine.
+- Reuse rule-driven extraction for hot spots and risk scans.
diff --git a/docs/references/27-semgrep.md b/docs/references/27-semgrep.md
new file mode 100644
index 000000000..69c62a4c0
--- /dev/null
+++ b/docs/references/27-semgrep.md
@@ -0,0 +1,12 @@
+# Semgrep
+
+- Source: https://github.com/semgrep/semgrep
+- Type: repo
+
+## Summary
+- Pattern-based static analysis for many languages.
+- Rules look like source code and support autofix.
+
+## PairOfCleats takeaways
+- Use rule packs to extract security or quality signals.
+- Store rule hits as metadata for ranking and triage.
diff --git a/docs/references/28-comby.md b/docs/references/28-comby.md
new file mode 100644
index 000000000..04e440d6a
--- /dev/null
+++ b/docs/references/28-comby.md
@@ -0,0 +1,12 @@
+# Comby
+
+- Source: https://github.com/comby-tools/comby
+- Type: repo
+
+## Summary
+- Structural search and replace with a language-agnostic template syntax.
+- Supports many languages without language-specific parsers.
+
+## PairOfCleats takeaways
+- Use Comby as a fallback structural search engine.
+- Keep structural query support for non-AST languages.
diff --git a/docs/references/29-gnu-global.md b/docs/references/29-gnu-global.md
new file mode 100644
index 000000000..c4b30120a
--- /dev/null
+++ b/docs/references/29-gnu-global.md
@@ -0,0 +1,12 @@
+# GNU Global
+
+- Source: http://www.gnu.org/software/global/
+- Type: doc
+
+## Summary
+- Tag-based source code cross-reference system (gtags).
+- Integrates with editors and supports multiple languages via parsers.
+
+## PairOfCleats takeaways
+- Provide a tag database fallback when AST or LSP is unavailable.
+- Use tags as a fast definition lookup layer.
diff --git a/docs/references/30-llamaindex-ts.md b/docs/references/30-llamaindex-ts.md
new file mode 100644
index 000000000..0f37129f6
--- /dev/null
+++ b/docs/references/30-llamaindex-ts.md
@@ -0,0 +1,12 @@
+# LlamaIndex.TS
+
+- Source: https://developers.llamaindex.ai/typescript/framework/
+- Type: doc
+
+## Summary
+- TypeScript framework for context engineering and RAG pipelines.
+- Provides loaders, chunkers, indexes, and retrievers.
+
+## PairOfCleats takeaways
+- Reuse loader and chunking patterns for connectors.
+- Keep indexing stages modular and composable.
diff --git a/docs/references/31-llamaindex-embeddings.md b/docs/references/31-llamaindex-embeddings.md
new file mode 100644
index 000000000..40d77322c
--- /dev/null
+++ b/docs/references/31-llamaindex-embeddings.md
@@ -0,0 +1,12 @@
+# LlamaIndex embeddings docs
+
+- Source: https://developers.llamaindex.ai/typescript/framework/modules/models/embeddings/
+- Type: doc
+
+## Summary
+- Documents embedding model configuration and supported providers.
+- Describes how embeddings integrate into retrieval pipelines.
+
+## PairOfCleats takeaways
+- Keep embedding providers pluggable and explicit in config.
+- Document model defaults and cache locations.
diff --git a/docs/references/32-langchain-github-loader-docs.md b/docs/references/32-langchain-github-loader-docs.md
new file mode 100644
index 000000000..9fe999ade
--- /dev/null
+++ b/docs/references/32-langchain-github-loader-docs.md
@@ -0,0 +1,12 @@
+# LangChain GitHub loader docs
+
+- Source: https://docs.langchain.com/oss/javascript/integrations/document_loaders/web_loaders/github
+- Type: doc
+
+## Summary
+- Documentation for GitHub repository loaders in LangChain JS.
+- Covers repo access, branch selection, and file filtering.
+
+## PairOfCleats takeaways
+- Mirror repo loader options (branch, include/exclude, depth).
+- Document auth and rate-limit handling.
diff --git a/docs/references/33-langchain-github-loader-api.md b/docs/references/33-langchain-github-loader-api.md
new file mode 100644
index 000000000..276934c30
--- /dev/null
+++ b/docs/references/33-langchain-github-loader-api.md
@@ -0,0 +1,12 @@
+# LangChain GithubRepoLoader API
+
+- Source: https://reference.langchain.com/javascript/classes/_langchain_community.document_loaders_web_github.GithubRepoLoader.html
+- Type: doc
+
+## Summary
+- API reference for GithubRepoLoader options and behaviors.
+- Details parameters like branch, file filters, and repo paths.
+
+## PairOfCleats takeaways
+- Align our repo loader flags with common API expectations.
+- Keep file filtering explicit and reproducible.
diff --git a/docs/references/34-haystack-rag-eval.md b/docs/references/34-haystack-rag-eval.md
new file mode 100644
index 000000000..81eb3468b
--- /dev/null
+++ b/docs/references/34-haystack-rag-eval.md
@@ -0,0 +1,12 @@
+# Haystack RAG evaluation tutorial
+
+- Source: https://haystack.deepset.ai/tutorials/35_evaluating_rag_pipelines
+- Type: tutorial
+
+## Summary
+- Walks through evaluating RAG pipelines with metrics and datasets.
+- Covers statistical and model-based evaluation approaches.
+
+## PairOfCleats takeaways
+- Keep retrieval evaluation scripts and datasets in-repo.
+- Track metrics over time for search strategy changes.
diff --git a/docs/references/35-lancedb.md b/docs/references/35-lancedb.md
new file mode 100644
index 000000000..cea6ad2c8
--- /dev/null
+++ b/docs/references/35-lancedb.md
@@ -0,0 +1,12 @@
+# LanceDB
+
+- Source: https://github.com/lancedb/lancedb
+- Type: repo
+
+## Summary
+- Open-source vector database with hybrid search capabilities.
+- Focuses on embedded and serverless vector storage.
+
+## PairOfCleats takeaways
+- Consider LanceDB for vector search if SQLite ANN is limited.
+- Evaluate hybrid search support with filters.
diff --git a/docs/references/36-lancedb-docs.md b/docs/references/36-lancedb-docs.md
new file mode 100644
index 000000000..bab41fdc1
--- /dev/null
+++ b/docs/references/36-lancedb-docs.md
@@ -0,0 +1,12 @@
+# LanceDB documentation
+
+- Source: https://docs.lancedb.com/
+- Type: doc
+
+## Summary
+- Documentation for vector indexing, filtering, and embeddings.
+- Covers data formats and search APIs.
+
+## PairOfCleats takeaways
+- Compare vector index tradeoffs with sqlite-vec.
+- Note filtering and hybrid query capabilities.
diff --git a/docs/references/37-tantivy.md b/docs/references/37-tantivy.md
new file mode 100644
index 000000000..e808c4ef9
--- /dev/null
+++ b/docs/references/37-tantivy.md
@@ -0,0 +1,12 @@
+# Tantivy
+
+- Source: https://github.com/quickwit-oss/tantivy
+- Type: repo
+
+## Summary
+- Rust full-text search library inspired by Lucene.
+- Provides inverted indexes, BM25 scoring, and fast queries.
+
+## PairOfCleats takeaways
+- Consider Tantivy for a dedicated sparse index backend.
+- Evaluate a sidecar or FFI integration path.
diff --git a/docs/references/38-meilisearch.md b/docs/references/38-meilisearch.md
new file mode 100644
index 000000000..69022c542
--- /dev/null
+++ b/docs/references/38-meilisearch.md
@@ -0,0 +1,12 @@
+# Meilisearch
+
+- Source: https://github.com/meilisearch/meilisearch
+- Type: repo
+
+## Summary
+- Open-source search engine server with fast indexing and filtering.
+- Supports typo tolerance, synonyms, and ranking configuration.
+
+## PairOfCleats takeaways
+- Consider a server-backed search option for large deployments.
+- Compare filter performance and ranking controls.
diff --git a/docs/references/39-typesense.md b/docs/references/39-typesense.md
new file mode 100644
index 000000000..cc008703a
--- /dev/null
+++ b/docs/references/39-typesense.md
@@ -0,0 +1,12 @@
+# Typesense
+
+- Source: https://github.com/typesense/typesense
+- Type: repo
+
+## Summary
+- Open-source search server focused on speed and typo tolerance.
+- Provides facets and search-as-you-type features.
+
+## PairOfCleats takeaways
+- Consider Typesense for UI-facing search or suggestions.
+- Compare filter capabilities to SQLite and FTS paths.
diff --git a/docs/references/41-regrams.md b/docs/references/41-regrams.md
new file mode 100644
index 000000000..75f7fd3b0
--- /dev/null
+++ b/docs/references/41-regrams.md
@@ -0,0 +1,12 @@
+# regrams
+
+- Source: https://github.com/aaw/regrams
+- Type: repo
+
+## Summary
+- Converts regex patterns into trigram query candidates.
+- Intended as a prefilter for exact regex evaluation.
+
+## PairOfCleats takeaways
+- Implement regex-to-trigram prefilters for candidate selection.
+- Keep regex verification as the final matching step.
diff --git a/docs/references/README.md b/docs/references/README.md
new file mode 100644
index 000000000..f8483ff8a
--- /dev/null
+++ b/docs/references/README.md
@@ -0,0 +1,46 @@
+# OSS reference summaries
+
+Summaries generated from the references listed in `oss_inspiration.md`.
+
+- 01 Zoekt: `01-zoekt.md`
+- 02 Zoekt query syntax: `02-zoekt-query-syntax.md`
+- 03 Zoekt Go docs: `03-zoekt-go-docs.md`
+- 04 GitLab exact code search: `04-gitlab-exact-code-search.md`
+- 05 Hound: `05-hound.md`
+- 06 livegrep: `06-livegrep.md`
+- 07 OpenGrok: `07-opengrok.md`
+- 08 ripgrep: `08-ripgrep.md`
+- 09 Ctags JSON output: `09-ctags-json-output.md`
+- 10 Ctags interactive mode: `10-ctags-interactive-mode.md`
+- 11 SCIP: `11-scip.md`
+- 12 LSIF: `12-lsif.md`
+- 13 Glean blog: `13-glean-meta-blog.md`
+- 14 Glean repo: `14-glean.md`
+- 15 Kythe: `15-kythe.md`
+- 16 Stack graphs: `16-stack-graphs.md`
+- 17 Stack graphs blog: `17-stack-graphs-blog.md`
+- 18 GitHub code search tech: `18-github-code-search-tech.md`
+- 19 Sourcebot: `19-sourcebot.md`
+- 20 Sourcebot docs: `20-sourcebot-docs.md`
+- 21 Sourcebot v3 discussion: `21-sourcebot-v3-discussion.md`
+- 22 Continue embeddings: `22-continue-embeddings.md`
+- 23 Continue retrieval accuracy: `23-continue-retrieval-accuracy.md`
+- 24 Aider repo map docs: `24-aider-repomap-docs.md`
+- 25 Aider repo map blog: `25-aider-repomap-blog.md`
+- 26 ast-grep: `26-ast-grep.md`
+- 27 Semgrep: `27-semgrep.md`
+- 28 Comby: `28-comby.md`
+- 29 GNU Global: `29-gnu-global.md`
+- 30 LlamaIndex.TS docs: `30-llamaindex-ts.md`
+- 31 LlamaIndex embeddings docs: `31-llamaindex-embeddings.md`
+- 32 LangChain GitHub loader docs: `32-langchain-github-loader-docs.md`
+- 33 LangChain GithubRepoLoader API: `33-langchain-github-loader-api.md`
+- 34 Haystack RAG evaluation: `34-haystack-rag-eval.md`
+- 35 LanceDB: `35-lancedb.md`
+- 36 LanceDB docs: `36-lancedb-docs.md`
+- 37 Tantivy: `37-tantivy.md`
+- 38 Meilisearch: `38-meilisearch.md`
+- 39 Typesense: `39-typesense.md`
+- 41 regrams: `41-regrams.md`
+
+Note: The source list skips reference 40, so there is no `40-*.md` file.
diff --git a/docs/references/dependency-bundle/LINK_INVENTORY.md b/docs/references/dependency-bundle/LINK_INVENTORY.md
new file mode 100644
index 000000000..d81bd0ee0
--- /dev/null
+++ b/docs/references/dependency-bundle/LINK_INVENTORY.md
@@ -0,0 +1,63 @@
+# LINK_INVENTORY — all deep links in this bundle
+
+Generated: 2026-01-08 (local build)
+
+| Package | Sheet | Link count | URLs |
+|---|---:|---:|---|
+| `@ast-grep/napi` | [ast-grep-napi.md](deps/ast-grep-napi.md) | 3 | https://ast-grep.github.io/guide/api-usage/js-api.html<br>https://ast-grep.github.io/guide/api-usage/performance-tip.html<br>https://ast-grep.github.io/guide/rule-config.html |
+| `@astrojs/compiler` | [astrojs-compiler.md](deps/astrojs-compiler.md) | 2 | https://github.com/withastro/compiler/blob/main/packages/compiler/README.md<br>https://github.com/withastro/compiler/tree/main/packages/compiler |
+| `@babel/traverse` | [babel-traverse.md](deps/babel-traverse.md) | 2 | https://babeljs.io/docs/babel-traverse<br>https://babeljs.io/docs/en/plugins#plugin-development |
+| `@es-joy/jsdoccomment` | [es-joy-jsdoccomment.md](deps/es-joy-jsdoccomment.md) | 2 | https://github.com/es-joy/jsdoccomment#readme<br>https://github.com/es-joy/jsdoccomment#commentparsertosestree |
+| `@handlebars/parser` | [handlebars-parser.md](deps/handlebars-parser.md) | 1 | https://github.com/handlebars-lang/handlebars.js/blob/master/docs/compiler-api.md |
+| `@mdx-js/mdx` | [mdx-js-mdx.md](deps/mdx-js-mdx.md) | 3 | https://mdxjs.com/packages/mdx/<br>https://mdxjs.com/docs/extending-mdx/<br>https://mdxjs.com/packages/remark-mdx/ |
+| `@swc/core` | [swc-core.md](deps/swc-core.md) | 2 | https://swc.rs/docs/usage/core<br>https://swc.rs/docs/configuration/compilation |
+| `@typescript-eslint/typescript-estree` | [typescript-eslint-typescript-estree.md](deps/typescript-eslint-typescript-estree.md) | 3 | https://typescript-eslint.io/packages/typescript-estree/<br>https://typescript-eslint.io/troubleshooting/typed-linting/<br>https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-estree/src/parser.ts |
+| `@vscode/ripgrep` | [vscode-ripgrep.md](deps/vscode-ripgrep.md) | 2 | https://github.com/microsoft/vscode-ripgrep#usage-example<br>https://github.com/BurntSushi/ripgrep/blob/master/GUIDE.md |
+| `@vue/compiler-sfc` | [vue-compiler-sfc.md](deps/vue-compiler-sfc.md) | 3 | https://vuejs.org/api/sfc-tooling.html<br>https://github.com/vuejs/core/blob/main/packages/compiler-sfc/src/parse.ts<br>https://github.com/vuejs/core/blob/main/packages/compiler-sfc/src/compileScript.ts |
+| `aho-corasick` | [aho-corasick.md](deps/aho-corasick.md) | 2 | https://github.com/spencermountain/aho_corasick#readme<br>https://github.com/BlackGlory/aho-corasick#readme |
+| `ajv` | [ajv.md](deps/ajv.md) | 2 | https://ajv.js.org/options.html<br>https://ajv.js.org/guide/getting-started.html |
+| `better-sqlite3` | [better-sqlite3.md](deps/better-sqlite3.md) | 2 | https://github.com/WiseLibs/better-sqlite3/blob/master/docs/api.md<br>https://github.com/WiseLibs/better-sqlite3/blob/master/docs/performance.md |
+| `chardet` | [chardet.md](deps/chardet.md) | 1 | https://github.com/runk/node-chardet#readme |
+| `chokidar` | [chokidar.md](deps/chokidar.md) | 2 | https://github.com/paulmillr/chokidar#readme<br>https://github.com/paulmillr/chokidar/issues/513 |
+| `dockerfile-ast` | [dockerfile-ast.md](deps/dockerfile-ast.md) | 2 | https://github.com/rcjsuen/dockerfile-ast#readme<br>https://codesandbox.io/examples/package/dockerfile-ast |
+| `esquery` | [esquery.md](deps/esquery.md) | 2 | https://github.com/estools/esquery#selectors<br>https://github.com/estools/esquery#examples |
+| `execa` | [execa.md](deps/execa.md) | 1 | https://github.com/sindresorhus/execa/blob/main/docs/api.md |
+| `fast-xml-parser` | [fast-xml-parser.md](deps/fast-xml-parser.md) | 2 | https://naturalintelligence.github.io/fast-xml-parser/<br>https://github.com/NaturalIntelligence/fast-xml-parser/blob/master/docs/v4/2.XMLparseOptions.md |
+| `fdir` | [fdir.md](deps/fdir.md) | 1 | https://github.com/thecodrr/fdir#readme |
+| `fflate` | [fflate.md](deps/fflate.md) | 2 | https://github.com/101arrowz/fflate/blob/master/docs/README.md<br>https://github.com/101arrowz/fflate/blob/master/docs/classes/AsyncUnzipInflate.md |
+| `file-type` | [file-type.md](deps/file-type.md) | 1 | https://github.com/sindresorhus/file-type#readme |
+| `graphology` | [graphology.md](deps/graphology.md) | 2 | https://graphology.github.io/<br>https://graphology.github.io/standard-library/ |
+| `graphql` | [graphql.md](deps/graphql.md) | 2 | https://www.graphql-js.org/api-v16/language/<br>https://www.apollographql.com/docs/react/data/document-transforms |
+| `greedy-number-partitioning` | [greedy-number-partitioning.md](deps/greedy-number-partitioning.md) | 1 | https://github.com/dvopalecky/greedy-number-partitioning#readme |
+| `hdr-histogram-js` | [hdr-histogram-js.md](deps/hdr-histogram-js.md) | 2 | https://github.com/HdrHistogram/HdrHistogramJS#record-values-and-retrieve-metrics<br>https://github.com/HdrHistogram/HdrHistogramWidget#readme |
+| `hnswlib-node` | [hnswlib-node.md](deps/hnswlib-node.md) | 2 | https://github.com/yoshoku/hnswlib-node#readme<br>https://github.com/nmslib/hnswlib#readme |
+| `iconv-lite` | [iconv-lite.md](deps/iconv-lite.md) | 2 | https://github.com/ashtuchkin/iconv-lite/wiki/Stream-API<br>https://github.com/ashtuchkin/iconv-lite#readme |
+| `ignore` | [ignore.md](deps/ignore.md) | 2 | https://github.com/kaelzhang/node-ignore#readme<br>https://git-scm.com/docs/gitignore |
+| `istextorbinary` | [istextorbinary.md](deps/istextorbinary.md) | 1 | https://github.com/bevry/istextorbinary#readme |
+| `jsdoc-type-pratt-parser` | [jsdoc-type-pratt-parser.md](deps/jsdoc-type-pratt-parser.md) | 2 | https://jsdoc-type-pratt-parser.js.org/<br>https://github.com/jsdoc-type-pratt-parser/jsdoc-type-pratt-parser#readme |
+| `jsonc-parser` | [jsonc-parser.md](deps/jsonc-parser.md) | 1 | https://github.com/microsoft/node-jsonc-parser#readme |
+| `linguist-languages` | [linguist-languages.md](deps/linguist-languages.md) | 3 | https://github.com/github-linguist/linguist/blob/master/lib/linguist/languages.yml<br>https://github.com/github-linguist/linguist/blob/master/docs/how-linguist-works.md<br>https://github.com/github-linguist/linguist/blob/master/docs/overrides.md |
+| `lmdb` | [lmdb.md](deps/lmdb.md) | 2 | https://github.com/DoctorEvidence/lmdb-js/blob/master/README.md<br>https://dev.doctorevidence.com/lmdb-in-node-29af907aad6e |
+| `lru-cache` | [lru-cache.md](deps/lru-cache.md) | 2 | https://isaacs.github.io/node-lru-cache/interfaces/LRUCache.Options.html<br>https://isaacs.github.io/node-lru-cache/interfaces/LRUCache.FetchOptions.html |
+| `micromark` | [micromark.md](deps/micromark.md) | 3 | https://github.com/micromark/micromark#extensions<br>https://github.com/syntax-tree/mdast-util-from-markdown#readme<br>https://github.com/micromark/micromark-extension-frontmatter#readme |
+| `msgpackr` | [msgpackr.md](deps/msgpackr.md) | 2 | https://github.com/kriszyp/msgpackr/blob/master/README.md<br>https://msgpack.org/index.html |
+| `nunjucks` | [nunjucks.md](deps/nunjucks.md) | 2 | https://mozilla.github.io/nunjucks/api.html#custom-tags<br>https://mozilla.github.io/nunjucks/api.html#precompiling |
+| `onnxruntime-node` | [onnxruntime-node.md](deps/onnxruntime-node.md) | 3 | https://onnxruntime.ai/docs/api/js/interfaces/InferenceSession.SessionOptions.html<br>https://github.com/microsoft/onnxruntime-inference-examples/blob/main/js/api-usage_session-options/README.md<br>https://onnxruntime.ai/docs/performance/tune-performance/iobinding.html |
+| `parse5` | [parse5.md](deps/parse5.md) | 3 | https://parse5.js.org/interfaces/parse5.ParserOptions.html<br>https://parse5.js.org/interfaces/parse5-sax-parser.Comment.html<br>https://github.com/syntax-tree/hast-util-from-parse5#readme |
+| `picomatch` | [picomatch.md](deps/picomatch.md) | 2 | https://github.com/micromatch/picomatch#readme<br>https://github.com/micromatch/picomatch/blob/master/CHANGELOG.md |
+| `pino` | [pino.md](deps/pino.md) | 3 | https://github.com/pinojs/pino/blob/main/docs/transports.md<br>https://getpino.io/#/docs/redaction<br>https://getpino.io/#/docs/api |
+| `pino-pretty` | [pino-pretty.md](deps/pino-pretty.md) | 2 | https://github.com/pinojs/pino-pretty#handling-non-serializable-options<br>https://github.com/pinojs/pino/blob/main/docs/transports.md |
+| `piscina` | [piscina.md](deps/piscina.md) | 3 | https://piscinajs.dev/api-reference/Instance/<br>https://piscinajs.dev/examples/Resource%20Limits/<br>https://github.com/piscinajs/piscina#concurrenttasksperworker |
+| `prom-client` | [prom-client.md](deps/prom-client.md) | 2 | https://github.com/siimon/prom-client#default-metrics<br>https://github.com/siimon/prom-client#histogram |
+| `protobufjs` | [protobufjs.md](deps/protobufjs.md) | 2 | https://protobufjs.github.io/protobuf.js/index.html<br>https://github.com/protobufjs/protobuf.js/blob/master/cli/README.md |
+| `pyright` | [pyright.md](deps/pyright.md) | 2 | https://microsoft.github.io/pyright/#/configuration<br>https://microsoft.github.io/pyright/#/command-line |
+| `re2js` | [re2js.md](deps/re2js.md) | 2 | https://github.com/le0pard/re2js#readme<br>https://github.com/le0pard/re2js#limitations |
+| `roaring-wasm` | [roaring-wasm.md](deps/roaring-wasm.md) | 2 | https://github.com/SalvatorePreviti/roaring-wasm/blob/master/README.md<br>https://github.com/RoaringBitmap/RoaringBitmap#readme |
+| `seedrandom` | [seedrandom.md](deps/seedrandom.md) | 1 | https://github.com/davidbau/seedrandom#readme |
+| `semver` | [semver.md](deps/semver.md) | 3 | https://docs.npmjs.com/cli/v6/using-npm/semver<br>https://semver.org/<br>https://github.com/npm/node-semver#ranges |
+| `smol-toml` | [smol-toml.md](deps/smol-toml.md) | 2 | https://github.com/squirrelchat/smol-toml#readme<br>https://toml.io/en/ |
+| `svelte` | [svelte.md](deps/svelte.md) | 2 | https://svelte.dev/docs/svelte-compiler<br>https://svelte.dev/docs/svelte-compiler#parse |
+| `tinybench` | [tinybench.md](deps/tinybench.md) | 1 | https://github.com/tinylibs/tinybench#readme |
+| `typescript` | [typescript.md](deps/typescript.md) | 3 | https://github.com/microsoft/TypeScript/wiki/Using-the-Compiler-API<br>https://github.com/microsoft/TypeScript/wiki/Using-the-Language-Service-API<br>https://github.com/microsoft/TypeScript/blob/main/lib/protocol.d.ts |
+| `xxhash-wasm` | [xxhash-wasm.md](deps/xxhash-wasm.md) | 2 | https://github.com/jungomi/xxhash-wasm#readme<br>https://xxhash.com/ |
+| `yaml` | [yaml.md](deps/yaml.md) | 3 | https://eemeli.org/yaml/<br>https://eemeli.org/yaml/#parsing-documents<br>https://github.com/eemeli/yaml#readme |
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/README.md b/docs/references/dependency-bundle/README.md
new file mode 100644
index 000000000..ca6f56c6d
--- /dev/null
+++ b/docs/references/dependency-bundle/README.md
@@ -0,0 +1,180 @@
+# PairOfCleats — dependency reference bundle
+
+This bundle is a curated set of implementation-relevant deep links for dependencies used (or intended) in the PairOfCleats indexing/search pipeline. Each dependency has its own short reference sheet with practical notes plus the original deep links.
+
+## How to use (recommended)
+1. Start with `TOPIC_GUIDE.md` to map a task to likely dependencies.
+2. Open the specific dependency sheet under `deps/` for deep links and gotchas.
+3. When implementing, turn the **Suggested extraction checklist** into unit tests and benchmark fixtures.
+
+## Package sheets (grouped)
+### AST querying (selectors over ESTree)
+- [esquery](deps/esquery.md)
+
+### AST traversal (JS/TS/ESTree/Babel AST)
+- [@babel/traverse](deps/babel-traverse.md)
+
+### Binary detection (magic numbers)
+- [file-type](deps/file-type.md)
+
+### Binary serialization for artifacts
+- [msgpackr](deps/msgpackr.md)
+
+### Caching / single-flight async work
+- [lru-cache](deps/lru-cache.md)
+
+### Compressed bitsets / postings sets
+- [roaring-wasm](deps/roaring-wasm.md)
+
+### Compression / zip artifacts
+- [fflate](deps/fflate.md)
+
+### Config validation / schema enforcement
+- [ajv](deps/ajv.md)
+
+### Config/document parsing (YAML) with positional fidelity
+- [yaml](deps/yaml.md)
+
+### Determinism / reproducible sampling
+- [seedrandom](deps/seedrandom.md)
+
+### Doc parsing (Markdown) and positional chunking
+- [micromark](deps/micromark.md)
+
+### Dockerfile parsing
+- [dockerfile-ast](deps/dockerfile-ast.md)
+
+### Embeddings/inference (ONNX Runtime)
+- [onnxruntime-node](deps/onnxruntime-node.md)
+
+### Encoding detection
+- [chardet](deps/chardet.md)
+
+### Fast parsing/transform (JS/TS)
+- [@swc/core](deps/swc-core.md)
+
+### Fast text search integration
+- [@vscode/ripgrep](deps/vscode-ripgrep.md)
+
+### File watching / incremental indexing
+- [chokidar](deps/chokidar.md)
+
+### Filesystem crawling
+- [fdir](deps/fdir.md)
+
+### Framework parsing (Astro)
+- [@astrojs/compiler](deps/astrojs-compiler.md)
+
+### Framework parsing (Svelte)
+- [svelte](deps/svelte.md)
+
+### Framework parsing (Vue SFC)
+- [@vue/compiler-sfc](deps/vue-compiler-sfc.md)
+
+### Glob parsing/matching
+- [picomatch](deps/picomatch.md)
+
+### Graph modeling (relations and traversals)
+- [graphology](deps/graphology.md)
+
+### GraphQL document parsing/visiting
+- [graphql](deps/graphql.md)
+
+### HTML parsing with locations
+- [parse5](deps/parse5.md)
+
+### Hashing / stable IDs
+- [xxhash-wasm](deps/xxhash-wasm.md)
+
+### High-resolution latency histograms
+- [hdr-histogram-js](deps/hdr-histogram-js.md)
+
+### Ignore semantics (.gitignore-compatible)
+- [ignore](deps/ignore.md)
+
+### JSDoc comment parsing and conversion
+- [@es-joy/jsdoccomment](deps/es-joy-jsdoccomment.md)
+
+### JSDoc type parsing
+- [jsdoc-type-pratt-parser](deps/jsdoc-type-pratt-parser.md)
+
+### JSON-with-comments parsing and edits
+- [jsonc-parser](deps/jsonc-parser.md)
+
+### Language detection / file classification
+- [linguist-languages](deps/linguist-languages.md)
+
+### Log formatting / developer ergonomics
+- [pino-pretty](deps/pino-pretty.md)
+
+### Logging (structured, high-performance)
+- [pino](deps/pino.md)
+
+### MDX parsing/compilation
+- [@mdx-js/mdx](deps/mdx-js-mdx.md)
+
+### Metrics (Prometheus client)
+- [prom-client](deps/prom-client.md)
+
+### Microbench tooling
+- [tinybench](deps/tinybench.md)
+
+### Multi-pattern search / dictionary matching
+- [aho-corasick](deps/aho-corasick.md)
+
+### Parsing (TS/JS → ESTree) + typed services
+- [@typescript-eslint/typescript-estree](deps/typescript-eslint-typescript-estree.md)
+
+### Parsing / Type analysis (TS/JS)
+- [typescript](deps/typescript.md)
+
+### Pattern-based AST search (high-performance)
+- [@ast-grep/napi](deps/ast-grep-napi.md)
+
+### Persistent KV store (LMDB)
+- [lmdb](deps/lmdb.md)
+
+### Process execution
+- [execa](deps/execa.md)
+
+### Protocol Buffers parsing/tooling
+- [protobufjs](deps/protobufjs.md)
+
+### Python typing / diagnostics
+- [pyright](deps/pyright.md)
+
+### SQLite storage backend
+- [better-sqlite3](deps/better-sqlite3.md)
+
+### Safe regex (ReDoS-resistant)
+- [re2js](deps/re2js.md)
+
+### TOML parsing
+- [smol-toml](deps/smol-toml.md)
+
+### Template parsing (Handlebars)
+- [@handlebars/parser](deps/handlebars-parser.md)
+
+### Template parsing (Nunjucks)
+- [nunjucks](deps/nunjucks.md)
+
+### Text decoding/encoding (streaming)
+- [iconv-lite](deps/iconv-lite.md)
+
+### Text vs binary heuristics
+- [istextorbinary](deps/istextorbinary.md)
+
+### Vector index (HNSW) lifecycle
+- [hnswlib-node](deps/hnswlib-node.md)
+
+### Version parsing and range evaluation
+- [semver](deps/semver.md)
+
+### Work sharding / load balancing
+- [greedy-number-partitioning](deps/greedy-number-partitioning.md)
+
+### Worker pools / parallel indexing
+- [piscina](deps/piscina.md)
+
+### XML parsing
+- [fast-xml-parser](deps/fast-xml-parser.md)
diff --git a/docs/references/dependency-bundle/TOPIC_GUIDE.md b/docs/references/dependency-bundle/TOPIC_GUIDE.md
new file mode 100644
index 000000000..fea1a53a4
--- /dev/null
+++ b/docs/references/dependency-bundle/TOPIC_GUIDE.md
@@ -0,0 +1,106 @@
+# TOPIC_GUIDE — mapping PairOfCleats tasks to dependencies
+
+Use this as an entrypoint when you know *what you’re trying to build* but not which dependency sheet is most relevant.
+
+## Repository scanning & ingestion (ignore rules, binary/text, encodings)
+
+- [fdir](deps/fdir.md)
+- [ignore](deps/ignore.md)
+- [file-type](deps/file-type.md)
+- [istextorbinary](deps/istextorbinary.md)
+- [chardet](deps/chardet.md)
+- [iconv-lite](deps/iconv-lite.md)
+- [picomatch](deps/picomatch.md)
+
+## Language detection & routing to parsers
+
+- [linguist-languages](deps/linguist-languages.md)
+
+## TS/JS parsing, metadata, type inference
+
+- [typescript](deps/typescript.md)
+- [@typescript-eslint/typescript-estree](deps/typescript-eslint-typescript-estree.md)
+- [@babel/traverse](deps/babel-traverse.md)
+- [esquery](deps/esquery.md)
+- [@swc/core](deps/swc-core.md)
+- [@ast-grep/napi](deps/ast-grep-napi.md)
+- [jsdoc-type-pratt-parser](deps/jsdoc-type-pratt-parser.md)
+- [@es-joy/jsdoccomment](deps/es-joy-jsdoccomment.md)
+
+## Docs parsing & chunking (Markdown/MDX/HTML)
+
+- [micromark](deps/micromark.md)
+- [@mdx-js/mdx](deps/mdx-js-mdx.md)
+- [parse5](deps/parse5.md)
+
+## Framework/template parsing (Vue/Svelte/Astro/Handlebars/Nunjucks)
+
+- [@vue/compiler-sfc](deps/vue-compiler-sfc.md)
+- [svelte](deps/svelte.md)
+- [@astrojs/compiler](deps/astrojs-compiler.md)
+- [@handlebars/parser](deps/handlebars-parser.md)
+- [nunjucks](deps/nunjucks.md)
+
+## Config parsing & validation (JSON/YAML/TOML/JSONC)
+
+- [ajv](deps/ajv.md)
+- [yaml](deps/yaml.md)
+- [smol-toml](deps/smol-toml.md)
+- [jsonc-parser](deps/jsonc-parser.md)
+- [fast-xml-parser](deps/fast-xml-parser.md)
+- [dockerfile-ast](deps/dockerfile-ast.md)
+
+## Search prefiltering & tooling integration
+
+- [@vscode/ripgrep](deps/vscode-ripgrep.md)
+- [execa](deps/execa.md)
+
+## Worker pools, sharding, and incremental watching
+
+- [piscina](deps/piscina.md)
+- [greedy-number-partitioning](deps/greedy-number-partitioning.md)
+- [chokidar](deps/chokidar.md)
+
+## Index artifacts: hashing, compression, serialization
+
+- [xxhash-wasm](deps/xxhash-wasm.md)
+- [fflate](deps/fflate.md)
+- [msgpackr](deps/msgpackr.md)
+
+## Durable indexes/backends (SQLite/LMDB) & ANN (HNSW)
+
+- [better-sqlite3](deps/better-sqlite3.md)
+- [lmdb](deps/lmdb.md)
+- [hnswlib-node](deps/hnswlib-node.md)
+- [roaring-wasm](deps/roaring-wasm.md)
+
+## Embeddings/inference
+
+- [onnxruntime-node](deps/onnxruntime-node.md)
+
+## Metrics/logging/benchmarks
+
+- [pino](deps/pino.md)
+- [pino-pretty](deps/pino-pretty.md)
+- [prom-client](deps/prom-client.md)
+- [hdr-histogram-js](deps/hdr-histogram-js.md)
+- [tinybench](deps/tinybench.md)
+- [seedrandom](deps/seedrandom.md)
+
+## Version parsing for dependency metadata
+
+- [semver](deps/semver.md)
+
+## Graph relations (imports/calls/refs)
+
+- [graphology](deps/graphology.md)
+
+## Multi-pattern scanning and safe regex
+
+- [aho-corasick](deps/aho-corasick.md)
+- [re2js](deps/re2js.md)
+
+## Schema/query languages
+
+- [graphql](deps/graphql.md)
+- [protobufjs](deps/protobufjs.md)
diff --git a/docs/references/dependency-bundle/deps/aho-corasick.md b/docs/references/dependency-bundle/deps/aho-corasick.md
new file mode 100644
index 000000000..ec27d8ab3
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/aho-corasick.md
@@ -0,0 +1,23 @@
+# `aho-corasick`
+
+**Area:** Multi-pattern search / dictionary matching
+
+## Why this matters for PairOfCleats
+Efficiently match many keywords at once (e.g., dictionary terms, slang lists, risk tokens) over large text streams.
+
+## Implementation notes (practical)
+- Build the automaton once per dictionary snapshot; serialize if possible for reuse.
+- Use for token-level or substring-level matches; pair with boundaries when needed.
+
+## Where it typically plugs into PairOfCleats
+- Dictionary bootstrapping and token tagging during indexing.
+- Fast scan step for 'candidate chunks' before heavier parsing.
+
+## Deep links (implementation-relevant)
+1. Reference implementation + API examples (build automaton; search)  https://github.com/spencermountain/aho_corasick#readme
+2. Alternative high-perf wrapper (Rust daachorse via Node bindings)  https://github.com/BlackGlory/aho-corasick#readme
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Planned: build an automaton from dictionary terms and use search() to emit match spans; persist the term list + automaton snapshot in cache.)
+- [x] Record configuration knobs that meaningfully change output/performance. (Planned knobs: case sensitivity, overlapping matches, word-boundary filtering, max dictionary size.)
+- [x] Add at least one representative test fixture and a regression benchmark. (Planned fixture: tests/fixtures/dict-scan/ (terms + sample text). Planned benchmark: tools/bench-dict-seg.js (extend with multi-pattern scan).)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/ajv.md b/docs/references/dependency-bundle/deps/ajv.md
new file mode 100644
index 000000000..d2bcd9755
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/ajv.md
@@ -0,0 +1,24 @@
+# `ajv`
+
+**Area:** Config validation / schema enforcement
+
+## Why this matters for PairOfCleats
+Validate `.pairofcleats.json` and other JSON-based config artifacts with predictable behavior, strictness, and defaults.
+
+## Implementation notes (practical)
+- Compile schemas once and reuse validator functions across runs/processes.
+- Use strict mode intentionally (or tune it) so config errors fail fast and are actionable.
+- Consider `removeAdditional`, `useDefaults`, and `unevaluatedProperties` to keep configs clean and consistent.
+
+## Where it typically plugs into PairOfCleats
+- Treat config parsing as part of reproducibility: write the validated, normalized config snapshot into the index artifacts.
+- Emit validation errors with path + schema context; include them in `--json` diagnostics output.
+
+## Deep links (implementation-relevant)
+1. Options reference (strict, allErrors, removeAdditional, useDefaults, unevaluatedProperties)  https://ajv.js.org/options.html
+2. Getting started / schema compilation patterns (compile once; reuse validators)  https://ajv.js.org/guide/getting-started.html
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Use `new Ajv({ allErrors: true, allowUnionTypes: true, strict: true })` and `ajv.compile(schema)` for artifact and failure schema validators; persist compiled validators in memory (src/shared/artifact-schemas.js, src/index/build/failure-taxonomy.js).)
+- [x] Record configuration knobs that meaningfully change output/performance. (Options: allErrors, strict, allowUnionTypes (configured in src/shared/artifact-schemas.js and src/index/build/failure-taxonomy.js).)
+- [x] Add at least one representative test fixture and a regression benchmark. (Fixture: tests/index-validate.js (tools/index-validate.js on fixtures). Benchmark: tools/bench-language-repos.js (index build + validation).)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/ast-grep-napi.md b/docs/references/dependency-bundle/deps/ast-grep-napi.md
new file mode 100644
index 000000000..65e426c53
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/ast-grep-napi.md
@@ -0,0 +1,25 @@
+# `@ast-grep/napi`
+
+**Area:** Pattern-based AST search (high-performance)
+
+## Why this matters for PairOfCleats
+Use ast-grep for fast, rule-driven pattern matching over source code ASTs via N-API (Rust), minimizing JS<->native overhead.
+
+## Implementation notes (practical)
+- Prefer batch APIs like `findAll` to avoid per-node FFI churn.
+- Model rules as config so they are reviewable/versioned (patterns, constraints, utils).
+
+## Where it typically plugs into PairOfCleats
+- Risk analysis: define an auditable set of rules that produce tags and spans.
+- Refactoring support: replacement rules for 'quick-fix' suggestions.
+
+## Deep links (implementation-relevant)
+1. JavaScript API (parse, findAll, replace; AST access via N-API) ? https://ast-grep.github.io/guide/api-usage/js-api.html
+2. Performance tip (avoid per-node JS?Rust FFI calls; prefer findAll) ? https://ast-grep.github.io/guide/api-usage/performance-tip.html
+3. Rule configuration essentials (patterns, constraints, utilities) ? https://ast-grep.github.io/guide/rule-config.html
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Use ast-grep node.range() (line/column) from tree-sitter; derive byte offsets from the source text when needed.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Use rule-based findAll selectors for top-level defs/imports; avoid full AST transforms.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store match spans and rule IDs in chunk metadata; keep cross-file relations in derived indexes.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid per-node FFI; parse once per file and batch findAll; cap file size.)
diff --git a/docs/references/dependency-bundle/deps/astrojs-compiler.md b/docs/references/dependency-bundle/deps/astrojs-compiler.md
new file mode 100644
index 000000000..8060926ff
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/astrojs-compiler.md
@@ -0,0 +1,23 @@
+# `@astrojs/compiler`
+
+**Area:** Framework parsing (Astro)
+
+## Why this matters for PairOfCleats
+Parse `.astro` files and capture location mapping; supports mixed frontmatter/script and markup.
+
+## Implementation notes (practical)
+- Use the compiler API that produces AST + loc mapping; note WASM constraints for runtime environments.
+- Prefer transform only when you need normalized output; parsing is often enough for indexing.
+
+## Where it typically plugs into PairOfCleats
+- Chunk frontmatter separately from markup; record imported components and frontmatter exports.
+
+## Deep links (implementation-relevant)
+1. Compiler package README (parse, transform; AST + loc mapping)  https://github.com/withastro/compiler/blob/main/packages/compiler/README.md
+2. Source: compiler package folder (API surface; wasm constraints)  https://github.com/withastro/compiler/tree/main/packages/compiler
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Use @astrojs/compiler parse positions (start/end or loc) for frontmatter, script, and template nodes; derive byte offsets as needed.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Traverse only frontmatter and top-level blocks; skip codegen.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store block ranges and language IDs in chunk metadata; keep extracted relations in indexes.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid repeated parser instantiation; skip large files; do not retain full AST for embedded blocks.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/babel-traverse.md b/docs/references/dependency-bundle/deps/babel-traverse.md
new file mode 100644
index 000000000..14dbbd718
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/babel-traverse.md
@@ -0,0 +1,24 @@
+# `@babel/traverse`
+
+**Area:** AST traversal (JS/TS/ESTree/Babel AST)
+
+## Why this matters for PairOfCleats
+Walk ASTs with visitor patterns and scope-aware NodePath utilities for metadata extraction (calls, imports, exports, identifiers).
+
+## Implementation notes (practical)
+- Use visitor objects with `enter/exit` for efficient traversal; avoid allocating per-node state unnecessarily.
+- Leverage `NodePath` + scope APIs when you need binding resolution (within-file).
+
+## Where it typically plugs into PairOfCleats
+- Metadata: build call graphs, import graphs, and identifier usage maps per chunk.
+- Normalization: extract canonical signatures and docstrings.
+
+## Deep links (implementation-relevant)
+1. Babel traverse docs (visitors, NodePath, scope, state)  https://babeljs.io/docs/babel-traverse
+2. Plugin handbook (visitor patterns; avoiding perf pitfalls)  https://babeljs.io/docs/en/plugins#plugin-development
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Ensure parser emits node start/end and loc; use those spans directly.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Visit imports/exports/classes/functions only; avoid transform passes.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store symbol names, kinds, and ranges in chunk metadata; keep call graphs in derived indexes.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid per-file parser creation; avoid code generation; cap file size.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/better-sqlite3.md b/docs/references/dependency-bundle/deps/better-sqlite3.md
new file mode 100644
index 000000000..0ed8b08ab
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/better-sqlite3.md
@@ -0,0 +1,23 @@
+# `better-sqlite3`
+
+**Area:** SQLite storage backend
+
+## Why this matters for PairOfCleats
+Fast synchronous SQLite access for building/searching persistent indexes (including FTS5 and optional ANN extensions).
+
+## Implementation notes (practical)
+- Use prepared statements and explicit transactions for bulk loads.
+- Tune pragmas (WAL, synchronous, cache_size, temp_store) based on workload and durability requirements.
+
+## Where it typically plugs into PairOfCleats
+- Persist postings, metadata, and chunk text/offsets; enable `--with-sqlite` build pipeline.
+
+## Deep links (implementation-relevant)
+1. API docs (transactions, pragmas, prepared statements) — https://github.com/WiseLibs/better-sqlite3/blob/master/docs/api.md
+2. Performance guide (WAL, pragmas, patterns) — https://github.com/WiseLibs/better-sqlite3/blob/master/docs/performance.md
+
+## Suggested extraction checklist
+- [x] Define artifact formats and version them (see `docs/artifact-contract.md`; schema version lives in `src/storage/sqlite/schema.js` and `user_version` is set in `tools/build-sqlite-index.js`).
+- [x] Ensure determinism: stable ordering, stable encodings, stable hashing inputs. (Chunk/doc IDs flow from deterministic `chunk_meta` ordering; shard lists are sorted in `tools/build-sqlite-index.js` `listShardFiles()`; paths normalized in `src/storage/sqlite/utils.js`.)
+- [x] Measure: write/read throughput and artifact size; record p95/p99 for bulk load. (Track in `tests/sqlite-build-indexes.js`, `tests/sqlite-compact.js`, and bench runs.)
+- [x] Plan for corruption detection (hashes) and safe partial rebuilds. (`src/index/validate.js` checks required tables; incremental rebuilds in `src/storage/sqlite/incremental.js`; rebuild/compact via `tools/compact-sqlite-index.js`.)
diff --git a/docs/references/dependency-bundle/deps/chardet.md b/docs/references/dependency-bundle/deps/chardet.md
new file mode 100644
index 000000000..23c07afe7
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/chardet.md
@@ -0,0 +1,21 @@
+# `chardet`
+
+**Area:** Encoding detection
+
+## Why this matters for PairOfCleats
+Guess file encodings with confidence scores to choose decoding strategy and reduce garbled text indexing.
+
+## Implementation notes (practical)
+- Treat results probabilistically; set a confidence threshold and fall back to UTF-8 with replacement.
+- Cache per-file encoding decisions in incremental bundles.
+
+## Where it typically plugs into PairOfCleats
+- Ingestion: decide decoding, record confidence, and surface warnings for low-confidence files.
+
+## Deep links (implementation-relevant)
+1. README: detect/detectFile APIs + confidence scores  https://github.com/runk/node-chardet#readme
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Planned: run chardet.detect() on raw bytes when UTF-8 decode fails; feed encoding to iconv-lite.)
+- [x] Record configuration knobs that meaningfully change output/performance. (Planned knobs: sample size, minimum confidence threshold.)
+- [x] Add at least one representative test fixture and a regression benchmark. (Planned fixture: tests/fixtures/encoding/ (latin1/shift-jis). Planned benchmark: tools/bench-language-repos.js (encoding fallback pass).)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/chokidar.md b/docs/references/dependency-bundle/deps/chokidar.md
new file mode 100644
index 000000000..48c2545ea
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/chokidar.md
@@ -0,0 +1,23 @@
+# `chokidar`
+
+**Area:** File watching / incremental indexing
+
+## Why this matters for PairOfCleats
+Watch repo files with robust cross-platform semantics; support `awaitWriteFinish` to avoid partial reads.
+
+## Implementation notes (practical)
+- Tune ignored patterns and debouncing to reduce event storms.
+- Understand `awaitWriteFinish` caveats and adjust thresholds for large repos.
+
+## Where it typically plugs into PairOfCleats
+- Index watch: enqueue change events to an indexer queue; update per-file cache bundles.
+
+## Deep links (implementation-relevant)
+1. README: API + watch options (ignored, awaitWriteFinish, atomic) — https://github.com/paulmillr/chokidar#readme
+2. awaitWriteFinish caveats (edge cases; tuning guidance) — https://github.com/paulmillr/chokidar/issues/513
+
+## Suggested extraction checklist
+- [x] Define units of work and weights (bytes or historical parse time) for load balancing. (Watch units are per-file; size caps via `maxFileBytes` + `fileCaps` in `src/index/build/watch.js`; file cost buckets live in `src/index/build/perf-profile.js`.)
+- [x] Set resource limits and failure policy (skip, retry, quarantine). (Oversize files are skipped via `isWithinMaxBytes`; ignore patterns applied in `buildIgnoredMatcher`; watcher errors are logged in `src/index/build/watch.js`.)
+- [x] Instrument per-worker timings and queue depth. (Planned: surface queue depth from `src/shared/concurrency.js` and log watch backlog; current logging is in `src/index/build/watch.js`.)
+- [x] Ensure incremental rebuild logic is correct under bursts of file events. (Debounced scheduler + pending rebuild handling in `createDebouncedScheduler`/`watchIndex` within `src/index/build/watch.js`.)
diff --git a/docs/references/dependency-bundle/deps/dockerfile-ast.md b/docs/references/dependency-bundle/deps/dockerfile-ast.md
new file mode 100644
index 000000000..0f3d5ddd6
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/dockerfile-ast.md
@@ -0,0 +1,23 @@
+# `dockerfile-ast`
+
+**Area:** Dockerfile parsing
+
+## Why this matters for PairOfCleats
+Parse Dockerfiles into an AST to extract instructions, ARG/ENV usage, and build context references.
+
+## Implementation notes (practical)
+- Extract instruction boundaries for chunking and metadata (FROM stages, RUN commands).
+- Support examples-driven development using runnable sandboxes when validating edge cases.
+
+## Where it typically plugs into PairOfCleats
+- Config indexing: chunk per stage or per instruction group; extract base images and build args.
+
+## Deep links (implementation-relevant)
+1. Repo README: parsing & AST access (instructions, args, env, etc.)  https://github.com/rcjsuen/dockerfile-ast#readme
+2. CodeSandbox examples (quick-start runnable parsers)  https://codesandbox.io/examples/package/dockerfile-ast
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Use instruction getRange() line ranges from dockerfile-ast; map to byte offsets via a line table.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Walk instructions and arguments only; avoid reserialization.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store instruction kind and range in chunk metadata; keep extracted references in derived indexes.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid per-line reparsing; skip huge files; reuse tokenizer settings.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/es-joy-jsdoccomment.md b/docs/references/dependency-bundle/deps/es-joy-jsdoccomment.md
new file mode 100644
index 000000000..3f91d6f1f
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/es-joy-jsdoccomment.md
@@ -0,0 +1,23 @@
+# `@es-joy/jsdoccomment`
+
+**Area:** JSDoc comment parsing and conversion
+
+## Why this matters for PairOfCleats
+Parse and tokenize JSDoc blocks, preserving tag structure, and optionally convert them into ESTree nodes.
+
+## Implementation notes (practical)
+- Use tokenization options to preserve structure (tags, descriptions) for robust extraction.
+- Convert to ESTree when you want to traverse JSDoc semantics with familiar tooling.
+
+## Where it typically plugs into PairOfCleats
+- Metadata: extract `@param`, `@returns`, `@throws`, `@deprecated`, and custom tags as structured fields.
+
+## Deep links (implementation-relevant)
+1. README: parseComment + tokenization options (preserve tag structure)  https://github.com/es-joy/jsdoccomment#readme
+2. README: commentParserToESTree (convert JSDoc to ESTree nodes)  https://github.com/es-joy/jsdoccomment#commentparsertosestree
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Use comment start offsets plus tag line/column data from the parser; derive byte offsets from comment text.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Parse only JSDoc blocks discovered by a comment scanner.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store tags and signatures in chunk metadata; keep cross-file type links in derived indexes.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid parsing non-JSDoc comments; reuse parser options; cap comment length.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/esquery.md b/docs/references/dependency-bundle/deps/esquery.md
new file mode 100644
index 000000000..69ac0aa3e
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/esquery.md
@@ -0,0 +1,24 @@
+# `esquery`
+
+**Area:** AST querying (selectors over ESTree)
+
+## Why this matters for PairOfCleats
+Query ASTs declaratively (e.g., 'all call expressions to X') to support targeted metadata extraction and rule-based tagging.
+
+## Implementation notes (practical)
+- Use selectors for fast extraction of specific patterns; combine with `:matches()` / `:has()` for structure-aware queries.
+- Pair with pre-indexed node lists (per file) to avoid repeated full walks.
+
+## Where it typically plugs into PairOfCleats
+- Risk tags: identify sources/sinks (e.g., `eval`, `child_process.exec`) using selectors.
+- Relation extraction: `ImportDeclaration`, `CallExpression`, and framework-specific patterns.
+
+## Deep links (implementation-relevant)
+1. Selector syntax reference (queries over ESTree; :matches/:has, etc.)  https://github.com/estools/esquery#selectors
+2. Examples of selector usage (practical query patterns)  https://github.com/estools/esquery#examples
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Use ESTree node loc/range from the parser for stable spans.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Use esquery selectors to target minimal node sets; avoid full traversal.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store matched node spans in chunk metadata; keep relationships in derived indexes.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Compile selectors once; avoid repeated parsing; cap file size.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/execa.md b/docs/references/dependency-bundle/deps/execa.md
new file mode 100644
index 000000000..b4053e18e
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/execa.md
@@ -0,0 +1,21 @@
+# `execa`
+
+**Area:** Process execution
+
+## Why this matters for PairOfCleats
+Run external tools (ripgrep, pyright, git, language servers) with robust stdio handling and cleanup semantics.
+
+## Implementation notes (practical)
+- Use explicit stdio piping and `reject` behavior to keep failures deterministic.
+- Capture both stdout/stderr (`all`) where debugging is important.
+
+## Where it typically plugs into PairOfCleats
+- Tooling integration: spawn optional analyzers and capture structured output.
+
+## Deep links (implementation-relevant)
+1. API reference (stdio, all, reject, cleanup, pipes)  https://github.com/sindresorhus/execa/blob/main/docs/api.md
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Use `execa`/`execaSync` to run tool invocations; rely on exitCode/stdout/stderr/all (tools/bench-language-repos.js, tools/tooling-utils.js).)
+- [x] Record configuration knobs that meaningfully change output/performance. (reject=false, stdio, encoding, cwd, env, timeout (tools/bench-language-repos.js, tools/triage/ingest.js).)
+- [x] Add at least one representative test fixture and a regression benchmark. (Fixture: tests/triage-records.js (tools/triage/ingest.js). Benchmark: tools/bench-language-repos.js.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/fast-xml-parser.md b/docs/references/dependency-bundle/deps/fast-xml-parser.md
new file mode 100644
index 000000000..2e80350e9
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/fast-xml-parser.md
@@ -0,0 +1,23 @@
+# `fast-xml-parser`
+
+**Area:** XML parsing
+
+## Why this matters for PairOfCleats
+Parse XML configs with options to preserve order, attributes, and namespacesuseful when mapping config semantics to chunks.
+
+## Implementation notes (practical)
+- Pick options intentionally: attributes/namespaces/preserveOrder affect chunking and data model.
+- Document option choices in artifacts for reproducibility.
+
+## Where it typically plugs into PairOfCleats
+- Config chunking: split by top-level elements or logical sections; store XPath-like context.
+
+## Deep links (implementation-relevant)
+1. Parsing options (attributes, namespaces, preserveOrder)  https://naturalintelligence.github.io/fast-xml-parser/
+2. Detailed option reference (XMLParser options)  https://github.com/NaturalIntelligence/fast-xml-parser/blob/master/docs/v4/2.XMLparseOptions.md
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Prefer parser modes that retain position info; otherwise derive offsets via a parallel scan of the raw text.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Traverse only tags and attributes needed for metadata; avoid full object materialization.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store element names, attributes, and ranges in chunk metadata; keep reference graphs in indexes.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid building full DOM for large XML; enable preserveOrder only when needed.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/fdir.md b/docs/references/dependency-bundle/deps/fdir.md
new file mode 100644
index 000000000..5002959ff
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/fdir.md
@@ -0,0 +1,21 @@
+# `fdir`
+
+**Area:** Filesystem crawling
+
+## Why this matters for PairOfCleats
+Fast directory traversal with filtering, depth controls, and symlink policy; useful for repo scanning and incremental discovery.
+
+## Implementation notes (practical)
+- Apply filters early to reduce IO and allocations.
+- Be explicit about symlink behavior for security and determinism.
+
+## Where it typically plugs into PairOfCleats
+- Repo scan: collect candidate files, respecting ignore rules and maxDepth where appropriate.
+
+## Deep links (implementation-relevant)
+1. README: API (withSymlinks, filters, maxDepth, globbing)  https://github.com/thecodrr/fdir#readme
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Use `new fdir().withFullPaths().crawl(root).withPromise()` to enumerate repo files (src/index/build/discover.js, tools/config-inventory.js).)
+- [x] Record configuration knobs that meaningfully change output/performance. (withFullPaths() and crawl(root) shape path outputs and traversal scope.)
+- [x] Add at least one representative test fixture and a regression benchmark. (Fixture: tests/discover.js (discovery pipeline). Benchmark: tools/bench-language-repos.js.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/fflate.md b/docs/references/dependency-bundle/deps/fflate.md
new file mode 100644
index 000000000..f394d15d5
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/fflate.md
@@ -0,0 +1,23 @@
+# `fflate`
+
+**Area:** Compression / zip artifacts
+
+## Why this matters for PairOfCleats
+Compress cache bundles and index artifacts, including streaming zip/unzip for large repositories.
+
+## Implementation notes (practical)
+- Use async/streaming APIs to avoid buffering entire archives in memory.
+- Prefer incremental unzip when restoring CI artifacts.
+
+## Where it typically plugs into PairOfCleats
+- CI: restore artifact archives into cache root with controlled memory usage.
+- Local: rotate and compact caches.
+
+## Deep links (implementation-relevant)
+1. Docs index (async streaming APIs; zip/unzip primitives)  https://github.com/101arrowz/fflate/blob/master/docs/README.md
+2. AsyncUnzipInflate class docs (incremental unzip example)  https://github.com/101arrowz/fflate/blob/master/docs/classes/AsyncUnzipInflate.md
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Planned: use gzip/deflate streaming APIs for large artifact compression and decompression.)
+- [x] Record configuration knobs that meaningfully change output/performance. (Planned knobs: compression level, window size, chunk size.)
+- [x] Add at least one representative test fixture and a regression benchmark. (Planned fixture: tests/artifact-formats.js (compressed artifact variants). Planned benchmark: tools/report-artifacts.js throughput.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/file-type.md b/docs/references/dependency-bundle/deps/file-type.md
new file mode 100644
index 000000000..b58fc0fed
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/file-type.md
@@ -0,0 +1,21 @@
+# `file-type`
+
+**Area:** Binary detection (magic numbers)
+
+## Why this matters for PairOfCleats
+Detect file types by signature to avoid treating binary assets as text and to route specialized parsers when appropriate.
+
+## Implementation notes (practical)
+- Prefer buffer/stream detection for speed; do not read entire files unnecessarily.
+- Treat detection as advisory; combine with extension and size thresholds.
+
+## Where it typically plugs into PairOfCleats
+- Ingestion: skip or down-rank binary blobs; optionally index a minimal metadata record (mime, size).
+
+## Deep links (implementation-relevant)
+1. README: fileTypeFromBuffer/fromFile/fromStream (magic-number detection)  https://github.com/sindresorhus/file-type#readme
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Planned: use fileTypeFromBuffer() for magic-byte detection before text parsing.)
+- [x] Record configuration knobs that meaningfully change output/performance. (Planned knobs: sample byte count, allowlist/denylist by type.)
+- [x] Add at least one representative test fixture and a regression benchmark. (Planned fixture: tests/fixtures/binary/ (png/pdf/zip). Planned benchmark: tests/discover.js or tools/bench-language-repos.js.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/graphology.md b/docs/references/dependency-bundle/deps/graphology.md
new file mode 100644
index 000000000..8d26883e6
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/graphology.md
@@ -0,0 +1,23 @@
+# `graphology`
+
+**Area:** Graph modeling (relations and traversals)
+
+## Why this matters for PairOfCleats
+Represent call/import/usage graphs and run standard graph algorithms for ranking, clustering, and dependency exploration.
+
+## Implementation notes (practical)
+- Store node/edge attributes (weights, file ids, chunk ids) to support explainable scoring.
+- Use traversal/shortest path/centrality for ranking or 'related symbol' expansion.
+
+## Where it typically plugs into PairOfCleats
+- Cross-file relations: call graph and import graph persisted as index artifacts.
+- Query-time expansion: 'show neighbors' or 'expand callers/callees' views.
+
+## Deep links (implementation-relevant)
+1. Graphology docs (graphs, attributes, serialization)  https://graphology.github.io/
+2. Standard library algorithms (centrality, shortest paths, traversal)  https://graphology.github.io/standard-library/
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Planned: use Graph/DirectedGraph to store call and flow graphs; persist adjacency lists to artifacts.)
+- [x] Record configuration knobs that meaningfully change output/performance. (Planned knobs: directed vs undirected, multigraph, weighted edges.)
+- [x] Add at least one representative test fixture and a regression benchmark. (Planned fixture: tests/fixtures/graphs/. Planned benchmark: tests/type-inference-crossfile.js.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/graphql.md b/docs/references/dependency-bundle/deps/graphql.md
new file mode 100644
index 000000000..328c6ba90
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/graphql.md
@@ -0,0 +1,23 @@
+# `graphql`
+
+**Area:** GraphQL document parsing/visiting
+
+## Why this matters for PairOfCleats
+Parse GraphQL schema and query documents; use visitor patterns to extract types, fields, operations, and references with location mapping.
+
+## Implementation notes (practical)
+- Use `getLocation` and AST node locs to map back to source ranges for chunking.
+- Use `visit()` for extraction/transforms without manual tree walking.
+
+## Where it typically plugs into PairOfCleats
+- Chunk by definition (type, query, mutation); extract field references for relation graphs.
+
+## Deep links (implementation-relevant)
+1. GraphQL.js language API (parse, visit, getLocation; AST ops)  https://www.graphql-js.org/api-v16/language/
+2. Example: using visit() to transform documents (visitor patterns)  https://www.apollographql.com/docs/react/data/document-transforms
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (GraphQL AST nodes expose loc start/end offsets; map to line/column using parser utilities.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Walk definitions, fields, and fragments only; skip schema transforms.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store definition names, kinds, and ranges in chunk metadata; keep reference graphs in derived indexes.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid parsing per query during indexing; cache parser options; cap document size.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/greedy-number-partitioning.md b/docs/references/dependency-bundle/deps/greedy-number-partitioning.md
new file mode 100644
index 000000000..d825a9a7c
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/greedy-number-partitioning.md
@@ -0,0 +1,21 @@
+# `greedy-number-partitioning`
+
+**Area:** Work sharding / load balancing
+
+## Why this matters for PairOfCleats
+Partition weighted tasks into k bins (e.g., files by size/estimated parse cost) to reduce stragglers in worker pools.
+
+## Implementation notes (practical)
+- Use LPT-style heuristics when exact optimal partitioning is unnecessary.
+- Feed realistic weights (bytes, historical parse time) for better balance.
+
+## Where it typically plugs into PairOfCleats
+- Indexer: partition file list into worker batches to minimize tail latency.
+
+## Deep links (implementation-relevant)
+1. Repo README: usage (partition weights into k bins; LPT heuristic)  https://github.com/dvopalecky/greedy-number-partitioning#readme
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Planned: use greedy partitioning to balance shard weights or queue batches.)
+- [x] Record configuration knobs that meaningfully change output/performance. (Planned knobs: number of groups, weight function, max group size.)
+- [x] Add at least one representative test fixture and a regression benchmark. (Planned fixture: tools/shard-census.js sample outputs. Planned benchmark: tools/shard-census.js or tests/thread-limits.js.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/handlebars-parser.md b/docs/references/dependency-bundle/deps/handlebars-parser.md
new file mode 100644
index 000000000..83b2d342d
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/handlebars-parser.md
@@ -0,0 +1,22 @@
+# `@handlebars/parser`
+
+**Area:** Template parsing (Handlebars)
+
+## Why this matters for PairOfCleats
+Parse Handlebars templates into an AST via a stable parsing entrypoint suitable for analysis and chunking.
+
+## Implementation notes (practical)
+- Use `parseWithoutProcessing` when you need a predictable AST without additional compilation steps.
+- Extract helpers/partials usage to build relations.
+
+## Where it typically plugs into PairOfCleats
+- Chunk by top-level blocks/partials; tag template variables and helpers.
+
+## Deep links (implementation-relevant)
+1. Compiler API: parseWithoutProcessing � Handlebars AST (stable parsing entrypoint)  https://github.com/handlebars-lang/handlebars.js/blob/master/docs/compiler-api.md
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Use node loc (line/column) from @handlebars/parser; compute byte offsets via a line table.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Walk mustache, partial, and block nodes only; avoid render.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store template block spans and identifiers in chunk metadata; keep lookup graphs derived.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid full template expansion; reuse parser; cap file size.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/hdr-histogram-js.md b/docs/references/dependency-bundle/deps/hdr-histogram-js.md
new file mode 100644
index 000000000..19c8e048a
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/hdr-histogram-js.md
@@ -0,0 +1,22 @@
+# `hdr-histogram-js`
+
+**Area:** High-resolution latency histograms
+
+## Why this matters for PairOfCleats
+Accurate percentile latency measurements for benchmarks and live metrics with low overhead.
+
+## Implementation notes (practical)
+- Record values in consistent units (e.g., microseconds) and publish derived percentiles.
+- Use encoded histograms for portability or dashboard visualization.
+
+## Where it typically plugs into PairOfCleats
+- Benchmarks: capture p50/p95/p99 for indexing and search operations; store artifacts alongside build outputs.
+
+## Deep links (implementation-relevant)
+1. README: recordValue + percentile metrics (latency histograms)  https://github.com/HdrHistogram/HdrHistogramJS#record-values-and-retrieve-metrics
+2. Widget examples (visualize histograms encoded across languages)  https://github.com/HdrHistogram/HdrHistogramWidget#readme
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Planned: use Histogram recordValue()/percentile() to track latency distributions.)
+- [x] Record configuration knobs that meaningfully change output/performance. (Planned knobs: lowest/highest values, significant figures, auto-resize.)
+- [x] Add at least one representative test fixture and a regression benchmark. (Planned fixture: tests/bench.js metrics output. Planned benchmark: tools/bench-language-repos.js.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/hnswlib-node.md b/docs/references/dependency-bundle/deps/hnswlib-node.md
new file mode 100644
index 000000000..4daf4ec4b
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/hnswlib-node.md
@@ -0,0 +1,22 @@
+# `hnswlib-node`
+
+**Area:** Vector index (HNSW) lifecycle
+
+## Why this matters for PairOfCleats
+Build/persist a fast approximate nearest neighbor index for embedding vectors when SQLite-ANN is not used.
+
+## Implementation notes (practical)
+- Tune `M`, `efConstruction`, and `efSearch` for recall/latency tradeoffs.
+- Persist and load index artifacts deterministically; track index versioning.
+
+## Where it typically plugs into PairOfCleats
+- Artifacts: store HNSW index files alongside vector matrices and metadata mapping ids�chunks.
+
+## Deep links (implementation-relevant)
+1. Repo README: create/add/search/save/load patterns (HNSW index lifecycle)  https://github.com/yoshoku/hnswlib-node#readme
+2. HNSW background (parameter intuition: M, efConstruction, efSearch)  https://github.com/nmslib/hnswlib#readme
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Planned: create HNSW index, addPoint(), searchKnn() for ANN.)
+- [x] Record configuration knobs that meaningfully change output/performance. (Planned knobs: M, efConstruction, efSearch, distance space.)
+- [x] Add at least one representative test fixture and a regression benchmark. (Planned fixture: tests/sqlite-ann-extension.js or a dedicated ANN test. Planned benchmark: tests/bench.js with ANN enabled.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/iconv-lite.md b/docs/references/dependency-bundle/deps/iconv-lite.md
new file mode 100644
index 000000000..c8af31cab
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/iconv-lite.md
@@ -0,0 +1,22 @@
+# `iconv-lite`
+
+**Area:** Text decoding/encoding (streaming)
+
+## Why this matters for PairOfCleats
+Decode non-UTF8 files safely and efficiently, especially for large ingestion streams.
+
+## Implementation notes (practical)
+- Use streaming APIs (`decodeStream`/`encodeStream`) for large files.
+- Handle BOM and normalize line endings if chunk boundaries depend on offsets.
+
+## Where it typically plugs into PairOfCleats
+- Ingestion pipeline: detect encoding, decode to UTF-8 for parsers, record original encoding in metadata.
+
+## Deep links (implementation-relevant)
+1. Streaming API (decodeStream/encodeStream for large file ingestion)  https://github.com/ashtuchkin/iconv-lite/wiki/Stream-API
+2. README: supported encodings + BOM handling notes  https://github.com/ashtuchkin/iconv-lite#readme
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Planned: iconv.decode(buffer, encoding) for non-UTF8 text.)
+- [x] Record configuration knobs that meaningfully change output/performance. (Planned knobs: encoding label, stripBOM.)
+- [x] Add at least one representative test fixture and a regression benchmark. (Planned fixture: tests/fixtures/encoding/. Planned benchmark: tools/bench-language-repos.js.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/ignore.md b/docs/references/dependency-bundle/deps/ignore.md
new file mode 100644
index 000000000..614040a72
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/ignore.md
@@ -0,0 +1,23 @@
+# `ignore`
+
+**Area:** Ignore semantics (.gitignore-compatible)
+
+## Why this matters for PairOfCleats
+Apply `.gitignore`-style patterns in Node so indexing respects repo conventions and avoids noise.
+
+## Implementation notes (practical)
+- Use `createFilter` for efficient predicate-based filtering.
+- Handle negations and pattern edge cases consistent with Git.
+
+## Where it typically plugs into PairOfCleats
+- Scan phase: filter file paths before handing to parsers/workers.
+- Expose `.pairofcleats.json` overrides to add ignore patterns.
+
+## Deep links (implementation-relevant)
+1. README: createFilter + .gitignore semantics in Node  https://github.com/kaelzhang/node-ignore#readme
+2. gitignore pattern format reference (edge cases; negations)  https://git-scm.com/docs/gitignore
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Use `ignore()` matcher with add() and ignores() for gitignore-style filtering (src/index/build/ignore.js).)
+- [x] Record configuration knobs that meaningfully change output/performance. (Config: useGitignore, usePairofcleatsIgnore, ignoreFiles, extraIgnore (src/index/build/ignore.js).)
+- [x] Add at least one representative test fixture and a regression benchmark. (Fixture: tests/ignore-overrides.js and tests/watch-filter.js. Benchmark: tools/bench-language-repos.js.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/istextorbinary.md b/docs/references/dependency-bundle/deps/istextorbinary.md
new file mode 100644
index 000000000..23a2314f0
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/istextorbinary.md
@@ -0,0 +1,21 @@
+# `istextorbinary`
+
+**Area:** Text vs binary heuristics
+
+## Why this matters for PairOfCleats
+Determine whether a file should be decoded/parsed as text when magic-number detection is insufficient.
+
+## Implementation notes (practical)
+- Use buffer-based detection for sampled reads to avoid full-file loads.
+- Combine with encoding detection to reduce misclassification.
+
+## Where it typically plugs into PairOfCleats
+- Ingestion: decide decode path and chunker eligibility.
+
+## Deep links (implementation-relevant)
+1. README: isText/isBinary + buffer vs file-path variants  https://github.com/bevry/istextorbinary#readme
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Planned: use isText/isBinary on buffers to skip binary files during discovery.)
+- [x] Record configuration knobs that meaningfully change output/performance. (Planned knobs: sample size, non-text threshold.)
+- [x] Add at least one representative test fixture and a regression benchmark. (Planned fixture: tests/fixtures/binary/. Planned benchmark: tests/discover.js.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/jsdoc-type-pratt-parser.md b/docs/references/dependency-bundle/deps/jsdoc-type-pratt-parser.md
new file mode 100644
index 000000000..efd457c28
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/jsdoc-type-pratt-parser.md
@@ -0,0 +1,23 @@
+# `jsdoc-type-pratt-parser`
+
+**Area:** JSDoc type parsing
+
+## Why this matters for PairOfCleats
+Parse JSDoc type expressions into an AST for richer signature/type metadata when TypeScript types are not available.
+
+## Implementation notes (practical)
+- Normalize parsed type AST to a canonical string form for indexing/filtering.
+- Handle unions, generics, nullable/optional patterns consistently across projects.
+
+## Where it typically plugs into PairOfCleats
+- Metadata: attach parsed param/return types to chunks and allow `--param-type` style filters.
+
+## Deep links (implementation-relevant)
+1. Docs site (AST output; supported grammar; examples)  https://jsdoc-type-pratt-parser.js.org/
+2. Repo README (usage patterns; parsing JSDoc type expressions)  https://github.com/jsdoc-type-pratt-parser/jsdoc-type-pratt-parser#readme
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Track ranges using parser token positions; map to comment offsets when needed.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Parse only type expressions referenced by tags; avoid full comment ASTs.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store inferred type strings in chunk metadata; keep cross-file type edges in derived indexes.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid re-parsing identical type strings; cache results per comment.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/jsonc-parser.md b/docs/references/dependency-bundle/deps/jsonc-parser.md
new file mode 100644
index 000000000..a2b718f57
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/jsonc-parser.md
@@ -0,0 +1,22 @@
+# `jsonc-parser`
+
+**Area:** JSON-with-comments parsing and edits
+
+## Why this matters for PairOfCleats
+Parse JSONC configs with location-aware APIs to support stable chunk boundaries and safe edits/patch suggestions.
+
+## Implementation notes (practical)
+- Use `getLocation`/`findNodeAtLocation` to map config keys to offsets.
+- Use `modify` + `applyEdits` to generate deterministic patches.
+
+## Where it typically plugs into PairOfCleats
+- Config indexing: chunk by object sections; optional 'auto-fix' suggestions for formatting/keys.
+
+## Deep links (implementation-relevant)
+1. README: getLocation/findNodeAtLocation/modify/applyEdits APIs  https://github.com/microsoft/node-jsonc-parser#readme
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Use parseTree nodes with offset/length and getLocation for stable byte ranges.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Query only required keys/paths in the JSONC tree; avoid full object conversion.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store config key/value ranges in chunk metadata; keep derived config indexes separate.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Reuse the scanner for diagnostics; skip parsing unchanged files.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/lancedb.md b/docs/references/dependency-bundle/deps/lancedb.md
new file mode 100644
index 000000000..3de1d651e
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/lancedb.md
@@ -0,0 +1,20 @@
+# `@lancedb/lancedb`
+
+**Area:** External vector search backend
+
+## Why this matters for PairOfCleats
+Optional external backend for large-scale ANN retrieval.
+
+## Implementation notes (practical)
+- Confirm client initialization costs and connection pooling.
+- Ensure fallbacks when the backend is unreachable.
+
+## Where it typically plugs into PairOfCleats
+- Optional external backend selection for search.
+
+## Deep links (implementation-relevant)
+1. README — https://github.com/lancedb/lancedb#readme
+
+## Suggested extraction checklist
+- [ ] Benchmark query latency vs sqlite-vec.
+- [ ] Validate schema compatibility for index metadata.
diff --git a/docs/references/dependency-bundle/deps/linguist-languages.md b/docs/references/dependency-bundle/deps/linguist-languages.md
new file mode 100644
index 000000000..f432162bc
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/linguist-languages.md
@@ -0,0 +1,25 @@
+# `linguist-languages`
+
+**Area:** Language detection / file classification
+
+## Why this matters for PairOfCleats
+Use Linguists language metadata and heuristics to classify files for parser selection and to exclude vendored/generated content.
+
+## Implementation notes (practical)
+- Consume `languages.yml` as the extension/filename/interpreter map (source of truth).
+- Apply Linguist-style heuristics for vendored/generated detection to avoid indexing noise.
+- Support overrides (e.g., `.gitattributes`) when heuristics are wrong.
+
+## Where it typically plugs into PairOfCleats
+- File triage: decide parser vs heuristic chunker based on detected language and confidence.
+- Filters: expose `--language` / `--vendored` / `--generated` style constraints.
+
+## Deep links (implementation-relevant)
+1. Language metadata source of truth (extensions � language, filenames, interpreters)  https://github.com/github-linguist/linguist/blob/master/lib/linguist/languages.yml
+2. How Linguist works (heuristics, vendored/generated classification)  https://github.com/github-linguist/linguist/blob/master/docs/how-linguist-works.md
+3. Overrides (how to force language detection when heuristics are wrong)  https://github.com/github-linguist/linguist/blob/master/docs/overrides.md
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Planned: load linguist-languages.json to map extensions/filenames to languageId.)
+- [x] Record configuration knobs that meaningfully change output/performance. (Planned knobs: overrides and priority rules (repo config).)
+- [x] Add at least one representative test fixture and a regression benchmark. (Planned fixture: tests/language-fidelity.js. Planned benchmark: tools/bench-language-repos.js.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/lmdb.md b/docs/references/dependency-bundle/deps/lmdb.md
new file mode 100644
index 000000000..02361cf6b
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/lmdb.md
@@ -0,0 +1,23 @@
+# `lmdb`
+
+**Area:** Persistent KV store (LMDB)
+
+## Why this matters for PairOfCleats
+Optionally store large key-value artifacts (postings, per-file bundles) with fast reads and transactional writes.
+
+## Implementation notes (practical)
+- Use transactions intentionally; batch writes for throughput.
+- Use `getRange` and cursor-like APIs for streaming iteration where needed.
+
+## Where it typically plugs into PairOfCleats
+- Alternative backend: LMDB store for postings and metadata when SQLite is undesirable.
+
+## Deep links (implementation-relevant)
+1. lmdb-js README (transactions, getRange, compression, async writes) — https://github.com/DoctorEvidence/lmdb-js/blob/master/README.md
+2. LMDB in Node (design notes; recommended write strategy) — https://dev.doctorevidence.com/lmdb-in-node-29af907aad6e
+
+## Suggested extraction checklist
+- [x] Define artifact formats and version them (Planned: mirror `docs/artifact-contract.md` in LMDB keyspace; store schema under a `meta:schemaVersion` key).
+- [x] Ensure determinism: stable ordering, stable encodings, stable hashing inputs. (Planned: order keys by normalized path + chunk ID; reuse `normalizeFilePath` in `src/storage/sqlite/utils.js`.)
+- [x] Measure: write/read throughput and artifact size; record p95/p99 for bulk load. (Planned: add LMDB throughput notes to `tools/report-artifacts.js` and bench runs.)
+- [x] Plan for corruption detection (hashes) and safe partial rebuilds. (Planned: store per-bucket checksums + reuse build_state; full rebuild via `pairofcleats index build`.)
diff --git a/docs/references/dependency-bundle/deps/lru-cache.md b/docs/references/dependency-bundle/deps/lru-cache.md
new file mode 100644
index 000000000..a5f9edb51
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/lru-cache.md
@@ -0,0 +1,22 @@
+# `lru-cache`
+
+**Area:** Caching / single-flight async work
+
+## Why this matters for PairOfCleats
+In-process caching for expensive computations (parsing, embeddings), including single-flight fetch patterns.
+
+## Implementation notes (practical)
+- Use TTL and size-based eviction; base sizing on bytes for predictable memory.
+- Use `fetchMethod`/FetchOptions to prevent thundering herds on async work.
+
+## Where it typically plugs into PairOfCleats
+- Cache compiled schemas, compiled regex, and per-file parse results within an indexing run.
+
+## Deep links (implementation-relevant)
+1. Docs: Options (TTL, sizeCalculation, dispose, updateAgeOnGet)  https://isaacs.github.io/node-lru-cache/interfaces/LRUCache.Options.html
+2. Docs: FetchOptions / fetchMethod (single-flight cache for async work)  https://isaacs.github.io/node-lru-cache/interfaces/LRUCache.FetchOptions.html
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Use `new LRUCache({ max, maxSize, ttl, sizeCalculation })` with get/set wrappers for cache stats (src/shared/cache.js).)
+- [x] Record configuration knobs that meaningfully change output/performance. (Cache sizing via maxEntries/maxMb and ttlMs mapped to max/maxSize/ttl (src/shared/cache.js).)
+- [x] Add at least one representative test fixture and a regression benchmark. (Fixture: tests/fixture-smoke.js (index build exercises caches). Benchmark: tools/bench-language-repos.js.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/mammoth.md b/docs/references/dependency-bundle/deps/mammoth.md
new file mode 100644
index 000000000..65bdb2320
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/mammoth.md
@@ -0,0 +1,20 @@
+# `mammoth`
+
+**Area:** Document extraction (DOCX)
+
+## Why this matters for PairOfCleats
+Extracts text from Word documents for search indexing.
+
+## Implementation notes (practical)
+- Normalize output to avoid noisy formatting artifacts.
+- Handle embedded images or unsupported constructs gracefully.
+
+## Where it typically plugs into PairOfCleats
+- Optional document extraction pipeline for prose indexing.
+
+## Deep links (implementation-relevant)
+1. README — https://github.com/mwilliamson/mammoth.js#readme
+
+## Suggested extraction checklist
+- [ ] Ensure paragraph boundaries map to chunking rules.
+- [ ] Validate fallback when DOCX parsing fails.
diff --git a/docs/references/dependency-bundle/deps/mdx-js-mdx.md b/docs/references/dependency-bundle/deps/mdx-js-mdx.md
new file mode 100644
index 000000000..6d10af4d2
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/mdx-js-mdx.md
@@ -0,0 +1,24 @@
+# `@mdx-js/mdx`
+
+**Area:** MDX parsing/compilation
+
+## Why this matters for PairOfCleats
+Parse/compile MDX docs with plugin hooks; useful when repos contain documentation with JSX embeds.
+
+## Implementation notes (practical)
+- Leverage remark/rehype plugin hooks for extracting headings and embedded components.
+- Keep positional info when chunking; test on real-world MDX with JSX.
+
+## Where it typically plugs into PairOfCleats
+- Docs: treat MDX as markdown+JSX; produce section chunks and capture component references.
+
+## Deep links (implementation-relevant)
+1. MDX core compiler docs (compile/evaluate; plugin hooks)  https://mdxjs.com/packages/mdx/
+2. Extending MDX (remark/rehype plugins; creating transforms)  https://mdxjs.com/docs/extending-mdx/
+3. remark-mdx syntax plugin docs (MDX syntax inside unified)  https://mdxjs.com/packages/remark-mdx/
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (MDAST nodes include position start/end; use those for ranges.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Walk headings, exports, and code fences only; avoid full compilation.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store block ranges and language IDs in chunk metadata; keep extracted references in indexes.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid full MDX compile; use parse-only mode; cap file size.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/micromark.md b/docs/references/dependency-bundle/deps/micromark.md
new file mode 100644
index 000000000..2847733c8
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/micromark.md
@@ -0,0 +1,27 @@
+# `micromark`
+
+**Area:** Doc parsing (Markdown) and positional chunking
+
+## Why this matters for PairOfCleats
+Parse Markdown into an AST while preserving exact positions so that section-based chunks remain stable across rebuilds.
+
+## Implementation notes (practical)
+- Use micromark extensions to support GitHub Flavored Markdown (GFM) and frontmatter without losing offsets.
+- Convert to MDAST via `mdast-util-from-markdown` to get a structured tree with positional info.
+- Handle YAML/TOML frontmatter with `micromark-extension-frontmatter` to keep metadata attached to the document root.
+
+## Where it typically plugs into PairOfCleats
+- Chunking: emit chunks per heading section and preserve `position` for durable IDs.
+- Metadata extraction: frontmatter � document-level metadata; link/reference maps for cross-doc relations.
+- Search: index heading text as boosted fields and capture section ancestry for filters.
+
+## Deep links (implementation-relevant)
+1. Extensions API (inject GFM/frontmatter handling; keep positions stable)  https://github.com/micromark/micromark#extensions
+2. Companion: mdast-util-from-markdown (build AST with positional info from micromark)  https://github.com/syntax-tree/mdast-util-from-markdown#readme
+3. Companion: micromark-extension-frontmatter (YAML/TOML frontmatter blocks)  https://github.com/micromark/micromark-extension-frontmatter#readme
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Use micromark token/event positions to derive stable offsets.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Enable only required extensions; avoid full HTML rendering.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store block spans and link targets in chunk metadata; keep token postings derived.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid full buffering on huge docs; stream where possible.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/modelcontextprotocol-sdk.md b/docs/references/dependency-bundle/deps/modelcontextprotocol-sdk.md
new file mode 100644
index 000000000..cb01faca1
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/modelcontextprotocol-sdk.md
@@ -0,0 +1,20 @@
+# `@modelcontextprotocol/sdk`
+
+**Area:** MCP transport / tool integration
+
+## Why this matters for PairOfCleats
+Provides the modern MCP transport and protocol helpers for agent integration.
+
+## Implementation notes (practical)
+- Keep a legacy transport fallback to avoid breaking older clients.
+- Surface capability warnings when the SDK is unavailable.
+
+## Where it typically plugs into PairOfCleats
+- MCP server transport selection.
+
+## Deep links (implementation-relevant)
+1. README — https://github.com/modelcontextprotocol/sdk#readme
+
+## Suggested extraction checklist
+- [ ] Validate message framing and error handling parity with legacy transport.
+- [ ] Ensure clean shutdown on stream close.
diff --git a/docs/references/dependency-bundle/deps/mongodb-js-zstd.md b/docs/references/dependency-bundle/deps/mongodb-js-zstd.md
new file mode 100644
index 000000000..4a7568d51
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/mongodb-js-zstd.md
@@ -0,0 +1,20 @@
+# `@mongodb-js/zstd`
+
+**Area:** Compression / artifact storage
+
+## Why this matters for PairOfCleats
+Zstandard can reduce artifact size while keeping decompression fast.
+
+## Implementation notes (practical)
+- Decide whether to support streaming or buffer-only modes per artifact type.
+- Ensure fallback to gzip when zstd is unavailable.
+
+## Where it typically plugs into PairOfCleats
+- Artifact compression mode selection for build outputs.
+
+## Deep links (implementation-relevant)
+1. README — https://github.com/mongodb-js/zstd#readme
+
+## Suggested extraction checklist
+- [ ] Validate compatibility across Node versions/platforms.
+- [ ] Benchmark compression ratio vs gzip on large artifacts.
diff --git a/docs/references/dependency-bundle/deps/msgpackr.md b/docs/references/dependency-bundle/deps/msgpackr.md
new file mode 100644
index 000000000..1e6dc43aa
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/msgpackr.md
@@ -0,0 +1,24 @@
+# `msgpackr`
+
+**Area:** Binary serialization for artifacts
+
+## Why this matters for PairOfCleats
+Compact, fast serialization for per-file caches and durable index artifacts (faster than JSON for large nested objects).
+
+## Implementation notes (practical)
+- Tune options like `useRecords` for repeated object shapes.
+- Use extensions for special types (e.g., typed arrays) and define compatibility constraints.
+
+## Where it typically plugs into PairOfCleats
+- Per-file cache bundles: AST metadata, token stats, embeddings (as typed arrays) stored as MsgPack.
+- Index artifacts: store dictionaries and symbol tables efficiently.
+
+## Deep links (implementation-relevant)
+1. README: options (useRecords, structuredClone, extensions) — https://github.com/kriszyp/msgpackr/blob/master/README.md
+2. MessagePack spec (cross-language compatibility expectations) — https://msgpack.org/index.html
+
+## Suggested extraction checklist
+- [x] Define artifact formats and version them (Planned: add a versioned MsgPack envelope aligned with `docs/artifact-contract.md`).
+- [x] Ensure determinism: stable ordering, stable encodings, stable hashing inputs. (Planned: canonical key ordering + typed array encoding; avoid non-deterministic float handling.)
+- [x] Measure: write/read throughput and artifact size; record p95/p99 for bulk load. (Planned: compare JSON vs MsgPack sizes in `tools/report-artifacts.js`.)
+- [x] Plan for corruption detection (hashes) and safe partial rebuilds. (Planned: store checksums next to bundles; align with `src/shared/hash.js` + `src/index/validate.js` checksum handling.)
diff --git a/docs/references/dependency-bundle/deps/node-rs-xxhash.md b/docs/references/dependency-bundle/deps/node-rs-xxhash.md
new file mode 100644
index 000000000..8fe94c5c8
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/node-rs-xxhash.md
@@ -0,0 +1,20 @@
+# `@node-rs/xxhash`
+
+**Area:** Hashing / stable IDs
+
+## Why this matters for PairOfCleats
+Native xxhash can accelerate checksum generation for large artifacts.
+
+## Implementation notes (practical)
+- Ensure hex formatting stays stable with the wasm backend.
+- Keep stream hashing bounded-memory for large files.
+
+## Where it typically plugs into PairOfCleats
+- Artifact checksums, bundle identities, and build manifests.
+
+## Deep links (implementation-relevant)
+1. README — https://github.com/napi-rs/node-rs/tree/main/packages/xxhash
+
+## Suggested extraction checklist
+- [ ] Verify native and wasm hashes match on fixed fixtures.
+- [ ] Confirm streaming hash performance on large files.
diff --git a/docs/references/dependency-bundle/deps/nunjucks.md b/docs/references/dependency-bundle/deps/nunjucks.md
new file mode 100644
index 000000000..4ec79cc15
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/nunjucks.md
@@ -0,0 +1,23 @@
+# `nunjucks`
+
+**Area:** Template parsing (Nunjucks)
+
+## Why this matters for PairOfCleats
+Support advanced template analysis via custom tags/parser exposure and precompilation to avoid runtime parse costs.
+
+## Implementation notes (practical)
+- Use precompilation to obtain AST-like artifacts deterministically.
+- Custom tags enable parsing domain-specific constructs used in a repo.
+
+## Where it typically plugs into PairOfCleats
+- Chunk templates by blocks/macros; extract includes/imports and variable usage.
+
+## Deep links (implementation-relevant)
+1. API: custom tags (parser API exposure; advanced template analysis)  https://mozilla.github.io/nunjucks/api.html#custom-tags
+2. Precompiling templates (build-time parsing; avoiding runtime parse costs)  https://mozilla.github.io/nunjucks/api.html#precompiling
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Nunjucks AST nodes expose lineno/colno; map to byte offsets via a line table.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Walk template tags, blocks, and extends nodes only; avoid render.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store block spans and identifiers in chunk metadata; keep include graphs in derived indexes.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid full template rendering; reuse parser; cap file size.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/onnxruntime-node.md b/docs/references/dependency-bundle/deps/onnxruntime-node.md
new file mode 100644
index 000000000..9ee395c96
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/onnxruntime-node.md
@@ -0,0 +1,23 @@
+# `onnxruntime-node`
+
+**Area:** Embeddings/inference (ONNX Runtime)
+
+## Why this matters for PairOfCleats
+Run local ONNX models (e.g., MiniLM embeddings) with tuned threading and memory options for semantic search.
+
+## Implementation notes (practical)
+- Set `SessionOptions` (intra/inter-op threads, logging) according to host resources.
+- Use IO binding where available to reduce copies and improve throughput.
+
+## Where it typically plugs into PairOfCleats
+- Embedding stage: batch inference per worker, reuse sessions, and record throughput metrics.
+
+## Deep links (implementation-relevant)
+1. JS API: InferenceSession.SessionOptions (threading, logging, etc.)  https://onnxruntime.ai/docs/api/js/interfaces/InferenceSession.SessionOptions.html
+2. Example: configuring SessionOptions in Node (reference setup)  https://github.com/microsoft/onnxruntime-inference-examples/blob/main/js/api-usage_session-options/README.md
+3. Performance: I/O binding concept (reduce copies; pre-allocate outputs)  https://onnxruntime.ai/docs/performance/tune-performance/iobinding.html
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Planned: create InferenceSession and run() to generate embeddings.)
+- [x] Record configuration knobs that meaningfully change output/performance. (Planned knobs: executionProviders, intraOpNumThreads, graphOptimizationLevel.)
+- [x] Add at least one representative test fixture and a regression benchmark. (Planned fixture: tests/bench.js (real embeddings). Planned benchmark: tools/bench-language-repos.js.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/parcel-watcher.md b/docs/references/dependency-bundle/deps/parcel-watcher.md
new file mode 100644
index 000000000..5ad837c97
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/parcel-watcher.md
@@ -0,0 +1,20 @@
+# `@parcel/watcher`
+
+**Area:** File watching / incremental indexing
+
+## Why this matters for PairOfCleats
+Provides a native watcher backend that can outperform chokidar on large trees.
+
+## Implementation notes (practical)
+- Map events into the normalized add/change/unlink model used by watchIndex.
+- Pair with a write-stability guard because parcel does not mirror awaitWriteFinish.
+
+## Where it typically plugs into PairOfCleats
+- Index watch backend selection for `pairofcleats index watch`.
+
+## Deep links (implementation-relevant)
+1. README and API surface — https://github.com/parcel-bundler/watcher#readme
+
+## Suggested extraction checklist
+- [ ] Define fallback behavior when native bindings fail to load.
+- [ ] Validate event coalescing under rapid file writes.
diff --git a/docs/references/dependency-bundle/deps/parse5.md b/docs/references/dependency-bundle/deps/parse5.md
new file mode 100644
index 000000000..33ab07b56
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/parse5.md
@@ -0,0 +1,25 @@
+# `parse5`
+
+**Area:** HTML parsing with locations
+
+## Why this matters for PairOfCleats
+Parse HTML (and HTML-like templates) while retaining node ranges for chunking and metadata extraction.
+
+## Implementation notes (practical)
+- Enable `sourceCodeLocationInfo` for node ranges.
+- Consider SAX mode for streaming/low-memory workflows; capture token locations.
+
+## Where it typically plugs into PairOfCleats
+- Docs/templates: chunk by headings/sections, extract link graphs, and preserve offsets.
+
+## Deep links (implementation-relevant)
+1. ParserOptions: sourceCodeLocationInfo (node ranges for chunking) ? https://parse5.js.org/interfaces/parse5.ParserOptions.html
+2. SAX tokens: comment token locations (if streaming/low-memory parse) ? https://parse5.js.org/interfaces/parse5-sax-parser.Comment.html
+3. Companion: hast-util-from-parse5 (convert parse5 AST to hast) ? https://github.com/syntax-tree/hast-util-from-parse5#readme
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Enable sourceCodeLocationInfo to get node sourceCodeLocation ranges.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Traverse only tags and attributes needed for metadata; avoid DOM cloning.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store element and tag ranges plus key attrs in chunk metadata; keep link graphs derived.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid parsing huge HTML blobs without caps; reuse parser options.)
+
diff --git a/docs/references/dependency-bundle/deps/pdfjs-dist.md b/docs/references/dependency-bundle/deps/pdfjs-dist.md
new file mode 100644
index 000000000..6a740e3f8
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/pdfjs-dist.md
@@ -0,0 +1,20 @@
+# `pdfjs-dist`
+
+**Area:** Document extraction (PDF)
+
+## Why this matters for PairOfCleats
+Enables indexing of PDF content for prose search.
+
+## Implementation notes (practical)
+- Guard memory usage when parsing large or malformed PDFs.
+- Normalize extracted text for consistent chunking.
+
+## Where it typically plugs into PairOfCleats
+- Optional document extraction pipeline for prose indexing.
+
+## Deep links (implementation-relevant)
+1. README — https://github.com/mozilla/pdfjs-dist#readme
+
+## Suggested extraction checklist
+- [ ] Validate extraction on multi-page PDFs with images.
+- [ ] Ensure PDF parsing failures do not stop indexing.
diff --git a/docs/references/dependency-bundle/deps/picomatch.md b/docs/references/dependency-bundle/deps/picomatch.md
new file mode 100644
index 000000000..c993d3a90
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/picomatch.md
@@ -0,0 +1,23 @@
+# `picomatch`
+
+**Area:** Glob parsing/matching
+
+## Why this matters for PairOfCleats
+Fast glob matching for include/exclude rules and path filters; provides parsing/scanning utilities for diagnostics.
+
+## Implementation notes (practical)
+- Use `scan`/`parse` for debugging and normalizing patterns.
+- Be explicit about Windows path separators and slash handling.
+
+## Where it typically plugs into PairOfCleats
+- Config: allow users to specify include/exclude globs; surface 'why excluded' diagnostics.
+
+## Deep links (implementation-relevant)
+1. README: API (makeRe/parse/scan; extglobs/braces)  https://github.com/micromatch/picomatch#readme
+2. Changelog: scan()/parse() output details (tokens/slashes/parts)  https://github.com/micromatch/picomatch/blob/master/CHANGELOG.md
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Not applicable for AST ranges; this is a glob matcher used for path filtering.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Use precompiled glob matchers; avoid per-file recompilation.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (No chunk metadata; affects discovery include/exclude lists only.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Compile globs once and reuse; minimize backtracking with strict patterns.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/pino-pretty.md b/docs/references/dependency-bundle/deps/pino-pretty.md
new file mode 100644
index 000000000..862b433d3
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/pino-pretty.md
@@ -0,0 +1,22 @@
+# `pino-pretty`
+
+**Area:** Log formatting / developer ergonomics
+
+## Why this matters for PairOfCleats
+Pretty-print Pino JSON logs in dev while keeping structured logs for production; runs well as a transport.
+
+## Implementation notes (practical)
+- Transport options must be serializable unless explicitly handled (e.g., `messageFormat` as a function).
+- Run pretty transport in worker threads so it does not slow main execution.
+
+## Where it typically plugs into PairOfCleats
+- Provide a `--pretty` dev mode that uses pino-pretty transport without impacting CI/service performance.
+
+## Deep links (implementation-relevant)
+1. README: handling non-serializable transport options (messageFormat as function)  https://github.com/pinojs/pino-pretty#handling-non-serializable-options
+2. Pino transports doc (how to run pretty transport in worker threads)  https://github.com/pinojs/pino/blob/main/docs/transports.md
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Use pino transport target `pino-pretty` when pretty logging is enabled (src/shared/progress.js).)
+- [x] Record configuration knobs that meaningfully change output/performance. (pretty flag, level, ringMax, ringMaxBytes; transport options colorize/translateTime (src/shared/progress.js).)
+- [x] Add at least one representative test fixture and a regression benchmark. (Fixture: tests/fixture-smoke.js (logging path). Benchmark: tools/bench-language-repos.js.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/pino.md b/docs/references/dependency-bundle/deps/pino.md
new file mode 100644
index 000000000..ff968e53d
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/pino.md
@@ -0,0 +1,26 @@
+# `pino`
+
+**Area:** Logging (structured, high-performance)
+
+## Why this matters for PairOfCleats
+Structured JSON logging with support for worker-thread transports, redaction, and child loggers.
+
+## Implementation notes (practical)
+- Use transports for formatting/forwarding without blocking the hot path.
+- Redact secrets by path to keep logs safe in CI and shared artifacts.
+- Use serializers/bindings for consistent context fields (repoId, runId, workerId).
+
+## Where it typically plugs into PairOfCleats
+- Index build: emit structured events for per-stage timings and counts.
+- Service mode: correlate logs across API/indexer processes.
+
+## Deep links (implementation-relevant)
+1. Transports (worker-thread transports; custom targets; perf model) — https://github.com/pinojs/pino/blob/main/docs/transports.md
+2. Redaction (safely strip secrets from logs; path-based redaction) — https://getpino.io/#/docs/redaction
+3. Serializers & bindings (structured fields; child loggers) — https://getpino.io/#/docs/api
+
+## Suggested extraction checklist
+- [x] Define a minimal metrics vocabulary (names, labels) and keep label cardinality bounded. (Current: perf profiles track `files/bytes/lines/durationMs` in `src/index/build/perf-profile.js`; bench summaries label by mode/backend in `tools/bench/micro/run.js`.)
+- [x] Capture latency distributions, not just averages (p50/p95/p99). (Bench summaries compute p50/p95 in `tools/bench/micro/utils.js`; extend to p99 when needed.)
+- [x] Make logs structured and redact secrets; add run/repo correlation fields. (Structured logging via `src/shared/progress.js`; context fields `buildId`, `configHash`, `repoRoot` set in `src/index/build/runtime.js`; redaction planned if sensitive fields are added.)
+- [x] Keep benchmarking reproducible (fixed inputs, warmups, pinned configs). (Warmup/warm runs and default fixture repo in `tools/bench/micro/run.js`.)
diff --git a/docs/references/dependency-bundle/deps/piscina.md b/docs/references/dependency-bundle/deps/piscina.md
new file mode 100644
index 000000000..e0ce37cf0
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/piscina.md
@@ -0,0 +1,24 @@
+# `piscina`
+
+**Area:** Worker pools / parallel indexing
+
+## Why this matters for PairOfCleats
+Scale CPU-bound indexing work (parsing, hashing, embedding pre/post) with a configurable worker thread pool.
+
+## Implementation notes (practical)
+- Tune `minThreads`/`maxThreads` and `concurrentTasksPerWorker` based on whether tasks are CPU vs async IO.
+- Use resource limits patterns to handle OOM deterministically and to retry/skip problematic files.
+
+## Where it typically plugs into PairOfCleats
+- Shard file batches (see greedy partitioning) and feed them to workers; surface per-worker metrics.
+
+## Deep links (implementation-relevant)
+1. Instance API (minThreads/maxThreads, concurrentTasksPerWorker, atomics) — https://piscinajs.dev/api-reference/Instance/
+2. Resource limits example (prevent OOM; handle failures deterministically) — https://piscinajs.dev/examples/Resource%20Limits/
+3. README: concurrentTasksPerWorker guidance (only for async-heavy tasks) — https://github.com/piscinajs/piscina#concurrenttasksperworker
+
+## Suggested extraction checklist
+- [x] Define units of work and weights (bytes or historical parse time) for load balancing. (Shard weights from `src/index/build/perf-profile.js` and `src/index/build/shards.js`; worker pool limits in `src/index/build/worker-pool.js`; tree-sitter pool limits in `src/lang/tree-sitter.js`.)
+- [x] Set resource limits and failure policy (skip, retry, quarantine). (Worker pool backoff/retry in `src/index/build/worker-pool.js`; pool fallback with logging in `src/lang/tree-sitter.js`.)
+- [x] Instrument per-worker timings and queue depth. (Planned: expose per-worker timings; current queue limits/backpressure in `src/shared/concurrency.js` and active task tracking in `src/index/build/worker-pool.js`.)
+- [x] Ensure incremental rebuild logic is correct under bursts of file events. (Incremental build orchestration in `src/index/build/indexer.js` with worker pools; watch debouncing in `src/index/build/watch.js`.)
diff --git a/docs/references/dependency-bundle/deps/prom-client.md b/docs/references/dependency-bundle/deps/prom-client.md
new file mode 100644
index 000000000..1b64a901d
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/prom-client.md
@@ -0,0 +1,24 @@
+# `prom-client`
+
+**Area:** Metrics (Prometheus client)
+
+## Why this matters for PairOfCleats
+Expose Prometheus-compatible metrics for indexing/search latencies, queue depths, and worker utilization.
+
+## Implementation notes (practical)
+- Use a Registry to isolate metrics per service or per test.
+- Prefer Histograms for latency distributions and size metrics.
+
+## Where it typically plugs into PairOfCleats
+- Service/API: `/metrics` endpoint and per-operation labels (mode, backend).
+- Indexer: per-stage histograms (scan, parse, embed, persist).
+
+## Deep links (implementation-relevant)
+1. README: Registry + collectDefaultMetrics (multi-registry patterns) — https://github.com/siimon/prom-client#default-metrics
+2. README: Histogram/Summary usage (latency & size metrics) — https://github.com/siimon/prom-client#histogram
+
+## Suggested extraction checklist
+- [x] Define a minimal metrics vocabulary (names, labels) and keep label cardinality bounded. (Planned: export `files/bytes/lines/durationMs` from `src/index/build/perf-profile.js` with bounded labels.)
+- [x] Capture latency distributions, not just averages (p50/p95/p99). (Current bench utilities compute p50/p95 in `tools/bench/micro/utils.js`; expose via Prometheus histograms when wired.)
+- [x] Make logs structured and redact secrets; add run/repo correlation fields. (Structured logging via `src/shared/progress.js` and context in `src/index/build/runtime.js`; prom-client integration can reuse `buildId`/`repoRoot` as labels.)
+- [x] Keep benchmarking reproducible (fixed inputs, warmups, pinned configs). (Warmup/warm runs and default fixture repo in `tools/bench/micro/run.js`.)
diff --git a/docs/references/dependency-bundle/deps/protobufjs.md b/docs/references/dependency-bundle/deps/protobufjs.md
new file mode 100644
index 000000000..11243198c
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/protobufjs.md
@@ -0,0 +1,23 @@
+# `protobufjs`
+
+**Area:** Protocol Buffers parsing/tooling
+
+## Why this matters for PairOfCleats
+Parse `.proto` files and optionally generate static modules/types; useful for extracting messages/services for index metadata.
+
+## Implementation notes (practical)
+- Use reflection-based loading for indexing; use CLI generation when you want stable, build-time artifacts.
+- Capture package + message/service hierarchies for chunk context.
+
+## Where it typically plugs into PairOfCleats
+- Chunk by message/enum/service; extract field types and RPC signatures.
+
+## Deep links (implementation-relevant)
+1. Protobuf.js docs (load .proto; reflection-based messages)  https://protobufjs.github.io/protobuf.js/index.html
+2. CLI docs (pbjs/pbts for generating static modules)  https://github.com/protobufjs/protobuf.js/blob/master/cli/README.md
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Use parser token line/column locations to record ranges; map to byte offsets via a line table.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Walk message, enum, and service definitions only; avoid full reflection transforms.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store definition spans and names in chunk metadata; keep reference graphs in derived indexes.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid per-file parser creation; cap file size; cache parsed roots for reuse.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/pyright.md b/docs/references/dependency-bundle/deps/pyright.md
new file mode 100644
index 000000000..0e74a64e9
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/pyright.md
@@ -0,0 +1,23 @@
+# `pyright`
+
+**Area:** Python typing / diagnostics
+
+## Why this matters for PairOfCleats
+Use Pyright for optional Python type inference and structured diagnostics output, complementing AST-based Python metadata.
+
+## Implementation notes (practical)
+- Use `pyrightconfig.json` to control strictness, excluded paths, and execution environment.
+- Prefer JSON output for machine-consumable diagnostics and type info.
+
+## Where it typically plugs into PairOfCleats
+- Attach inferred types/diagnostics to Python chunks (opt-in; cache results).
+- Use settings to keep runs deterministic in CI (pinned python version / venv selection).
+
+## Deep links (implementation-relevant)
+1. Pyright configuration reference (pyrightconfig.json; strictness controls)  https://microsoft.github.io/pyright/#/configuration
+2. Command line reference (JSON output, verifytypes, diagnostics)  https://microsoft.github.io/pyright/#/command-line
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Planned: run pyright analysis and capture diagnostics/types for Python files.)
+- [x] Record configuration knobs that meaningfully change output/performance. (Planned knobs: pythonVersion, pythonPlatform, typeCheckingMode, stubPath.)
+- [x] Add at least one representative test fixture and a regression benchmark. (Planned fixture: tests/type-inference-crossfile.js (python). Planned benchmark: tools/bench-language-repos.js.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/re2.md b/docs/references/dependency-bundle/deps/re2.md
new file mode 100644
index 000000000..464cddb13
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/re2.md
@@ -0,0 +1,20 @@
+# `re2`
+
+**Area:** Safe regular expression engine
+
+## Why this matters for PairOfCleats
+Native RE2 avoids catastrophic backtracking for user-supplied patterns.
+
+## Implementation notes (practical)
+- Keep a strict allowlist of flags to match current behavior.
+- Ensure error handling matches re2js so filters stay deterministic.
+
+## Where it typically plugs into PairOfCleats
+- Search filters and risk tagging regex evaluation.
+
+## Deep links (implementation-relevant)
+1. README — https://github.com/uhop/node-re2#readme
+
+## Suggested extraction checklist
+- [ ] Confirm parity with re2js for common flag combinations.
+- [ ] Exercise max pattern/input limits with native engine.
diff --git a/docs/references/dependency-bundle/deps/re2js.md b/docs/references/dependency-bundle/deps/re2js.md
new file mode 100644
index 000000000..aee0a45ef
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/re2js.md
@@ -0,0 +1,22 @@
+# `re2js`
+
+**Area:** Safe regex (ReDoS-resistant)
+
+## Why this matters for PairOfCleats
+Use a RE2-compatible engine in JS contexts that must avoid catastrophic backtracking from user-controlled patterns.
+
+## Implementation notes (practical)
+- Understand syntax limitations vs native `RegExp`; fall back or pre-validate patterns as needed.
+- Prefer compiling once and reusing compiled expressions.
+
+## Where it typically plugs into PairOfCleats
+- User query features: safe regex filters over identifiers/paths without risking runaway CPU.
+
+## Deep links (implementation-relevant)
+1. README: ReDoS-safe regex engine usage + supported syntax  https://github.com/le0pard/re2js#readme
+2. README: compatibility notes/limitations vs native RegExp  https://github.com/le0pard/re2js#limitations
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Planned: compile regexes via re2js for safe matching in risk rules and search filters.)
+- [x] Record configuration knobs that meaningfully change output/performance. (Planned knobs: max program size, flags, timeout.)
+- [x] Add at least one representative test fixture and a regression benchmark. (Planned fixture: tests/language-fidelity.js risk rules. Planned benchmark: tests/bench.js.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/roaring-wasm.md b/docs/references/dependency-bundle/deps/roaring-wasm.md
new file mode 100644
index 000000000..223c1f18f
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/roaring-wasm.md
@@ -0,0 +1,24 @@
+# `roaring-wasm`
+
+**Area:** Compressed bitsets / postings sets
+
+## Why this matters for PairOfCleats
+Represent large sets of doc/chunk IDs compactly and support fast union/intersection for filterable search.
+
+## Implementation notes (practical)
+- Be explicit about serialization format and pass required format args.
+- Keep interoperability in mind if artifacts are shared across languages/implementations.
+
+## Where it typically plugs into PairOfCleats
+- Posting lists: store token→bitmap, filter facets→bitmap.
+- Query-time set operations for fast filtering.
+
+## Deep links (implementation-relevant)
+1. README: serialization formats + streaming APIs (format arg required) — https://github.com/SalvatorePreviti/roaring-wasm/blob/master/README.md
+2. Roaring format background (interoperability; why roaring bitmaps) — https://github.com/RoaringBitmap/RoaringBitmap#readme
+
+## Suggested extraction checklist
+- [x] Define artifact formats and version them (Planned: declare bitmap serialization format + version in `docs/artifact-contract.md`).
+- [x] Ensure determinism: stable ordering, stable encodings, stable hashing inputs. (Planned: sort doc IDs before bitmap build; serialize with explicit format ID.)
+- [x] Measure: write/read throughput and artifact size; record p95/p99 for bulk load. (Planned: compare bitmap size vs JSON postings in `tools/report-artifacts.js`.)
+- [x] Plan for corruption detection (hashes) and safe partial rebuilds. (Planned: store bitmap checksums alongside postings; validate via `src/index/validate.js`.)
diff --git a/docs/references/dependency-bundle/deps/seedrandom.md b/docs/references/dependency-bundle/deps/seedrandom.md
new file mode 100644
index 000000000..90b6b871f
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/seedrandom.md
@@ -0,0 +1,21 @@
+# `seedrandom`
+
+**Area:** Determinism / reproducible sampling
+
+## Why this matters for PairOfCleats
+Deterministic PRNG streams for sampling, shuffling, and any randomized heuristics in indexing pipelines.
+
+## Implementation notes (practical)
+- Seed per run/repo to make results reproducible and debuggable.
+- Avoid global seeding unless you control all call sites.
+
+## Where it typically plugs into PairOfCleats
+- Sampling: deterministic selection of 'neighbor context' or exemplars for summaries.
+
+## Deep links (implementation-relevant)
+1. README: deterministic PRNG streams + global seeding patterns  https://github.com/davidbau/seedrandom#readme
+
+## Suggested extraction checklist
+- [x] Identify the exact API entrypoints you will call and the data structures you will persist. (Use `seedrandom(seed)` to create deterministic RNG for query generation (tools/bench-query-generator.js).)
+- [x] Record configuration knobs that meaningfully change output/performance. (Seed string controls deterministic output (tools/bench-query-generator.js).)
+- [x] Add at least one representative test fixture and a regression benchmark. (Fixture: benchmarks/queries/*.txt generated by tools/bench-query-generator.js. Benchmark: tools/bench-language-repos.js.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/semver.md b/docs/references/dependency-bundle/deps/semver.md
new file mode 100644
index 000000000..78409f864
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/semver.md
@@ -0,0 +1,24 @@
+# `semver`
+
+**Area:** Version parsing and range evaluation
+
+## Why this matters for PairOfCleats
+Interpret dependency versions and constraints consistently with npm/semver rules for metadata extraction and reports.
+
+## Implementation notes (practical)
+- Use `satisfies`, `coerce`, and range parsing to normalize messy versions.
+- Know ordering rules for prerelease tags if you rank/compare versions.
+
+## Where it typically plugs into PairOfCleats
+- Index metadata: capture dependency graphs (package.json) with normalized version/range fields.
+
+## Deep links (implementation-relevant)
+1. npm SemVer functions + range syntax (satisfies/coerce; npm semantics)  https://docs.npmjs.com/cli/v6/using-npm/semver
+2. SemVer 2.0.0 spec (baseline rules; prerelease ordering)  https://semver.org/
+3. node-semver README: ranges (caret/tilde/hyphen; set operations)  https://github.com/npm/node-semver#ranges
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Not applicable for AST ranges; semver compares version strings only.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Use compare and satisfies checks; no AST traversal.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (No chunk metadata; used for version gating and compatibility.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid repeated parsing in hot paths; cache parsed versions when looping.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/smol-toml.md b/docs/references/dependency-bundle/deps/smol-toml.md
new file mode 100644
index 000000000..d04b3645b
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/smol-toml.md
@@ -0,0 +1,23 @@
+# `smol-toml`
+
+**Area:** TOML parsing
+
+## Why this matters for PairOfCleats
+Parse TOML configs into objects and (optionally) stringify; use for chunking by tables/keys.
+
+## Implementation notes (practical)
+- Be mindful of TOML spec edge cases (multiline strings, datetime).
+- Normalize parsed output if you index semantic values.
+
+## Where it typically plugs into PairOfCleats
+- Config chunking: split by top-level tables and keys; retain table paths as facets.
+
+## Deep links (implementation-relevant)
+1. Repo README: parse/stringify API (TOML v1.1-ish compliance notes)  https://github.com/squirrelchat/smol-toml#readme
+2. TOML spec (edge cases; multiline strings; datetime)  https://toml.io/en/
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Use parser error locations when available; otherwise derive key/value offsets via a lightweight scanner.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Parse only needed keys; avoid converting the entire document when not required.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store key/value ranges in chunk metadata; keep derived config indexes separate.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid full parse on unchanged files; cap file size.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/svelte.md b/docs/references/dependency-bundle/deps/svelte.md
new file mode 100644
index 000000000..94ce41369
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/svelte.md
@@ -0,0 +1,23 @@
+# `svelte`
+
+**Area:** Framework parsing (Svelte)
+
+## Why this matters for PairOfCleats
+Use the Svelte compilers `parse` output to chunk `.svelte` components with correct AST positions.
+
+## Implementation notes (practical)
+- Use `parse` to obtain AST nodes and source positions for script/template/style sections.
+- Consider compiling only when needed; parsing is typically sufficient for indexing.
+
+## Where it typically plugs into PairOfCleats
+- Split chunks by `<script>`, `<style>`, and markup blocks; store component-level metadata (exports/props).
+
+## Deep links (implementation-relevant)
+1. Svelte compiler API (parse/compile; AST structure)  https://svelte.dev/docs/svelte-compiler
+2. Compiler 'parse' API (AST + positions; used for Svelte chunking)  https://svelte.dev/docs/svelte-compiler#parse
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Svelte compiler AST nodes include start/end offsets; use those directly.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Walk script/style/template blocks and top-level nodes only.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store block spans and language IDs in chunk metadata; keep bindings in derived indexes.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid full compile; use parse-only; skip huge files.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/swc-core.md b/docs/references/dependency-bundle/deps/swc-core.md
new file mode 100644
index 000000000..fd1368f2e
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/swc-core.md
@@ -0,0 +1,23 @@
+# `@swc/core`
+
+**Area:** Fast parsing/transform (JS/TS)
+
+## Why this matters for PairOfCleats
+Use SWC for high-throughput parsing/transform when you need speed and are willing to work with SWC ASTs or emitted code.
+
+## Implementation notes (practical)
+- Tune parser syntax options (TS/JSX/Decorators) to match repo reality.
+- Use transforms selectively; parsing-only is usually faster/safer for indexing.
+
+## Where it typically plugs into PairOfCleats
+- Throughput mode: use SWC to quickly extract top-level declarations and ranges for chunking.
+
+## Deep links (implementation-relevant)
+1. Core usage docs (parse/transform; syntax options)  https://swc.rs/docs/usage/core
+2. Configuration reference (parser syntax settings; TS/JSX/Decorators)  https://swc.rs/docs/configuration/compilation
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (SWC nodes expose span lo/hi; map to byte offsets via the source file.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Use SWC visitors to collect imports and defs only; avoid transform passes.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store symbol spans and kinds in chunk metadata; keep call graphs in derived indexes.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Reuse parser settings; avoid reparse; cap file size.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/tantivy.md b/docs/references/dependency-bundle/deps/tantivy.md
new file mode 100644
index 000000000..28dae01fd
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/tantivy.md
@@ -0,0 +1,20 @@
+# `tantivy`
+
+**Area:** External search backend
+
+## Why this matters for PairOfCleats
+Optional high-performance full-text backend for large repositories.
+
+## Implementation notes (practical)
+- Ensure schema and tokenizer choices align with current index features.
+- Provide clear fallback behavior when the backend is unavailable.
+
+## Where it typically plugs into PairOfCleats
+- Optional external backend selection for search.
+
+## Deep links (implementation-relevant)
+1. README — https://github.com/quickwit-oss/tantivy#readme
+
+## Suggested extraction checklist
+- [ ] Validate parity for filters and scoring behavior.
+- [ ] Benchmark ingest and query throughput.
diff --git a/docs/references/dependency-bundle/deps/tinybench.md b/docs/references/dependency-bundle/deps/tinybench.md
new file mode 100644
index 000000000..d636d2789
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/tinybench.md
@@ -0,0 +1,22 @@
+# `tinybench`
+
+**Area:** Microbench tooling
+
+## Why this matters for PairOfCleats
+Run disciplined microbenchmarks (warmup, iterations) to compare parser/index strategies and detect regressions.
+
+## Implementation notes (practical)
+- Always include warmup; pin CPU conditions as much as possible.
+- Report variance and use enough iterations to reduce noise.
+
+## Where it typically plugs into PairOfCleats
+- Benchmark suite: compare AST parsers and chunkers; store results to track performance over time.
+
+## Deep links (implementation-relevant)
+1. README: warmup/iterations/timing hooks (microbench discipline) — https://github.com/tinylibs/tinybench#readme
+
+## Suggested extraction checklist
+- [x] Define a minimal metrics vocabulary (names, labels) and keep label cardinality bounded. (Current: bench results emit `mean/min/max/p50/p95` plus mode/backend in `tools/bench/micro/run.js`.)
+- [x] Capture latency distributions, not just averages (p50/p95/p99). (See `tools/bench/micro/utils.js` for p50/p95; extend to p99 when adding tinybench.)
+- [x] Make logs structured and redact secrets; add run/repo correlation fields. (Bench output is JSON in `tools/bench/micro/run.js`; structured logging hooks exist in `src/shared/progress.js`.)
+- [x] Keep benchmarking reproducible (fixed inputs, warmups, pinned configs). (Warmup/warm runs and default fixture repo in `tools/bench/micro/run.js`.)
diff --git a/docs/references/dependency-bundle/deps/typescript-eslint-typescript-estree.md b/docs/references/dependency-bundle/deps/typescript-eslint-typescript-estree.md
new file mode 100644
index 000000000..8124df23c
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/typescript-eslint-typescript-estree.md
@@ -0,0 +1,27 @@
+# `@typescript-eslint/typescript-estree`
+
+**Area:** Parsing (TS/JS � ESTree) + typed services
+
+## Why this matters for PairOfCleats
+Parse TS/JS into an ESTree-compatible AST with optional TypeScript services; enables unified traversals and queries across JS/TS code paths.
+
+## Implementation notes (practical)
+- Use `parseAndGenerateServices` when you want `parserServices` (TypeChecker access) for typed metadata.
+- Be explicit with parse options (ECMAScript version, sourceType, tokens/comments, loc/range) because they affect chunk boundaries.
+- Typed linting requires careful `project` + `tsconfigRootDir` configuration; avoid per-file TS Program creation.
+
+## Where it typically plugs into PairOfCleats
+- Unify JS/TS indexing flows with Babel-style visitors and `esquery` selectors.
+- Store both syntactic metadata (nodes, ranges) and optional typed metadata (symbols/types) in chunk sidecars.
+- Cache TS Programs per project to control performance.
+
+## Deep links (implementation-relevant)
+1. Package docs (AST, tokens/comments, services, parse options)  https://typescript-eslint.io/packages/typescript-estree/
+2. Typed linting / project configuration guidance (project, tsconfigRootDir, performance pitfalls)  https://typescript-eslint.io/troubleshooting/typed-linting/
+3. Source: parser entrypoint (parse / parseAndGenerateServices implementation details)  https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-estree/src/parser.ts
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (ESTree nodes include range and loc; use those directly.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Use parser services for minimal nodes; avoid full ESLint traversal.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store node spans in chunk metadata; keep relations in derived indexes.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid repeated parser creation; cap file size.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/typescript.md b/docs/references/dependency-bundle/deps/typescript.md
new file mode 100644
index 000000000..f532dcf04
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/typescript.md
@@ -0,0 +1,27 @@
+# `typescript`
+
+**Area:** Parsing / Type analysis (TS/JS)
+
+## Why this matters for PairOfCleats
+Use TypeScript's compiler and language service APIs when you need project-aware parsing, symbol resolution, and type inference for TypeScript/JavaScript files.
+
+## Implementation notes (practical)
+- Prefer `LanguageService` for incremental, project-aware analysis (tsconfig, module resolution, watch programs).
+- Use `Program` + `TypeChecker` patterns for batch indexing and extracting declarations/signatures/types.
+- Use `tsserver` protocol types when you need to emulate editor-like queries or consume standardized request/response shapes.
+
+## Where it typically plugs into PairOfCleats
+- Chunking: map AST nodes back to stable `{start,end}` offsets/line-col for durable chunk IDs.
+- Metadata: derive signatures, modifiers, export/import graphs, and inferred types (optionally cross-file).
+- Incremental: reuse `DocumentRegistry` / cached SourceFiles to avoid full reparse.
+
+## Deep links (implementation-relevant)
+1. Using the Compiler API (Program, SourceFile, TypeChecker patterns)  https://github.com/microsoft/TypeScript/wiki/Using-the-Compiler-API
+2. Using the Language Service API (incremental, project-aware analysis)  https://github.com/microsoft/TypeScript/wiki/Using-the-Language-Service-API
+3. tsserver protocol types (request/response shapes; useful for incremental queries)  https://github.com/microsoft/TypeScript/blob/main/lib/protocol.d.ts
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (TypeScript nodes expose pos/end; use getStart/getEnd with line and character mapping.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Walk top-level declarations and imports; avoid full type checker when not needed.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store symbols, types, and ranges in chunk metadata; keep type graphs in derived indexes.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid creating Program per file; reuse language service or incremental host; cap file size.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/vscode-ripgrep.md b/docs/references/dependency-bundle/deps/vscode-ripgrep.md
new file mode 100644
index 000000000..7481b5c11
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/vscode-ripgrep.md
@@ -0,0 +1,24 @@
+# `@vscode/ripgrep`
+
+**Area:** Fast text search integration
+
+## Why this matters for PairOfCleats
+Bundle and spawn `rg` reliably across platforms; use it for prefiltering candidate files/lines and for fallback search modes.
+
+## Implementation notes (practical)
+- Use exported `rgPath` to locate binary; avoid assuming `rg` exists on PATH.
+- Prefer `--json` output for robust parsing; tune flags for speed.
+
+## Where it typically plugs into PairOfCleats
+- Index build: pre-scan for tokens to prioritize files; incremental watch: detect changed lines quickly.
+- Search: offer 'grep-mode' fallback for repositories without full index.
+
+## Deep links (implementation-relevant)
+1. README: rgPath export + usage example (spawn rg; parse output)  https://github.com/microsoft/vscode-ripgrep#usage-example
+2. Ripgrep guide (flags, -json output, performance knobs)  https://github.com/BurntSushi/ripgrep/blob/master/GUIDE.md
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Use rg --json output for match offsets and line/column; record byte offsets from JSON events.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Drive search via ripgrep query flags; no AST traversal.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store match spans in chunk metadata when used; keep postings derived.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Batch queries; avoid spawning per file; reuse the rg binary.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/vue-compiler-sfc.md b/docs/references/dependency-bundle/deps/vue-compiler-sfc.md
new file mode 100644
index 000000000..8de92a1cf
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/vue-compiler-sfc.md
@@ -0,0 +1,26 @@
+# `@vue/compiler-sfc`
+
+**Area:** Framework parsing (Vue SFC)
+
+## Why this matters for PairOfCleats
+Parse `.vue` single-file components into descriptor blocks with accurate location mapping; enables separate chunking of `<template>`, `<script>`, `<style>`.
+
+## Implementation notes (practical)
+- Use `parse()` to get the SFC descriptor and per-block `loc` mappings.
+- Use `compileScript()` for `<script setup>` transforms and binding metadata if needed.
+- Treat generated code carefully: store original locations and map back from transforms.
+
+## Where it typically plugs into PairOfCleats
+- Emit separate chunks per block plus a synthetic 'component summary' chunk (props/emits/imports).
+- Record cross-block relations (template uses script bindings).
+
+## Deep links (implementation-relevant)
+1. Vue SFC tooling API overview (parse/compileScript/compileTemplate)  https://vuejs.org/api/sfc-tooling.html
+2. Source: parse() implementation (descriptor blocks + loc mapping)  https://github.com/vuejs/core/blob/main/packages/compiler-sfc/src/parse.ts
+3. Source: compileScript() (script setup transforms; binding metadata)  https://github.com/vuejs/core/blob/main/packages/compiler-sfc/src/compileScript.ts
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (SFC descriptor blocks include loc start/end and offsets; use those.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Extract script/template/style blocks only; skip full compilation.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store block spans and language IDs in chunk metadata; keep template bindings derived.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid full compile; parse only; skip huge files.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/deps/xxhash-wasm.md b/docs/references/dependency-bundle/deps/xxhash-wasm.md
new file mode 100644
index 000000000..376f0ccc1
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/xxhash-wasm.md
@@ -0,0 +1,24 @@
+# `xxhash-wasm`
+
+**Area:** Hashing / stable IDs
+
+## Why this matters for PairOfCleats
+Fast non-cryptographic hashing for content fingerprints, chunk IDs, and MinHash-style utilities.
+
+## Implementation notes (practical)
+- Account for sync vs async init depending on WASM loading strategy.
+- Use stable encoding (e.g., UTF-8 bytes) so hashes are deterministic across platforms.
+
+## Where it typically plugs into PairOfCleats
+- Chunk IDs: hash `(repoId, path, start, end, content)` or a canonicalized representation.
+- Artifact integrity: hash artifact files to detect corruption and enable caching.
+
+## Deep links (implementation-relevant)
+1. Repo README: API usage (xxh32/xxh64; sync/async init) — https://github.com/jungomi/xxhash-wasm#readme
+2. Algorithm background (xxHash reference; perf characteristics) — https://xxhash.com/
+
+## Suggested extraction checklist
+- [x] Define artifact formats and version them (xxhash is used for checksums with an `xxh64:` prefix, validated in `src/index/validate.js`).
+- [x] Ensure determinism: stable ordering, stable encodings, stable hashing inputs (hash UTF-8 bytes from normalized inputs; aligned in `src/shared/hash.js`).
+- [x] Measure: write/read throughput and artifact size; record p95/p99 for bulk load (capture hashing throughput in `tools/report-artifacts.js` or bench runs).
+- [x] Plan for corruption detection (hashes) and safe partial rebuilds (xxhash checksums are verified during `pairofcleats index validate`).
diff --git a/docs/references/dependency-bundle/deps/yaml.md b/docs/references/dependency-bundle/deps/yaml.md
new file mode 100644
index 000000000..de62e30fb
--- /dev/null
+++ b/docs/references/dependency-bundle/deps/yaml.md
@@ -0,0 +1,26 @@
+# `yaml`
+
+**Area:** Config/document parsing (YAML) with positional fidelity
+
+## Why this matters for PairOfCleats
+Parse YAML configs and frontmatter while retaining comments and location utilities so chunk boundaries remain stable.
+
+## Implementation notes (practical)
+- Prefer parsing into `Document` objects when you need CST/AST and comment preservation.
+- Use line/position utilities to map nodes back to source ranges for chunk offsets.
+- Be explicit about schema/version behavior if you index YAML semantics (booleans, timestamps, etc.).
+
+## Where it typically plugs into PairOfCleats
+- Chunking: split by top-level keys or document sections while storing exact ranges.
+- Metadata: retain comments as 'explanation' fields for config keys.
+
+## Deep links (implementation-relevant)
+1. YAML package docs (parse Document; CST/AST; preserving comments & source tokens)  https://eemeli.org/yaml/
+2. Parsing documents + line/position utilities (for stable chunk boundaries)  https://eemeli.org/yaml/#parsing-documents
+3. Source: YAML repo CST/AST/Document APIs (deep dive)  https://github.com/eemeli/yaml#readme
+
+## Suggested extraction checklist
+- [x] Confirm you can obtain stable node/section ranges (`start/end` offsets or line/column). (Enable keepCstNodes or keepNodeTypes to get node ranges; use node.range for offsets.)
+- [x] Identify the minimal AST traversal/query approach that yields needed metadata (avoid full transforms unless required). (Traverse only mapping keys and sequence items needed; avoid full object conversion.)
+- [x] Decide what becomes chunk metadata vs. what stays as derived indexes (postings/relations). (Store key/value ranges in chunk metadata; keep derived indexes separate.)
+- [x] Note any performance pitfalls (per-file program creation, per-node FFI, full-file buffering). (Avoid parsing large files without caps; reuse parser config.)
\ No newline at end of file
diff --git a/docs/references/dependency-bundle/manifest.json b/docs/references/dependency-bundle/manifest.json
new file mode 100644
index 000000000..813432022
--- /dev/null
+++ b/docs/references/dependency-bundle/manifest.json
@@ -0,0 +1,992 @@
+{
+  "generated": "2026-01-08T07:26:06.037986Z",
+  "project": "https://github.com/doublemover/PairOfCleats",
+  "package_count": 57,
+  "link_count": 117,
+  "packages": [
+    {
+      "package": "@ast-grep/napi",
+      "area": "Pattern-based AST search (high-performance)",
+      "sheet": "deps/ast-grep-napi.md",
+      "links": [
+        {
+          "package": "@ast-grep/napi",
+          "desc": "JavaScript API (parse, findAll, replace; AST access via N-API)",
+          "url": "https://ast-grep.github.io/guide/api-usage/js-api.html"
+        },
+        {
+          "package": "@ast-grep/napi",
+          "desc": "Performance tip (avoid per-node JS\u2194Rust FFI calls; prefer findAll)",
+          "url": "https://ast-grep.github.io/guide/api-usage/performance-tip.html"
+        },
+        {
+          "package": "@ast-grep/napi",
+          "desc": "Rule configuration essentials (patterns, constraints, utilities)",
+          "url": "https://ast-grep.github.io/guide/rule-config.html"
+        }
+      ]
+    },
+    {
+      "package": "@astrojs/compiler",
+      "area": "Framework parsing (Astro)",
+      "sheet": "deps/astrojs-compiler.md",
+      "links": [
+        {
+          "package": "@astrojs/compiler",
+          "desc": "Compiler package README (parse, transform; AST + loc mapping)",
+          "url": "https://github.com/withastro/compiler/blob/main/packages/compiler/README.md"
+        },
+        {
+          "package": "@astrojs/compiler",
+          "desc": "Source: compiler package folder (API surface; wasm constraints)",
+          "url": "https://github.com/withastro/compiler/tree/main/packages/compiler"
+        }
+      ]
+    },
+    {
+      "package": "@babel/traverse",
+      "area": "AST traversal (JS/TS/ESTree/Babel AST)",
+      "sheet": "deps/babel-traverse.md",
+      "links": [
+        {
+          "package": "@babel/traverse",
+          "desc": "Babel traverse docs (visitors, NodePath, scope, state)",
+          "url": "https://babeljs.io/docs/babel-traverse"
+        },
+        {
+          "package": "@babel/traverse",
+          "desc": "Plugin handbook (visitor patterns; avoiding perf pitfalls)",
+          "url": "https://babeljs.io/docs/en/plugins#plugin-development"
+        }
+      ]
+    },
+    {
+      "package": "@es-joy/jsdoccomment",
+      "area": "JSDoc comment parsing and conversion",
+      "sheet": "deps/es-joy-jsdoccomment.md",
+      "links": [
+        {
+          "package": "@es-joy/jsdoccomment",
+          "desc": "README: parseComment + tokenization options (preserve tag structure)",
+          "url": "https://github.com/es-joy/jsdoccomment#readme"
+        },
+        {
+          "package": "@es-joy/jsdoccomment",
+          "desc": "README: commentParserToESTree (convert JSDoc to ESTree nodes)",
+          "url": "https://github.com/es-joy/jsdoccomment#commentparsertosestree"
+        }
+      ]
+    },
+    {
+      "package": "@handlebars/parser",
+      "area": "Template parsing (Handlebars)",
+      "sheet": "deps/handlebars-parser.md",
+      "links": [
+        {
+          "package": "@handlebars/parser",
+          "desc": "Compiler API: parseWithoutProcessing \u2192 Handlebars AST (stable parsing entrypoint)",
+          "url": "https://github.com/handlebars-lang/handlebars.js/blob/master/docs/compiler-api.md"
+        }
+      ]
+    },
+    {
+      "package": "@mdx-js/mdx",
+      "area": "MDX parsing/compilation",
+      "sheet": "deps/mdx-js-mdx.md",
+      "links": [
+        {
+          "package": "@mdx-js/mdx",
+          "desc": "MDX core compiler docs (compile/evaluate; plugin hooks)",
+          "url": "https://mdxjs.com/packages/mdx/"
+        },
+        {
+          "package": "@mdx-js/mdx",
+          "desc": "Extending MDX (remark/rehype plugins; creating transforms)",
+          "url": "https://mdxjs.com/docs/extending-mdx/"
+        },
+        {
+          "package": "@mdx-js/mdx",
+          "desc": "remark-mdx syntax plugin docs (MDX syntax inside unified)",
+          "url": "https://mdxjs.com/packages/remark-mdx/"
+        }
+      ]
+    },
+    {
+      "package": "@swc/core",
+      "area": "Fast parsing/transform (JS/TS)",
+      "sheet": "deps/swc-core.md",
+      "links": [
+        {
+          "package": "@swc/core",
+          "desc": "Core usage docs (parse/transform; syntax options)",
+          "url": "https://swc.rs/docs/usage/core"
+        },
+        {
+          "package": "@swc/core",
+          "desc": "Configuration reference (parser syntax settings; TS/JSX/Decorators)",
+          "url": "https://swc.rs/docs/configuration/compilation"
+        }
+      ]
+    },
+    {
+      "package": "@typescript-eslint/typescript-estree",
+      "area": "Parsing (TS/JS \u2192 ESTree) + typed services",
+      "sheet": "deps/typescript-eslint-typescript-estree.md",
+      "links": [
+        {
+          "package": "@typescript-eslint/typescript-estree",
+          "desc": "Package docs (AST, tokens/comments, services, parse options)",
+          "url": "https://typescript-eslint.io/packages/typescript-estree/"
+        },
+        {
+          "package": "@typescript-eslint/typescript-estree",
+          "desc": "Typed linting / project configuration guidance (project, tsconfigRootDir, performance pitfalls)",
+          "url": "https://typescript-eslint.io/troubleshooting/typed-linting/"
+        },
+        {
+          "package": "@typescript-eslint/typescript-estree",
+          "desc": "Source: parser entrypoint (parse / parseAndGenerateServices implementation details)",
+          "url": "https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-estree/src/parser.ts"
+        }
+      ]
+    },
+    {
+      "package": "@vscode/ripgrep",
+      "area": "Fast text search integration",
+      "sheet": "deps/vscode-ripgrep.md",
+      "links": [
+        {
+          "package": "@vscode/ripgrep",
+          "desc": "README: rgPath export + usage example (spawn rg; parse output)",
+          "url": "https://github.com/microsoft/vscode-ripgrep#usage-example"
+        },
+        {
+          "package": "@vscode/ripgrep",
+          "desc": "Ripgrep guide (flags, -json output, performance knobs)",
+          "url": "https://github.com/BurntSushi/ripgrep/blob/master/GUIDE.md"
+        }
+      ]
+    },
+    {
+      "package": "@vue/compiler-sfc",
+      "area": "Framework parsing (Vue SFC)",
+      "sheet": "deps/vue-compiler-sfc.md",
+      "links": [
+        {
+          "package": "@vue/compiler-sfc",
+          "desc": "Vue SFC tooling API overview (parse/compileScript/compileTemplate)",
+          "url": "https://vuejs.org/api/sfc-tooling.html"
+        },
+        {
+          "package": "@vue/compiler-sfc",
+          "desc": "Source: parse() implementation (descriptor blocks + loc mapping)",
+          "url": "https://github.com/vuejs/core/blob/main/packages/compiler-sfc/src/parse.ts"
+        },
+        {
+          "package": "@vue/compiler-sfc",
+          "desc": "Source: compileScript() (script setup transforms; binding metadata)",
+          "url": "https://github.com/vuejs/core/blob/main/packages/compiler-sfc/src/compileScript.ts"
+        }
+      ]
+    },
+    {
+      "package": "aho-corasick",
+      "area": "Multi-pattern search / dictionary matching",
+      "sheet": "deps/aho-corasick.md",
+      "links": [
+        {
+          "package": "aho-corasick",
+          "desc": "Reference implementation + API examples (build automaton; search)",
+          "url": "https://github.com/spencermountain/aho_corasick#readme"
+        },
+        {
+          "package": "aho-corasick",
+          "desc": "Alternative high-perf wrapper (Rust daachorse via Node bindings)",
+          "url": "https://github.com/BlackGlory/aho-corasick#readme"
+        }
+      ]
+    },
+    {
+      "package": "ajv",
+      "area": "Config validation / schema enforcement",
+      "sheet": "deps/ajv.md",
+      "links": [
+        {
+          "package": "ajv",
+          "desc": "Options reference (strict, allErrors, removeAdditional, useDefaults, unevaluatedProperties)",
+          "url": "https://ajv.js.org/options.html"
+        },
+        {
+          "package": "ajv",
+          "desc": "Getting started / schema compilation patterns (compile once; reuse validators)",
+          "url": "https://ajv.js.org/guide/getting-started.html"
+        }
+      ]
+    },
+    {
+      "package": "better-sqlite3",
+      "area": "SQLite storage backend",
+      "sheet": "deps/better-sqlite3.md",
+      "links": [
+        {
+          "package": "better-sqlite3",
+          "desc": "API docs (transactions, pragmas, prepared statements)",
+          "url": "https://github.com/WiseLibs/better-sqlite3/blob/master/docs/api.md"
+        },
+        {
+          "package": "better-sqlite3",
+          "desc": "Performance guide (WAL, pragmas, patterns)",
+          "url": "https://github.com/WiseLibs/better-sqlite3/blob/master/docs/performance.md"
+        }
+      ]
+    },
+    {
+      "package": "chardet",
+      "area": "Encoding detection",
+      "sheet": "deps/chardet.md",
+      "links": [
+        {
+          "package": "chardet",
+          "desc": "README: detect/detectFile APIs + confidence scores",
+          "url": "https://github.com/runk/node-chardet#readme"
+        }
+      ]
+    },
+    {
+      "package": "chokidar",
+      "area": "File watching / incremental indexing",
+      "sheet": "deps/chokidar.md",
+      "links": [
+        {
+          "package": "chokidar",
+          "desc": "README: API + watch options (ignored, awaitWriteFinish, atomic)",
+          "url": "https://github.com/paulmillr/chokidar#readme"
+        },
+        {
+          "package": "chokidar",
+          "desc": "awaitWriteFinish caveats (edge cases; tuning guidance)",
+          "url": "https://github.com/paulmillr/chokidar/issues/513"
+        }
+      ]
+    },
+    {
+      "package": "dockerfile-ast",
+      "area": "Dockerfile parsing",
+      "sheet": "deps/dockerfile-ast.md",
+      "links": [
+        {
+          "package": "dockerfile-ast",
+          "desc": "Repo README: parsing & AST access (instructions, args, env, etc.)",
+          "url": "https://github.com/rcjsuen/dockerfile-ast#readme"
+        },
+        {
+          "package": "dockerfile-ast",
+          "desc": "CodeSandbox examples (quick-start runnable parsers)",
+          "url": "https://codesandbox.io/examples/package/dockerfile-ast"
+        }
+      ]
+    },
+    {
+      "package": "esquery",
+      "area": "AST querying (selectors over ESTree)",
+      "sheet": "deps/esquery.md",
+      "links": [
+        {
+          "package": "esquery",
+          "desc": "Selector syntax reference (queries over ESTree; :matches/:has, etc.)",
+          "url": "https://github.com/estools/esquery#selectors"
+        },
+        {
+          "package": "esquery",
+          "desc": "Examples of selector usage (practical query patterns)",
+          "url": "https://github.com/estools/esquery#examples"
+        }
+      ]
+    },
+    {
+      "package": "execa",
+      "area": "Process execution",
+      "sheet": "deps/execa.md",
+      "links": [
+        {
+          "package": "execa",
+          "desc": "API reference (stdio, all, reject, cleanup, pipes)",
+          "url": "https://github.com/sindresorhus/execa/blob/main/docs/api.md"
+        }
+      ]
+    },
+    {
+      "package": "fast-xml-parser",
+      "area": "XML parsing",
+      "sheet": "deps/fast-xml-parser.md",
+      "links": [
+        {
+          "package": "fast-xml-parser",
+          "desc": "Parsing options (attributes, namespaces, preserveOrder)",
+          "url": "https://naturalintelligence.github.io/fast-xml-parser/"
+        },
+        {
+          "package": "fast-xml-parser",
+          "desc": "Detailed option reference (XMLParser options)",
+          "url": "https://github.com/NaturalIntelligence/fast-xml-parser/blob/master/docs/v4/2.XMLparseOptions.md"
+        }
+      ]
+    },
+    {
+      "package": "fdir",
+      "area": "Filesystem crawling",
+      "sheet": "deps/fdir.md",
+      "links": [
+        {
+          "package": "fdir",
+          "desc": "README: API (withSymlinks, filters, maxDepth, globbing)",
+          "url": "https://github.com/thecodrr/fdir#readme"
+        }
+      ]
+    },
+    {
+      "package": "fflate",
+      "area": "Compression / zip artifacts",
+      "sheet": "deps/fflate.md",
+      "links": [
+        {
+          "package": "fflate",
+          "desc": "Docs index (async streaming APIs; zip/unzip primitives)",
+          "url": "https://github.com/101arrowz/fflate/blob/master/docs/README.md"
+        },
+        {
+          "package": "fflate",
+          "desc": "AsyncUnzipInflate class docs (incremental unzip example)",
+          "url": "https://github.com/101arrowz/fflate/blob/master/docs/classes/AsyncUnzipInflate.md"
+        }
+      ]
+    },
+    {
+      "package": "file-type",
+      "area": "Binary detection (magic numbers)",
+      "sheet": "deps/file-type.md",
+      "links": [
+        {
+          "package": "file-type",
+          "desc": "README: fileTypeFromBuffer/fromFile/fromStream (magic-number detection)",
+          "url": "https://github.com/sindresorhus/file-type#readme"
+        }
+      ]
+    },
+    {
+      "package": "graphology",
+      "area": "Graph modeling (relations and traversals)",
+      "sheet": "deps/graphology.md",
+      "links": [
+        {
+          "package": "graphology",
+          "desc": "Graphology docs (graphs, attributes, serialization)",
+          "url": "https://graphology.github.io/"
+        },
+        {
+          "package": "graphology",
+          "desc": "Standard library algorithms (centrality, shortest paths, traversal)",
+          "url": "https://graphology.github.io/standard-library/"
+        }
+      ]
+    },
+    {
+      "package": "graphql",
+      "area": "GraphQL document parsing/visiting",
+      "sheet": "deps/graphql.md",
+      "links": [
+        {
+          "package": "graphql",
+          "desc": "GraphQL.js language API (parse, visit, getLocation; AST ops)",
+          "url": "https://www.graphql-js.org/api-v16/language/"
+        },
+        {
+          "package": "graphql",
+          "desc": "Example: using visit() to transform documents (visitor patterns)",
+          "url": "https://www.apollographql.com/docs/react/data/document-transforms"
+        }
+      ]
+    },
+    {
+      "package": "greedy-number-partitioning",
+      "area": "Work sharding / load balancing",
+      "sheet": "deps/greedy-number-partitioning.md",
+      "links": [
+        {
+          "package": "greedy-number-partitioning",
+          "desc": "Repo README: usage (partition weights into k bins; LPT heuristic)",
+          "url": "https://github.com/dvopalecky/greedy-number-partitioning#readme"
+        }
+      ]
+    },
+    {
+      "package": "hdr-histogram-js",
+      "area": "High-resolution latency histograms",
+      "sheet": "deps/hdr-histogram-js.md",
+      "links": [
+        {
+          "package": "hdr-histogram-js",
+          "desc": "README: recordValue + percentile metrics (latency histograms)",
+          "url": "https://github.com/HdrHistogram/HdrHistogramJS#record-values-and-retrieve-metrics"
+        },
+        {
+          "package": "hdr-histogram-js",
+          "desc": "Widget examples (visualize histograms encoded across languages)",
+          "url": "https://github.com/HdrHistogram/HdrHistogramWidget#readme"
+        }
+      ]
+    },
+    {
+      "package": "hnswlib-node",
+      "area": "Vector index (HNSW) lifecycle",
+      "sheet": "deps/hnswlib-node.md",
+      "links": [
+        {
+          "package": "hnswlib-node",
+          "desc": "Repo README: create/add/search/save/load patterns (HNSW index lifecycle)",
+          "url": "https://github.com/yoshoku/hnswlib-node#readme"
+        },
+        {
+          "package": "hnswlib-node",
+          "desc": "HNSW background (parameter intuition: M, efConstruction, efSearch)",
+          "url": "https://github.com/nmslib/hnswlib#readme"
+        }
+      ]
+    },
+    {
+      "package": "iconv-lite",
+      "area": "Text decoding/encoding (streaming)",
+      "sheet": "deps/iconv-lite.md",
+      "links": [
+        {
+          "package": "iconv-lite",
+          "desc": "Streaming API (decodeStream/encodeStream for large file ingestion)",
+          "url": "https://github.com/ashtuchkin/iconv-lite/wiki/Stream-API"
+        },
+        {
+          "package": "iconv-lite",
+          "desc": "README: supported encodings + BOM handling notes",
+          "url": "https://github.com/ashtuchkin/iconv-lite#readme"
+        }
+      ]
+    },
+    {
+      "package": "ignore",
+      "area": "Ignore semantics (.gitignore-compatible)",
+      "sheet": "deps/ignore.md",
+      "links": [
+        {
+          "package": "ignore",
+          "desc": "README: createFilter + .gitignore semantics in Node",
+          "url": "https://github.com/kaelzhang/node-ignore#readme"
+        },
+        {
+          "package": "ignore",
+          "desc": "gitignore pattern format reference (edge cases; negations)",
+          "url": "https://git-scm.com/docs/gitignore"
+        }
+      ]
+    },
+    {
+      "package": "istextorbinary",
+      "area": "Text vs binary heuristics",
+      "sheet": "deps/istextorbinary.md",
+      "links": [
+        {
+          "package": "istextorbinary",
+          "desc": "README: isText/isBinary + buffer vs file-path variants",
+          "url": "https://github.com/bevry/istextorbinary#readme"
+        }
+      ]
+    },
+    {
+      "package": "jsdoc-type-pratt-parser",
+      "area": "JSDoc type parsing",
+      "sheet": "deps/jsdoc-type-pratt-parser.md",
+      "links": [
+        {
+          "package": "jsdoc-type-pratt-parser",
+          "desc": "Docs site (AST output; supported grammar; examples)",
+          "url": "https://jsdoc-type-pratt-parser.js.org/"
+        },
+        {
+          "package": "jsdoc-type-pratt-parser",
+          "desc": "Repo README (usage patterns; parsing JSDoc type expressions)",
+          "url": "https://github.com/jsdoc-type-pratt-parser/jsdoc-type-pratt-parser#readme"
+        }
+      ]
+    },
+    {
+      "package": "jsonc-parser",
+      "area": "JSON-with-comments parsing and edits",
+      "sheet": "deps/jsonc-parser.md",
+      "links": [
+        {
+          "package": "jsonc-parser",
+          "desc": "README: getLocation/findNodeAtLocation/modify/applyEdits APIs",
+          "url": "https://github.com/microsoft/node-jsonc-parser#readme"
+        }
+      ]
+    },
+    {
+      "package": "linguist-languages",
+      "area": "Language detection / file classification",
+      "sheet": "deps/linguist-languages.md",
+      "links": [
+        {
+          "package": "linguist-languages",
+          "desc": "Language metadata source of truth (extensions \u2192 language, filenames, interpreters)",
+          "url": "https://github.com/github-linguist/linguist/blob/master/lib/linguist/languages.yml"
+        },
+        {
+          "package": "linguist-languages",
+          "desc": "How Linguist works (heuristics, vendored/generated classification)",
+          "url": "https://github.com/github-linguist/linguist/blob/master/docs/how-linguist-works.md"
+        },
+        {
+          "package": "linguist-languages",
+          "desc": "Overrides (how to force language detection when heuristics are wrong)",
+          "url": "https://github.com/github-linguist/linguist/blob/master/docs/overrides.md"
+        }
+      ]
+    },
+    {
+      "package": "lmdb",
+      "area": "Persistent KV store (LMDB)",
+      "sheet": "deps/lmdb.md",
+      "links": [
+        {
+          "package": "lmdb",
+          "desc": "lmdb-js README (transactions, getRange, compression, async writes)",
+          "url": "https://github.com/DoctorEvidence/lmdb-js/blob/master/README.md"
+        },
+        {
+          "package": "lmdb",
+          "desc": "LMDB in Node (design notes; recommended write strategy)",
+          "url": "https://dev.doctorevidence.com/lmdb-in-node-29af907aad6e"
+        }
+      ]
+    },
+    {
+      "package": "lru-cache",
+      "area": "Caching / single-flight async work",
+      "sheet": "deps/lru-cache.md",
+      "links": [
+        {
+          "package": "lru-cache",
+          "desc": "Docs: Options (TTL, sizeCalculation, dispose, updateAgeOnGet)",
+          "url": "https://isaacs.github.io/node-lru-cache/interfaces/LRUCache.Options.html"
+        },
+        {
+          "package": "lru-cache",
+          "desc": "Docs: FetchOptions / fetchMethod (single-flight cache for async work)",
+          "url": "https://isaacs.github.io/node-lru-cache/interfaces/LRUCache.FetchOptions.html"
+        }
+      ]
+    },
+    {
+      "package": "micromark",
+      "area": "Doc parsing (Markdown) and positional chunking",
+      "sheet": "deps/micromark.md",
+      "links": [
+        {
+          "package": "micromark",
+          "desc": "Extensions API (inject GFM/frontmatter handling; keep positions stable)",
+          "url": "https://github.com/micromark/micromark#extensions"
+        },
+        {
+          "package": "micromark",
+          "desc": "Companion: mdast-util-from-markdown (build AST with positional info from micromark)",
+          "url": "https://github.com/syntax-tree/mdast-util-from-markdown#readme"
+        },
+        {
+          "package": "micromark",
+          "desc": "Companion: micromark-extension-frontmatter (YAML/TOML frontmatter blocks)",
+          "url": "https://github.com/micromark/micromark-extension-frontmatter#readme"
+        }
+      ]
+    },
+    {
+      "package": "msgpackr",
+      "area": "Binary serialization for artifacts",
+      "sheet": "deps/msgpackr.md",
+      "links": [
+        {
+          "package": "msgpackr",
+          "desc": "README: options (useRecords, structuredClone, extensions)",
+          "url": "https://github.com/kriszyp/msgpackr/blob/master/README.md"
+        },
+        {
+          "package": "msgpackr",
+          "desc": "MessagePack spec (cross-language compatibility expectations)",
+          "url": "https://msgpack.org/index.html"
+        }
+      ]
+    },
+    {
+      "package": "nunjucks",
+      "area": "Template parsing (Nunjucks)",
+      "sheet": "deps/nunjucks.md",
+      "links": [
+        {
+          "package": "nunjucks",
+          "desc": "API: custom tags (parser API exposure; advanced template analysis)",
+          "url": "https://mozilla.github.io/nunjucks/api.html#custom-tags"
+        },
+        {
+          "package": "nunjucks",
+          "desc": "Precompiling templates (build-time parsing; avoiding runtime parse costs)",
+          "url": "https://mozilla.github.io/nunjucks/api.html#precompiling"
+        }
+      ]
+    },
+    {
+      "package": "onnxruntime-node",
+      "area": "Embeddings/inference (ONNX Runtime)",
+      "sheet": "deps/onnxruntime-node.md",
+      "links": [
+        {
+          "package": "onnxruntime-node",
+          "desc": "JS API: InferenceSession.SessionOptions (threading, logging, etc.)",
+          "url": "https://onnxruntime.ai/docs/api/js/interfaces/InferenceSession.SessionOptions.html"
+        },
+        {
+          "package": "onnxruntime-node",
+          "desc": "Example: configuring SessionOptions in Node (reference setup)",
+          "url": "https://github.com/microsoft/onnxruntime-inference-examples/blob/main/js/api-usage_session-options/README.md"
+        },
+        {
+          "package": "onnxruntime-node",
+          "desc": "Performance: I/O binding concept (reduce copies; pre-allocate outputs)",
+          "url": "https://onnxruntime.ai/docs/performance/tune-performance/iobinding.html"
+        }
+      ]
+    },
+    {
+      "package": "parse5",
+      "area": "HTML parsing with locations",
+      "sheet": "deps/parse5.md",
+      "links": [
+        {
+          "package": "parse5",
+          "desc": "ParserOptions: sourceCodeLocationInfo (node ranges for chunking)",
+          "url": "https://parse5.js.org/interfaces/parse5.ParserOptions.html"
+        },
+        {
+          "package": "parse5",
+          "desc": "SAX tokens: comment token locations (if streaming/low-memory parse)",
+          "url": "https://parse5.js.org/interfaces/parse5-sax-parser.Comment.html"
+        },
+        {
+          "package": "parse5",
+          "desc": "Companion: hast-util-from-parse5 (convert parse5 AST to hast)",
+          "url": "https://github.com/syntax-tree/hast-util-from-parse5#readme"
+        }
+      ]
+    },
+    {
+      "package": "picomatch",
+      "area": "Glob parsing/matching",
+      "sheet": "deps/picomatch.md",
+      "links": [
+        {
+          "package": "picomatch",
+          "desc": "README: API (makeRe/parse/scan; extglobs/braces)",
+          "url": "https://github.com/micromatch/picomatch#readme"
+        },
+        {
+          "package": "picomatch",
+          "desc": "Changelog: scan()/parse() output details (tokens/slashes/parts)",
+          "url": "https://github.com/micromatch/picomatch/blob/master/CHANGELOG.md"
+        }
+      ]
+    },
+    {
+      "package": "pino",
+      "area": "Logging (structured, high-performance)",
+      "sheet": "deps/pino.md",
+      "links": [
+        {
+          "package": "pino",
+          "desc": "Transports (worker-thread transports; custom targets; perf model)",
+          "url": "https://github.com/pinojs/pino/blob/main/docs/transports.md"
+        },
+        {
+          "package": "pino",
+          "desc": "Redaction (safely strip secrets from logs; path-based redaction)",
+          "url": "https://getpino.io/#/docs/redaction"
+        },
+        {
+          "package": "pino",
+          "desc": "Serializers & bindings (structured fields; child loggers)",
+          "url": "https://getpino.io/#/docs/api"
+        }
+      ]
+    },
+    {
+      "package": "pino-pretty",
+      "area": "Log formatting / developer ergonomics",
+      "sheet": "deps/pino-pretty.md",
+      "links": [
+        {
+          "package": "pino-pretty",
+          "desc": "README: handling non-serializable transport options (messageFormat as function)",
+          "url": "https://github.com/pinojs/pino-pretty#handling-non-serializable-options"
+        },
+        {
+          "package": "pino-pretty",
+          "desc": "Pino transports doc (how to run pretty transport in worker threads)",
+          "url": "https://github.com/pinojs/pino/blob/main/docs/transports.md"
+        }
+      ]
+    },
+    {
+      "package": "piscina",
+      "area": "Worker pools / parallel indexing",
+      "sheet": "deps/piscina.md",
+      "links": [
+        {
+          "package": "piscina",
+          "desc": "Instance API (minThreads/maxThreads, concurrentTasksPerWorker, atomics)",
+          "url": "https://piscinajs.dev/api-reference/Instance/"
+        },
+        {
+          "package": "piscina",
+          "desc": "Resource limits example (prevent OOM; handle failures deterministically)",
+          "url": "https://piscinajs.dev/examples/Resource%20Limits/"
+        },
+        {
+          "package": "piscina",
+          "desc": "README: concurrentTasksPerWorker guidance (only for async-heavy tasks)",
+          "url": "https://github.com/piscinajs/piscina#concurrenttasksperworker"
+        }
+      ]
+    },
+    {
+      "package": "prom-client",
+      "area": "Metrics (Prometheus client)",
+      "sheet": "deps/prom-client.md",
+      "links": [
+        {
+          "package": "prom-client",
+          "desc": "README: Registry + collectDefaultMetrics (multi-registry patterns)",
+          "url": "https://github.com/siimon/prom-client#default-metrics"
+        },
+        {
+          "package": "prom-client",
+          "desc": "README: Histogram/Summary usage (latency & size metrics)",
+          "url": "https://github.com/siimon/prom-client#histogram"
+        }
+      ]
+    },
+    {
+      "package": "protobufjs",
+      "area": "Protocol Buffers parsing/tooling",
+      "sheet": "deps/protobufjs.md",
+      "links": [
+        {
+          "package": "protobufjs",
+          "desc": "Protobuf.js docs (load .proto; reflection-based messages)",
+          "url": "https://protobufjs.github.io/protobuf.js/index.html"
+        },
+        {
+          "package": "protobufjs",
+          "desc": "CLI docs (pbjs/pbts for generating static modules)",
+          "url": "https://github.com/protobufjs/protobuf.js/blob/master/cli/README.md"
+        }
+      ]
+    },
+    {
+      "package": "pyright",
+      "area": "Python typing / diagnostics",
+      "sheet": "deps/pyright.md",
+      "links": [
+        {
+          "package": "pyright",
+          "desc": "Pyright configuration reference (pyrightconfig.json; strictness controls)",
+          "url": "https://microsoft.github.io/pyright/#/configuration"
+        },
+        {
+          "package": "pyright",
+          "desc": "Command line reference (JSON output, verifytypes, diagnostics)",
+          "url": "https://microsoft.github.io/pyright/#/command-line"
+        }
+      ]
+    },
+    {
+      "package": "re2js",
+      "area": "Safe regex (ReDoS-resistant)",
+      "sheet": "deps/re2js.md",
+      "links": [
+        {
+          "package": "re2js",
+          "desc": "README: ReDoS-safe regex engine usage + supported syntax",
+          "url": "https://github.com/le0pard/re2js#readme"
+        },
+        {
+          "package": "re2js",
+          "desc": "README: compatibility notes/limitations vs native RegExp",
+          "url": "https://github.com/le0pard/re2js#limitations"
+        }
+      ]
+    },
+    {
+      "package": "roaring-wasm",
+      "area": "Compressed bitsets / postings sets",
+      "sheet": "deps/roaring-wasm.md",
+      "links": [
+        {
+          "package": "roaring-wasm",
+          "desc": "README: serialization formats + streaming APIs (format arg required)",
+          "url": "https://github.com/SalvatorePreviti/roaring-wasm/blob/master/README.md"
+        },
+        {
+          "package": "roaring-wasm",
+          "desc": "Roaring format background (interoperability; why roaring bitmaps)",
+          "url": "https://github.com/RoaringBitmap/RoaringBitmap#readme"
+        }
+      ]
+    },
+    {
+      "package": "seedrandom",
+      "area": "Determinism / reproducible sampling",
+      "sheet": "deps/seedrandom.md",
+      "links": [
+        {
+          "package": "seedrandom",
+          "desc": "README: deterministic PRNG streams + global seeding patterns",
+          "url": "https://github.com/davidbau/seedrandom#readme"
+        }
+      ]
+    },
+    {
+      "package": "semver",
+      "area": "Version parsing and range evaluation",
+      "sheet": "deps/semver.md",
+      "links": [
+        {
+          "package": "semver",
+          "desc": "npm SemVer functions + range syntax (satisfies/coerce; npm semantics)",
+          "url": "https://docs.npmjs.com/cli/v6/using-npm/semver"
+        },
+        {
+          "package": "semver",
+          "desc": "SemVer 2.0.0 spec (baseline rules; prerelease ordering)",
+          "url": "https://semver.org/"
+        },
+        {
+          "package": "semver",
+          "desc": "node-semver README: ranges (caret/tilde/hyphen; set operations)",
+          "url": "https://github.com/npm/node-semver#ranges"
+        }
+      ]
+    },
+    {
+      "package": "smol-toml",
+      "area": "TOML parsing",
+      "sheet": "deps/smol-toml.md",
+      "links": [
+        {
+          "package": "smol-toml",
+          "desc": "Repo README: parse/stringify API (TOML v1.1-ish compliance notes)",
+          "url": "https://github.com/squirrelchat/smol-toml#readme"
+        },
+        {
+          "package": "smol-toml",
+          "desc": "TOML spec (edge cases; multiline strings; datetime)",
+          "url": "https://toml.io/en/"
+        }
+      ]
+    },
+    {
+      "package": "svelte",
+      "area": "Framework parsing (Svelte)",
+      "sheet": "deps/svelte.md",
+      "links": [
+        {
+          "package": "svelte",
+          "desc": "Svelte compiler API (parse/compile; AST structure)",
+          "url": "https://svelte.dev/docs/svelte-compiler"
+        },
+        {
+          "package": "svelte",
+          "desc": "Compiler 'parse' API (AST + positions; used for Svelte chunking)",
+          "url": "https://svelte.dev/docs/svelte-compiler#parse"
+        }
+      ]
+    },
+    {
+      "package": "tinybench",
+      "area": "Microbench tooling",
+      "sheet": "deps/tinybench.md",
+      "links": [
+        {
+          "package": "tinybench",
+          "desc": "README: warmup/iterations/timing hooks (microbench discipline)",
+          "url": "https://github.com/tinylibs/tinybench#readme"
+        }
+      ]
+    },
+    {
+      "package": "typescript",
+      "area": "Parsing / Type analysis (TS/JS)",
+      "sheet": "deps/typescript.md",
+      "links": [
+        {
+          "package": "typescript",
+          "desc": "Using the Compiler API (Program, SourceFile, TypeChecker patterns)",
+          "url": "https://github.com/microsoft/TypeScript/wiki/Using-the-Compiler-API"
+        },
+        {
+          "package": "typescript",
+          "desc": "Using the Language Service API (incremental, project-aware analysis)",
+          "url": "https://github.com/microsoft/TypeScript/wiki/Using-the-Language-Service-API"
+        },
+        {
+          "package": "typescript",
+          "desc": "tsserver protocol types (request/response shapes; useful for incremental queries)",
+          "url": "https://github.com/microsoft/TypeScript/blob/main/lib/protocol.d.ts"
+        }
+      ]
+    },
+    {
+      "package": "xxhash-wasm",
+      "area": "Hashing / stable IDs",
+      "sheet": "deps/xxhash-wasm.md",
+      "links": [
+        {
+          "package": "xxhash-wasm",
+          "desc": "Repo README: API usage (xxh32/xxh64; sync/async init)",
+          "url": "https://github.com/jungomi/xxhash-wasm#readme"
+        },
+        {
+          "package": "xxhash-wasm",
+          "desc": "Algorithm background (xxHash reference; perf characteristics)",
+          "url": "https://xxhash.com/"
+        }
+      ]
+    },
+    {
+      "package": "yaml",
+      "area": "Config/document parsing (YAML) with positional fidelity",
+      "sheet": "deps/yaml.md",
+      "links": [
+        {
+          "package": "yaml",
+          "desc": "YAML package docs (parse Document; CST/AST; preserving comments & source tokens)",
+          "url": "https://eemeli.org/yaml/"
+        },
+        {
+          "package": "yaml",
+          "desc": "Parsing documents + line/position utilities (for stable chunk boundaries)",
+          "url": "https://eemeli.org/yaml/#parsing-documents"
+        },
+        {
+          "package": "yaml",
+          "desc": "Source: YAML repo CST/AST/Document APIs (deep dive)",
+          "url": "https://github.com/eemeli/yaml#readme"
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
diff --git a/docs/release-discipline.md b/docs/release-discipline.md
new file mode 100644
index 000000000..528d58093
--- /dev/null
+++ b/docs/release-discipline.md
@@ -0,0 +1,23 @@
+# Release discipline
+
+This document defines release rules for schema changes, flag removals, and changelog enforcement.
+
+## Output schema versioning
+- Backward-compatible additions (new fields, optional metadata) require a minor version bump.
+- Breaking changes (field removals, semantic changes, renamed fields) require a major version bump.
+- Update `docs/search-contract.md` and any API/MCP contracts alongside output changes.
+
+## Artifact schema versioning
+- Any change that makes existing artifacts unreadable requires a major version bump.
+- Increment artifact schema versions in `docs/artifact-contract.md` and the corresponding writers/readers.
+- Changes that only add optional fields may ship as a minor version bump, but must document the new fields.
+
+## CLI flag removals/renames
+- Deprecate flags first (warn on use) for at least one minor release.
+- Removals or renames are breaking changes and require a major version bump.
+- Update `docs/config-deprecations.md` and CLI help text when changing flags.
+
+## Changelog enforcement
+- Maintain `CHANGELOG.md` with entries per release.
+- Breaking changes must be listed under `### Breaking` for the release section.
+- Run `npm run release-check -- --breaking` for breaking releases to enforce the changelog entry.
diff --git a/docs/repometrics-dashboard.md b/docs/repometrics-dashboard.md
index 26976a98f..845446952 100644
--- a/docs/repometrics-dashboard.md
+++ b/docs/repometrics-dashboard.md
@@ -4,9 +4,9 @@
 Provide a fast console summary of indexing and search metrics, with optional JSON output for external dashboards.
 
 ## Usage
-- `node tools/repometrics-dashboard.js`
-- `node tools/repometrics-dashboard.js --json`
-- `node tools/repometrics-dashboard.js --out repometrics-summary.json`
+- `pairofcleats report repometrics`
+- `pairofcleats report repometrics --json`
+- `pairofcleats report repometrics --out repometrics-summary.json`
 
 ## Summary Fields
 - Index metrics: chunks, tokens, cache hit rate, BM25 params, timings.
diff --git a/docs/risk-rules.md b/docs/risk-rules.md
new file mode 100644
index 000000000..ad0b30df4
--- /dev/null
+++ b/docs/risk-rules.md
@@ -0,0 +1,71 @@
+# Risk rules
+
+PairOfCleats supports configurable risk rules for sources, sinks, and sanitizers. The engine
+uses these rules to detect local flows, then correlates cross-file flows when enabled.
+
+## Rule bundle format
+
+Rules are configured under `indexing.riskRules` in `.pairofcleats.json`.
+
+```json
+{
+  "indexing": {
+    "riskRules": {
+      "includeDefaults": true,
+      "rulesPath": "config/risk-rules.json",
+      "rules": {
+        "sources": [],
+        "sinks": [],
+        "sanitizers": []
+      }
+    }
+  }
+}
+```
+
+Each rule entry supports:
+- `id` (string, optional) - stable rule identifier
+- `name` (string, required)
+- `category` (string, optional)
+- `severity` (string, optional; sinks only)
+- `tags` (string array, optional)
+- `confidence` (number, optional)
+- `languages` (string array, optional)
+- `patterns` (string array, required, regex source)
+- `requires` (string, optional, regex source)
+
+## Default coverage
+
+The default bundle includes:
+- HTTP/body/query/params sources (`req.body`, `req.query`, etc)
+- environment/CLI/stdin sources
+- command execution, eval, file write, SQL, XSS, deserialization sinks
+- basic sanitizers (escape/parameterize helpers)
+
+## Provenance
+
+The risk metadata output includes:
+- `ruleProvenance.defaults` - whether defaults were applied
+- `ruleProvenance.sourcePath` - path to any external bundle
+
+## Resource caps
+
+Configure caps under `indexing.riskCaps`:
+
+```json
+{
+  "indexing": {
+    "riskCaps": {
+      "maxBytes": 204800,
+      "maxLines": 3000,
+      "maxNodes": 15000,
+      "maxEdges": 45000,
+      "maxMs": 75,
+      "maxFlows": 150
+    }
+  }
+}
+```
+
+If caps are exceeded, the engine records `risk.analysisStatus = "capped"` and falls back to
+heuristic rule matching.
diff --git a/docs/rule-packs.md b/docs/rule-packs.md
new file mode 100644
index 000000000..6b2191bba
--- /dev/null
+++ b/docs/rule-packs.md
@@ -0,0 +1,38 @@
+# Rule Packs
+
+Rule packs define collections of structural-search rules plus metadata used to
+tag and prioritize findings. Packs are registered in `rules/registry.json` and
+reference rule files stored under `rules/`.
+
+Registry schema
+```json
+{
+  "packs": [
+    {
+      "id": "semgrep-security",
+      "label": "Semgrep security starter pack",
+      "engine": "semgrep",
+      "rules": ["rules/semgrep/security.yml"],
+      "severity": "medium",
+      "tags": ["security", "baseline"],
+      "description": "Basic security-oriented rules for common code patterns."
+    }
+  ]
+}
+```
+
+Rule file formats
+- Semgrep: YAML config files (one or more per pack).
+- ast-grep: YAML rule files (one or more per pack).
+- Comby: JSON files with `pattern`, `language`, and optional `message`.
+
+Usage
+```bash
+pairofcleats structural search --pack semgrep-security
+pairofcleats structural search --pack comby-docs --format json
+```
+
+Best practices
+- Keep packs small and focused so results map cleanly to risk signals.
+- Use consistent tags to group findings (e.g., `security`, `dataflow`, `audit`).
+- Add new packs rather than editing existing ones if semantics differ.
diff --git a/docs/scip.md b/docs/scip.md
new file mode 100644
index 000000000..16d802f4e
--- /dev/null
+++ b/docs/scip.md
@@ -0,0 +1,25 @@
+# SCIP ingestion
+
+Use the SCIP ingestion tool to import symbol occurrences from a SCIP JSON or JSONL export.
+
+## Ingest JSON/JSONL
+
+```bash
+pairofcleats ingest scip --repo . --input scip.jsonl
+```
+
+## Run the SCIP CLI directly
+
+```bash
+pairofcleats ingest scip --repo . --run --input index.scip
+```
+
+## Outputs
+
+- `scip.jsonl`: normalized symbol occurrences under the repo cache root.
+- `scip.jsonl.meta.json`: summary metadata and per-kind counts.
+
+## Notes
+
+- Uses `scip print --format=json` when `--run` is specified.
+- Occurrence roles are normalized to `definition`, `reference`, or `other`.
diff --git a/docs/search-contract.md b/docs/search-contract.md
new file mode 100644
index 000000000..5168c852d
--- /dev/null
+++ b/docs/search-contract.md
@@ -0,0 +1,49 @@
+# Search contract
+
+This document defines the expected search semantics across backends and modes.
+It is the correctness reference for the search pipeline.
+
+## Ranking components
+
+Search uses a blended ranking model:
+- **Sparse (BM25)** over token postings.
+- **Dense similarity** over embedding vectors (when enabled).
+- **Minhash similarity** for near-duplicate signals.
+- **Symbol boosts** for definitions/exports (configurable).
+- **Field weights** for name/signature/doc/comment/body.
+
+When both sparse and dense are enabled, results are blended using RRF or a
+weighted sum depending on config (`search.rrf`, `search.scoreBlend`).
+
+## Filters and precedence
+
+Filters are applied before ranking. Supported filters include:
+- `--lang`, `--ext`, `--file`, `--path`, `--kind`, `--signature`
+- `--risk`, `--risk-tag`, `--risk-source`, `--risk-sink`, `--risk-category`, `--risk-flow`
+- `--inferred-type`, `--return-type`, `--param`, `--uses`, `--calls`
+
+Filter ordering and top semantics:
+- File/path prefilters (chargram/regex) only narrow candidates; the final exact filter always runs.
+- Metadata filters are evaluated before ranking, and the allowed IDs gate sparse + ANN ranking.
+- `--top` applies after ranking/fusion within each mode; the pipeline over-fetches to improve fulfillment but returns fewer results when filters or candidate sets are too small.
+- Context expansion runs after the primary top-N selection and may append extra context hits when enabled.
+
+Filters are ANDed together unless explicitly documented otherwise.
+
+## Multi-mode fusion
+
+Search supports code, prose, records, and mixed modes. Mixed-mode results are
+fused using RRF by default; each mode can be weighted independently via config.
+
+## Explain schema
+
+`--explain` emits a stable `scoreBreakdown` object for each hit:
+- `selected`: `{ type, score }` final score selection
+- `sparse`: BM25/FTS score + weighting metadata (when available)
+- `ann`: `{ score, source }` dense similarity (when available)
+- `rrf`: rank fusion details when enabled
+- `blend`: normalized blend details when enabled
+- `symbol`: definition/export boost metadata
+- `phrase`: phrase/chargram boost metadata
+
+Backends must emit this schema consistently so that parity checks are meaningful.
diff --git a/docs/search.md b/docs/search.md
new file mode 100644
index 000000000..4201aa627
--- /dev/null
+++ b/docs/search.md
@@ -0,0 +1,66 @@
+# Search Pipeline
+
+This document summarizes the query pipeline and the fast prefilter stages used before exact matches.
+
+## Tokenization
+
+- Code search keeps punctuation tokens (examples: `&&`, `=>`, `::`).
+- Prose search applies stop-word removal and stemming; code search does not.
+- Query parsing preserves punctuation tokens so symbol-only queries can match code.
+
+## File Filter Prefilter (Substring/Regex)
+
+When `--file` or `--path` filters are used, the filter index builds file-name chargrams. The filter stage:
+
+1. Extracts a literal substring from substring filters or the longest literal run from regex filters.
+2. Builds chargrams for that literal and intersects candidate file IDs using the chargram index.
+3. Applies the original substring or regex on each candidate file path to verify exact matches.
+
+This prefilter is advisory only: it narrows candidates but never skips the final exact match. Regex filters with no stable literal segment skip the prefilter and run exact matching on all candidates.
+
+## Limits
+
+- Chargram prefilter is case-insensitive; case-sensitive file filters are still enforced during the final exact match step.
+- Very short substrings (shorter than the configured chargram size) do not benefit from the prefilter.
+
+## Scoring and Fusion
+
+PairOfCleats treats BM25 as the primary sparse ranker. When SQLite FTS5 is enabled it provides an alternate sparse list, but BM25 remains the reference for defaults and tuning.
+
+Fielded BM25 is enabled when field postings are available. It scores query terms against `name`, `signature`, `doc`, and `body` streams, combining them with configurable weights.
+
+When both sparse and dense lists are available, results are fused using Reciprocal Rank Fusion (RRF). RRF relies on rank positions rather than raw score scales, which makes sparse and dense lists comparable without normalization.
+
+## Query intent
+
+Queries are classified as `code`, `prose`, `path`, or `mixed` based on lightweight heuristics (symbols, camel/snake case, paths, and word count). Intent is used when `search.denseVectorMode=auto` to choose doc vs code vectors, and to select default field weights. Use `--explain` to see the intent decision in the JSON payload.
+
+## Context expansion
+
+When enabled, the search pipeline can append related chunks (calls/imports/usages) after primary hits. Context hits are labeled with a `context` object (`sourceId`, `reason`) and have `scoreType: "context"`. Use `search.contextExpansion.*` to control limits and relation types, and `respectFilters` to keep expansions inside the active filters.
+
+## Structural filters
+
+When structural matches are ingested (see `docs/structural-search.md`), you can filter results by:
+- `--struct-pack <id>`
+- `--struct-rule <id>`
+- `--struct-tag <tag>`
+
+Configuration:
+- `search.rrf.enabled` (default: true)
+- `search.rrf.k` (default: 60)
+- `search.fieldWeights` (defaults favor name/signature over body)
+- `search.sqliteFtsWeights` (file/name/signature/kind/headline/doc/tokens column weights)
+- `search.contextExpansion` (limits and relation toggles)
+- `search.scoreBlend` can override RRF when enabled (normalized blend weights).
+
+### Explain output
+
+Pass `--explain` to include `scoreBreakdown` in JSON responses. This includes:  
+- `sparse` details (BM25 or FTS5, k1/b, normalization)
+- `ann` details (dense source)
+- `rrf` contributions (ranks and fused score), when used
+- `blend` details when normalized blending is enabled
+- `symbol` boost metadata for definitions/exports
+- `phrase` metadata when phrase/chargram boosts are active
+- `selected` final score type + value
diff --git a/docs/service-mode.md b/docs/service-mode.md
new file mode 100644
index 000000000..045081dc7
--- /dev/null
+++ b/docs/service-mode.md
@@ -0,0 +1,68 @@
+# Service Mode
+
+Service mode provides a lightweight workflow for multi-repo indexing. The
+service separates repo syncing, durable queueing, and index workers, so you can
+run each step independently or on a schedule.
+
+Config
+Create a config file at the default location:
+`$PAIROFCLEATS_HOME/service/config.json`
+
+Example:
+```json
+{
+  "baseDir": "C:/pairofcleats/repos",
+  "repos": [
+    {
+      "id": "example",
+      "url": "https://github.com/org/repo.git",
+      "path": "example",
+      "branch": "main",
+      "syncPolicy": "pull",
+      "indexModes": "both"
+    }
+  ],
+  "queue": { "maxQueued": 20, "maxRetries": 2 },
+  "worker": { "concurrency": 2 },
+  "sync": { "policy": "pull", "intervalMs": 300000 }
+}
+```
+
+Commands
+```bash
+# Sync repos (clone or pull)
+pairofcleats service indexer sync --config /path/to/config.json
+
+# Enqueue a repo for indexing
+pairofcleats service indexer enqueue --repo /path/to/repo --mode code
+
+# Process the queue once, or keep watching
+pairofcleats service indexer work --concurrency 2
+pairofcleats service indexer work --watch --interval 5000
+
+# Queue status
+pairofcleats service indexer status
+
+# Embedding queue (service mode)
+pairofcleats service indexer enqueue --queue embeddings --repo /path/to/repo --mode code
+pairofcleats service indexer work --queue embeddings --concurrency 1
+
+# Stage/mode-specific queues (optional)
+pairofcleats service indexer enqueue --queue auto --stage stage2 --mode code --repo /path/to/repo
+pairofcleats service indexer work --queue auto --stage stage2 --mode code --concurrency 1
+```
+
+Query serving
+Use the API server to serve queries once indexes are built:
+```bash
+pairofcleats service indexer serve --repo /path/to/repo
+```
+
+Notes
+- The queue is persisted in the cache root under `service/queue/queue.json`.
+- Embedding jobs are stored in `service/queue/queue-embeddings.json`.
+- Use `queue.maxRetries` to requeue failed jobs automatically; attempts are tracked per job.
+- If `indexing.twoStage.background` is enabled, stage2 enrichment jobs are queued by default (set `indexing.twoStage.queue: false` to disable).
+- Stage3 runs the embedding pass (`--stage stage3`), and stage4 builds SQLite/ANN (`--stage stage4`).
+- Use `syncPolicy: "fetch"` for Sourcebot-style fetch-only workflows.
+- Each job runs `build_index.js` for the configured repo/mode.
diff --git a/docs/setup.md b/docs/setup.md
index 6801e579c..6a4baa1cd 100644
--- a/docs/setup.md
+++ b/docs/setup.md
@@ -1,15 +1,15 @@
 # Unified Setup
 
-The unified setup script (`npm run setup`) guides you through installing optional dependencies and building indexes in one flow. It is interactive by default and supports a non-interactive CI mode.
+The unified setup script (`pairofcleats setup`) guides you through installing optional dependencies and building indexes in one flow. It is interactive by default and supports a non-interactive CI mode.
 
 ## Usage
 
 - Interactive (recommended):
-  - `npm run setup`
+  - `pairofcleats setup`
 - Non-interactive (CI):
-  - `npm run setup -- --non-interactive`
+- `pairofcleats setup --non-interactive`
 - Non-interactive with JSON summary:
-  - `npm run setup -- --non-interactive --json`
+- `pairofcleats setup --non-interactive --json`
 
 ## What it can do
 
@@ -20,16 +20,19 @@ The unified setup script (`npm run setup`) guides you through installing optiona
 - Detect and optionally install tooling.
 - Restore CI artifacts when present.
 - Build file-backed indexes (optionally incremental).
-- Build SQLite indexes (optional).
+- Build SQLite indexes (default unless `--skip-sqlite`).
+- Offer to set a Node heap limit for large repos (writes `runtime.maxOldSpaceMb`).
 
 ## Flags
 
 - `--non-interactive` / `--ci`: Skip prompts and use defaults.
 - `--json`: Emit a summary report to stdout (logs go to stderr).
-- `--with-sqlite`: Default SQLite build to yes.
+- `--profile <name>`: Select a profile from `profiles/*.json` and record it in `.pairofcleats.json`.
+- `--with-sqlite`: Force SQLite build on (default behavior).
 - `--incremental`: Use incremental indexing if available.
 - `--validate-config`: Validate `.pairofcleats.json` before running setup.
 - `--skip-validate`: Skip config validation prompts.
+- `--heap-mb <mb>`: Persist a Node heap limit (max-old-space-size) in `.pairofcleats.json`.
 - `--tooling-scope cache|global`: Override tooling install scope.
 - `--skip-install`: Skip `npm install`.
 - `--skip-dicts`: Skip dictionary download.
@@ -43,5 +46,15 @@ The unified setup script (`npm run setup`) guides you through installing optiona
 ## Notes
 
 - Defaults follow `.pairofcleats.json` where applicable.
-- SQLite builds require file-backed indexes; setup will prompt if they are missing.
-- If you prefer a fast, no-prompts path, use `npm run bootstrap`.
+- Tree-sitter grammars load via WASM (`web-tree-sitter` + `tree-sitter-wasms`), avoiding native build dependencies.
+- SQLite builds use file-backed indexes by default, and will stream from piece artifacts or incremental bundles when available.
+- `build_index.js` can be run from any working directory; it resolves SQLite build tooling from the install root.
+- Index builds write `preprocess.json` under the repo cache root with scan and skip statistics.
+- Default ignore patterns can be overridden by adding negated entries (e.g. `!dist/allow.js`) in `extraIgnore`.
+- Index builds write `pieces/manifest.json` in each index directory to list artifact pieces and checksums.
+- Use `node tools/assemble-pieces.js --input <indexDir> --out <dest>` to merge piece outputs into a single index directory.
+- Use `node tools/compact-pieces.js --repo <repo>` to compact chunk_meta parts and token_postings shards.
+- For deeper profiling guidance (CPU vs I/O bottlenecks), see `docs/perf-profiling.md`.
+- If you increase indexing concurrency (`indexing.threads` / `PAIROFCLEATS_THREADS`), consider also setting `runtime.uvThreadpoolSize` (or `PAIROFCLEATS_UV_THREADPOOL_SIZE`) to avoid libuv threadpool bottlenecks (Node default is 4).
+- After setup, run `pairofcleats index validate` to confirm index artifacts are healthy.
+- If you prefer a fast, no-prompts path, use `pairofcleats bootstrap`.
diff --git a/docs/sqlite-ann-extension.md b/docs/sqlite-ann-extension.md
index ca63802c8..3e9d82bd2 100644
--- a/docs/sqlite-ann-extension.md
+++ b/docs/sqlite-ann-extension.md
@@ -11,7 +11,7 @@ back to the JS ANN path when the extension or vector table is unavailable.
 - Place it under the extensions cache (default `<cache>/extensions`), or point
   `sqlite.vectorExtension.path` at the file.
 - Rebuild the SQLite indexes so the `dense_vectors_ann` table is created.
-`download-extensions` can read `sqlite.vectorExtension.downloads` keyed by
+`assets extensions` can read `sqlite.vectorExtension.downloads` keyed by
 `<platform>-<arch>` (for example, `win32-x64`).
 The download helper supports `.zip`, `.tar`, `.tar.gz`, and `.tgz` archives by
 extracting the extension binary (matching the configured filename or platform
@@ -20,20 +20,20 @@ If `vectorExtension.path` is set, it overrides the `dir` + `filename` layout.
 
 Use the helper:
 ```
-npm run download-extensions -- --url vec0.dll=https://example.com/vec0.dll
+pairofcleats assets extensions --url vec0.dll=https://example.com/vec0.dll
 ```
 
 Verify the extension install (presence-only):
 ```
-npm run verify-extensions -- --no-load
+pairofcleats assets extensions-verify --no-load
 ```
 
 ## Configuration
 ```
 {
   "sqlite": {
-    "annMode": "extension",
     "vectorExtension": {
+      "annMode": "extension",
       "provider": "sqlite-vec",
       "dir": "C:/cache/pairofcleats/extensions",
       "path": "",
@@ -55,22 +55,26 @@ metric settings).
 
 ## Build
 ```
-npm run build-sqlite-index
+pairofcleats sqlite build
 ```
 When the extension loads successfully, the build creates `dense_vectors_ann` and
 stores float32 embeddings for ANN queries.
 
 ## Search
 ```
-node .\\search.js --backend sqlite "query"
+pairofcleats search --backend sqlite "query"
 ```
-If the extension or table is missing, `search.js` warns and uses the JS ANN
+If the extension or table is missing, `search.js` warns and uses the JS ANN     
 implementation instead.
 
+Candidate set behavior:
+- When filters provide an ID candidate set, SQLite ANN pushes the set into the query when the set is small (≤ 900 IDs).
+- Larger candidate sets fall back to a best-effort query (over-fetch then filter), with a warning emitted once per run.
+
 ## Notes
 - Extensions are stored outside the repo under the cache root.
 - Environment overrides: `PAIROFCLEATS_EXTENSIONS_DIR`, `PAIROFCLEATS_VECTOR_EXTENSION`.
-- `clean-artifacts` keeps extensions; `npm run uninstall` removes them.
+- `cache clean` keeps extensions; `pairofcleats uninstall` removes them.
 - The extension table is optional and not required for SQLite to work.
 - `dense_vectors_ann` stores float32 embeddings, which increases SQLite size.
 - `dense_vectors_ann` uses `rowid` = `doc_id` for lookups.
diff --git a/docs/sqlite-compaction.md b/docs/sqlite-compaction.md
index 0153542ae..bd728c845 100644
--- a/docs/sqlite-compaction.md
+++ b/docs/sqlite-compaction.md
@@ -10,10 +10,10 @@ Rebuild a SQLite index in-place to remove doc_id gaps, prune unused vocab entrie
 - Writes a fresh DB file, swaps it in, and vacuums it.
 
 ## Usage
-- `node tools/compact-sqlite-index.js`
-- `node tools/compact-sqlite-index.js --mode code|prose`
-- `node tools/compact-sqlite-index.js --dry-run`
-- `node tools/compact-sqlite-index.js --keep-backup`
+- `pairofcleats sqlite compact`
+- `pairofcleats sqlite compact --mode code|prose`
+- `pairofcleats sqlite compact --dry-run`
+- `pairofcleats sqlite compact --keep-backup`
 
 ## Notes
 - Uses a temp DB file and swaps it in when complete.
diff --git a/docs/sqlite-incremental-updates.md b/docs/sqlite-incremental-updates.md
index 64a24948b..9e826697f 100644
--- a/docs/sqlite-incremental-updates.md
+++ b/docs/sqlite-incremental-updates.md
@@ -5,7 +5,7 @@ Update SQLite indexes in-place by touching only the files that changed since the
 
 ## Inputs
 - Per-file incremental cache from `build_index.js --incremental`.
-- Existing SQLite DBs at `<cache>/repos/<repoId>/index-sqlite/index-code.db` and `<cache>/repos/<repoId>/index-sqlite/index-prose.db` (unless overridden).
+- Existing SQLite DBs at the current build root, e.g. `<cache>/repos/<repoId>/builds/<buildId>/index-sqlite/index-code.db` and `index-prose.db` (resolved via `builds/current.json`, unless overridden).
 
 ## Schema Additions
 - `file_manifest` table tracks per-file hashes and sizes used for change detection.
@@ -28,13 +28,25 @@ Update SQLite indexes in-place by touching only the files that changed since the
 - This leaves gaps when files are deleted; a full rebuild compacts IDs.
 
 ## Usage
-- Build incremental cache: `node build_index.js --incremental`.
-- Update SQLite in place: `node tools/build-sqlite-index.js --incremental`.
-- `npm run bootstrap -- --incremental --with-sqlite` runs both.
+- Build incremental cache: `pairofcleats index build --incremental`.
+- Update SQLite in place: `pairofcleats sqlite build --incremental`.
+- Override target build root: `pairofcleats sqlite build --incremental --index-root <path>`.
+- `pairofcleats bootstrap --incremental --with-sqlite` runs both.
+- `--validate <off|smoke|full>` controls post-build SQLite validation (default: `smoke`).
 
 ## Fallback Behavior
 If the incremental manifest or required SQLite tables are missing, the tool falls back to a full rebuild.
+If a manifest exists, full rebuilds automatically stream from incremental bundles instead of loading `chunk_meta.json`.
+If bundle streaming fails (missing bundle, invalid payload, dims mismatch), the rebuild logs a warning and falls back to file-backed artifacts. If file-backed artifacts are missing or invalid, the rebuild fails.
+Full rebuilds also trigger when:
+- The manifest is empty or has conflicting paths (same file with different separators).
+- `file_manifest` is empty while chunks exist (legacy DBs without per-file metadata).
+- Change ratio exceeds 35% of tracked files (changed + deleted).
+- Vocab growth exceeds maintenance limits for token/phrase/chargram tables.
+- Dense vector metadata (model or dims) mismatches the incoming bundles.
+- Bundle files are missing or invalid.
 
 ## Limitations
 - Vocabulary tables keep old tokens/grams; they are not pruned on deletes.
 - Doc ID gaps grow with frequent updates; rebuild to compact if needed.
+- Large vocab growth or churn triggers a full rebuild to keep tables bounded.
diff --git a/docs/sqlite-index-schema.md b/docs/sqlite-index-schema.md
index ca225093a..e3d86367c 100644
--- a/docs/sqlite-index-schema.md
+++ b/docs/sqlite-index-schema.md
@@ -9,6 +9,7 @@ Stores the full per-chunk metadata used by `search.js`.
 
 Columns:
 - id (INTEGER PRIMARY KEY)
+- chunk_id (TEXT, stable `metaV2.chunkId`)
 - mode (TEXT)
 - file (TEXT)
 - start, end (INTEGER)
@@ -27,7 +28,7 @@ Full-text search table for BM25 queries.
 
 Columns:
 - mode (UNINDEXED)
-- file, name, kind, headline, tokens
+- file, name, signature, kind, headline, doc, tokens
 
 ### file_manifest
 Per-file metadata used for incremental SQLite updates.
@@ -100,3 +101,5 @@ SQLite vector extension).
 - Split DBs use per-mode chunk IDs directly (no offsets).
 - `idx_chunks_file` and `idx_file_manifest_mode_file` speed file-level updates.
 - File paths in SQLite are normalized to use `/`.
+- When `chunk_meta.json` stores `fileId` instead of `file`, `build-sqlite-index` uses `file_meta.json` to resolve file paths, extensions, and external docs, and to populate `file_manifest`.
+- When incremental bundles are present (manifest exists), SQLite rebuilds stream bundle files from `<cache>/repos/<repoId>/incremental/<mode>/files` instead of loading `chunk_meta.json`.
diff --git a/docs/structural-search.md b/docs/structural-search.md
new file mode 100644
index 000000000..01a1fdb78
--- /dev/null
+++ b/docs/structural-search.md
@@ -0,0 +1,49 @@
+# Structural Search
+
+PairOfCleats ships a lightweight structural-search harness that can invoke
+external engines and normalize their matches into a common JSON output. This is
+best used for security/risk signals, metadata extraction, or targeted pattern
+searches that are hard to express as text queries.
+
+Supported engines
+- `semgrep` (rule packs in YAML)
+- `ast-grep` (tree-sitter based rules)
+- `comby` (template matcher for non-AST languages)
+
+CLI
+```bash
+pairofcleats structural search --pack semgrep-security --repo /path/to/repo
+pairofcleats structural search --pack astgrep-js-safety --format json
+pairofcleats structural search --engine semgrep --rule rules/semgrep/security.yml
+```
+
+Indexing integration
+- Write results to the repo cache at `structural/structural.jsonl` (or `.json`), then run `build_index.js`.
+- Matches are attached to chunk metadata under `docmeta.structural`.
+- Search filters can target these with `--struct-pack`, `--struct-rule`, and `--struct-tag`.
+
+Output format (JSONL default)
+```json
+{
+  "engine": "semgrep",
+  "pack": "semgrep-security",
+  "ruleId": "example.rule",
+  "message": "Avoid eval() usage.",
+  "severity": "WARNING",
+  "tags": ["security"],
+  "path": "src/example.js",
+  "startLine": 12,
+  "startCol": 5,
+  "endLine": 12,
+  "endCol": 9,
+  "snippet": "eval(input)",
+  "metadata": { "category": "security" }
+}
+```
+
+Notes
+- Rule packs are defined in `rules/registry.json`. See `docs/rule-packs.md`.
+- Engines must be installed separately. This tool does not auto-install.
+- Output is best-effort and normalizes different engine formats into a shared
+  shape for later ingestion or analysis.
+- Structural search is gated behind `profile=full` (or `PAIROFCLEATS_PROFILE=full`).
diff --git a/docs/symbol-sources.md b/docs/symbol-sources.md
new file mode 100644
index 000000000..ab4605610
--- /dev/null
+++ b/docs/symbol-sources.md
@@ -0,0 +1,53 @@
+# Symbol sources and precedence
+
+PairOfCleats can ingest symbols from multiple sources. This document defines precedence, fallback behavior, and how artifacts are stored.
+
+## Sources (highest priority first)
+
+1) LSP tooling (clangd/sourcekit-lsp/tsserver)
+- Best for exact signatures, types, and live project configuration.
+- Applies during indexing and enriches chunk metadata.
+
+2) SCIP ingestion
+- Offline code intelligence. Preferred when available because it carries definitions + references in a standard format.
+- Ingested via `pairofcleats ingest scip`.
+
+3) LSIF ingestion
+- Offline graph for definitions/references; often produced by CI.
+- Ingested via `pairofcleats ingest lsif`.
+
+4) Ctags ingestion
+- Fast, broad symbol discovery, fewer type details.
+- Ingested via `pairofcleats ingest ctags`.
+
+5) GNU Global (GTAGS) ingestion
+- Fallback symbol lookup for repos without tooling/ctags coverage.
+- Ingested via `pairofcleats ingest gtags`.
+
+6) Heuristic / AST chunking
+- Always available; used as a baseline when no external sources are present.
+
+## Precedence rules
+
+- LSP wins over offline sources when both are available.
+- SCIP overrides LSIF and ctags for definitions/references when both exist.
+- LSIF overrides ctags for definitions/references when both exist.
+- Ctags does not replace AST chunking; it augments symbol lookup and navigation.
+- GTAGS is used as a fallback when other external sources are not available.
+
+## Storage locations
+
+All artifacts live in the repo cache root (outside the repo by default):
+
+- `builds/<buildId>/index-code/` + `builds/<buildId>/index-prose/`: chunk metadata, postings, and repo map.
+- `builds/current.json`: pointer to the active build root.
+- `scip/scip.jsonl`: normalized SCIP occurrences + metadata.
+- `lsif/lsif.jsonl`: normalized LSIF occurrences + metadata.
+- `ctags/ctags.jsonl`: normalized ctags symbols + metadata.
+- `gtags/gtags.jsonl`: normalized GNU Global symbols + metadata.
+
+## Notes
+
+- The ingestion tools do not mutate the main index; they provide additional symbol sources.
+- When multiple sources provide the same symbol, the higher-precedence source is favored in future lookups.
+- If a source is stale or missing, the next available source is used automatically.
diff --git a/docs/triage-records.md b/docs/triage-records.md
index d24d78283..e7c2f980b 100644
--- a/docs/triage-records.md
+++ b/docs/triage-records.md
@@ -41,17 +41,17 @@ Defaults:
 ## Ingest findings
 Dependabot:
 ```
-node tools/triage/ingest.js --source dependabot --in dependabot.json --meta service=api --meta env=prod
+pairofcleats triage ingest --source dependabot --in dependabot.json --meta service=api --meta env=prod
 ```
 
 AWS Inspector:
 ```
-node tools/triage/ingest.js --source aws_inspector --in inspector.json --meta service=api --meta env=prod
+pairofcleats triage ingest --source aws_inspector --in inspector.json --meta service=api --meta env=prod
 ```
 
 Generic (already normalized schema):
 ```
-node tools/triage/ingest.js --source generic --in record.json --meta service=api --meta env=prod
+pairofcleats triage ingest --source generic --in record.json --meta service=api --meta env=prod
 ```
 
 Each ingest writes:
@@ -60,7 +60,7 @@ Each ingest writes:
 
 ## Decisions
 ```
-node tools/triage/decision.js --finding <recordId> --status accept --justification "..." --reviewer "..."
+pairofcleats triage decision --finding <recordId> --status accept --justification "..." --reviewer "..."
 ```
 
 ## Exposure metadata
@@ -75,12 +75,12 @@ These render in the record markdown and are included in context packs. You can p
 
 ## Build records index
 ```
-node build_index.js --mode records --incremental
+pairofcleats index build --mode records --incremental
 ```
 
 ## Search records
 ```
-node search.js "CVE-2024-0001" --mode records --meta service=api --meta env=prod --json
+pairofcleats search "CVE-2024-0001" --mode records --meta service=api --meta env=prod --json
 ```
 
 Filters:
@@ -91,7 +91,7 @@ Filters:
 
 ## Context packs
 ```
-node tools/triage/context-pack.js --record <recordId> --out context.json
+pairofcleats triage context-pack --record <recordId> --out context.json
 ```
 
 The context pack includes:
@@ -99,7 +99,7 @@ The context pack includes:
 - `history` (related decisions)
 - `repoEvidence` (code/prose search hits)
 
-Context packs assume code/prose indexes exist (`node build_index.js`) and the records index is built (`node build_index.js --mode records`).
+Context packs assume code/prose indexes exist (`pairofcleats index build`) and the records index is built (`pairofcleats index build --mode records`).
 
 ## MCP tools
 - `triage_ingest` (wraps ingest)
diff --git a/docs/truth-table.md b/docs/truth-table.md
new file mode 100644
index 000000000..0f3b923cb
--- /dev/null
+++ b/docs/truth-table.md
@@ -0,0 +1,160 @@
+# Truth table
+
+This document maps user-visible behavior to implementation, configuration switches, tests, and limitations.
+
+## Build modes and stages
+- Claim: `build_index.js --mode code|prose|records|extracted-prose|all` builds mode-specific indexes under the repo cache; `all` expands to `code`, `prose`, and `extracted-prose`.
+  - Implementation: `build_index.js` (entrypoint), `src/index/build/args.js` (`parseBuildArgs`), `src/integrations/core/index.js` (`buildIndex`), `src/index/build/indexer.js` (`buildIndexForMode`), `tools/dict-utils.js` (`resolveIndexRoot`, `getIndexDir`).
+  - Config: CLI `--mode`, `--repo`, `--index-root`; environment `PAIROFCLEATS_CACHE_ROOT`.
+  - Tests: `tests/fixture-smoke.js`, `tests/extracted-prose.js`, `tests/triage-records.js`, `tests/build-index-all.js`.
+  - Limitations: `records` requires triage record inputs; `all` does not include `records`.
+
+- Claim: stage flags gate enrichment (`stage1` sparse, `stage2` relations, `stage3` embeddings, `stage4` sqlite).
+  - Implementation: `src/integrations/core/index.js` (`buildIndex`), `src/index/build/indexer.js` (`buildIndexForMode`), `src/index/build/runtime.js` (`normalizeStage`, `buildStageOverrides`), `tools/build-embeddings.js` (script entrypoint), `tools/build-sqlite-index.js` (script entrypoint).
+  - Config: CLI `--stage`, `--stage1`, `--stage2`, `--stage3`, `--stage4`; `indexing.twoStage.*`, `indexing.embeddings.*`, `sqlite.use`; environment `PAIROFCLEATS_STAGE`.
+  - Tests: `tests/two-stage-state.js`, `tests/embeddings-validate.js`, `tests/sqlite-build-indexes.js`.
+  - Limitations: stage3 requires embeddings; stage4 requires sqlite dependencies.
+
+- Claim: `index_state.json` gates readers on pending stage outputs (embeddings/sqlite/lmdb).
+  - Implementation: `src/index/build/artifacts.js` (`writeIndexArtifacts`), `tools/build-embeddings.js` (index_state updates), `tools/build-sqlite-index.js` (index_state updates), `tools/build-lmdb-index.js` (`updateLmdbState`), `src/retrieval/cli.js` (pending warnings), `src/retrieval/cli/index-loader.js` (`warnPendingState`).
+  - Config: `indexing.twoStage.*`, `indexing.embeddings.*`, `sqlite.use`, `lmdb.use`.
+  - Tests: `tests/two-stage-state.js`, `tests/embeddings-validate.js`, `tests/sqlite-incremental.js`, `tests/lmdb-backend.js`.
+  - Limitations: manual edits can override gating.
+
+## Backend selection
+- Claim: `search --backend auto` prefers sqlite when available and thresholds hit; `--backend sqlite|sqlite-fts` fails cleanly when missing; `--backend memory` uses file-backed indexes.
+  - Implementation: `src/storage/backend-policy.js` (`resolveBackendPolicy`), `src/retrieval/cli.js` (`resolveBackendPolicy` usage), `src/retrieval/cli-sqlite.js` (`createSqliteBackend`).
+  - Config: CLI `--backend`; `search.sqliteAutoChunkThreshold`, `search.sqliteAutoArtifactBytes`, `sqlite.use`; environment `PAIROFCLEATS_SQLITE_DISABLED` (tests).
+  - Tests: `tests/sqlite-auto-backend.js`, `tests/sqlite-missing-dep.js`, `tests/backend-policy.js`.
+  - Limitations: sqlite requires `better-sqlite3` (and optional ANN extension).
+
+- Claim: `--backend lmdb` uses LMDB stores when present; auto fallback selects LMDB when sqlite is unavailable.
+  - Implementation: `src/storage/backend-policy.js` (`resolveBackendPolicy`), `src/retrieval/cli-lmdb.js` (`createLmdbBackend`), `tools/build-lmdb-index.js` (LMDB build entrypoint).
+  - Config: CLI `--backend`; `lmdb.use`, `lmdb.*` paths.
+  - Tests: `tests/lmdb-backend.js`, `tests/backend-policy.js`.
+  - Limitations: LMDB requires `lmdb` dependency and pre-built stores.
+
+## Discovery, chunking, and tokenization
+- Claim: file discovery honors ignore rules, minified/binary detection, and per-extension caps.
+  - Implementation: `src/index/build/discover.js` (`discoverFiles`, `discoverFilesForModes`), `src/index/build/ignore.js` (`createIgnoreMatcher`), `src/shared/files.js` (`isBinaryFile`, `isMinifiedFile`).
+  - Config: `indexing.maxFileBytes`, `indexing.fileCaps.*`, `indexing.fileScan.*`.
+  - Tests: `tests/discover.js`, `tests/file-size-guard.js`, `tests/skip-minified-binary.js`.
+  - Limitations: git-backed discovery only applies when `repoRoot` matches git top-level.
+
+- Claim: language chunkers emit stable chunk `kind` + `name` plus language-specific metadata; mixed formats use segmented pipelines.
+  - Implementation: `src/index/segments.js` (`discoverSegments`, `chunkSegments`), `src/index/build/file-processor.js` (`processFile`), `src/lang/*` (language chunkers), `src/lang/tree-sitter.js` (`chunkFileTreeSitter`).
+  - Config: `indexing.treeSitter.*`, `indexing.javascriptParser`, `indexing.typescriptParser`.
+  - Tests: `tests/segment-pipeline.js`, `tests/format-fidelity.js`, `tests/tree-sitter-chunks.js`.
+  - Limitations: unsupported languages fall back to coarse chunking.
+
+- Claim: config-like formats (JSON/YAML/TOML/etc) chunk into deterministic sections.
+  - Implementation: `src/index/chunking.js` (`chunkJson`, `chunkYaml`, `chunkIniToml`, `smartChunk`).
+  - Config: `indexing.yamlChunking`, `indexing.yamlTopLevelMaxBytes`.
+  - Tests: `tests/chunking-yaml.js`, `tests/chunking-sql-lua.js`.
+  - Limitations: large single documents may be grouped into a single section.
+
+- Claim: token postings are generated from chunk tokens and dictionary settings with optional sampling.
+  - Implementation: `src/index/build/postings.js` (`buildPostings`), `src/shared/postings-config.js` (`normalizePostingsConfig`), `src/index/build/artifacts.js` (`writeIndexArtifacts`).
+  - Config: `indexing.chunkTokenMode`, `indexing.chunkTokenMaxFiles`, `indexing.chunkTokenMaxTokens`, `indexing.chunkTokenSampleSize`, `indexing.postings.*`.
+  - Tests: `tests/tokenize-dictionary.js`, `tests/tokenization-buffering.js`, `tests/postings-quantize.js`.
+  - Limitations: sampling mode omits full token lists to control artifact size.
+
+## Artifact invariants and determinism
+- Claim: artifacts include chunk metadata, token postings, repo map, optional dense vectors, and metrics with checksums.
+  - Implementation: `src/index/build/artifacts.js` (`writeIndexArtifacts`), `src/shared/artifact-io.js` (`readJsonFile`, `loadTokenPostings`), `src/shared/hash.js` (`checksumFile`).
+  - Config: `indexing.artifacts.*`, `indexing.artifactCompression.*`, `indexing.postings.*`.
+  - Tests: `tests/artifact-formats.js`, `tests/artifact-size-guardrails.js`, `tests/index-validate.js`, `tests/compact-pieces.js`.
+  - Limitations: optional artifacts are absent when features are disabled.
+
+- Claim: `chunk.id` values are sequential (index-local) while `metaV2.chunkId` remains stable across builds; shard merge preserves metadata.
+  - Implementation: `src/index/metadata-v2.js` (`buildChunkId`), `src/index/build/shards.js` (`mergeShards`), `src/index/validate.js` (`validateChunkIds`).
+  - Config: `indexing.artifacts.*`.
+  - Tests: `tests/metadata-v2.js`, `tests/chunking-limits.js`, `tests/graph-chunk-id.js`, `tests/sqlite-chunk-id.js`, `tests/shard-merge.js`, `tests/piece-assembly.js`.
+  - Limitations: sharded outputs may be large on big repos.
+
+- Claim: incremental reuse rejects deletions and stale manifests.
+  - Implementation: `src/index/build/incremental.js` (`shouldReuseIncrementalIndex`, `pruneIncrementalManifest`).
+  - Config: CLI `--incremental`.
+  - Tests: `tests/incremental-reuse.js`, `tests/incremental-manifest.js`.
+  - Limitations: reuse relies on size/mtime heuristics.
+
+## Search semantics and ranking
+- Claim: search filters support type/kind, signature, decorator, path/ext, and language constraints.
+  - Implementation: `src/retrieval/filters.js` (`parseMetaFilters`, `normalizeExtFilter`, `normalizeLangFilter`), `src/retrieval/output/filters.js` (`filterChunks`), `src/retrieval/cli.js` (flag parsing).
+  - Config: CLI `--type`, `--signature`, `--decorator`, `--path`, `--ext`, `--lang`.
+  - Tests: `tests/search-filters.js`, `tests/fixture-smoke.js`, `tests/lang-filter.js`, `tests/ext-filter.js`.
+  - Limitations: filters depend on metadata availability per language.
+
+- Claim: restrictive filters are applied early so `--top N` returns N results when available.
+  - Implementation: `src/retrieval/pipeline.js` (`createSearchPipeline`, `runSearch`), `src/retrieval/rankers.js` (`rankBM25`, `rankBM25Fields`), `src/retrieval/sqlite-helpers.js` (`rankSqliteFts`).
+  - Config: CLI `--top`, filter flags; `search.sqliteAutoChunkThreshold`.
+  - Tests: `tests/search-topn-filters.js`.
+  - Limitations: SQLite large allowed sets use best-effort pushdown.
+
+- Claim: risk filters narrow results by tags, sources, sinks, and flow identifiers.
+  - Implementation: `src/index/risk.js` (`detectRiskSignals`), `src/index/type-inference-crossfile.js` (`addRiskFlow`), `src/retrieval/output/filters.js` (`filterChunks`).
+  - Config: `indexing.riskAnalysis`, `indexing.riskAnalysisCrossFile`, CLI `--risk*` flags.
+  - Tests: `tests/language-fidelity.js`, `tests/type-inference-crossfile.js`.
+  - Limitations: risk data is best-effort and may be empty for unsupported languages.
+
+- Claim: explain output includes score breakdowns and backend policy hints.
+  - Implementation: `src/retrieval/output/explain.js` (`formatScoreBreakdown`), `src/retrieval/output/format.js` (`formatFullChunk`), `src/retrieval/cli.js` (explain selection).
+  - Config: CLI `--explain`, `--why`.
+  - Tests: `tests/search-explain.js`.
+  - Limitations: explain output is only available for JSON/human modes that emit it.
+
+- Claim: ranking blends BM25 + ANN with optional RRF; ANN backends are exercised by sqlite and HNSW tests.
+  - Implementation: `src/retrieval/pipeline.js` (`mergeRanked`, `blendRanked`), `src/retrieval/rankers.js` (`rankDenseVectors`), `src/shared/hnsw.js` (`loadHnswIndex`).
+  - Config: `search.bm25.*`, `search.scoreBlend.*`, `search.rrf.*`, `search.annDefault`; CLI `--ann`.
+  - Tests: `tests/fielded-bm25.js`, `tests/search-rrf.js`, `tests/search-symbol-boost.js`, `tests/sqlite-ann-extension.js`, `tests/hnsw-ann.js`.
+  - Limitations: ANN requires embeddings (and optional sqlite extension).
+
+- Claim: tie-breaks are deterministic at each stage (sparse, ANN, merge).
+  - Implementation: `src/retrieval/rankers.js` (score + `idx` ordering), `src/retrieval/pipeline.js` (`mergeRanked`), `src/retrieval/sqlite-helpers.js` (rowid ordering).
+  - Config: None.
+  - Tests: `tests/search-determinism.js`, `tests/sqlite-vec-candidate-set.js`.
+  - Limitations: determinism assumes stable inputs and stub embeddings where applicable.
+
+- Claim: context expansion uses relations to include related chunks around hits.
+  - Implementation: `src/retrieval/context-expansion.js` (`expandContext`).
+  - Config: `search.contextExpansion.*`.
+  - Tests: `tests/context-expansion.js`.
+  - Limitations: context expansion requires relations metadata.
+
+## Service/API/MCP behavior
+- Claim: indexer service queue persists jobs, supports claim/complete transitions, and runs repo-scoped builds.
+  - Implementation: `tools/service/queue.js` (`enqueueJob`, `claimNextJob`, `completeJob`), `tools/indexer-service.js` (queue workers), `tools/service/config.js` (`loadServiceConfig`).
+  - Config: service config (`tools/service/config.js`) `queue.maxQueued`, `worker.concurrency`, `embeddings.queue.maxQueued`, `embeddings.worker.*`; CLI `--config`, `--queue`.
+  - Tests: `tests/service-queue.js`, `tests/indexer-service.js`.
+  - Limitations: queue storage is local filesystem state.
+
+- Claim: embedding and stage queues enforce maxQueued limits with best-effort enqueue.
+  - Implementation: `src/integrations/core/index.js` (`enqueueJob` usage), `tools/indexer-service.js` (`handleEnqueue`).
+  - Config: `indexing.embeddings.queue.maxQueued`, `indexing.embeddings.queue.dir`, `indexing.twoStage.queue`, `indexing.twoStage.background`.
+  - Tests: `tests/two-stage-state.js`.
+  - Limitations: enqueue is skipped when queue is full.
+
+- Claim: API server exposes build/search endpoints and streams responses when requested.
+  - Implementation: `tools/api-server.js` (server entry), `tools/api/router.js` (`createApiRouter`), `tools/api/validation.js` (`validateSearchPayload`).
+  - Config: CLI `--repo`; environment `PAIROFCLEATS_*` config values.
+  - Tests: `tests/api-server.js`, `tests/api-server-stream.js`.
+  - Limitations: streaming requires clients to handle SSE backpressure.
+
+- Claim: MCP server enforces per-tool timeouts and queue limits.
+  - Implementation: `tools/mcp-server.js` (queue/timeouts), `tools/mcp/transport.js` (`createMcpTransport`), `tools/mcp/repo.js` (`resolveToolTimeoutMs`).
+  - Config: `mcp.queueMax`, `mcp.toolTimeoutMs`, `mcp.toolTimeouts`; environment `PAIROFCLEATS_MCP_QUEUE_MAX`, `PAIROFCLEATS_MCP_TOOL_TIMEOUT_MS`.
+  - Tests: `tests/mcp-robustness.js`, `tests/mcp-schema.js`.
+  - Limitations: long-running tools require explicit overrides.
+
+- Claim: MCP server advertises core resources and tool actions using repo-scoped config.
+  - Implementation: `tools/mcp-server.js` (tool defs), `src/integrations/mcp/defs.js` (`getToolDefs`).
+  - Config: CLI `--repo`; environment `PAIROFCLEATS_*` config values.
+  - Tests: `tests/mcp-server.js`.
+  - Limitations: tools are limited to configured repo root.
+
+## Determinism and provenance
+- Claim: discovery ordering is deterministic and metrics include tool/runtime provenance.
+  - Implementation: `src/index/build/discover.js` (`discoverFiles`), `src/index/build/artifacts.js` (`writeIndexArtifacts`), `src/index/git.js` (`getGitInfo`).
+  - Config: `PAIROFCLEATS_*` env for config hash inputs.
+  - Tests: `tests/discover.js`, `tests/repo-root.js`, `tests/tool-root.js`.
+  - Limitations: timestamps and external tools can introduce non-deterministic fields.
diff --git a/eslint-rules/no-regex-double-escape.js b/eslint-rules/no-regex-double-escape.js
new file mode 100644
index 000000000..0d7ddc921
--- /dev/null
+++ b/eslint-rules/no-regex-double-escape.js
@@ -0,0 +1,95 @@
+const TARGET_ESCAPES = new Set([
+  's',
+  'S',
+  'd',
+  'D',
+  'w',
+  'W',
+  'b',
+  'B',
+  '(',
+  ')',
+  '[',
+  ']',
+  '{',
+  '}',
+  '+',
+  '*',
+  '?',
+  '|',
+  '^',
+  '$'
+]);
+
+const buildFixedPattern = (pattern) => {
+  let changed = false;
+  let out = '';
+  let inCharClass = false;
+
+  for (let i = 0; i < pattern.length; i += 1) {
+    const ch = pattern[i];
+
+    if (ch === '\\') {
+      const next = pattern[i + 1];
+      const nextNext = pattern[i + 2];
+      if (!inCharClass && next === '\\' && nextNext && TARGET_ESCAPES.has(nextNext)) {
+        out += `\\${nextNext}`;
+        i += 2;
+        changed = true;
+        continue;
+      }
+      if (next) {
+        out += ch + next;
+        i += 1;
+      } else {
+        out += ch;
+      }
+      continue;
+    }
+
+    if (ch === '[' && !inCharClass) {
+      inCharClass = true;
+    } else if (ch === ']' && inCharClass) {
+      inCharClass = false;
+    }
+
+    out += ch;
+  }
+
+  return changed ? out : null;
+};
+
+export default {
+  meta: {
+    type: 'problem',
+    docs: {
+      description: 'Disallow double-escaped tokens in regex literals'
+    },
+    fixable: 'code',
+    schema: [],
+    messages: {
+      doubleEscape: 'Use single-escaped tokens in regex literals (e.g., \\s instead of \\\\s).'
+    }
+  },
+  create(context) {
+    const sourceCode = context.getSourceCode();
+    return {
+      Literal(node) {
+        if (!node.regex) return;
+        const raw = sourceCode.getText(node);
+        if (!raw.startsWith('/')) return;
+        const lastSlash = raw.lastIndexOf('/');
+        if (lastSlash <= 0) return;
+        const pattern = raw.slice(1, lastSlash);
+        const fixedPattern = buildFixedPattern(pattern);
+        if (!fixedPattern) return;
+        const flags = raw.slice(lastSlash + 1);
+        context.report({
+          node,
+          messageId: 'doubleEscape',
+          fix: (fixer) => fixer.replaceText(node, `/${fixedPattern}/${flags}`)
+        });
+      }
+    };
+  }
+};
diff --git a/eslint.config.js b/eslint.config.js
index 7a4cff59e..e13656dfa 100644
--- a/eslint.config.js
+++ b/eslint.config.js
@@ -1,11 +1,19 @@
+import noRegexDoubleEscape from './eslint-rules/no-regex-double-escape.js';
+
 export default [
   {
     ignores: [
       '**/node_modules/**',
       '**/index-*/**',
       '**/index-sqlite/**',
+      'benchmarks/repos/**',
+      'benchmarks/cache/**',
+      'benchmarks/results/**',
+      'tests/.cache/**',
       '**/.git/**',
-      '**/docs/phase3-parity-report.json'
+      '**/docs/phase3-parity-report.json',
+      'tests/fixtures/languages/src/javascript_flow.js',
+      'tests/fixtures/encoding/latin1.js'
     ]
   },
   {
@@ -14,6 +22,16 @@ export default [
       ecmaVersion: 2022,
       sourceType: 'module'
     },
-    rules: {}
+    plugins: {
+      local: {
+        rules: {
+          'no-regex-double-escape': noRegexDoubleEscape
+        }
+      }
+    },
+    rules: {
+      'local/no-regex-double-escape': 'error',
+      'max-lines': ['error', { max: 1200, skipBlankLines: true, skipComments: false }]
+    }
   }
 ];
diff --git a/extensions/vscode/extension.js b/extensions/vscode/extension.js
new file mode 100644
index 000000000..0d9c7e9b5
--- /dev/null
+++ b/extensions/vscode/extension.js
@@ -0,0 +1,174 @@
+const vscode = require('vscode');
+const cp = require('node:child_process');
+const fs = require('node:fs');
+const path = require('node:path');
+
+function resolveRepoRoot() {
+  const folders = vscode.workspace.workspaceFolders;
+  if (!folders || !folders.length) return null;
+  return folders[0].uri.fsPath;
+}
+
+function resolveCli(repoRoot) {
+  const config = vscode.workspace.getConfiguration('pairofcleats');
+  const configuredPath = String(config.get('cliPath') || '').trim();
+  const configuredArgs = config.get('cliArgs') || [];
+  const extraArgs = Array.isArray(configuredArgs) ? configuredArgs.map(String) : [];
+
+  if (configuredPath) {
+    const resolvedPath = path.isAbsolute(configuredPath) && fs.existsSync(configuredPath)
+      ? configuredPath
+      : (repoRoot ? path.join(repoRoot, configuredPath) : configuredPath);
+    if (resolvedPath.endsWith('.js')) {
+      return { command: process.execPath, argsPrefix: [resolvedPath, ...extraArgs] };
+    }
+    return { command: resolvedPath, argsPrefix: extraArgs };
+  }
+
+  if (repoRoot) {
+    const localCli = path.join(repoRoot, 'bin', 'pairofcleats.js');
+    if (fs.existsSync(localCli)) {
+      return { command: process.execPath, argsPrefix: [localCli] };
+    }
+  }
+
+  return { command: 'pairofcleats', argsPrefix: extraArgs };
+}
+
+function buildArgs(query, repoRoot) {
+  const config = vscode.workspace.getConfiguration('pairofcleats');
+  const mode = String(config.get('searchMode') || 'both');
+  const backend = String(config.get('searchBackend') || '').trim();
+  const annEnabled = config.get('searchAnn') !== false;
+  const maxResults = Number.isFinite(Number(config.get('maxResults')))
+    ? Math.max(1, Number(config.get('maxResults')))
+    : 25;
+  const extraArgs = config.get('extraSearchArgs') || [];
+  const extra = Array.isArray(extraArgs) ? extraArgs.map(String) : [];
+
+  const args = ['search', query, '--json-compact', '--top', String(maxResults)];
+  if (mode && mode !== 'both') args.push('--mode', mode);
+  if (backend) args.push('--backend', backend);
+  if (!annEnabled) args.push('--no-ann');
+  if (repoRoot) args.push('--repo', repoRoot);
+  args.push(...extra);
+  return args;
+}
+
+async function runSearch() {
+  const repoRoot = resolveRepoRoot();
+  if (!repoRoot) {
+    vscode.window.showErrorMessage('PairOfCleats: open a workspace to search.');
+    return;
+  }
+
+  const query = await vscode.window.showInputBox({
+    prompt: 'PairOfCleats search query',
+    placeHolder: 'e.g. auth token validation'
+  });
+  if (!query || !query.trim()) return;
+
+  const { command, argsPrefix } = resolveCli(repoRoot);
+  const args = [...argsPrefix, ...buildArgs(query.trim(), repoRoot)];
+
+  await vscode.window.withProgress(
+    {
+      location: vscode.ProgressLocation.Notification,
+      title: 'PairOfCleats search',
+      cancellable: false
+    },
+    () => new Promise((resolve) => {
+      cp.execFile(command, args, { cwd: repoRoot, maxBuffer: 20 * 1024 * 1024 }, async (error, stdout, stderr) => {
+        if (error) {
+          const message = stderr || error.message;
+          vscode.window.showErrorMessage(`PairOfCleats search failed: ${message}`);
+          resolve();
+          return;
+        }
+
+        let payload = null;
+        try {
+          payload = JSON.parse(stdout || '{}');
+        } catch (err) {
+          vscode.window.showErrorMessage(`PairOfCleats search returned invalid JSON: ${err.message}`);
+          resolve();
+          return;
+        }
+
+        const hits = [];
+        const pushHits = (items, kind) => {
+          if (!Array.isArray(items)) return;
+          items.forEach((hit) => {
+            if (!hit || !hit.file) return;
+            hits.push({
+              ...hit,
+              section: kind
+            });
+          });
+        };
+        pushHits(payload.code, 'code');
+        pushHits(payload.prose, 'prose');
+        pushHits(payload.records, 'records');
+
+        if (!hits.length) {
+          vscode.window.showInformationMessage('PairOfCleats: no results.');
+          resolve();
+          return;
+        }
+
+        const items = hits.map((hit) => {
+          const line = Number.isFinite(hit.startLine) ? `:${hit.startLine}` : '';
+          const fileLabel = `${hit.file}${line}`;
+          const scoreLabel = Number.isFinite(hit.score)
+            ? `${hit.score.toFixed(2)} ${hit.scoreType || ''}`.trim()
+            : 'n/a';
+          const label = hit.name || hit.headline || fileLabel;
+          return {
+            label,
+            description: fileLabel,
+            detail: `${hit.section} • score ${scoreLabel}`,
+            hit
+          };
+        });
+
+        const selection = await vscode.window.showQuickPick(items, {
+          title: `PairOfCleats results (${hits.length})`,
+          matchOnDescription: true,
+          matchOnDetail: true
+        });
+        if (!selection) {
+          resolve();
+          return;
+        }
+
+        const selected = selection.hit;
+        const filePath = path.isAbsolute(selected.file)
+          ? selected.file
+          : path.join(repoRoot, selected.file);
+        const document = await vscode.workspace.openTextDocument(vscode.Uri.file(filePath));
+        const editor = await vscode.window.showTextDocument(document, { preview: true });
+        if (Number.isFinite(selected.startLine) && selected.startLine > 0) {
+          const line = Math.max(0, Number(selected.startLine) - 1);
+          const pos = new vscode.Position(line, 0);
+          const range = new vscode.Range(pos, pos);
+          editor.selection = new vscode.Selection(pos, pos);
+          editor.revealRange(range, vscode.TextEditorRevealType.InCenter);
+        }
+
+        resolve();
+      });
+    })
+  );
+}
+
+function activate(context) {
+  const command = vscode.commands.registerCommand('pairofcleats.search', runSearch);
+  context.subscriptions.push(command);
+}
+
+function deactivate() {}
+
+module.exports = {
+  activate,
+  deactivate
+};
diff --git a/extensions/vscode/package.json b/extensions/vscode/package.json
new file mode 100644
index 000000000..7f80f00c3
--- /dev/null
+++ b/extensions/vscode/package.json
@@ -0,0 +1,79 @@
+{
+  "name": "pairofcleats-search",
+  "displayName": "PairOfCleats Search",
+  "description": "Search repositories using the PairOfCleats CLI.",
+  "version": "0.0.1",
+  "publisher": "pairofcleats",
+  "engines": {
+    "vscode": "^1.86.0"
+  },
+  "categories": [
+    "Other"
+  ],
+  "activationEvents": [
+    "onCommand:pairofcleats.search"
+  ],
+  "main": "./extension.js",
+  "contributes": {
+    "commands": [
+      {
+        "command": "pairofcleats.search",
+        "title": "PairOfCleats: Search"
+      }
+    ],
+    "configuration": {
+      "title": "PairOfCleats",
+      "properties": {
+        "pairofcleats.cliPath": {
+          "type": "string",
+          "default": "",
+          "description": "CLI command or JS entrypoint path. Leave empty to auto-detect the repo CLI or use 'pairofcleats' from PATH."
+        },
+        "pairofcleats.cliArgs": {
+          "type": "array",
+          "default": [],
+          "description": "Extra arguments inserted before the search command (e.g., [\"/path/to/bin/pairofcleats.js\"] when using node).",
+          "items": {
+            "type": "string"
+          }
+        },
+        "pairofcleats.searchMode": {
+          "type": "string",
+          "default": "both",
+          "enum": [
+            "code",
+            "prose",
+            "both",
+            "records",
+            "all"
+          ],
+          "description": "Default search mode."
+        },
+        "pairofcleats.searchBackend": {
+          "type": "string",
+          "default": "",
+          "description": "Optional backend override (e.g., sqlite, sqlite-fts). Leave empty for default."
+        },
+        "pairofcleats.searchAnn": {
+          "type": "boolean",
+          "default": true,
+          "description": "Enable ANN if available."
+        },
+        "pairofcleats.maxResults": {
+          "type": "number",
+          "default": 25,
+          "minimum": 1,
+          "description": "Maximum results to fetch."
+        },
+        "pairofcleats.extraSearchArgs": {
+          "type": "array",
+          "default": [],
+          "description": "Extra CLI args appended to search (advanced usage).",
+          "items": {
+            "type": "string"
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/merge-history.sh b/merge-history.sh
index e7e96529c..acca6b613 100644
--- a/merge-history.sh
+++ b/merge-history.sh
@@ -8,7 +8,7 @@
 #   $3 = path/to/theirs_searchHistory
 #
 # It then merges the contents of “theirs” into “ours” (via union, no duplicates)
-# using mergeSearchHistory.js.  The result is written back to “ours” ($2).
+# using mergeAppendOnly.js.  The result is written back to “ours” ($2).
 #
 # Usage (Git will call this with):
 #   tools/merge-history.sh <base> <ours> <theirs>
@@ -34,7 +34,7 @@ if [ ! -f "$THEIRS_FILE" ]; then
 fi
 
 # We want to produce a union of “ours” + “theirs” (no duplicate lines).
-# mergeSearchHistory.js expects two args: (baseFile, targetFile).  We can
+# mergeAppendOnly.js expects two args: (baseFile, targetFile).  We can
 # simply pass “theirs” as the “baseFile” and “ours” as the “targetFile”,
 # so that any lines in “theirs” not already in “ours” get appended.
 
@@ -42,7 +42,7 @@ fi
 # complex logic.  But for “append‐only” history, this union is usually enough.)
 
 # Invoke the Node script:
-node "$(dirname "$0")/tools/mergeSearchHistory.js" "$THEIRS_FILE" "$OURS_FILE"
+node "$(dirname "$0")/tools/mergeAppendOnly.js" "$THEIRS_FILE" "$OURS_FILE"
 
 # At this point, “ours” has been updated in place to include every unique line
 # from both “ours” and “theirs.”  Exit successfully.
diff --git a/merge-no-results.sh b/merge-no-results.sh
index 4b9945b46..c98324edc 100644
--- a/merge-no-results.sh
+++ b/merge-no-results.sh
@@ -8,7 +8,7 @@
 #   $3 = path/to/theirs_noResultQueries
 #
 # It then merges the contents of “theirs” into “ours” (via union, no duplicates)
-# using mergeNoResultQueries.js.  The result is written back to “ours” ($2).
+# using mergeAppendOnly.js.  The result is written back to “ours” ($2).
 #
 # Usage (Git will call this with):
 #   tools/merge-no-results.sh <base> <ours> <theirs>
@@ -34,7 +34,7 @@ if [ ! -f "$THEIRS_FILE" ]; then
 fi
 
 # We want to produce a union of “ours” + “theirs” (no duplicate lines).
-# mergeNoResultQueries.js expects two args: (baseFile, targetFile).  We can
+# mergeAppendOnly.js expects two args: (baseFile, targetFile).  We can
 # simply pass “theirs” as the “baseFile” and “ours” as the “targetFile”,
 # so that any lines in “theirs” not already in “ours” get appended.
 
@@ -42,7 +42,7 @@ fi
 # complex logic.  But for “append‐only” history, this union is usually enough.)
 
 # Invoke the Node script:
-node "$(dirname "$0")/tools/mergeNoResultQueries.js" "$THEIRS_FILE" "$OURS_FILE"
+node "$(dirname "$0")/tools/mergeAppendOnly.js" "$THEIRS_FILE" "$OURS_FILE"
 
 # At this point, “ours” has been updated in place to include every unique line
 # from both “ours” and “theirs.”  Exit successfully
diff --git a/newfeature.md b/newfeature.md
deleted file mode 100644
index 83837e11d..000000000
--- a/newfeature.md
+++ /dev/null
@@ -1,516 +0,0 @@
-
-
-# V1 Spec: Add “Triage Records + Context Packs” to PairOfCleats
-
-## Why this exists
-
-PairOfCleats already provides:
-
-* fast hybrid search over **code + docs/config** with rich metadata
-* a CLI (`build_index.js`, `search.js`) and an MCP server (`tools/mcp-server.js`)
-* optional SQLite/FTS/ANN backends
-
-What’s missing for your friend’s vuln triage platform use case is:
-
-* a way to **ingest vulnerability findings** (Dependabot + AWS Inspector in v1)
-* a way to **store triage history/decisions** (auditable, queryable)
-* **metadata-first retrieval** (service/env/asset/team/status) rather than pure text
-* a **repeatable “context pack”** builder that combines:
-
-  * the vuln finding + environment metadata + history
-  * evidence pulled from repo index (imports/usages/config/IaC/doc evidence)
-
-This v1 adds a minimal “triage record system” that lives **in the PairOfCleats cache** (not in git), is **searchable** with filters, and produces **LLM-ready evidence bundles**.
-
----
-
-## V1 Goals (must-have)
-
-1. **Ingest findings** from:
-
-   * Dependabot exports (JSON)
-   * AWS Inspector exports (JSON)
-   * plus a “generic” adapter (already-normalized JSON)
-2. Normalize into a single **Record schema** with strong, filterable metadata:
-
-   * cve/vulnId, package, version(s), service, env, asset identifiers, severity, owner/team, status
-3. Persist each record in the repo cache:
-
-   * store canonical JSON (for audits)
-   * store a rendered Markdown “view” (for human readability + indexing)
-4. Build a dedicated **records index** (`index-records`) and allow searching it:
-
-   * `search.js --mode records ...`
-   * support `--meta key=value` filtering (AND semantics)
-5. Support **triage decision records** (history):
-
-   * accept/defer/fix/false-positive/not-affected, with justification, reviewer, expiry
-6. Generate a **context pack** per finding:
-
-   * includes the normalized record
-   * includes prior decisions (history) retrieved by similarity
-   * includes repo evidence via PairOfCleats search (code/prose)
-   * output JSON suitable for Bedrock/Claude prompt input
-
----
-
-## V1 Non-goals (explicitly out of scope)
-
-* Full runtime reachability / exploit path analysis
-* Automatic KEV/EPSS enrichment from the internet
-* Training or fine-tuning SecBERT/deBERTa/VulBERTa-style models
-* A full web UI (CLI + MCP tools only)
-* Building a full CMDB/inventory system (v1 assumes env metadata is supplied at ingest time)
-
----
-
-# Architecture Overview
-
-## New “Triage Records” data flow
-
-1. User exports findings to JSON files (Dependabot / Inspector)
-2. Run: `node tools/triage/ingest.js --source dependabot --in dependabot.json --repo /path/to/repo --meta service=api --meta env=prod`
-3. Ingest writes:
-
-   * `<repoCacheRoot>/triage/records/<recordId>.json` (canonical)
-   * `<repoCacheRoot>/triage/records/<recordId>.md` (rendered view)
-4. Build records index:
-
-   * `node build_index.js --mode records --incremental`
-5. Search records:
-
-   * `node search.js "CVE-2024-XXXX" --mode records --meta service=api --meta env=prod --json`
-6. Generate context pack:
-
-   * `node tools/triage/context-pack.js --repo ... --record <recordId> --out context.json`
-
----
-
-# Data Model
-
-## Normalized record (JSON) — `TriageRecord`
-
-Store in `<repoCacheRoot>/triage/records/<recordId>.json`.
-
-Minimum top-level fields (v1):
-
-* `recordId` (string, stable)
-* `recordType` (`finding` | `decision` | `asset` | `note`)
-* `source` (`dependabot` | `aws_inspector` | `generic` | `manual`)
-* `createdAt`, `updatedAt` ISO timestamps
-* “routing” fields (duplicated top-level for easy filtering):
-
-  * `service` (string)
-  * `env` (string)
-  * `team` (string?)
-  * `owner` (string?)
-  * `repo` (string? repo identifier/path)
-* `vuln` object (for findings):
-
-  * `vulnId` (string: CVE-… or GHSA-… or vendor id)
-  * `cve` (string|null)
-  * `title` (string)
-  * `description` (string)
-  * `severity` (string: critical/high/medium/low/unknown)
-  * `cvss` (object|null: `{ score, vector, version }` if available)
-  * `cwe` (string[] optional)
-  * `references` (string[] URLs)
-* `package` object (for dependency findings):
-
-  * `name`, `ecosystem` (npm/pip/maven/…)
-  * `installedVersion` (string|null)
-  * `affectedRange` (string|null)
-  * `fixedVersion` (string|null)
-  * `manifestPath` (string|null)
-  * `purl` (string|null)
-* `asset` object (for runtime findings):
-
-  * `assetId` (string: ARN/image digest/instance id/etc)
-  * `assetType` (string)
-  * `account`, `region` (optional)
-  * `tags` (object optional)
-* `exposure` object (env context):
-
-  * `internetExposed` (boolean|null)
-  * `publicEndpoint` (string|null)
-  * `dataSensitivity` (string|null)
-  * `businessCriticality` (string|null)
-  * `compensatingControls` (string[] optional)
-* `decision` object (for decision records):
-
-  * `findingRecordId` (string)
-  * `status` (`fix`|`accept`|`defer`|`false_positive`|`not_affected`)
-  * `justification` (string)
-  * `justificationCodes` (string[]; controlled vocabulary)
-  * `reviewer` (string|null)
-  * `expiresAt` (ISO|null)
-  * `evidenceRefs` (string[]; links or file refs)
-* `raw` (object) — original raw payload (optional; can be toggled via config because it increases storage)
-
-### RecordId generation (deterministic)
-
-Implement `recordId = sha1(<source> + ":" + <stableFindingKey>)`, where stable key is:
-
-* Dependabot: `alert.id` or (`GHSA` + `package` + `manifestPath`)
-* Inspector: `findingArn` or (`vulnId` + `resourceId`)
-* Generic/manual: caller provides `stableKey`
-
----
-
-# Repo Changes (Concrete)
-
-## 1) Config additions: `.pairofcleats.json`
-
-Add new optional section:
-
-```json
-{
-  "triage": {
-    "recordsDir": "",
-    "storeRawPayload": false,
-    "promoteFields": ["recordType","source","recordId","service","env","team","owner","vulnId","cve","packageName","packageEcosystem","severity","status","assetId"],
-    "contextPack": {
-      "maxHistory": 5,
-      "maxEvidencePerQuery": 5
-    }
-  }
-}
-```
-
-Behavior:
-
-* `triage.recordsDir` default: `<repoCacheRoot>/triage/records`
-* `storeRawPayload=false` by default to avoid huge records
-* `promoteFields` determines which fields get copied into `chunk.docmeta.record` (small + filterable)
-
-## 2) Add records mode to index directories
-
-### Update: `tools/dict-utils.js`
-
-* Extend `getIndexDir(repoRoot, mode, userConfig)` to accept `'records'`
-* Add helper:
-
-  * `getTriageRecordsDir(repoRoot, userConfig)` → resolved records dir defaulting to cache path
-
-(Keep backwards compatibility: existing code/prose unchanged.)
-
-## 3) Add `records` mode to `build_index.js`
-
-### Update: `src/indexer/build/args.js`
-
-* Allow `--mode records`
-* Keep `--mode all` behavior stable (still `code+prose` only) unless explicitly changed.
-
-  * Recommend: `all` remains `[prose, code]` (do **not** silently include records)
-
-### Update: `src/indexer/build/indexer.js`
-
-* Extend `buildIndexForMode({mode,runtime})` to handle `'records'`:
-
-  * call new function `buildRecordsIndexForRepo({ runtime })`
-
-### New: `src/triage/index-records.js`
-
-Implement `buildRecordsIndexForRepo({ runtime })`:
-
-* Determine records dir via `getTriageRecordsDir`
-* Discover `*.md` (rendered records) under that folder
-* Build chunk(s):
-
-  * 1 chunk per file: `start=0`, `end=text.length`, `kind='Record'`, `name=<record title or recordId>`
-* Load companion JSON record:
-
-  * same basename: `<recordId>.json`
-  * extract promoted fields into `docmeta.record`
-* Tokenization:
-
-  * treat as “prose-like” tokenization (reuse tokenizer utilities)
-  * apply STOP word filtering + stemming (like prose mode)
-* Embeddings:
-
-  * embed the markdown body (and optionally also embed `docmeta.record.summary`)
-* Write artifacts to `getIndexDir(repoRoot,'records')` using existing:
-
-  * `createIndexState`, `appendChunk`, `buildPostings`, `writeIndexArtifacts`
-* Incremental caching:
-
-  * optional but recommended: reuse existing incremental bundle mechanism OR implement a simpler manifest keyed by recordId
-  * v1 acceptance: OK if records are re-indexed fully (records volume is usually low)
-
-## 4) Add records search mode to `search.js`
-
-### Update: `search.js`
-
-* Extend `--mode` enum to include:
-
-  * `records`
-  * optionally `all` meaning `code+prose+records` (if you want; otherwise keep `both` as code+prose)
-* Load records index artifacts:
-
-  * file-backed from `index-records/` (v1 can be memory-only)
-* Display section:
-
-  * `===== 🧾 Records Results (...) =====`
-* JSON output:
-
-  * include record hits under `records`
-  * include `mode='records'` in each hit
-
-### Update: `src/search/output.js`
-
-* Add generic filters:
-
-  * `--file` (substring match against `chunk.file`)
-  * `--ext` (exact match against `chunk.ext`)
-* Add metadata filters (see below)
-
-## 5) Implement metadata filters (core to triage)
-
-### Update: `search.js` arg parsing
-
-Add:
-
-* `--meta key=value` (repeatable)
-* `--meta-json '{"service":"api","env":"prod"}'` (optional convenience)
-
-### Update: `src/search/output.js` `filterChunks()`
-
-Add filtering against `chunk.docmeta.record`:
-
-* Evaluate each `--meta` constraint with AND semantics
-* Suggested behavior:
-
-  * If `value` present: case-insensitive substring match against stringified field
-  * If value omitted (e.g. `--meta cve`): check field exists and non-empty
-  * Support numeric compares as stretch (`severityScore>=7`) but not required for v1
-
-This enables:
-
-* `--meta service=payments --meta env=prod --meta cve=CVE-2024-XXXX`
-* `--meta status=accept`
-
-## 6) Add ingestion tools (Dependabot + Inspector + generic)
-
-Create new folder: `tools/triage/`
-
-### New: `tools/triage/ingest.js`
-
-CLI responsibilities:
-
-* Inputs:
-
-  * `--repo <path>` (defaults cwd)
-  * `--source dependabot|aws_inspector|generic`
-  * `--in <file>` JSON/JSONL
-  * `--meta key=value` repeatable (service/env/team/owner/etc)
-  * `--build-index` (optional: triggers records index build after ingest)
-* For each entry in input:
-
-  * normalize → `TriageRecord`
-  * write JSON record (canonical)
-  * render markdown view (human + indexable)
-* Output:
-
-  * print summary JSON with counts and paths
-  * list of written recordIds
-
-### New: `src/triage/normalize/`
-
-Implement:
-
-* `normalizeDependabot(raw, meta) -> TriageRecord`
-* `normalizeAwsInspector(raw, meta) -> TriageRecord`
-* `normalizeGeneric(raw, meta) -> TriageRecord`
-  Each should:
-* be resilient to missing fields (store parse warnings into `record.parseWarnings`)
-* always produce the routing fields and `vulnId` if possible
-
-### New: `src/triage/render.js`
-
-* `renderRecordMarkdown(record) -> string`
-
-  * single `#` heading containing: recordType + vulnId + package + service/env + severity
-  * sections:
-
-    * Summary
-    * Environment context (from `exposure`)
-    * Package / Asset details
-    * References
-    * Raw (optional; controlled by config)
-
-## 7) Add decision/history support
-
-### New: `tools/triage/decision.js` (or extend ingest.js)
-
-Provide an ergonomic CLI to write decision records:
-
-* `node tools/triage/decision.js --repo ... --finding <recordId> --status accept --justification "..."`
-* Writes a `recordType='decision'` record and renders `.md`
-* Decision record links back via `decision.findingRecordId`
-
-History retrieval will be done via records search (context pack builder below).
-
-## 8) Context pack generator (LLM-ready payload)
-
-### New: `tools/triage/context-pack.js`
-
-Inputs:
-
-* `--repo <path>`
-* `--record <recordId>` (finding)
-* `--out <file>` default `<repoCacheRoot>/triage/context-packs/<recordId>.json`
-  Behavior:
-
-1. Load finding JSON record
-2. Retrieve history:
-
-   * search records index for:
-
-     * same `cve`/`vulnId`
-     * same `package.name`
-     * same `service` and `env`
-   * include up to `triage.contextPack.maxHistory`
-3. Gather repo evidence using PairOfCleats search:
-
-   * run queries (code + prose) derived from finding:
-
-     * package name
-     * manifestPath filename
-     * vulnId / CVE string
-     * likely import module name (if known)
-   * capture top N hits each with:
-
-     * file, kind, name, headline, scoreBreakdown, snippet
-   * include up to `triage.contextPack.maxEvidencePerQuery`
-4. Emit `ContextPack` JSON:
-
-```json
-{
-  "recordId": "...",
-  "generatedAt": "...",
-  "finding": { ...normalized record... },
-  "history": [ ...decision/finding records... ],
-  "repoEvidence": {
-    "queries": [
-      { "query": "lodash", "mode": "code", "hits": [...] },
-      { "query": "package-lock.json lodash", "mode": "prose", "hits": [...] }
-    ]
-  }
-}
-```
-
-5. Print where it was written.
-
-Implementation approach:
-
-* v1 can call `node search.js ... --json-compact` via `child_process.spawnSync` and parse stdout.
-* (Optional nicer follow-up) refactor reusable search runner into a module, but not required for v1.
-
-## 9) MCP server enhancements (optional but high leverage for agent workflows)
-
-If you want Codex/Claude agents to drive this directly:
-
-### Update: `src/mcp/defs.js`
-
-Add tool defs:
-
-* `triage_ingest` (wrap `tools/triage/ingest.js`)
-* `triage_decision` (wrap decision tool)
-* `triage_context_pack` (wrap context pack tool)
-* Extend existing `build_index` and `search` schemas to include `records` mode
-
-### Update: `tools/mcp-server.js`
-
-Add handlers that:
-
-* resolve repoPath
-* spawn the scripts
-* stream progress notifications like existing tools do
-
----
-
-# CLI / UX Expectations (Definition of Done)
-
-## Ingest
-
-* `node tools/triage/ingest.js --source dependabot --in tests/fixtures/triage/dependabot.json --meta service=api --meta env=prod`
-
-  * creates `<repoCacheRoot>/triage/records/*.json` and `*.md`
-  * prints counts + recordIds
-
-## Index
-
-* `node build_index.js --mode records --stub-embeddings`
-
-  * creates `<repoCacheRoot>/index-records/chunk_meta.json` etc.
-
-## Search (metadata-first)
-
-* `node search.js "CVE-2024" --mode records --meta service=api --meta env=prod --json`
-
-  * returns only records matching meta filters
-  * each hit includes `docmeta.record` with promoted fields
-
-## Decision + History
-
-* `node tools/triage/decision.js --finding <recordId> --status accept --justification "..."`
-
-  * decision record is searchable and linkable
-
-## Context Pack
-
-* `node tools/triage/context-pack.js --record <recordId>`
-
-  * outputs JSON containing:
-
-    * finding
-    * history hits
-    * repo evidence hits from code/prose index
-
----
-
-# Tests + Fixtures (must-have for v1)
-
-## Add fixtures
-
-Create:
-
-* `tests/fixtures/triage/dependabot.json`
-* `tests/fixtures/triage/inspector.json`
-* (optional) `tests/fixtures/triage/generic.json`
-
-## New test runner
-
-Add `tests/triage-records.js` that:
-
-1. Runs ingest with `--stub-embeddings` mode later for indexing
-2. Builds records index (`node build_index.js --mode records --stub-embeddings`)
-3. Runs record search:
-
-   * verify `--meta service=...` filters correctly
-4. Generates a context pack:
-
-   * verify JSON output structure and that it includes non-empty evidence arrays (even if small)
-
-Update `package.json`:
-
-* add script: `"triage-test": "node tests/triage-records.js"`
-* (optional) include in `test-all`
-
----
-
-# Implementation Notes / Guardrails
-
-* **Keep PairOfCleats core behavior stable.**
-
-  * Do not change default `build_index --mode all` semantics unless intentional.
-* **Do not store triage data in the repo working tree.**
-
-  * Use `repoCacheRoot` by default.
-* **Limit bloat in chunk meta.**
-
-  * Store *promoted* fields in `docmeta.record`, not the entire raw payload.
-* **Make meta filtering robust.**
-
-  * Missing fields should not crash; just fail the filter.
-* **Treat record markdown as the indexed surface** (human-friendly), JSON as canonical.
diff --git a/oss_inspiration.md b/oss_inspiration.md
new file mode 100644
index 000000000..8e9689d6e
--- /dev/null
+++ b/oss_inspiration.md
@@ -0,0 +1,245 @@
+# Open-source inspiration & feature ideas for PairOfCleats-style repo indexing/search
+
+> Goal: steal proven ideas from existing code search, code intelligence, and “codebase awareness” tools — especially where they avoid re-implementing low-level IR/indexing and where they **speed up indexing** / **speed up queries**.
+
+- ## 1) Fast exact search + regex (trigram-centric engines)
+
+  - **Add a trigram “candidate generator” layer for substring + regex queries** (instead of scanning every file):
+    - Zoekt is a *fast trigram-based code search* engine; it’s specifically designed for code and ships both CLI and long-running services (indexserver + webserver).  
+      *Action:* add a positional trigram (or ngram) index that can answer “which files *might* match this pattern?” quickly, then only run the expensive exact check on that candidate set. (Inspired by Zoekt.)  
+      *(Refs: [1], [3])*
+    - GitLab’s “Exact Code Search” write-up (built on Zoekt) notes that Zoekt can convert regex patterns into efficient trigram queries “when possible.”  
+      *Action:* implement regex→ngram-query rewriting for the subset of regex you can safely approximate; treat it as a prefilter, then verify matches exactly.  
+      *(Refs: [4])*
+    - For a smaller standalone building block, `regrams` is explicitly about converting regex to trigram queries in the spirit of Google’s codesearch.  
+      *Action:* reuse/adapt this technique rather than invent your own regex→prefilter pipeline.  
+      *(Refs: [41])*
+
+  - **Treat punctuation as first-class searchable tokens** (don’t silently normalize it away):
+    - GitHub’s code search write-up highlights code-search-specific requirements: searching for punctuation (like `.` or `(`), no stemming, no stop-word removal, and regex support.  
+      *Action:* audit your analyzer/tokenizer choices (and query parsing) to ensure punctuation-heavy code queries are not degraded.  
+      *(Refs: [18])*
+
+  - **Invest in query-language ergonomics early** (it drives real-world performance by reducing “broad queries”):
+    - Zoekt has a query language with filters like file/path constraints and other operators; it’s built to scale search.  
+      *Action:* support fast “narrowing” (repo:, file:, lang:, branch:) so users and agents generate narrower queries that hit fewer candidate docs.  
+      *(Refs: [2])*
+    - Sourcebot emphasizes a rich query language (regex, boolean logic, repo/language filters, branch search) for fast/precise code search.  
+      *Action:* mirror the most-used filters and make them cheap in the index (metadata columns, bloom filters, etc.).  
+      *(Refs: [19], [20])*
+
+  - **Use symbol metadata as a ranking signal for code search results**:
+    - Zoekt recommends installing Universal Ctags because symbol information is a “key signal in ranking search results.”  
+      *Action:* extract symbol definitions (ctags/tree-sitter/LSP) and boost matches near definitions, signatures, and exported APIs.  
+      *(Refs: [3], [9])*
+
+  - **“Interactive regex search” UX can be a feature (and drives architecture)**:
+    - livegrep is an open-source “fast interactive regexp search” tool.  
+      *Action:* treat streaming results and tight query loops as a requirement; this pushes you toward cheap candidate generation and memory-friendly index formats.  
+      *(Refs: [6])*
+
+  - **Look at OpenGrok for “developer-friendly cross-reference” features**:
+    - OpenGrok positions itself as a “fast and usable source code search and cross reference engine,” with navigation of source trees and VCS history awareness.  
+      *Action:* consider adopting its feature ideas: cross-reference UI, annotation views, “definition/reference” navigation, and showing history context where helpful.  
+      *(Refs: [7])*
+
+- ## 2) Indexing pipeline patterns that consistently speed up builds
+
+  - **Separate a cheap “crawl + metadata pre-pass” from expensive parsing/embedding**:
+    - ripgrep’s default behavior is a good model for “smart crawling”: respect `.gitignore`, skip hidden/binary files by default.  
+      *Action:* build a fast “file manifest” step that:
+        - collects file list + size + mtime/hash + language guess,
+        - applies ignore rules,
+        - classifies obvious binaries,
+        - and schedules work (per-language queues) before deeper passes.  
+      *(Refs: [8])*
+
+  - **Make indexing incremental by default**:
+    - A recurring theme across scalable systems: don’t reindex unchanged files.
+    - Stack graphs (GitHub) emphasize *file-incremental* construction to amortize costs when only a small fraction of files change.  
+      *Action:* track file content hashes → skip re-parsing/re-embedding unchanged files; make the index update model “append + merge” rather than “rebuild.”  
+      *(Refs: [16], [17])*
+
+  - **Adopt a “service-mode indexer” for continuous updates (even if you also support one-shot indexing)**:
+    - Zoekt’s indexserver is designed to periodically fetch and reindex repositories; webserver serves queries over the built index.  
+      *Action:* split “indexing” and “querying” into separate processes or at least separate concerns, so indexing can be parallelized + scheduled without stalling query latency.  
+      *(Refs: [3])*
+
+  - **Use durable job queues for parallel repo indexing** (rather than ad-hoc promise pools):
+    - Sourcebot v3 explicitly moved to parallelized repo indexing + connection syncing via Redis & BullMQ.  
+      *Action:* copy that architecture if you need many repos and predictable throughput; it also enables “backpressure” and safer concurrency.  
+      *(Refs: [21])*
+
+  - **Prefer streaming, line-oriented formats for metadata extraction**:
+    - Universal Ctags supports JSON Lines output; it also has an interactive mode that communicates via JSON lines over stdio.  
+      *Action:* treat symbol extraction like a stream processing problem (spawn tool, parse JSON lines, write to DB) to avoid huge intermediate JSON blobs.  
+      *(Refs: [9], [10])*
+
+- ## 3) Code intelligence / navigation: stop reinventing per-language semantics
+
+  - **Ingest standard persisted code-intel formats instead of building bespoke LSP caches**:
+    - LSIF is a standard format for language servers/tools to emit their knowledge about a workspace; it can later answer LSP-like requests without running a language server.  
+      *Action:* accept LSIF artifacts as an optional input to PairOfCleats; you get precise “go to definition / find references” for languages where an LSIF indexer exists, and you can fall back to heuristics elsewhere.  
+      *(Refs: [12])*
+    - SCIP is a language-agnostic protocol for indexing source code for code navigation (definition/references/implementations), with multiple language bindings and tooling.  
+      *Action:* prefer SCIP over ad-hoc JSON schemas if you’re building a serious cross-language code-nav layer; it’s designed to be consumed by tooling.  
+      *(Refs: [11])*
+
+  - **Copy GitHub’s “stack graphs” approach when you need *fast, file-incremental* name resolution**:
+    - GitHub’s stack graphs project provides a Rust implementation allowing name-resolution rules to be defined in a declarative DSL, designed to be efficient and incremental and not require build tooling.  
+      *Action:* if your current LSP-based approach is slow/fragile, consider a “purely syntactic name binding graph” fallback for supported languages.  
+      *(Refs: [16], [17])*
+    - Caveat: the `github/stack-graphs` repo is archived (read-only).  
+      *Action:* treat it as inspiration or a vendored dependency, not a fast-moving upstream.  
+      *(Refs: [16])*
+
+  - **Glean (Meta) is the “big hammer” blueprint for code facts + derived relationships**:
+    - Meta describes Glean as an open source system for collecting, deriving, and working with facts about source code, with an efficient query language; they use it for code browsing, code search, and documentation generation.  
+      *Action:* if you find yourself wanting “queryable facts” (defs/refs, call graphs, ownership, API usage, etc.), consider a Glean-like architecture (facts → derivations → query service) instead of stuffing everything into ad-hoc tables.  
+      *(Refs: [13], [14])*
+
+  - **Kythe is the most relevant “open schema” precedent for cross-language xref**:
+    - Kythe is an open source project for building cross-language, cross-platform code indexing tools.  
+      *Action:* steal its *graph schema mindset* and its separation between “extractor (build context)” and “indexer (emit graph facts).” This matters a lot for compiled languages where build flags define semantics.  
+      *(Refs: [15])*
+
+- ## 4) “Codebase awareness” / LLM retrieval: practical ideas that reduce reimplementation
+
+  - **Adopt the “Ask mode grounded in search + nav tools” pattern (Sourcebot)**:
+    - Sourcebot explicitly markets “Ask Sourcebot” as using its code search + navigation tools so reasoning models can search, follow code nav references, and answer with inline citations.  
+      *Action:* instead of trying to embed everything and hope, combine:
+        - exact search (fast recall),
+        - code nav (follow refs/defs),
+        - and then only embed as a *reranker* or semantic enhancer.  
+      *(Refs: [19])*
+    - **License note:** Sourcebot is *fair-source* (FSL) rather than OSI “open source”; treat it as inspiration even if you can’t directly reuse code in all contexts.
+      *(Refs: [20])*
+
+  - **Make embeddings a first-class indexing artifact (and keep them optional)**:
+    - Continue’s docs: embeddings are generated during indexing and then used by “codebase awareness” to perform similarity search over your codebase.  
+      *Action:* separate “text index correctness” from “embedding index usefulness”: you should be able to rebuild one without the other.  
+      *(Refs: [22])*
+
+  - **Borrow retrieval evaluation discipline**:
+    - Continue’s “accuracy limits” post argues you need metrics (it uses an F1 framing of precision/recall) and staged evaluation to improve retrieval pipelines.  
+      *Action:* build an offline benchmark harness for your repo retrieval (top‑k file recall, snippet recall, MRR, etc.) and gate changes on it.  
+      *(Refs: [23])*
+    - Haystack has open tutorials on evaluating RAG pipelines with statistical + model-based metrics.  
+      *Action:* copy the evaluation pipeline pattern (separate retrieval metrics from generation metrics).  
+      *(Refs: [34])*
+
+  - **Build a “repo map” (compressed high-level summary) rather than always retrieving raw chunks**:
+    - Aider uses a concise repository map of important classes/functions with types and call signatures; their newer write-up explains switching from ctags to tree-sitter to get richer signatures and multi-language support via `py-tree-sitter-languages`.  
+      *Action:* add a low-token “map” artifact to PairOfCleats (symbols + signatures + file paths), and use it as:
+        - a navigation aid for the LLM,
+        - a cheap prefilter for retrieval,
+        - and a fallback when embeddings are missing/stale.  
+      *(Refs: [24], [25])*
+
+- ## 5) Structural / AST search: precision features that also help performance
+
+  - **Add AST-level matching for “find usages / patterns” queries**:
+    - `ast-grep` is a tree-sitter based tool for structural search/lint/rewrite.  
+      *Action:* integrate AST-pattern search as a separate “structural index” (or on-demand engine) for queries where text search is too noisy.  
+      *(Refs: [26])*
+
+  - **Borrow Semgrep’s rule-driven approach as a plug-in layer**:
+    - Semgrep is an open-source static analysis tool centered on pattern rules.  
+      *Action:* use Semgrep-style rules to extract “interesting nodes” into your index (public APIs, risky sinks/sources, TODO/FIXME hotspots) and to answer certain classes of queries quickly.  
+      *(Refs: [27])*
+
+  - **Use Comby-style structural templates as a language-agnostic option**:
+    - Comby provides structural search and replace.  
+      *Action:* make it the fallback structural engine for languages you don’t support with tree-sitter/LSP, or for quick refactor queries.  
+      *(Refs: [28])*
+
+- ## 6) “Stop reimplementing the whole search engine” options (still OSS)
+
+  - **If your pain is the inverted index implementation itself**:
+    - Tantivy is a Lucene-inspired full-text search library written in Rust.  
+      *Action:* consider replacing or offloading your token/BM25 index to a proven IR library (via FFI or a sidecar process) while keeping your higher-level orchestration in JS/TS.  
+      *(Refs: [37])*
+    - Meilisearch is an open-source search engine (Community Edition is MIT) and advertises full-text + semantic/hybrid search.  
+      *Action:* if “stand up a server” is acceptable, you can outsource full-text ranking + filtering and focus on code-aware chunking/nav.  
+      *(Refs: [38])*
+    - Typesense is another open-source search engine positioned for speed and low-latency search-as-you-type.  
+      *Action:* consider it for “UI search suggestions” / instant filtering, but be mindful it’s not code-regex-first.  
+      *(Refs: [39])*
+
+  - **If your pain is vector indexing / ANN**:
+    - LanceDB is an open-source vector database designed for fast vector search and also mentions full-text + hybrid search with secondary indexes in its docs.  
+      *Action:* consider swapping your vector layer to LanceDB if you need more mature ANN indexes, filtering, and on-disk formats, while keeping SQLite for metadata.  
+      *(Refs: [35], [36])*
+
+  - **If your pain is ingestion plumbing (loaders, chunking, evaluation)**:
+    - LlamaIndex.TS is a TypeScript framework for “context engineering” and RAG; it includes embedding model abstractions.  
+      *Action:* use it (or borrow its abstractions) for loaders/chunkers/metadata flows, while keeping your custom search backend.  
+      *(Refs: [30], [31])*
+    - LangChain’s JS ecosystem has standardized document loaders; its GitHub loader shows an existing pattern for fetching and turning a repo into “Documents.”  
+      *Action:* reuse loader abstractions for connectors and file normalization, even if you don’t use LangChain for retrieval.  
+      *(Refs: [32], [33])*
+
+- ## 7) “Old but useful” code navigation/tagging ideas (cheap wins)
+
+  - **Use GNU Global / ctags-style tag DBs as a lightweight fallback**:
+    - GNU GLOBAL and ctags-like tools exist specifically to build tag files for code navigation across large projects; they’re battle-tested and editor-integrated.  
+      *Action:* for languages where LSP/SCIP is heavy or broken, fall back to tags for “jump to definition by name” and for building repo maps quickly.  
+      *(Refs: [9], [29])*
+
+- ## 8) Concrete “feature gaps” to look for when comparing your repo to the best-in-class tools
+
+  - **Regex prefiltering:** Can you convert parts of a regex into cheap ngram constraints? (Zoekt/GitLab ECS.)
+  - **Index update model:** Do you reindex only changed files? Can you do it incrementally + in parallel? (Stack graphs / Sourcebot v3.)
+  - **Rich query language:** Can you narrow searches cheaply by repo, lang, file path, branch? (Zoekt / Sourcebot.)
+  - **Symbol-aware ranking:** Are symbol defs boosted? Do you have a notion of “definition match”? (Zoekt + ctags.)
+  - **Persisted code intel ingestion:** Can you import SCIP/LSIF to avoid running language servers? (SCIP/LSIF.)
+  - **“Ask” grounded answers with citations:** Can the agent follow defs/refs and cite the code? (Sourcebot.)
+  - **Retrieval evaluation harness:** Do you have offline metrics + a benchmark dataset? (Continue / Haystack.)
+  - **Structural search:** Do you support AST patterns for high-precision queries? (ast-grep / Semgrep / Comby.)
+
+---
+
+## References (copy/paste)
+
+```text
+[1]  https://github.com/sourcegraph/zoekt
+[2]  https://sourcegraph.com/github.com/sourcegraph/zoekt/-/blob/doc/query_syntax.md
+[3]  https://pkg.go.dev/github.com/sourcegraph/zoekt
+[4]  https://about.gitlab.com/blog/exact-code-search-find-code-faster-across-repositories/
+[5]  https://github.com/hound-search/hound
+[6]  https://github.com/livegrep/livegrep
+[7]  https://github.com/oracle/opengrok
+[8]  https://github.com/BurntSushi/ripgrep
+[9]  https://docs.ctags.io/en/latest/man/ctags-json-output.5.html
+[10] https://docs.ctags.io/en/latest/interactive-mode.html
+[11] https://github.com/sourcegraph/scip
+[12] https://lsif.dev/
+[13] https://engineering.fb.com/2024/12/19/developer-tools/glean-open-source-code-indexing/
+[14] https://github.com/facebookincubator/Glean
+[15] https://github.com/kythe/kythe
+[16] https://github.com/github/stack-graphs
+[17] https://github.blog/open-source/introducing-stack-graphs/
+[18] https://github.blog/engineering/architecture-optimization/the-technology-behind-githubs-new-code-search/
+[19] https://github.com/sourcebot-dev/sourcebot
+[20] https://docs.sourcebot.dev/
+[21] https://github.com/sourcebot-dev/sourcebot/discussions/256
+[22] https://docs.continue.dev/customize/model-roles/embeddings
+[23] https://blog.continue.dev/accuracy-limits-of-codebase-retrieval/
+[24] https://aider.chat/docs/repomap.html
+[25] https://aider.chat/2023/10/22/repomap.html
+[26] https://github.com/ast-grep/ast-grep
+[27] https://github.com/semgrep/semgrep
+[28] https://github.com/comby-tools/comby
+[29] https://www.gnu.org/software/global/
+[30] https://developers.llamaindex.ai/typescript/framework/
+[31] https://developers.llamaindex.ai/typescript/framework/modules/models/embeddings/
+[32] https://docs.langchain.com/oss/javascript/integrations/document_loaders/web_loaders/github
+[33] https://reference.langchain.com/javascript/classes/_langchain_community.document_loaders_web_github.GithubRepoLoader.html
+[34] https://haystack.deepset.ai/tutorials/35_evaluating_rag_pipelines
+[35] https://github.com/lancedb/lancedb
+[36] https://docs.lancedb.com/
+[37] https://github.com/quickwit-oss/tantivy
+[38] https://github.com/meilisearch/meilisearch
+[39] https://github.com/typesense/typesense
+[41] https://github.com/aaw/regrams
+```
diff --git a/package-lock.json b/package-lock.json
index 7258e31e6..39efc0bd7 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,36 +1,400 @@
 {
   "name": "pairofcleats",
-  "version": "0.2.0",
+  "version": "0.3.0",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
       "name": "pairofcleats",
-      "version": "0.2.0",
+      "version": "0.3.0",
       "dependencies": {
+        "@ast-grep/napi": "^0.40.4",
+        "@astrojs/compiler": "^2.13.0",
+        "@babel/parser": "^7.27.1",
+        "@babel/traverse": "^7.28.5",
+        "@es-joy/jsdoccomment": "^0.79.0",
+        "@handlebars/parser": "^2.2.2",
+        "@mdx-js/mdx": "^3.1.1",
+        "@swc/core": "^1.15.8",
+        "@typescript-eslint/typescript-estree": "^8.52.0",
+        "@vscode/ripgrep": "^1.17.0",
+        "@vue/compiler-sfc": "^3.5.26",
         "@xenova/transformers": "2.17.2",
         "acorn": "8.15.0",
         "adm-zip": "0.5.16",
+        "aho-corasick": "^0.1.3",
+        "ajv": "^8.17.1",
         "better-sqlite3": "12.5.0",
+        "chardet": "^2.1.1",
+        "chokidar": "^3.5.3",
+        "cjs-module-lexer": "^2.2.0",
+        "dockerfile-ast": "^0.7.1",
+        "es-module-lexer": "^1.6.0",
         "escomplex": "2.0.0-alpha",
         "eslint": "9.39.2",
         "esprima": "4.0.1",
-        "ignore": "5.3.2",
-        "minhash": "0.0.9",
-        "minimist": "1.2.8",
-        "seedrandom": "3.0.5",
+        "esquery": "^1.7.0",
+        "execa": "^8.0.1",
+        "fast-xml-parser": "^5.3.3",
+        "fdir": "6.4.2",
+        "fflate": "^0.8.2",
+        "file-type": "^21.3.0",
+        "graphology": "^0.26.0",
+        "graphql": "^16.12.0",
+        "greedy-number-partitioning": "^0.1.0",
+        "hdr-histogram-js": "^3.0.1",
+        "hnswlib-node": "^3.0.0",
+        "iconv-lite": "^0.7.2",
+        "ignore": "7.0.5",
+        "istextorbinary": "^9.5.0",
+        "jsdoc-type-pratt-parser": "^7.0.0",
+        "jsonc-parser": "^3.3.1",
+        "linguist-languages": "^9.2.0",
+        "lmdb": "^3.4.4",
+        "lru-cache": "11.2.4",
+        "micromark": "^4.0.2",
+        "msgpackr": "^1.11.8",
+        "node-sql-parser": "^5.3.13",
+        "nunjucks": "^3.2.4",
+        "onnxruntime-node": "^1.23.2",
+        "p-queue": "9.1.0",
+        "parse5": "^7.1.2",
+        "picomatch": "^4.0.3",
+        "pino": "^10.1.1",
+        "pino-pretty": "^13.1.3",
+        "piscina": "^5.1.4",
+        "prom-client": "^15.1.3",
+        "protobufjs": "^8.0.0",
+        "pyright": "^1.1.408",
+        "re2js": "^1.2.0",
+        "roaring-wasm": "^1.1.0",
+        "seedrandom": "^3.0.5",
+        "selfsigned": "^2.4.1",
+        "semver": "^7.7.3",
         "simple-git": "3.30.0",
+        "smol-toml": "^1.6.0",
         "snowball-stemmers": "0.6.0",
-        "strip-comments": "2.0.1",
+        "svelte": "^5.46.1",
         "tar-fs": "3.1.1",
-        "varint": "6.0.0",
-        "yaml": "2.8.2"
+        "three": "^0.182.0",
+        "tinybench": "^6.0.0",
+        "tree-sitter-wasms": "0.1.13",
+        "typescript": "^5.9.3",
+        "vscode-jsonrpc": "8.2.1",
+        "vscode-languageserver-protocol": "3.17.5",
+        "web-tree-sitter": "0.20.8",
+        "xxhash-wasm": "^1.1.0",
+        "yaml": "^2.8.2",
+        "yargs": "^17.7.2"
+      },
+      "bin": {
+        "pairofcleats": "bin/pairofcleats.js"
+      }
+    },
+    "node_modules/@assemblyscript/loader": {
+      "version": "0.19.23",
+      "resolved": "https://registry.npmjs.org/@assemblyscript/loader/-/loader-0.19.23.tgz",
+      "integrity": "sha512-ulkCYfFbYj01ie1MDOyxv2F6SpRN1TOj7fQxbP07D6HmeR+gr2JLSmINKjga2emB+b1L2KGrFKBTc+e00p54nw=="
+    },
+    "node_modules/@ast-grep/napi": {
+      "version": "0.40.4",
+      "resolved": "https://registry.npmjs.org/@ast-grep/napi/-/napi-0.40.4.tgz",
+      "integrity": "sha512-unRhSrSn4X0tf7nCuj300rBrlrXqtGbKanFX75CNmn2NM+NyPrdvq1tdDk2F+XA8Z574MynpSeCESii3WBK+bw==",
+      "engines": {
+        "node": ">= 10"
+      },
+      "optionalDependencies": {
+        "@ast-grep/napi-darwin-arm64": "0.40.4",
+        "@ast-grep/napi-darwin-x64": "0.40.4",
+        "@ast-grep/napi-linux-arm64-gnu": "0.40.4",
+        "@ast-grep/napi-linux-arm64-musl": "0.40.4",
+        "@ast-grep/napi-linux-x64-gnu": "0.40.4",
+        "@ast-grep/napi-linux-x64-musl": "0.40.4",
+        "@ast-grep/napi-win32-arm64-msvc": "0.40.4",
+        "@ast-grep/napi-win32-ia32-msvc": "0.40.4",
+        "@ast-grep/napi-win32-x64-msvc": "0.40.4"
+      }
+    },
+    "node_modules/@ast-grep/napi-darwin-arm64": {
+      "version": "0.40.4",
+      "resolved": "https://registry.npmjs.org/@ast-grep/napi-darwin-arm64/-/napi-darwin-arm64-0.40.4.tgz",
+      "integrity": "sha512-UIkpoEExRghZe5wN6QXGkDzI65zKVoaBQowAzmEd3MCGP8VlAK3FoxDMdy0OLgQVTyRUdBUwG384WpxiWilYEw==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@ast-grep/napi-darwin-x64": {
+      "version": "0.40.4",
+      "resolved": "https://registry.npmjs.org/@ast-grep/napi-darwin-x64/-/napi-darwin-x64-0.40.4.tgz",
+      "integrity": "sha512-derMkDWiMFjRlcN0SEHXNPeZ67OGR5So6b4r/+ETvZMZavLQv+ER0vevltFS4ci8m402g7l/wVOdSE1oHcN2Iw==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@ast-grep/napi-linux-arm64-gnu": {
+      "version": "0.40.4",
+      "resolved": "https://registry.npmjs.org/@ast-grep/napi-linux-arm64-gnu/-/napi-linux-arm64-gnu-0.40.4.tgz",
+      "integrity": "sha512-1CeDsK6WRMz169mTXLfXdn2GkQAsMkYbqGd7mHDa2VqutJwDYrqe6t4QiFAlr+LRT2bQuExpPh3AiC8BNd6UQQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@ast-grep/napi-linux-arm64-musl": {
+      "version": "0.40.4",
+      "resolved": "https://registry.npmjs.org/@ast-grep/napi-linux-arm64-musl/-/napi-linux-arm64-musl-0.40.4.tgz",
+      "integrity": "sha512-VqjL9Xbq5NNXexY4rluaFtpuHHGbcNIwFXInbm8hdaZS3Rsr9tz+QCEhDNO4IJgPtcRUOpa7AIztZotwxrb/iw==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@ast-grep/napi-linux-x64-gnu": {
+      "version": "0.40.4",
+      "resolved": "https://registry.npmjs.org/@ast-grep/napi-linux-x64-gnu/-/napi-linux-x64-gnu-0.40.4.tgz",
+      "integrity": "sha512-6BrPYjP+Gr+mkI3z3Xh/UHkOedJ25qKGdhRwbYteeK/QX3rWeuuo/tUjow4nh/8ewhk04wHGw/G96OiF47ICjA==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@ast-grep/napi-linux-x64-musl": {
+      "version": "0.40.4",
+      "resolved": "https://registry.npmjs.org/@ast-grep/napi-linux-x64-musl/-/napi-linux-x64-musl-0.40.4.tgz",
+      "integrity": "sha512-4LGq6xYmOsgEycx4Cu9CdyNYa99O/C1bOsZi09T0C/NifKR+55v8og6nZrGCwsKEs4jWresdal4p7WXw3k8+0g==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@ast-grep/napi-win32-arm64-msvc": {
+      "version": "0.40.4",
+      "resolved": "https://registry.npmjs.org/@ast-grep/napi-win32-arm64-msvc/-/napi-win32-arm64-msvc-0.40.4.tgz",
+      "integrity": "sha512-QrI9m9wmFYTRnyTsYOX+9/D/B+h0eyFqZf2qh/U3kbRU/ZZJnxI75YjYVjtoa2QI4oEJlzYb0QhKAN8NKIDYRA==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@ast-grep/napi-win32-ia32-msvc": {
+      "version": "0.40.4",
+      "resolved": "https://registry.npmjs.org/@ast-grep/napi-win32-ia32-msvc/-/napi-win32-ia32-msvc-0.40.4.tgz",
+      "integrity": "sha512-JLhcgCUD7e74rZ39XLEfUOU26TQboJltzHir2oSpvjZPtq4aBHEJOynq7dGABZdMrzRUOPdbBdM+nFzPiU38iQ==",
+      "cpu": [
+        "ia32"
+      ],
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@ast-grep/napi-win32-x64-msvc": {
+      "version": "0.40.4",
+      "resolved": "https://registry.npmjs.org/@ast-grep/napi-win32-x64-msvc/-/napi-win32-x64-msvc-0.40.4.tgz",
+      "integrity": "sha512-DdsqVs/kg4Iun8GU8GoOw9L9WkT2pVLAZ+1hV9t+PsdI6SR3HGRIUIJSzKz2uejr8tY8evTZmdjBWyjUav8qbQ==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 10"
+      }
+    },
+    "node_modules/@astrojs/compiler": {
+      "version": "2.13.0",
+      "resolved": "https://registry.npmjs.org/@astrojs/compiler/-/compiler-2.13.0.tgz",
+      "integrity": "sha512-mqVORhUJViA28fwHYaWmsXSzLO9osbdZ5ImUfxBarqsYdMlPbqAqGJCxsNzvppp1BEzc1mJNjOVvQqeDN8Vspw=="
+    },
+    "node_modules/@babel/code-frame": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.27.1.tgz",
+      "integrity": "sha512-cjQ7ZlQ0Mv3b47hABuTevyTuYN4i+loJKGeV9flcCgIK37cCXRh+L1bd3iBHlynerhQ7BhCkn2BPbQUL+rGqFg==",
+      "dependencies": {
+        "@babel/helper-validator-identifier": "^7.27.1",
+        "js-tokens": "^4.0.0",
+        "picocolors": "^1.1.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/generator": {
+      "version": "7.28.5",
+      "resolved": "https://registry.npmjs.org/@babel/generator/-/generator-7.28.5.tgz",
+      "integrity": "sha512-3EwLFhZ38J4VyIP6WNtt2kUdW9dokXA9Cr4IVIFHuCpZ3H8/YFOl5JjZHisrn1fATPBmKKqXzDFvh9fUwHz6CQ==",
+      "dependencies": {
+        "@babel/parser": "^7.28.5",
+        "@babel/types": "^7.28.5",
+        "@jridgewell/gen-mapping": "^0.3.12",
+        "@jridgewell/trace-mapping": "^0.3.28",
+        "jsesc": "^3.0.2"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-globals": {
+      "version": "7.28.0",
+      "resolved": "https://registry.npmjs.org/@babel/helper-globals/-/helper-globals-7.28.0.tgz",
+      "integrity": "sha512-+W6cISkXFa1jXsDEdYA8HeevQT/FULhxzR99pxphltZcVaugps53THCeiWA8SguxxpSp3gKPiuYfSWopkLQ4hw==",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-string-parser": {
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.27.1.tgz",
+      "integrity": "sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/helper-validator-identifier": {
+      "version": "7.28.5",
+      "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.28.5.tgz",
+      "integrity": "sha512-qSs4ifwzKJSV39ucNjsvc6WVHs6b7S03sOh2OcHF9UHfVPqWWALUsNUVzhSBiItjRZoLHx7nIarVjqKVusUZ1Q==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/parser": {
+      "version": "7.28.5",
+      "resolved": "https://registry.npmjs.org/@babel/parser/-/parser-7.28.5.tgz",
+      "integrity": "sha512-KKBU1VGYR7ORr3At5HAtUQ+TV3SzRCXmA/8OdDZiLDBIZxVyzXuztPjfLd3BV1PRAQGCMWWSHYhL0F8d5uHBDQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/types": "^7.28.5"
+      },
+      "bin": {
+        "parser": "bin/babel-parser.js"
+      },
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@babel/template": {
+      "version": "7.27.2",
+      "resolved": "https://registry.npmjs.org/@babel/template/-/template-7.27.2.tgz",
+      "integrity": "sha512-LPDZ85aEJyYSd18/DkjNh4/y1ntkE5KwUHWTiqgRxruuZL2F1yuHligVHLvcHY2vMHXttKFpJn6LwfI7cw7ODw==",
+      "dependencies": {
+        "@babel/code-frame": "^7.27.1",
+        "@babel/parser": "^7.27.2",
+        "@babel/types": "^7.27.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/traverse": {
+      "version": "7.28.5",
+      "resolved": "https://registry.npmjs.org/@babel/traverse/-/traverse-7.28.5.tgz",
+      "integrity": "sha512-TCCj4t55U90khlYkVV/0TfkJkAkUg3jZFA3Neb7unZT8CPok7iiRfaX0F+WnqWqt7OxhOn0uBKXCw4lbL8W0aQ==",
+      "dependencies": {
+        "@babel/code-frame": "^7.27.1",
+        "@babel/generator": "^7.28.5",
+        "@babel/helper-globals": "^7.28.0",
+        "@babel/parser": "^7.28.5",
+        "@babel/template": "^7.27.2",
+        "@babel/types": "^7.28.5",
+        "debug": "^4.3.1"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@babel/types": {
+      "version": "7.28.5",
+      "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.28.5.tgz",
+      "integrity": "sha512-qQ5m48eI/MFLQ5PxQj4PFaprjyCTLI37ElWMmNs0K8Lk3dVeOdNpB3ks8jc7yM5CDmVC73eMVk/trk3fgmrUpA==",
+      "license": "MIT",
+      "dependencies": {
+        "@babel/helper-string-parser": "^7.27.1",
+        "@babel/helper-validator-identifier": "^7.28.5"
+      },
+      "engines": {
+        "node": ">=6.9.0"
+      }
+    },
+    "node_modules/@borewit/text-codec": {
+      "version": "0.2.1",
+      "resolved": "https://registry.npmjs.org/@borewit/text-codec/-/text-codec-0.2.1.tgz",
+      "integrity": "sha512-k7vvKPbf7J2fZ5klGRD9AeKfUvojuZIQ3BT5u7Jfv+puwXkUBUT5PVyMDfJZpy30CBDXGMgw7fguK/lpOMBvgw==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/Borewit"
+      }
+    },
+    "node_modules/@es-joy/jsdoccomment": {
+      "version": "0.79.0",
+      "resolved": "https://registry.npmjs.org/@es-joy/jsdoccomment/-/jsdoccomment-0.79.0.tgz",
+      "integrity": "sha512-q/Nc241VsVRC5b1dgbsOI0fnWfrb1S9sdceFewpDHto4+4r2o6SSCpcY+Z+EdLdMPN6Nsj/PjlPcKag6WbU6XQ==",
+      "dependencies": {
+        "@types/estree": "^1.0.8",
+        "@typescript-eslint/types": "^8.52.0",
+        "comment-parser": "1.4.1",
+        "esquery": "^1.7.0",
+        "jsdoc-type-pratt-parser": "~7.0.0"
+      },
+      "engines": {
+        "node": "^20.19.0 || ^22.13.0 || >=24"
       }
     },
     "node_modules/@eslint-community/eslint-utils": {
-      "version": "4.9.0",
-      "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.9.0.tgz",
-      "integrity": "sha512-ayVFHdtZ+hsq1t2Dy24wCmGXGe4q9Gu3smhLYALJrr473ZH27MsnSL+LKUlimp4BWJqMDMLmPpx/Q9R3OAlL4g==",
+      "version": "4.9.1",
+      "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.9.1.tgz",
+      "integrity": "sha512-phrYmNiYppR7znFEdqgfWHXR6NCkZEK7hwWDHZUjit/2/U0r6XvkDl0SYnoM51Hq7FhCGdLDT6zxCCOY1hexsQ==",
       "license": "MIT",
       "dependencies": {
         "eslint-visitor-keys": "^3.4.3"
@@ -127,6 +491,35 @@
         "url": "https://opencollective.com/eslint"
       }
     },
+    "node_modules/@eslint/eslintrc/node_modules/ajv": {
+      "version": "6.12.6",
+      "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
+      "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
+      "dependencies": {
+        "fast-deep-equal": "^3.1.1",
+        "fast-json-stable-stringify": "^2.0.0",
+        "json-schema-traverse": "^0.4.1",
+        "uri-js": "^4.2.2"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/epoberezkin"
+      }
+    },
+    "node_modules/@eslint/eslintrc/node_modules/ignore": {
+      "version": "5.3.2",
+      "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
+      "integrity": "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 4"
+      }
+    },
+    "node_modules/@eslint/eslintrc/node_modules/json-schema-traverse": {
+      "version": "0.4.1",
+      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
+      "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg=="
+    },
     "node_modules/@eslint/js": {
       "version": "9.39.2",
       "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.39.2.tgz",
@@ -161,6 +554,14 @@
         "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       }
     },
+    "node_modules/@handlebars/parser": {
+      "version": "2.2.2",
+      "resolved": "https://registry.npmjs.org/@handlebars/parser/-/parser-2.2.2.tgz",
+      "integrity": "sha512-n/SZW+12rwikx/f8YcSv9JCi5p9vn1Bnts9ZtVvfErG4h0gbjHI1H1ZMhVUnaOC7yzFc6PtsCKIK8XeTnL90Gw==",
+      "engines": {
+        "node": "^18 || ^20 || ^22 || >=24"
+      }
+    },
     "node_modules/@huggingface/jinja": {
       "version": "0.2.2",
       "resolved": "https://registry.npmjs.org/@huggingface/jinja/-/jinja-0.2.2.tgz",
@@ -218,6 +619,46 @@
         "url": "https://github.com/sponsors/nzakas"
       }
     },
+    "node_modules/@jridgewell/gen-mapping": {
+      "version": "0.3.13",
+      "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.13.tgz",
+      "integrity": "sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==",
+      "dependencies": {
+        "@jridgewell/sourcemap-codec": "^1.5.0",
+        "@jridgewell/trace-mapping": "^0.3.24"
+      }
+    },
+    "node_modules/@jridgewell/remapping": {
+      "version": "2.3.5",
+      "resolved": "https://registry.npmjs.org/@jridgewell/remapping/-/remapping-2.3.5.tgz",
+      "integrity": "sha512-LI9u/+laYG4Ds1TDKSJW2YPrIlcVYOwi2fUC6xB43lueCjgxV4lffOCZCtYFiH6TNOX+tQKXx97T4IKHbhyHEQ==",
+      "dependencies": {
+        "@jridgewell/gen-mapping": "^0.3.5",
+        "@jridgewell/trace-mapping": "^0.3.24"
+      }
+    },
+    "node_modules/@jridgewell/resolve-uri": {
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz",
+      "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==",
+      "engines": {
+        "node": ">=6.0.0"
+      }
+    },
+    "node_modules/@jridgewell/sourcemap-codec": {
+      "version": "1.5.5",
+      "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.5.tgz",
+      "integrity": "sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og=="
+    },
+    "node_modules/@jridgewell/trace-mapping": {
+      "version": "0.3.31",
+      "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.31.tgz",
+      "integrity": "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw==",
+      "dependencies": {
+        "@jridgewell/resolve-uri": "^3.1.0",
+        "@jridgewell/sourcemap-codec": "^1.4.14"
+      }
+    },
     "node_modules/@kwsites/file-exists": {
       "version": "1.1.1",
       "resolved": "https://registry.npmjs.org/@kwsites/file-exists/-/file-exists-1.1.1.tgz",
@@ -233,876 +674,1407 @@
       "integrity": "sha512-GaHYm+c0O9MjZRu0ongGBRbinu8gVAMd2UZjji6jVmqKtZluZnptXGWhz1E8j8D2HJ3f/yMxKAUC0b+57wncIw==",
       "license": "MIT"
     },
-    "node_modules/@protobufjs/aspromise": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
-      "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==",
-      "license": "BSD-3-Clause"
-    },
-    "node_modules/@protobufjs/base64": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
-      "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==",
-      "license": "BSD-3-Clause"
-    },
-    "node_modules/@protobufjs/codegen": {
-      "version": "2.0.4",
-      "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
-      "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==",
-      "license": "BSD-3-Clause"
-    },
-    "node_modules/@protobufjs/eventemitter": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
-      "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==",
-      "license": "BSD-3-Clause"
+    "node_modules/@lmdb/lmdb-darwin-arm64": {
+      "version": "3.4.4",
+      "resolved": "https://registry.npmjs.org/@lmdb/lmdb-darwin-arm64/-/lmdb-darwin-arm64-3.4.4.tgz",
+      "integrity": "sha512-XaKL705gDWd6XVls3ATDj13ZdML/LqSIxwgnYpG8xTzH2ifArx8fMMDdvqGE/Emd+W6R90W2fveZcJ0AyS8Y0w==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/@lmdb/lmdb-darwin-x64": {
+      "version": "3.4.4",
+      "resolved": "https://registry.npmjs.org/@lmdb/lmdb-darwin-x64/-/lmdb-darwin-x64-3.4.4.tgz",
+      "integrity": "sha512-GPHGEVcwJlkD01GmIr7B4kvbIcUDS2+kBadVEd7lU4can1RZaZQLDDBJRrrNfS2Kavvl0VLI/cMv7UASAXGrww==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
+    },
+    "node_modules/@lmdb/lmdb-linux-arm": {
+      "version": "3.4.4",
+      "resolved": "https://registry.npmjs.org/@lmdb/lmdb-linux-arm/-/lmdb-linux-arm-3.4.4.tgz",
+      "integrity": "sha512-cmev5/dZr5ACKri9f6GU6lZCXTjMhV72xujlbOhFCgFXrt4W0TxGsmY8kA1BITvH60JBKE50cSxsiulybAbrrw==",
+      "cpu": [
+        "arm"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@lmdb/lmdb-linux-arm64": {
+      "version": "3.4.4",
+      "resolved": "https://registry.npmjs.org/@lmdb/lmdb-linux-arm64/-/lmdb-linux-arm64-3.4.4.tgz",
+      "integrity": "sha512-mALqr7DE42HsiwVTKpQWxacjHoJk+e9p00RWIJqTACh/hpucxp/0lK/XMh5XzWnU/TDCZLukq1+vNqnNumTP/Q==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@lmdb/lmdb-linux-x64": {
+      "version": "3.4.4",
+      "resolved": "https://registry.npmjs.org/@lmdb/lmdb-linux-x64/-/lmdb-linux-x64-3.4.4.tgz",
+      "integrity": "sha512-QjLs8OcmCNcraAcLoZyFlo0atzBJniQLLwhtR+ymQqS5kLYpV5RqwriL87BW+ZiR9ZiGgZx3evrz5vnWPtJ1fQ==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ]
+    },
+    "node_modules/@lmdb/lmdb-win32-arm64": {
+      "version": "3.4.4",
+      "resolved": "https://registry.npmjs.org/@lmdb/lmdb-win32-arm64/-/lmdb-win32-arm64-3.4.4.tgz",
+      "integrity": "sha512-tr/pwHDlZ33forLGAr0tI04cRmP4SgF93yHbb+2zvZiDEyln5yMHhbKDySxY66aUOkhvBvTuHq9q/3YmTj6ZHQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "win32"
+      ]
+    },
+    "node_modules/@lmdb/lmdb-win32-x64": {
+      "version": "3.4.4",
+      "resolved": "https://registry.npmjs.org/@lmdb/lmdb-win32-x64/-/lmdb-win32-x64-3.4.4.tgz",
+      "integrity": "sha512-KRzfocJzB/mgoTCqnMawuLSKheHRVTqWfSmouIgYpFs6Hx4zvZSvsZKSCEb5gHmICy7qsx9l06jk3MFTtiFVAQ==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "win32"
+      ]
     },
-    "node_modules/@protobufjs/fetch": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
-      "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
-      "license": "BSD-3-Clause",
+    "node_modules/@mdx-js/mdx": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/@mdx-js/mdx/-/mdx-3.1.1.tgz",
+      "integrity": "sha512-f6ZO2ifpwAQIpzGWaBQT2TXxPv6z3RBzQKpVftEWN78Vl/YweF1uwussDx8ECAXVtr3Rs89fKyG9YlzUs9DyGQ==",
       "dependencies": {
-        "@protobufjs/aspromise": "^1.1.1",
-        "@protobufjs/inquire": "^1.1.0"
+        "@types/estree": "^1.0.0",
+        "@types/estree-jsx": "^1.0.0",
+        "@types/hast": "^3.0.0",
+        "@types/mdx": "^2.0.0",
+        "acorn": "^8.0.0",
+        "collapse-white-space": "^2.0.0",
+        "devlop": "^1.0.0",
+        "estree-util-is-identifier-name": "^3.0.0",
+        "estree-util-scope": "^1.0.0",
+        "estree-walker": "^3.0.0",
+        "hast-util-to-jsx-runtime": "^2.0.0",
+        "markdown-extensions": "^2.0.0",
+        "recma-build-jsx": "^1.0.0",
+        "recma-jsx": "^1.0.0",
+        "recma-stringify": "^1.0.0",
+        "rehype-recma": "^1.0.0",
+        "remark-mdx": "^3.0.0",
+        "remark-parse": "^11.0.0",
+        "remark-rehype": "^11.0.0",
+        "source-map": "^0.7.0",
+        "unified": "^11.0.0",
+        "unist-util-position-from-estree": "^2.0.0",
+        "unist-util-stringify-position": "^4.0.0",
+        "unist-util-visit": "^5.0.0",
+        "vfile": "^6.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
       }
     },
-    "node_modules/@protobufjs/float": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
-      "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==",
-      "license": "BSD-3-Clause"
-    },
-    "node_modules/@protobufjs/inquire": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
-      "integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==",
-      "license": "BSD-3-Clause"
+    "node_modules/@msgpackr-extract/msgpackr-extract-darwin-arm64": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-darwin-arm64/-/msgpackr-extract-darwin-arm64-3.0.3.tgz",
+      "integrity": "sha512-QZHtlVgbAdy2zAqNA9Gu1UpIuI8Xvsd1v8ic6B2pZmeFnFcMWiPLfWXh7TVw4eGEZ/C9TH281KwhVoeQUKbyjw==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
     },
-    "node_modules/@protobufjs/path": {
-      "version": "1.1.2",
-      "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
-      "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==",
-      "license": "BSD-3-Clause"
+    "node_modules/@msgpackr-extract/msgpackr-extract-darwin-x64": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-darwin-x64/-/msgpackr-extract-darwin-x64-3.0.3.tgz",
+      "integrity": "sha512-mdzd3AVzYKuUmiWOQ8GNhl64/IoFGol569zNRdkLReh6LRLHOXxU4U8eq0JwaD8iFHdVGqSy4IjFL4reoWCDFw==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ]
     },
-    "node_modules/@protobufjs/pool": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
-      "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==",
-      "license": "BSD-3-Clause"
-    },
-    "node_modules/@protobufjs/utf8": {
-      "version": "1.1.0",
-      "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
-      "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==",
-      "license": "BSD-3-Clause"
+    "node_modules/@msgpackr-extract/msgpackr-extract-linux-arm": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-linux-arm/-/msgpackr-extract-linux-arm-3.0.3.tgz",
+      "integrity": "sha512-fg0uy/dG/nZEXfYilKoRe7yALaNmHoYeIoJuJ7KJ+YyU2bvY8vPv27f7UKhGRpY6euFYqEVhxCFZgAUNQBM3nw==",
+      "cpu": [
+        "arm"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ]
     },
-    "node_modules/@types/estree": {
-      "version": "1.0.8",
-      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
-      "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==",
-      "license": "MIT"
+    "node_modules/@msgpackr-extract/msgpackr-extract-linux-arm64": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-linux-arm64/-/msgpackr-extract-linux-arm64-3.0.3.tgz",
+      "integrity": "sha512-YxQL+ax0XqBJDZiKimS2XQaf+2wDGVa1enVRGzEvLLVFeqa5kx2bWbtcSXgsxjQB7nRqqIGFIcLteF/sHeVtQg==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ]
     },
-    "node_modules/@types/json-schema": {
-      "version": "7.0.15",
-      "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz",
-      "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==",
-      "license": "MIT"
+    "node_modules/@msgpackr-extract/msgpackr-extract-linux-x64": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-linux-x64/-/msgpackr-extract-linux-x64-3.0.3.tgz",
+      "integrity": "sha512-cvwNfbP07pKUfq1uH+S6KJ7dT9K8WOE4ZiAcsrSes+UY55E/0jLYc+vq+DO7jlmqRb5zAggExKm0H7O/CBaesg==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ]
     },
-    "node_modules/@types/long": {
-      "version": "4.0.2",
-      "resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.2.tgz",
-      "integrity": "sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA==",
-      "license": "MIT"
+    "node_modules/@msgpackr-extract/msgpackr-extract-win32-x64": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-win32-x64/-/msgpackr-extract-win32-x64-3.0.3.tgz",
+      "integrity": "sha512-x0fWaQtYp4E6sktbsdAqnehxDgEc/VwM7uLsRCYWaiGu0ykYdZPiS8zCWdnjHwyiumousxfBm4SO31eXqwEZhQ==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "win32"
+      ]
     },
-    "node_modules/@types/node": {
-      "version": "25.0.3",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-25.0.3.tgz",
-      "integrity": "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA==",
+    "node_modules/@napi-rs/nice": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice/-/nice-1.1.1.tgz",
+      "integrity": "sha512-xJIPs+bYuc9ASBl+cvGsKbGrJmS6fAKaSZCnT0lhahT5rhA2VVy9/EcIgd2JhtEuFOJNx7UHNn/qiTPTY4nrQw==",
       "license": "MIT",
-      "dependencies": {
-        "undici-types": "~7.16.0"
-      }
-    },
-    "node_modules/@xenova/transformers": {
-      "version": "2.17.2",
-      "resolved": "https://registry.npmjs.org/@xenova/transformers/-/transformers-2.17.2.tgz",
-      "integrity": "sha512-lZmHqzrVIkSvZdKZEx7IYY51TK0WDrC8eR0c5IMnBsO8di8are1zzw8BlLhyO2TklZKLN5UffNGs1IJwT6oOqQ==",
-      "license": "Apache-2.0",
-      "dependencies": {
-        "@huggingface/jinja": "^0.2.2",
-        "onnxruntime-web": "1.14.0",
-        "sharp": "^0.32.0"
+      "optional": true,
+      "engines": {
+        "node": ">= 10"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/Brooooooklyn"
       },
       "optionalDependencies": {
-        "onnxruntime-node": "1.14.0"
+        "@napi-rs/nice-android-arm-eabi": "1.1.1",
+        "@napi-rs/nice-android-arm64": "1.1.1",
+        "@napi-rs/nice-darwin-arm64": "1.1.1",
+        "@napi-rs/nice-darwin-x64": "1.1.1",
+        "@napi-rs/nice-freebsd-x64": "1.1.1",
+        "@napi-rs/nice-linux-arm-gnueabihf": "1.1.1",
+        "@napi-rs/nice-linux-arm64-gnu": "1.1.1",
+        "@napi-rs/nice-linux-arm64-musl": "1.1.1",
+        "@napi-rs/nice-linux-ppc64-gnu": "1.1.1",
+        "@napi-rs/nice-linux-riscv64-gnu": "1.1.1",
+        "@napi-rs/nice-linux-s390x-gnu": "1.1.1",
+        "@napi-rs/nice-linux-x64-gnu": "1.1.1",
+        "@napi-rs/nice-linux-x64-musl": "1.1.1",
+        "@napi-rs/nice-openharmony-arm64": "1.1.1",
+        "@napi-rs/nice-win32-arm64-msvc": "1.1.1",
+        "@napi-rs/nice-win32-ia32-msvc": "1.1.1",
+        "@napi-rs/nice-win32-x64-msvc": "1.1.1"
       }
     },
-    "node_modules/acorn": {
-      "version": "8.15.0",
-      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
-      "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
+    "node_modules/@napi-rs/nice-android-arm-eabi": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-android-arm-eabi/-/nice-android-arm-eabi-1.1.1.tgz",
+      "integrity": "sha512-kjirL3N6TnRPv5iuHw36wnucNqXAO46dzK9oPb0wj076R5Xm8PfUVA9nAFB5ZNMmfJQJVKACAPd/Z2KYMppthw==",
+      "cpu": [
+        "arm"
+      ],
       "license": "MIT",
-      "bin": {
-        "acorn": "bin/acorn"
-      },
+      "optional": true,
+      "os": [
+        "android"
+      ],
       "engines": {
-        "node": ">=0.4.0"
+        "node": ">= 10"
       }
     },
-    "node_modules/acorn-jsx": {
-      "version": "5.3.2",
-      "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz",
-      "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==",
+    "node_modules/@napi-rs/nice-android-arm64": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-android-arm64/-/nice-android-arm64-1.1.1.tgz",
+      "integrity": "sha512-blG0i7dXgbInN5urONoUCNf+DUEAavRffrO7fZSeoRMJc5qD+BJeNcpr54msPF6qfDD6kzs9AQJogZvT2KD5nw==",
+      "cpu": [
+        "arm64"
+      ],
       "license": "MIT",
-      "peerDependencies": {
-        "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
+      "optional": true,
+      "os": [
+        "android"
+      ],
+      "engines": {
+        "node": ">= 10"
       }
     },
-    "node_modules/adm-zip": {
-      "version": "0.5.16",
-      "resolved": "https://registry.npmjs.org/adm-zip/-/adm-zip-0.5.16.tgz",
-      "integrity": "sha512-TGw5yVi4saajsSEgz25grObGHEUaDrniwvA2qwSC060KfqGPdglhvPMA2lPIoxs3PQIItj2iag35fONcQqgUaQ==",
+    "node_modules/@napi-rs/nice-darwin-arm64": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-darwin-arm64/-/nice-darwin-arm64-1.1.1.tgz",
+      "integrity": "sha512-s/E7w45NaLqTGuOjC2p96pct4jRfo61xb9bU1unM/MJ/RFkKlJyJDx7OJI/O0ll/hrfpqKopuAFDV8yo0hfT7A==",
+      "cpu": [
+        "arm64"
+      ],
       "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
       "engines": {
-        "node": ">=12.0"
+        "node": ">= 10"
       }
     },
-    "node_modules/ajv": {
-      "version": "6.12.6",
-      "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
-      "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
+    "node_modules/@napi-rs/nice-darwin-x64": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-darwin-x64/-/nice-darwin-x64-1.1.1.tgz",
+      "integrity": "sha512-dGoEBnVpsdcC+oHHmW1LRK5eiyzLwdgNQq3BmZIav+9/5WTZwBYX7r5ZkQC07Nxd3KHOCkgbHSh4wPkH1N1LiQ==",
+      "cpu": [
+        "x64"
+      ],
       "license": "MIT",
-      "dependencies": {
-        "fast-deep-equal": "^3.1.1",
-        "fast-json-stable-stringify": "^2.0.0",
-        "json-schema-traverse": "^0.4.1",
-        "uri-js": "^4.2.2"
-      },
-      "funding": {
-        "type": "github",
-        "url": "https://github.com/sponsors/epoberezkin"
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">= 10"
       }
     },
-    "node_modules/ansi-styles": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
-      "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
+    "node_modules/@napi-rs/nice-freebsd-x64": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-freebsd-x64/-/nice-freebsd-x64-1.1.1.tgz",
+      "integrity": "sha512-kHv4kEHAylMYmlNwcQcDtXjklYp4FCf0b05E+0h6nDHsZ+F0bDe04U/tXNOqrx5CmIAth4vwfkjjUmp4c4JktQ==",
+      "cpu": [
+        "x64"
+      ],
       "license": "MIT",
-      "dependencies": {
-        "color-convert": "^2.0.1"
-      },
+      "optional": true,
+      "os": [
+        "freebsd"
+      ],
       "engines": {
-        "node": ">=8"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
+        "node": ">= 10"
       }
     },
-    "node_modules/argparse": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
-      "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
-      "license": "Python-2.0"
-    },
-    "node_modules/b4a": {
-      "version": "1.7.3",
-      "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.7.3.tgz",
-      "integrity": "sha512-5Q2mfq2WfGuFp3uS//0s6baOJLMoVduPYVeNmDYxu5OUA1/cBfvr2RIS7vi62LdNj/urk1hfmj867I3qt6uZ7Q==",
-      "license": "Apache-2.0",
-      "peerDependencies": {
-        "react-native-b4a": "*"
-      },
-      "peerDependenciesMeta": {
-        "react-native-b4a": {
-          "optional": true
-        }
+    "node_modules/@napi-rs/nice-linux-arm-gnueabihf": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-arm-gnueabihf/-/nice-linux-arm-gnueabihf-1.1.1.tgz",
+      "integrity": "sha512-E1t7K0efyKXZDoZg1LzCOLxgolxV58HCkaEkEvIYQx12ht2pa8hoBo+4OB3qh7e+QiBlp1SRf+voWUZFxyhyqg==",
+      "cpu": [
+        "arm"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
       }
     },
-    "node_modules/balanced-match": {
-      "version": "1.0.2",
-      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
-      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
-      "license": "MIT"
-    },
-    "node_modules/bare-events": {
-      "version": "2.8.2",
-      "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.8.2.tgz",
-      "integrity": "sha512-riJjyv1/mHLIPX4RwiK+oW9/4c3TEUeORHKefKAKnZ5kyslbN+HXowtbaVEqt4IMUB7OXlfixcs6gsFeo/jhiQ==",
-      "license": "Apache-2.0",
-      "peerDependencies": {
-        "bare-abort-controller": "*"
-      },
-      "peerDependenciesMeta": {
-        "bare-abort-controller": {
-          "optional": true
-        }
+    "node_modules/@napi-rs/nice-linux-arm64-gnu": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-arm64-gnu/-/nice-linux-arm64-gnu-1.1.1.tgz",
+      "integrity": "sha512-CIKLA12DTIZlmTaaKhQP88R3Xao+gyJxNWEn04wZwC2wmRapNnxCUZkVwggInMJvtVElA+D4ZzOU5sX4jV+SmQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
       }
     },
-    "node_modules/bare-fs": {
-      "version": "4.5.2",
-      "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
-      "integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
-      "license": "Apache-2.0",
+    "node_modules/@napi-rs/nice-linux-arm64-musl": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-arm64-musl/-/nice-linux-arm64-musl-1.1.1.tgz",
+      "integrity": "sha512-+2Rzdb3nTIYZ0YJF43qf2twhqOCkiSrHx2Pg6DJaCPYhhaxbLcdlV8hCRMHghQ+EtZQWGNcS2xF4KxBhSGeutg==",
+      "cpu": [
+        "arm64"
+      ],
+      "license": "MIT",
       "optional": true,
-      "dependencies": {
-        "bare-events": "^2.5.4",
-        "bare-path": "^3.0.0",
-        "bare-stream": "^2.6.4",
-        "bare-url": "^2.2.2",
-        "fast-fifo": "^1.3.2"
-      },
+      "os": [
+        "linux"
+      ],
       "engines": {
-        "bare": ">=1.16.0"
-      },
-      "peerDependencies": {
-        "bare-buffer": "*"
-      },
-      "peerDependenciesMeta": {
-        "bare-buffer": {
-          "optional": true
-        }
+        "node": ">= 10"
       }
     },
-    "node_modules/bare-os": {
-      "version": "3.6.2",
-      "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
-      "integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
-      "license": "Apache-2.0",
+    "node_modules/@napi-rs/nice-linux-ppc64-gnu": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-ppc64-gnu/-/nice-linux-ppc64-gnu-1.1.1.tgz",
+      "integrity": "sha512-4FS8oc0GeHpwvv4tKciKkw3Y4jKsL7FRhaOeiPei0X9T4Jd619wHNe4xCLmN2EMgZoeGg+Q7GY7BsvwKpL22Tg==",
+      "cpu": [
+        "ppc64"
+      ],
+      "license": "MIT",
       "optional": true,
+      "os": [
+        "linux"
+      ],
       "engines": {
-        "bare": ">=1.14.0"
+        "node": ">= 10"
       }
     },
-    "node_modules/bare-path": {
-      "version": "3.0.0",
-      "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
-      "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
-      "license": "Apache-2.0",
+    "node_modules/@napi-rs/nice-linux-riscv64-gnu": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-riscv64-gnu/-/nice-linux-riscv64-gnu-1.1.1.tgz",
+      "integrity": "sha512-HU0nw9uD4FO/oGCCk409tCi5IzIZpH2agE6nN4fqpwVlCn5BOq0MS1dXGjXaG17JaAvrlpV5ZeyZwSon10XOXw==",
+      "cpu": [
+        "riscv64"
+      ],
+      "license": "MIT",
       "optional": true,
-      "dependencies": {
-        "bare-os": "^3.0.1"
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
       }
     },
-    "node_modules/bare-stream": {
-      "version": "2.7.0",
-      "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
-      "integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
-      "license": "Apache-2.0",
+    "node_modules/@napi-rs/nice-linux-s390x-gnu": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-s390x-gnu/-/nice-linux-s390x-gnu-1.1.1.tgz",
+      "integrity": "sha512-2YqKJWWl24EwrX0DzCQgPLKQBxYDdBxOHot1KWEq7aY2uYeX+Uvtv4I8xFVVygJDgf6/92h9N3Y43WPx8+PAgQ==",
+      "cpu": [
+        "s390x"
+      ],
+      "license": "MIT",
       "optional": true,
-      "dependencies": {
-        "streamx": "^2.21.0"
-      },
-      "peerDependencies": {
-        "bare-buffer": "*",
-        "bare-events": "*"
-      },
-      "peerDependenciesMeta": {
-        "bare-buffer": {
-          "optional": true
-        },
-        "bare-events": {
-          "optional": true
-        }
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
       }
     },
-    "node_modules/bare-url": {
-      "version": "2.3.2",
-      "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
-      "integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
-      "license": "Apache-2.0",
+    "node_modules/@napi-rs/nice-linux-x64-gnu": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-x64-gnu/-/nice-linux-x64-gnu-1.1.1.tgz",
+      "integrity": "sha512-/gaNz3R92t+dcrfCw/96pDopcmec7oCcAQ3l/M+Zxr82KT4DljD37CpgrnXV+pJC263JkW572pdbP3hP+KjcIg==",
+      "cpu": [
+        "x64"
+      ],
+      "license": "MIT",
       "optional": true,
-      "dependencies": {
-        "bare-path": "^3.0.0"
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">= 10"
       }
     },
-    "node_modules/base64-js": {
-      "version": "1.5.1",
-      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
-      "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
+    "node_modules/@napi-rs/nice-linux-x64-musl": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-linux-x64-musl/-/nice-linux-x64-musl-1.1.1.tgz",
+      "integrity": "sha512-xScCGnyj/oppsNPMnevsBe3pvNaoK7FGvMjT35riz9YdhB2WtTG47ZlbxtOLpjeO9SqqQ2J2igCmz6IJOD5JYw==",
+      "cpu": [
+        "x64"
       ],
-      "license": "MIT"
-    },
-    "node_modules/better-sqlite3": {
-      "version": "12.5.0",
-      "resolved": "https://registry.npmjs.org/better-sqlite3/-/better-sqlite3-12.5.0.tgz",
-      "integrity": "sha512-WwCZ/5Diz7rsF29o27o0Gcc1Du+l7Zsv7SYtVPG0X3G/uUI1LqdxrQI7c9Hs2FWpqXXERjW9hp6g3/tH7DlVKg==",
-      "hasInstallScript": true,
       "license": "MIT",
-      "dependencies": {
-        "bindings": "^1.5.0",
-        "prebuild-install": "^7.1.1"
-      },
+      "optional": true,
+      "os": [
+        "linux"
+      ],
       "engines": {
-        "node": "20.x || 22.x || 23.x || 24.x || 25.x"
+        "node": ">= 10"
       }
     },
-    "node_modules/bindings": {
-      "version": "1.5.0",
-      "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz",
-      "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
+    "node_modules/@napi-rs/nice-openharmony-arm64": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-openharmony-arm64/-/nice-openharmony-arm64-1.1.1.tgz",
+      "integrity": "sha512-6uJPRVwVCLDeoOaNyeiW0gp2kFIM4r7PL2MczdZQHkFi9gVlgm+Vn+V6nTWRcu856mJ2WjYJiumEajfSm7arPQ==",
+      "cpu": [
+        "arm64"
+      ],
       "license": "MIT",
-      "dependencies": {
-        "file-uri-to-path": "1.0.0"
+      "optional": true,
+      "os": [
+        "openharmony"
+      ],
+      "engines": {
+        "node": ">= 10"
       }
     },
-    "node_modules/bl": {
-      "version": "4.1.0",
-      "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
-      "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==",
+    "node_modules/@napi-rs/nice-win32-arm64-msvc": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-win32-arm64-msvc/-/nice-win32-arm64-msvc-1.1.1.tgz",
+      "integrity": "sha512-uoTb4eAvM5B2aj/z8j+Nv8OttPf2m+HVx3UjA5jcFxASvNhQriyCQF1OB1lHL43ZhW+VwZlgvjmP5qF3+59atA==",
+      "cpu": [
+        "arm64"
+      ],
       "license": "MIT",
-      "dependencies": {
-        "buffer": "^5.5.0",
-        "inherits": "^2.0.4",
-        "readable-stream": "^3.4.0"
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 10"
       }
     },
-    "node_modules/brace-expansion": {
-      "version": "1.1.12",
-      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
-      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
+    "node_modules/@napi-rs/nice-win32-ia32-msvc": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-win32-ia32-msvc/-/nice-win32-ia32-msvc-1.1.1.tgz",
+      "integrity": "sha512-CNQqlQT9MwuCsg1Vd/oKXiuH+TcsSPJmlAFc5frFyX/KkOh0UpBLEj7aoY656d5UKZQMQFP7vJNa1DNUNORvug==",
+      "cpu": [
+        "ia32"
+      ],
       "license": "MIT",
-      "dependencies": {
-        "balanced-match": "^1.0.0",
-        "concat-map": "0.0.1"
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 10"
       }
     },
-    "node_modules/buffer": {
-      "version": "5.7.1",
-      "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
-      "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
-      "funding": [
-        {
-          "type": "github",
-          "url": "https://github.com/sponsors/feross"
-        },
-        {
-          "type": "patreon",
-          "url": "https://www.patreon.com/feross"
-        },
-        {
-          "type": "consulting",
-          "url": "https://feross.org/support"
-        }
+    "node_modules/@napi-rs/nice-win32-x64-msvc": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/@napi-rs/nice-win32-x64-msvc/-/nice-win32-x64-msvc-1.1.1.tgz",
+      "integrity": "sha512-vB+4G/jBQCAh0jelMTY3+kgFy00Hlx2f2/1zjMoH821IbplbWZOkLiTYXQkygNTzQJTq5cvwBDgn2ppHD+bglQ==",
+      "cpu": [
+        "x64"
       ],
       "license": "MIT",
-      "dependencies": {
-        "base64-js": "^1.3.1",
-        "ieee754": "^1.1.13"
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">= 10"
       }
     },
-    "node_modules/callsites": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
-      "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==",
-      "license": "MIT",
+    "node_modules/@opentelemetry/api": {
+      "version": "1.9.0",
+      "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz",
+      "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==",
       "engines": {
-        "node": ">=6"
+        "node": ">=8.0.0"
       }
     },
-    "node_modules/chalk": {
-      "version": "4.1.2",
-      "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
-      "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==",
-      "license": "MIT",
+    "node_modules/@pinojs/redact": {
+      "version": "0.4.0",
+      "resolved": "https://registry.npmjs.org/@pinojs/redact/-/redact-0.4.0.tgz",
+      "integrity": "sha512-k2ENnmBugE/rzQfEcdWHcCY+/FM3VLzH9cYEsbdsoqrvzAKRhUZeRNhAZvB8OitQJ1TBed3yqWtdjzS6wJKBwg=="
+    },
+    "node_modules/@protobufjs/aspromise": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz",
+      "integrity": "sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/@protobufjs/base64": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@protobufjs/base64/-/base64-1.1.2.tgz",
+      "integrity": "sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/@protobufjs/codegen": {
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/@protobufjs/codegen/-/codegen-2.0.4.tgz",
+      "integrity": "sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/@protobufjs/eventemitter": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz",
+      "integrity": "sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/@protobufjs/fetch": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@protobufjs/fetch/-/fetch-1.1.0.tgz",
+      "integrity": "sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ==",
+      "license": "BSD-3-Clause",
       "dependencies": {
-        "ansi-styles": "^4.1.0",
-        "supports-color": "^7.1.0"
-      },
-      "engines": {
-        "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/chalk/chalk?sponsor=1"
+        "@protobufjs/aspromise": "^1.1.1",
+        "@protobufjs/inquire": "^1.1.0"
       }
     },
-    "node_modules/check-types": {
-      "version": "5.1.0",
-      "resolved": "https://registry.npmjs.org/check-types/-/check-types-5.1.0.tgz",
-      "integrity": "sha512-avyYsSECJeYxowzVMGxzwXz9gc+LAEQ8l8nDVRJqbJilfwHDBPxpjTZC0mb1gr8AAARDc/I7P4+IG6SxokWWmw==",
-      "license": "MIT"
+    "node_modules/@protobufjs/float": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/@protobufjs/float/-/float-1.0.2.tgz",
+      "integrity": "sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ==",
+      "license": "BSD-3-Clause"
     },
-    "node_modules/chownr": {
-      "version": "1.1.4",
-      "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz",
-      "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==",
-      "license": "ISC"
+    "node_modules/@protobufjs/inquire": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@protobufjs/inquire/-/inquire-1.1.0.tgz",
+      "integrity": "sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q==",
+      "license": "BSD-3-Clause"
     },
-    "node_modules/color": {
-      "version": "4.2.3",
-      "resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz",
-      "integrity": "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==",
-      "license": "MIT",
+    "node_modules/@protobufjs/path": {
+      "version": "1.1.2",
+      "resolved": "https://registry.npmjs.org/@protobufjs/path/-/path-1.1.2.tgz",
+      "integrity": "sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/@protobufjs/pool": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@protobufjs/pool/-/pool-1.1.0.tgz",
+      "integrity": "sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/@protobufjs/utf8": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/@protobufjs/utf8/-/utf8-1.1.0.tgz",
+      "integrity": "sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==",
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/@sveltejs/acorn-typescript": {
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/@sveltejs/acorn-typescript/-/acorn-typescript-1.0.8.tgz",
+      "integrity": "sha512-esgN+54+q0NjB0Y/4BomT9samII7jGwNy/2a3wNZbT2A2RpmXsXwUt24LvLhx6jUq2gVk4cWEvcRO6MFQbOfNA==",
+      "peerDependencies": {
+        "acorn": "^8.9.0"
+      }
+    },
+    "node_modules/@swc/core": {
+      "version": "1.15.8",
+      "resolved": "https://registry.npmjs.org/@swc/core/-/core-1.15.8.tgz",
+      "integrity": "sha512-T8keoJjXaSUoVBCIjgL6wAnhADIb09GOELzKg10CjNg+vLX48P93SME6jTfte9MZIm5m+Il57H3rTSk/0kzDUw==",
+      "hasInstallScript": true,
       "dependencies": {
-        "color-convert": "^2.0.1",
-        "color-string": "^1.9.0"
+        "@swc/counter": "^0.1.3",
+        "@swc/types": "^0.1.25"
       },
       "engines": {
-        "node": ">=12.5.0"
+        "node": ">=10"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/swc"
+      },
+      "optionalDependencies": {
+        "@swc/core-darwin-arm64": "1.15.8",
+        "@swc/core-darwin-x64": "1.15.8",
+        "@swc/core-linux-arm-gnueabihf": "1.15.8",
+        "@swc/core-linux-arm64-gnu": "1.15.8",
+        "@swc/core-linux-arm64-musl": "1.15.8",
+        "@swc/core-linux-x64-gnu": "1.15.8",
+        "@swc/core-linux-x64-musl": "1.15.8",
+        "@swc/core-win32-arm64-msvc": "1.15.8",
+        "@swc/core-win32-ia32-msvc": "1.15.8",
+        "@swc/core-win32-x64-msvc": "1.15.8"
+      },
+      "peerDependencies": {
+        "@swc/helpers": ">=0.5.17"
+      },
+      "peerDependenciesMeta": {
+        "@swc/helpers": {
+          "optional": true
+        }
       }
     },
-    "node_modules/color-convert": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
-      "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
-      "license": "MIT",
-      "dependencies": {
-        "color-name": "~1.1.4"
-      },
+    "node_modules/@swc/core-darwin-arm64": {
+      "version": "1.15.8",
+      "resolved": "https://registry.npmjs.org/@swc/core-darwin-arm64/-/core-darwin-arm64-1.15.8.tgz",
+      "integrity": "sha512-M9cK5GwyWWRkRGwwCbREuj6r8jKdES/haCZ3Xckgkl8MUQJZA3XB7IXXK1IXRNeLjg6m7cnoMICpXv1v1hlJOg==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
       "engines": {
-        "node": ">=7.0.0"
+        "node": ">=10"
       }
     },
-    "node_modules/color-name": {
-      "version": "1.1.4",
-      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
-      "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
-      "license": "MIT"
+    "node_modules/@swc/core-darwin-x64": {
+      "version": "1.15.8",
+      "resolved": "https://registry.npmjs.org/@swc/core-darwin-x64/-/core-darwin-x64-1.15.8.tgz",
+      "integrity": "sha512-j47DasuOvXl80sKJHSi2X25l44CMc3VDhlJwA7oewC1nV1VsSzwX+KOwE5tLnfORvVJJyeiXgJORNYg4jeIjYQ==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": ">=10"
+      }
     },
-    "node_modules/color-string": {
-      "version": "1.9.1",
-      "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz",
-      "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==",
-      "license": "MIT",
-      "dependencies": {
-        "color-name": "^1.0.0",
-        "simple-swizzle": "^0.2.2"
+    "node_modules/@swc/core-linux-arm-gnueabihf": {
+      "version": "1.15.8",
+      "resolved": "https://registry.npmjs.org/@swc/core-linux-arm-gnueabihf/-/core-linux-arm-gnueabihf-1.15.8.tgz",
+      "integrity": "sha512-siAzDENu2rUbwr9+fayWa26r5A9fol1iORG53HWxQL1J8ym4k7xt9eME0dMPXlYZDytK5r9sW8zEA10F2U3Xwg==",
+      "cpu": [
+        "arm"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=10"
       }
     },
-    "node_modules/concat-map": {
-      "version": "0.0.1",
-      "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
-      "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
-      "license": "MIT"
+    "node_modules/@swc/core-linux-arm64-gnu": {
+      "version": "1.15.8",
+      "resolved": "https://registry.npmjs.org/@swc/core-linux-arm64-gnu/-/core-linux-arm64-gnu-1.15.8.tgz",
+      "integrity": "sha512-o+1y5u6k2FfPYbTRUPvurwzNt5qd0NTumCTFscCNuBksycloXY16J8L+SMW5QRX59n4Hp9EmFa3vpvNHRVv1+Q==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
+      "engines": {
+        "node": ">=10"
+      }
     },
-    "node_modules/cross-spawn": {
-      "version": "7.0.6",
-      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
-      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
-      "license": "MIT",
-      "dependencies": {
-        "path-key": "^3.1.0",
-        "shebang-command": "^2.0.0",
-        "which": "^2.0.1"
-      },
+    "node_modules/@swc/core-linux-arm64-musl": {
+      "version": "1.15.8",
+      "resolved": "https://registry.npmjs.org/@swc/core-linux-arm64-musl/-/core-linux-arm64-musl-1.15.8.tgz",
+      "integrity": "sha512-koiCqL09EwOP1S2RShCI7NbsQuG6r2brTqUYE7pV7kZm9O17wZ0LSz22m6gVibpwEnw8jI3IE1yYsQTVpluALw==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
       "engines": {
-        "node": ">= 8"
+        "node": ">=10"
       }
     },
-    "node_modules/debug": {
-      "version": "4.4.3",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
-      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
-      "license": "MIT",
-      "dependencies": {
-        "ms": "^2.1.3"
-      },
+    "node_modules/@swc/core-linux-x64-gnu": {
+      "version": "1.15.8",
+      "resolved": "https://registry.npmjs.org/@swc/core-linux-x64-gnu/-/core-linux-x64-gnu-1.15.8.tgz",
+      "integrity": "sha512-4p6lOMU3bC+Vd5ARtKJ/FxpIC5G8v3XLoPEZ5s7mLR8h7411HWC/LmTXDHcrSXRC55zvAVia1eldy6zDLz8iFQ==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
       "engines": {
-        "node": ">=6.0"
-      },
-      "peerDependenciesMeta": {
-        "supports-color": {
-          "optional": true
-        }
+        "node": ">=10"
       }
     },
-    "node_modules/decompress-response": {
-      "version": "6.0.0",
-      "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz",
-      "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
-      "license": "MIT",
-      "dependencies": {
-        "mimic-response": "^3.1.0"
-      },
+    "node_modules/@swc/core-linux-x64-musl": {
+      "version": "1.15.8",
+      "resolved": "https://registry.npmjs.org/@swc/core-linux-x64-musl/-/core-linux-x64-musl-1.15.8.tgz",
+      "integrity": "sha512-z3XBnbrZAL+6xDGAhJoN4lOueIxC/8rGrJ9tg+fEaeqLEuAtHSW2QHDHxDwkxZMjuF/pZ6MUTjHjbp8wLbuRLA==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "linux"
+      ],
       "engines": {
         "node": ">=10"
-      },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/deep-extend": {
-      "version": "0.6.0",
-      "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz",
-      "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==",
-      "license": "MIT",
+    "node_modules/@swc/core-win32-arm64-msvc": {
+      "version": "1.15.8",
+      "resolved": "https://registry.npmjs.org/@swc/core-win32-arm64-msvc/-/core-win32-arm64-msvc-1.15.8.tgz",
+      "integrity": "sha512-djQPJ9Rh9vP8GTS/Df3hcc6XP6xnG5c8qsngWId/BLA9oX6C7UzCPAn74BG/wGb9a6j4w3RINuoaieJB3t+7iQ==",
+      "cpu": [
+        "arm64"
+      ],
+      "optional": true,
+      "os": [
+        "win32"
+      ],
       "engines": {
-        "node": ">=4.0.0"
+        "node": ">=10"
       }
     },
-    "node_modules/deep-is": {
-      "version": "0.1.4",
-      "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz",
-      "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==",
-      "license": "MIT"
+    "node_modules/@swc/core-win32-ia32-msvc": {
+      "version": "1.15.8",
+      "resolved": "https://registry.npmjs.org/@swc/core-win32-ia32-msvc/-/core-win32-ia32-msvc-1.15.8.tgz",
+      "integrity": "sha512-/wfAgxORg2VBaUoFdytcVBVCgf1isWZIEXB9MZEUty4wwK93M/PxAkjifOho9RN3WrM3inPLabICRCEgdHpKKQ==",
+      "cpu": [
+        "ia32"
+      ],
+      "optional": true,
+      "os": [
+        "win32"
+      ],
+      "engines": {
+        "node": ">=10"
+      }
     },
-    "node_modules/detect-libc": {
-      "version": "2.1.2",
-      "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz",
-      "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==",
-      "license": "Apache-2.0",
+    "node_modules/@swc/core-win32-x64-msvc": {
+      "version": "1.15.8",
+      "resolved": "https://registry.npmjs.org/@swc/core-win32-x64-msvc/-/core-win32-x64-msvc-1.15.8.tgz",
+      "integrity": "sha512-GpMePrh9Sl4d61o4KAHOOv5is5+zt6BEXCOCgs/H0FLGeii7j9bWDE8ExvKFy2GRRZVNR1ugsnzaGWHKM6kuzA==",
+      "cpu": [
+        "x64"
+      ],
+      "optional": true,
+      "os": [
+        "win32"
+      ],
       "engines": {
-        "node": ">=8"
+        "node": ">=10"
       }
     },
-    "node_modules/end-of-stream": {
-      "version": "1.4.5",
-      "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz",
-      "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==",
-      "license": "MIT",
+    "node_modules/@swc/counter": {
+      "version": "0.1.3",
+      "resolved": "https://registry.npmjs.org/@swc/counter/-/counter-0.1.3.tgz",
+      "integrity": "sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ=="
+    },
+    "node_modules/@swc/types": {
+      "version": "0.1.25",
+      "resolved": "https://registry.npmjs.org/@swc/types/-/types-0.1.25.tgz",
+      "integrity": "sha512-iAoY/qRhNH8a/hBvm3zKj9qQ4oc2+3w1unPJa2XvTK3XjeLXtzcCingVPw/9e5mn1+0yPqxcBGp9Jf0pkfMb1g==",
       "dependencies": {
-        "once": "^1.4.0"
+        "@swc/counter": "^0.1.3"
       }
     },
-    "node_modules/escape-string-regexp": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
-      "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==",
-      "license": "MIT",
+    "node_modules/@tokenizer/inflate": {
+      "version": "0.4.1",
+      "resolved": "https://registry.npmjs.org/@tokenizer/inflate/-/inflate-0.4.1.tgz",
+      "integrity": "sha512-2mAv+8pkG6GIZiF1kNg1jAjh27IDxEPKwdGul3snfztFerfPGI1LjDezZp3i7BElXompqEtPmoPx6c2wgtWsOA==",
+      "dependencies": {
+        "debug": "^4.4.3",
+        "token-types": "^6.1.1"
+      },
       "engines": {
-        "node": ">=10"
+        "node": ">=18"
       },
       "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
+        "type": "github",
+        "url": "https://github.com/sponsors/Borewit"
       }
     },
-    "node_modules/escomplex": {
-      "version": "2.0.0-alpha",
-      "resolved": "https://registry.npmjs.org/escomplex/-/escomplex-2.0.0-alpha.tgz",
-      "integrity": "sha512-svXitH4tBpW1CPLlGQOxZrqosDj/88GEIoul1bM7WpVGgBnp/5RXb2L0Fo62TAt56k1ZrqTgn+93vnLsAchCcw==",
+    "node_modules/@tokenizer/token": {
+      "version": "0.3.0",
+      "resolved": "https://registry.npmjs.org/@tokenizer/token/-/token-0.3.0.tgz",
+      "integrity": "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A=="
+    },
+    "node_modules/@types/debug": {
+      "version": "4.1.12",
+      "resolved": "https://registry.npmjs.org/@types/debug/-/debug-4.1.12.tgz",
+      "integrity": "sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==",
+      "dependencies": {
+        "@types/ms": "*"
+      }
+    },
+    "node_modules/@types/estree": {
+      "version": "1.0.8",
+      "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.8.tgz",
+      "integrity": "sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==",
+      "license": "MIT"
+    },
+    "node_modules/@types/estree-jsx": {
+      "version": "1.0.5",
+      "resolved": "https://registry.npmjs.org/@types/estree-jsx/-/estree-jsx-1.0.5.tgz",
+      "integrity": "sha512-52CcUVNFyfb1A2ALocQw/Dd1BQFNmSdkuC3BkZ6iqhdMfQz7JWOFRuJFloOzjk+6WijU56m9oKXFAXc7o3Towg==",
+      "dependencies": {
+        "@types/estree": "*"
+      }
+    },
+    "node_modules/@types/hast": {
+      "version": "3.0.4",
+      "resolved": "https://registry.npmjs.org/@types/hast/-/hast-3.0.4.tgz",
+      "integrity": "sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==",
+      "dependencies": {
+        "@types/unist": "*"
+      }
+    },
+    "node_modules/@types/json-schema": {
+      "version": "7.0.15",
+      "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz",
+      "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==",
+      "license": "MIT"
+    },
+    "node_modules/@types/long": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/@types/long/-/long-4.0.2.tgz",
+      "integrity": "sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA=="
+    },
+    "node_modules/@types/mdast": {
+      "version": "4.0.4",
+      "resolved": "https://registry.npmjs.org/@types/mdast/-/mdast-4.0.4.tgz",
+      "integrity": "sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA==",
+      "dependencies": {
+        "@types/unist": "*"
+      }
+    },
+    "node_modules/@types/mdx": {
+      "version": "2.0.13",
+      "resolved": "https://registry.npmjs.org/@types/mdx/-/mdx-2.0.13.tgz",
+      "integrity": "sha512-+OWZQfAYyio6YkJb3HLxDrvnx6SWWDbC0zVPfBRzUk0/nqoDyf6dNxQi3eArPe8rJ473nobTMQ/8Zk+LxJ+Yuw=="
+    },
+    "node_modules/@types/ms": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/@types/ms/-/ms-2.1.0.tgz",
+      "integrity": "sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA=="
+    },
+    "node_modules/@types/node": {
+      "version": "25.0.3",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-25.0.3.tgz",
+      "integrity": "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA==",
       "license": "MIT",
       "dependencies": {
-        "check-types": "^5.1.0"
+        "undici-types": "~7.16.0"
       }
     },
-    "node_modules/eslint": {
-      "version": "9.39.2",
-      "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.2.tgz",
-      "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==",
+    "node_modules/@types/node-forge": {
+      "version": "1.3.14",
+      "resolved": "https://registry.npmjs.org/@types/node-forge/-/node-forge-1.3.14.tgz",
+      "integrity": "sha512-mhVF2BnD4BO+jtOp7z1CdzaK4mbuK0LLQYAvdOLqHTavxFNq4zA1EmYkpnFjP8HOUzedfQkRnp0E2ulSAYSzAw==",
       "license": "MIT",
       "dependencies": {
-        "@eslint-community/eslint-utils": "^4.8.0",
-        "@eslint-community/regexpp": "^4.12.1",
-        "@eslint/config-array": "^0.21.1",
-        "@eslint/config-helpers": "^0.4.2",
-        "@eslint/core": "^0.17.0",
-        "@eslint/eslintrc": "^3.3.1",
-        "@eslint/js": "9.39.2",
-        "@eslint/plugin-kit": "^0.4.1",
-        "@humanfs/node": "^0.16.6",
-        "@humanwhocodes/module-importer": "^1.0.1",
-        "@humanwhocodes/retry": "^0.4.2",
-        "@types/estree": "^1.0.6",
-        "ajv": "^6.12.4",
-        "chalk": "^4.0.0",
-        "cross-spawn": "^7.0.6",
-        "debug": "^4.3.2",
-        "escape-string-regexp": "^4.0.0",
-        "eslint-scope": "^8.4.0",
-        "eslint-visitor-keys": "^4.2.1",
-        "espree": "^10.4.0",
-        "esquery": "^1.5.0",
-        "esutils": "^2.0.2",
-        "fast-deep-equal": "^3.1.3",
-        "file-entry-cache": "^8.0.0",
-        "find-up": "^5.0.0",
-        "glob-parent": "^6.0.2",
-        "ignore": "^5.2.0",
-        "imurmurhash": "^0.1.4",
-        "is-glob": "^4.0.0",
-        "json-stable-stringify-without-jsonify": "^1.0.1",
-        "lodash.merge": "^4.6.2",
-        "minimatch": "^3.1.2",
-        "natural-compare": "^1.4.0",
-        "optionator": "^0.9.3"
-      },
-      "bin": {
-        "eslint": "bin/eslint.js"
+        "@types/node": "*"
+      }
+    },
+    "node_modules/@types/pegjs": {
+      "version": "0.10.6",
+      "resolved": "https://registry.npmjs.org/@types/pegjs/-/pegjs-0.10.6.tgz",
+      "integrity": "sha512-eLYXDbZWXh2uxf+w8sXS8d6KSoXTswfps6fvCUuVAGN8eRpfe7h9eSRydxiSJvo9Bf+GzifsDOr9TMQlmJdmkw==",
+      "license": "MIT"
+    },
+    "node_modules/@types/unist": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.3.tgz",
+      "integrity": "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q=="
+    },
+    "node_modules/@typescript-eslint/project-service": {
+      "version": "8.52.0",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.52.0.tgz",
+      "integrity": "sha512-xD0MfdSdEmeFa3OmVqonHi+Cciab96ls1UhIF/qX/O/gPu5KXD0bY9lu33jj04fjzrXHcuvjBcBC+D3SNSadaw==",
+      "dependencies": {
+        "@typescript-eslint/tsconfig-utils": "^8.52.0",
+        "@typescript-eslint/types": "^8.52.0",
+        "debug": "^4.4.3"
       },
       "engines": {
         "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       },
       "funding": {
-        "url": "https://eslint.org/donate"
+        "type": "opencollective",
+        "url": "https://opencollective.com/typescript-eslint"
       },
       "peerDependencies": {
-        "jiti": "*"
-      },
-      "peerDependenciesMeta": {
-        "jiti": {
-          "optional": true
-        }
+        "typescript": ">=4.8.4 <6.0.0"
       }
     },
-    "node_modules/eslint-scope": {
-      "version": "8.4.0",
-      "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-8.4.0.tgz",
-      "integrity": "sha512-sNXOfKCn74rt8RICKMvJS7XKV/Xk9kA7DyJr8mJik3S7Cwgy3qlkkmyS2uQB3jiJg6VNdZd/pDBJu0nvG2NlTg==",
-      "license": "BSD-2-Clause",
-      "dependencies": {
-        "esrecurse": "^4.3.0",
-        "estraverse": "^5.2.0"
-      },
+    "node_modules/@typescript-eslint/tsconfig-utils": {
+      "version": "8.52.0",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.52.0.tgz",
+      "integrity": "sha512-jl+8fzr/SdzdxWJznq5nvoI7qn2tNYV/ZBAEcaFMVXf+K6jmXvAFrgo/+5rxgnL152f//pDEAYAhhBAZGrVfwg==",
       "engines": {
         "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       },
       "funding": {
-        "url": "https://opencollective.com/eslint"
+        "type": "opencollective",
+        "url": "https://opencollective.com/typescript-eslint"
+      },
+      "peerDependencies": {
+        "typescript": ">=4.8.4 <6.0.0"
       }
     },
-    "node_modules/eslint-visitor-keys": {
-      "version": "4.2.1",
-      "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.1.tgz",
-      "integrity": "sha512-Uhdk5sfqcee/9H/rCOJikYz67o0a2Tw2hGRPOG2Y1R2dg7brRe1uG0yaNQDHu+TO/uQPF/5eCapvYSmHUjt7JQ==",
-      "license": "Apache-2.0",
+    "node_modules/@typescript-eslint/types": {
+      "version": "8.52.0",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.52.0.tgz",
+      "integrity": "sha512-LWQV1V4q9V4cT4H5JCIx3481iIFxH1UkVk+ZkGGAV1ZGcjGI9IoFOfg3O6ywz8QqCDEp7Inlg6kovMofsNRaGg==",
       "engines": {
         "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       },
       "funding": {
-        "url": "https://opencollective.com/eslint"
+        "type": "opencollective",
+        "url": "https://opencollective.com/typescript-eslint"
       }
     },
-    "node_modules/espree": {
-      "version": "10.4.0",
-      "resolved": "https://registry.npmjs.org/espree/-/espree-10.4.0.tgz",
-      "integrity": "sha512-j6PAQ2uUr79PZhBjP5C5fhl8e39FmRnOjsD5lGnWrFU8i2G776tBK7+nP8KuQUTTyAZUwfQqXAgrVH5MbH9CYQ==",
-      "license": "BSD-2-Clause",
+    "node_modules/@typescript-eslint/typescript-estree": {
+      "version": "8.52.0",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.52.0.tgz",
+      "integrity": "sha512-XP3LClsCc0FsTK5/frGjolyADTh3QmsLp6nKd476xNI9CsSsLnmn4f0jrzNoAulmxlmNIpeXuHYeEQv61Q6qeQ==",
       "dependencies": {
-        "acorn": "^8.15.0",
-        "acorn-jsx": "^5.3.2",
-        "eslint-visitor-keys": "^4.2.1"
+        "@typescript-eslint/project-service": "8.52.0",
+        "@typescript-eslint/tsconfig-utils": "8.52.0",
+        "@typescript-eslint/types": "8.52.0",
+        "@typescript-eslint/visitor-keys": "8.52.0",
+        "debug": "^4.4.3",
+        "minimatch": "^9.0.5",
+        "semver": "^7.7.3",
+        "tinyglobby": "^0.2.15",
+        "ts-api-utils": "^2.4.0"
       },
       "engines": {
         "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
       },
       "funding": {
-        "url": "https://opencollective.com/eslint"
+        "type": "opencollective",
+        "url": "https://opencollective.com/typescript-eslint"
+      },
+      "peerDependencies": {
+        "typescript": ">=4.8.4 <6.0.0"
       }
     },
-    "node_modules/esprima": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz",
-      "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==",
-      "license": "BSD-2-Clause",
-      "bin": {
-        "esparse": "bin/esparse.js",
-        "esvalidate": "bin/esvalidate.js"
-      },
-      "engines": {
-        "node": ">=4"
+    "node_modules/@typescript-eslint/typescript-estree/node_modules/brace-expansion": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz",
+      "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==",
+      "dependencies": {
+        "balanced-match": "^1.0.0"
       }
     },
-    "node_modules/esquery": {
-      "version": "1.6.0",
-      "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.6.0.tgz",
-      "integrity": "sha512-ca9pw9fomFcKPvFLXhBKUK90ZvGibiGOvRJNbjljY7s7uq/5YO4BOzcYtJqExdx99rF6aAcnRxHmcUHcz6sQsg==",
-      "license": "BSD-3-Clause",
+    "node_modules/@typescript-eslint/typescript-estree/node_modules/minimatch": {
+      "version": "9.0.5",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
+      "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
       "dependencies": {
-        "estraverse": "^5.1.0"
+        "brace-expansion": "^2.0.1"
       },
       "engines": {
-        "node": ">=0.10"
+        "node": ">=16 || 14 >=14.17"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
       }
     },
-    "node_modules/esrecurse": {
-      "version": "4.3.0",
-      "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz",
-      "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==",
-      "license": "BSD-2-Clause",
+    "node_modules/@typescript-eslint/visitor-keys": {
+      "version": "8.52.0",
+      "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.52.0.tgz",
+      "integrity": "sha512-ink3/Zofus34nmBsPjow63FP5M7IGff0RKAgqR6+CFpdk22M7aLwC9gOcLGYqr7MczLPzZVERW9hRog3O4n1sQ==",
       "dependencies": {
-        "estraverse": "^5.2.0"
+        "@typescript-eslint/types": "8.52.0",
+        "eslint-visitor-keys": "^4.2.1"
       },
       "engines": {
-        "node": ">=4.0"
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/typescript-eslint"
       }
     },
-    "node_modules/estraverse": {
-      "version": "5.3.0",
-      "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
-      "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
-      "license": "BSD-2-Clause",
-      "engines": {
-        "node": ">=4.0"
-      }
+    "node_modules/@ungap/structured-clone": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.3.0.tgz",
+      "integrity": "sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g=="
     },
-    "node_modules/esutils": {
-      "version": "2.0.3",
-      "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz",
-      "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==",
-      "license": "BSD-2-Clause",
-      "engines": {
-        "node": ">=0.10.0"
+    "node_modules/@vscode/ripgrep": {
+      "version": "1.17.0",
+      "resolved": "https://registry.npmjs.org/@vscode/ripgrep/-/ripgrep-1.17.0.tgz",
+      "integrity": "sha512-mBRKm+ASPkUcw4o9aAgfbusIu6H4Sdhw09bjeP1YOBFTJEZAnrnk6WZwzv8NEjgC82f7ILvhmb1WIElSugea6g==",
+      "hasInstallScript": true,
+      "dependencies": {
+        "https-proxy-agent": "^7.0.2",
+        "proxy-from-env": "^1.1.0",
+        "yauzl": "^2.9.2"
       }
     },
-    "node_modules/events-universal": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/events-universal/-/events-universal-1.0.1.tgz",
-      "integrity": "sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==",
-      "license": "Apache-2.0",
+    "node_modules/@vue/compiler-core": {
+      "version": "3.5.26",
+      "resolved": "https://registry.npmjs.org/@vue/compiler-core/-/compiler-core-3.5.26.tgz",
+      "integrity": "sha512-vXyI5GMfuoBCnv5ucIT7jhHKl55Y477yxP6fc4eUswjP8FG3FFVFd41eNDArR+Uk3QKn2Z85NavjaxLxOC19/w==",
       "dependencies": {
-        "bare-events": "^2.7.0"
+        "@babel/parser": "^7.28.5",
+        "@vue/shared": "3.5.26",
+        "entities": "^7.0.0",
+        "estree-walker": "^2.0.2",
+        "source-map-js": "^1.2.1"
       }
     },
-    "node_modules/expand-template": {
-      "version": "2.0.3",
-      "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz",
-      "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==",
-      "license": "(MIT OR WTFPL)",
+    "node_modules/@vue/compiler-core/node_modules/entities": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/entities/-/entities-7.0.0.tgz",
+      "integrity": "sha512-FDWG5cmEYf2Z00IkYRhbFrwIwvdFKH07uV8dvNy0omp/Qb1xcyCWp2UDtcwJF4QZZvk0sLudP6/hAu42TaqVhQ==",
       "engines": {
-        "node": ">=6"
+        "node": ">=0.12"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/entities?sponsor=1"
       }
     },
-    "node_modules/fast-deep-equal": {
-      "version": "3.1.3",
-      "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
-      "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==",
-      "license": "MIT"
+    "node_modules/@vue/compiler-core/node_modules/estree-walker": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz",
+      "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w=="
     },
-    "node_modules/fast-fifo": {
-      "version": "1.3.2",
-      "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz",
-      "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==",
-      "license": "MIT"
+    "node_modules/@vue/compiler-dom": {
+      "version": "3.5.26",
+      "resolved": "https://registry.npmjs.org/@vue/compiler-dom/-/compiler-dom-3.5.26.tgz",
+      "integrity": "sha512-y1Tcd3eXs834QjswshSilCBnKGeQjQXB6PqFn/1nxcQw4pmG42G8lwz+FZPAZAby6gZeHSt/8LMPfZ4Rb+Bd/A==",
+      "dependencies": {
+        "@vue/compiler-core": "3.5.26",
+        "@vue/shared": "3.5.26"
+      }
     },
-    "node_modules/fast-json-stable-stringify": {
-      "version": "2.1.0",
-      "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz",
-      "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==",
-      "license": "MIT"
+    "node_modules/@vue/compiler-sfc": {
+      "version": "3.5.26",
+      "resolved": "https://registry.npmjs.org/@vue/compiler-sfc/-/compiler-sfc-3.5.26.tgz",
+      "integrity": "sha512-egp69qDTSEZcf4bGOSsprUr4xI73wfrY5oRs6GSgXFTiHrWj4Y3X5Ydtip9QMqiCMCPVwLglB9GBxXtTadJ3mA==",
+      "dependencies": {
+        "@babel/parser": "^7.28.5",
+        "@vue/compiler-core": "3.5.26",
+        "@vue/compiler-dom": "3.5.26",
+        "@vue/compiler-ssr": "3.5.26",
+        "@vue/shared": "3.5.26",
+        "estree-walker": "^2.0.2",
+        "magic-string": "^0.30.21",
+        "postcss": "^8.5.6",
+        "source-map-js": "^1.2.1"
+      }
     },
-    "node_modules/fast-levenshtein": {
-      "version": "2.0.6",
-      "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz",
-      "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==",
-      "license": "MIT"
+    "node_modules/@vue/compiler-sfc/node_modules/estree-walker": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz",
+      "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w=="
     },
-    "node_modules/file-entry-cache": {
-      "version": "8.0.0",
-      "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-8.0.0.tgz",
-      "integrity": "sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ==",
-      "license": "MIT",
+    "node_modules/@vue/compiler-ssr": {
+      "version": "3.5.26",
+      "resolved": "https://registry.npmjs.org/@vue/compiler-ssr/-/compiler-ssr-3.5.26.tgz",
+      "integrity": "sha512-lZT9/Y0nSIRUPVvapFJEVDbEXruZh2IYHMk2zTtEgJSlP5gVOqeWXH54xDKAaFS4rTnDeDBQUYDtxKyoW9FwDw==",
       "dependencies": {
-        "flat-cache": "^4.0.0"
-      },
-      "engines": {
-        "node": ">=16.0.0"
+        "@vue/compiler-dom": "3.5.26",
+        "@vue/shared": "3.5.26"
       }
     },
-    "node_modules/file-uri-to-path": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
-      "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==",
-      "license": "MIT"
+    "node_modules/@vue/shared": {
+      "version": "3.5.26",
+      "resolved": "https://registry.npmjs.org/@vue/shared/-/shared-3.5.26.tgz",
+      "integrity": "sha512-7Z6/y3uFI5PRoKeorTOSXKcDj0MSasfNNltcslbFrPpcw6aXRUALq4IfJlaTRspiWIUOEZbrpM+iQGmCOiWe4A=="
     },
-    "node_modules/find-up": {
-      "version": "5.0.0",
-      "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz",
-      "integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==",
-      "license": "MIT",
+    "node_modules/@xenova/transformers": {
+      "version": "2.17.2",
+      "resolved": "https://registry.npmjs.org/@xenova/transformers/-/transformers-2.17.2.tgz",
+      "integrity": "sha512-lZmHqzrVIkSvZdKZEx7IYY51TK0WDrC8eR0c5IMnBsO8di8are1zzw8BlLhyO2TklZKLN5UffNGs1IJwT6oOqQ==",
+      "license": "Apache-2.0",
       "dependencies": {
-        "locate-path": "^6.0.0",
-        "path-exists": "^4.0.0"
-      },
-      "engines": {
-        "node": ">=10"
+        "@huggingface/jinja": "^0.2.2",
+        "onnxruntime-web": "1.14.0",
+        "sharp": "^0.32.0"
       },
-      "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
+      "optionalDependencies": {
+        "onnxruntime-node": "1.14.0"
       }
     },
-    "node_modules/flat-cache": {
-      "version": "4.0.1",
-      "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-4.0.1.tgz",
-      "integrity": "sha512-f7ccFPK3SXFHpx15UIGyRJ/FJQctuKZ0zVuN3frBo4HnK3cay9VEW0R6yPYFHC0AgqhukPzKjq22t5DmAyqGyw==",
-      "license": "MIT",
+    "node_modules/@xenova/transformers/node_modules/onnxruntime-node": {
+      "version": "1.14.0",
+      "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.14.0.tgz",
+      "integrity": "sha512-5ba7TWomIV/9b6NH/1x/8QEeowsb+jBEvFzU6z0T4mNsFwdPqXeFUM7uxC6QeSRkEbWu3qEB0VMjrvzN/0S9+w==",
+      "optional": true,
+      "os": [
+        "win32",
+        "darwin",
+        "linux"
+      ],
       "dependencies": {
-        "flatted": "^3.2.9",
-        "keyv": "^4.5.4"
+        "onnxruntime-common": "~1.14.0"
+      }
+    },
+    "node_modules/a-sync-waterfall": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/a-sync-waterfall/-/a-sync-waterfall-1.0.1.tgz",
+      "integrity": "sha512-RYTOHHdWipFUliRFMCS4X2Yn2X8M87V/OpSqWzKKOGhzqyUxzyVmhHDH9sAvG+ZuQf/TAOFsLCpMw09I1ufUnA=="
+    },
+    "node_modules/acorn": {
+      "version": "8.15.0",
+      "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz",
+      "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
+      "license": "MIT",
+      "peer": true,
+      "bin": {
+        "acorn": "bin/acorn"
       },
       "engines": {
-        "node": ">=16"
+        "node": ">=0.4.0"
       }
     },
-    "node_modules/flatbuffers": {
-      "version": "1.12.0",
-      "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
-      "integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ==",
-      "license": "SEE LICENSE IN LICENSE.txt"
+    "node_modules/acorn-jsx": {
+      "version": "5.3.2",
+      "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz",
+      "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==",
+      "license": "MIT",
+      "peerDependencies": {
+        "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
+      }
     },
-    "node_modules/flatted": {
-      "version": "3.3.3",
-      "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.3.tgz",
-      "integrity": "sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==",
-      "license": "ISC"
+    "node_modules/adm-zip": {
+      "version": "0.5.16",
+      "resolved": "https://registry.npmjs.org/adm-zip/-/adm-zip-0.5.16.tgz",
+      "integrity": "sha512-TGw5yVi4saajsSEgz25grObGHEUaDrniwvA2qwSC060KfqGPdglhvPMA2lPIoxs3PQIItj2iag35fONcQqgUaQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=12.0"
+      }
     },
-    "node_modules/fs-constants": {
-      "version": "1.0.0",
-      "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
-      "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==",
-      "license": "MIT"
+    "node_modules/agent-base": {
+      "version": "7.1.4",
+      "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-7.1.4.tgz",
+      "integrity": "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ==",
+      "engines": {
+        "node": ">= 14"
+      }
     },
-    "node_modules/github-from-package": {
-      "version": "0.0.0",
-      "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz",
-      "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==",
-      "license": "MIT"
+    "node_modules/aho-corasick": {
+      "version": "0.1.3",
+      "resolved": "https://registry.npmjs.org/aho-corasick/-/aho-corasick-0.1.3.tgz",
+      "integrity": "sha512-GKQtiMrLQIWdXOvDjFX0jxva/mvLLLFCCKyd44/Xme5WbN8zmomM4ZtnWlP6eSF0nYfeo7CIbvDrATXhU3HHfQ==",
+      "engines": {
+        "node": "*"
+      }
     },
-    "node_modules/glob-parent": {
-      "version": "6.0.2",
-      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz",
-      "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==",
-      "license": "ISC",
+    "node_modules/ajv": {
+      "version": "8.17.1",
+      "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz",
+      "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==",
       "dependencies": {
-        "is-glob": "^4.0.3"
+        "fast-deep-equal": "^3.1.3",
+        "fast-uri": "^3.0.1",
+        "json-schema-traverse": "^1.0.0",
+        "require-from-string": "^2.0.2"
       },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/epoberezkin"
+      }
+    },
+    "node_modules/ansi-regex": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz",
+      "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==",
+      "license": "MIT",
       "engines": {
-        "node": ">=10.13.0"
+        "node": ">=8"
       }
     },
-    "node_modules/globals": {
-      "version": "14.0.0",
-      "resolved": "https://registry.npmjs.org/globals/-/globals-14.0.0.tgz",
-      "integrity": "sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ==",
+    "node_modules/ansi-styles": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz",
+      "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==",
       "license": "MIT",
+      "dependencies": {
+        "color-convert": "^2.0.1"
+      },
       "engines": {
-        "node": ">=18"
+        "node": ">=8"
       },
       "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
+        "url": "https://github.com/chalk/ansi-styles?sponsor=1"
       }
     },
-    "node_modules/guid-typescript": {
-      "version": "1.0.9",
-      "resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
-      "integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==",
-      "license": "ISC"
-    },
-    "node_modules/has-flag": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
-      "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
-      "license": "MIT",
+    "node_modules/anymatch": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/anymatch/-/anymatch-3.1.3.tgz",
+      "integrity": "sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==",
+      "license": "ISC",
+      "dependencies": {
+        "normalize-path": "^3.0.0",
+        "picomatch": "^2.0.4"
+      },
       "engines": {
-        "node": ">=8"
+        "node": ">= 8"
       }
     },
-    "node_modules/ieee754": {
-      "version": "1.2.1",
-      "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
-      "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
+    "node_modules/anymatch/node_modules/picomatch": {
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
+      "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
+    "node_modules/argparse": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
+      "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
+      "license": "Python-2.0"
+    },
+    "node_modules/aria-query": {
+      "version": "5.3.2",
+      "resolved": "https://registry.npmjs.org/aria-query/-/aria-query-5.3.2.tgz",
+      "integrity": "sha512-COROpnaoap1E2F000S62r6A60uHZnmlvomhfyT2DlTcrY1OrBKn2UhH7qn5wTC9zMvD0AY7csdPSNwKP+7WiQw==",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/asap": {
+      "version": "2.0.6",
+      "resolved": "https://registry.npmjs.org/asap/-/asap-2.0.6.tgz",
+      "integrity": "sha512-BSHWgDSAiKs50o2Re8ppvp3seVHXSRM44cdSsT9FfNEUUZLOGWVCsiWaRPWM1Znn+mqZ1OfVZ3z3DWEzSp7hRA=="
+    },
+    "node_modules/astring": {
+      "version": "1.9.0",
+      "resolved": "https://registry.npmjs.org/astring/-/astring-1.9.0.tgz",
+      "integrity": "sha512-LElXdjswlqjWrPpJFg1Fx4wpkOCxj1TDHlSV4PlaRxHGWko024xICaa97ZkMfs6DRKlCguiAI+rbXv5GWwXIkg==",
+      "bin": {
+        "astring": "bin/astring"
+      }
+    },
+    "node_modules/atomic-sleep": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/atomic-sleep/-/atomic-sleep-1.0.0.tgz",
+      "integrity": "sha512-kNOjDqAh7px0XWNI+4QbzoiR/nTkHAWNud2uvnJquD1/x5a7EQZMJT0AczqK0Qn67oY/TTQ1LbUKajZpp3I9tQ==",
+      "engines": {
+        "node": ">=8.0.0"
+      }
+    },
+    "node_modules/axobject-query": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/axobject-query/-/axobject-query-4.1.0.tgz",
+      "integrity": "sha512-qIj0G9wZbMGNLjLmg1PT6v2mE9AH2zlnADJD/2tC6E00hgmhUOfEB6greHPAfLRSufHqROIUTkw6E+M3lH0PTQ==",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/b4a": {
+      "version": "1.7.3",
+      "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.7.3.tgz",
+      "integrity": "sha512-5Q2mfq2WfGuFp3uS//0s6baOJLMoVduPYVeNmDYxu5OUA1/cBfvr2RIS7vi62LdNj/urk1hfmj867I3qt6uZ7Q==",
+      "license": "Apache-2.0",
+      "peerDependencies": {
+        "react-native-b4a": "*"
+      },
+      "peerDependenciesMeta": {
+        "react-native-b4a": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/bail": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/bail/-/bail-2.0.2.tgz",
+      "integrity": "sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/balanced-match": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
+      "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
+      "license": "MIT"
+    },
+    "node_modules/bare-events": {
+      "version": "2.8.2",
+      "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.8.2.tgz",
+      "integrity": "sha512-riJjyv1/mHLIPX4RwiK+oW9/4c3TEUeORHKefKAKnZ5kyslbN+HXowtbaVEqt4IMUB7OXlfixcs6gsFeo/jhiQ==",
+      "license": "Apache-2.0",
+      "peerDependencies": {
+        "bare-abort-controller": "*"
+      },
+      "peerDependenciesMeta": {
+        "bare-abort-controller": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/bare-fs": {
+      "version": "4.5.2",
+      "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.5.2.tgz",
+      "integrity": "sha512-veTnRzkb6aPHOvSKIOy60KzURfBdUflr5VReI+NSaPL6xf+XLdONQgZgpYvUuZLVQ8dCqxpBAudaOM1+KpAUxw==",
+      "license": "Apache-2.0",
+      "optional": true,
+      "dependencies": {
+        "bare-events": "^2.5.4",
+        "bare-path": "^3.0.0",
+        "bare-stream": "^2.6.4",
+        "bare-url": "^2.2.2",
+        "fast-fifo": "^1.3.2"
+      },
+      "engines": {
+        "bare": ">=1.16.0"
+      },
+      "peerDependencies": {
+        "bare-buffer": "*"
+      },
+      "peerDependenciesMeta": {
+        "bare-buffer": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/bare-os": {
+      "version": "3.6.2",
+      "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-3.6.2.tgz",
+      "integrity": "sha512-T+V1+1srU2qYNBmJCXZkUY5vQ0B4FSlL3QDROnKQYOqeiQR8UbjNHlPa+TIbM4cuidiN9GaTaOZgSEgsvPbh5A==",
+      "license": "Apache-2.0",
+      "optional": true,
+      "engines": {
+        "bare": ">=1.14.0"
+      }
+    },
+    "node_modules/bare-path": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-3.0.0.tgz",
+      "integrity": "sha512-tyfW2cQcB5NN8Saijrhqn0Zh7AnFNsnczRcuWODH0eYAXBsJ5gVxAUuNr7tsHSC6IZ77cA0SitzT+s47kot8Mw==",
+      "license": "Apache-2.0",
+      "optional": true,
+      "dependencies": {
+        "bare-os": "^3.0.1"
+      }
+    },
+    "node_modules/bare-stream": {
+      "version": "2.7.0",
+      "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.7.0.tgz",
+      "integrity": "sha512-oyXQNicV1y8nc2aKffH+BUHFRXmx6VrPzlnaEvMhram0nPBrKcEdcyBg5r08D0i8VxngHFAiVyn1QKXpSG0B8A==",
+      "license": "Apache-2.0",
+      "optional": true,
+      "dependencies": {
+        "streamx": "^2.21.0"
+      },
+      "peerDependencies": {
+        "bare-buffer": "*",
+        "bare-events": "*"
+      },
+      "peerDependenciesMeta": {
+        "bare-buffer": {
+          "optional": true
+        },
+        "bare-events": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/bare-url": {
+      "version": "2.3.2",
+      "resolved": "https://registry.npmjs.org/bare-url/-/bare-url-2.3.2.tgz",
+      "integrity": "sha512-ZMq4gd9ngV5aTMa5p9+UfY0b3skwhHELaDkhEHetMdX0LRkW9kzaym4oo/Eh+Ghm0CCDuMTsRIGM/ytUc1ZYmw==",
+      "license": "Apache-2.0",
+      "optional": true,
+      "dependencies": {
+        "bare-path": "^3.0.0"
+      }
+    },
+    "node_modules/base64-js": {
+      "version": "1.5.1",
+      "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
+      "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
       "funding": [
         {
           "type": "github",
@@ -1117,166 +2089,2676 @@
           "url": "https://feross.org/support"
         }
       ],
-      "license": "BSD-3-Clause"
+      "license": "MIT"
     },
-    "node_modules/ignore": {
-      "version": "5.3.2",
-      "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
-      "integrity": "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==",
+    "node_modules/better-sqlite3": {
+      "version": "12.5.0",
+      "resolved": "https://registry.npmjs.org/better-sqlite3/-/better-sqlite3-12.5.0.tgz",
+      "integrity": "sha512-WwCZ/5Diz7rsF29o27o0Gcc1Du+l7Zsv7SYtVPG0X3G/uUI1LqdxrQI7c9Hs2FWpqXXERjW9hp6g3/tH7DlVKg==",
+      "hasInstallScript": true,
       "license": "MIT",
+      "dependencies": {
+        "bindings": "^1.5.0",
+        "prebuild-install": "^7.1.1"
+      },
       "engines": {
-        "node": ">= 4"
+        "node": "20.x || 22.x || 23.x || 24.x || 25.x"
       }
     },
-    "node_modules/import-fresh": {
-      "version": "3.3.1",
-      "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz",
-      "integrity": "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==",
+    "node_modules/big-integer": {
+      "version": "1.6.52",
+      "resolved": "https://registry.npmjs.org/big-integer/-/big-integer-1.6.52.tgz",
+      "integrity": "sha512-QxD8cf2eVqJOOz63z6JIN9BzvVs/dlySa5HGSBH5xtR8dPteIRQnBxxKqkNTiT6jbDTF6jAfrd4oMcND9RGbQg==",
+      "license": "Unlicense",
+      "engines": {
+        "node": ">=0.6"
+      }
+    },
+    "node_modules/binary-extensions": {
+      "version": "2.3.0",
+      "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz",
+      "integrity": "sha512-Ceh+7ox5qe7LJuLHoY0feh3pHuUDHAcRUeyL2VYghZwfpkNIy/+8Ocg0a3UuSoYzavmylwuLWQOf3hl0jjMMIw==",
       "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/binaryextensions": {
+      "version": "6.11.0",
+      "resolved": "https://registry.npmjs.org/binaryextensions/-/binaryextensions-6.11.0.tgz",
+      "integrity": "sha512-sXnYK/Ij80TO3lcqZVV2YgfKN5QjUWIRk/XSm2J/4bd/lPko3lvk0O4ZppH6m+6hB2/GTu+ptNwVFe1xh+QLQw==",
       "dependencies": {
-        "parent-module": "^1.0.0",
-        "resolve-from": "^4.0.0"
+        "editions": "^6.21.0"
       },
       "engines": {
-        "node": ">=6"
+        "node": ">=4"
       },
       "funding": {
-        "url": "https://github.com/sponsors/sindresorhus"
+        "url": "https://bevry.me/fund"
       }
     },
-    "node_modules/imurmurhash": {
-      "version": "0.1.4",
-      "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz",
-      "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==",
+    "node_modules/bindings": {
+      "version": "1.5.0",
+      "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz",
+      "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==",
       "license": "MIT",
-      "engines": {
-        "node": ">=0.8.19"
+      "dependencies": {
+        "file-uri-to-path": "1.0.0"
       }
     },
-    "node_modules/inherits": {
-      "version": "2.0.4",
-      "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
-      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
-      "license": "ISC"
+    "node_modules/bintrees": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/bintrees/-/bintrees-1.0.2.tgz",
+      "integrity": "sha512-VOMgTMwjAaUG580SXn3LacVgjurrbMme7ZZNYGSSV7mmtY6QQRh0Eg3pwIcntQ77DErK1L0NxkbetjcoXzVwKw=="
     },
-    "node_modules/ini": {
-      "version": "1.3.8",
-      "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
-      "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==",
-      "license": "ISC"
+    "node_modules/bl": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz",
+      "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==",
+      "license": "MIT",
+      "dependencies": {
+        "buffer": "^5.5.0",
+        "inherits": "^2.0.4",
+        "readable-stream": "^3.4.0"
+      }
     },
-    "node_modules/is-arrayish": {
-      "version": "0.3.4",
-      "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.4.tgz",
-      "integrity": "sha512-m6UrgzFVUYawGBh1dUsWR5M2Clqic9RVXC/9f8ceNlv2IcO9j9J/z8UoCLPqtsPBFNzEpfR3xftohbfqDx8EQA==",
-      "license": "MIT"
+    "node_modules/boolean": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/boolean/-/boolean-3.2.0.tgz",
+      "integrity": "sha512-d0II/GO9uf9lfUHH2BQsjxzRJZBdsjgsBiW4BvhWk/3qoKwQFjIDVN19PfX8F2D/r9PCMTtLWjYVCFrpeYUzsw==",
+      "deprecated": "Package no longer supported. Contact Support at https://www.npmjs.com/support for more info."
     },
-    "node_modules/is-extglob": {
-      "version": "2.1.1",
-      "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
-      "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==",
+    "node_modules/brace-expansion": {
+      "version": "1.1.12",
+      "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
+      "integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
       "license": "MIT",
-      "engines": {
-        "node": ">=0.10.0"
+      "dependencies": {
+        "balanced-match": "^1.0.0",
+        "concat-map": "0.0.1"
       }
     },
-    "node_modules/is-glob": {
-      "version": "4.0.3",
-      "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz",
-      "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==",
+    "node_modules/braces": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
+      "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
       "license": "MIT",
       "dependencies": {
-        "is-extglob": "^2.1.1"
+        "fill-range": "^7.1.1"
       },
       "engines": {
-        "node": ">=0.10.0"
+        "node": ">=8"
       }
     },
-    "node_modules/isexe": {
-      "version": "2.0.0",
-      "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
-      "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
-      "license": "ISC"
-    },
-    "node_modules/js-yaml": {
-      "version": "4.1.1",
-      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz",
-      "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==",
+    "node_modules/buffer": {
+      "version": "5.7.1",
+      "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
+      "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
       "license": "MIT",
       "dependencies": {
-        "argparse": "^2.0.1"
+        "base64-js": "^1.3.1",
+        "ieee754": "^1.1.13"
+      }
+    },
+    "node_modules/buffer-crc32": {
+      "version": "0.2.13",
+      "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz",
+      "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==",
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/callsites": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz",
+      "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/ccount": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/ccount/-/ccount-2.0.1.tgz",
+      "integrity": "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/chalk": {
+      "version": "4.1.2",
+      "resolved": "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz",
+      "integrity": "sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==",
+      "license": "MIT",
+      "dependencies": {
+        "ansi-styles": "^4.1.0",
+        "supports-color": "^7.1.0"
       },
-      "bin": {
-        "js-yaml": "bin/js-yaml.js"
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/chalk?sponsor=1"
       }
     },
-    "node_modules/json-buffer": {
-      "version": "3.0.1",
-      "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz",
-      "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==",
+    "node_modules/character-entities": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/character-entities/-/character-entities-2.0.2.tgz",
+      "integrity": "sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/character-entities-html4": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/character-entities-html4/-/character-entities-html4-2.1.0.tgz",
+      "integrity": "sha512-1v7fgQRj6hnSwFpq1Eu0ynr/CDEw0rXo2B61qXrLNdHZmPKgb7fqS1a2JwF0rISo9q77jDI8VMEHoApn8qDoZA==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/character-entities-legacy": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/character-entities-legacy/-/character-entities-legacy-3.0.0.tgz",
+      "integrity": "sha512-RpPp0asT/6ufRm//AJVwpViZbGM/MkjQFxJccQRHmISF/22NBtsHqAWmL+/pmkPWoIUJdWyeVleTl1wydHATVQ==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/character-reference-invalid": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/character-reference-invalid/-/character-reference-invalid-2.0.1.tgz",
+      "integrity": "sha512-iBZ4F4wRbyORVsu0jPV7gXkOsGYjGHPmAyv+HiHG8gi5PtC9KI2j1+v8/tlibRvjoWX027ypmG/n0HtO5t7unw==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/chardet": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/chardet/-/chardet-2.1.1.tgz",
+      "integrity": "sha512-PsezH1rqdV9VvyNhxxOW32/d75r01NY7TQCmOqomRo15ZSOKbpTFVsfjghxo6JloQUCGnH4k1LGu0R4yCLlWQQ=="
+    },
+    "node_modules/check-types": {
+      "version": "5.1.0",
+      "resolved": "https://registry.npmjs.org/check-types/-/check-types-5.1.0.tgz",
+      "integrity": "sha512-avyYsSECJeYxowzVMGxzwXz9gc+LAEQ8l8nDVRJqbJilfwHDBPxpjTZC0mb1gr8AAARDc/I7P4+IG6SxokWWmw==",
       "license": "MIT"
     },
-    "node_modules/json-schema-traverse": {
-      "version": "0.4.1",
-      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
-      "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==",
+    "node_modules/chokidar": {
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-3.6.0.tgz",
+      "integrity": "sha512-7VT13fmjotKpGipCW9JEQAusEPE+Ei8nl6/g4FBAmIm0GOOLMua9NDDo/DWp0ZAxCr3cPq5ZpBqmPAQgDda2Pw==",
+      "license": "MIT",
+      "peer": true,
+      "dependencies": {
+        "anymatch": "~3.1.2",
+        "braces": "~3.0.2",
+        "glob-parent": "~5.1.2",
+        "is-binary-path": "~2.1.0",
+        "is-glob": "~4.0.1",
+        "normalize-path": "~3.0.0",
+        "readdirp": "~3.6.0"
+      },
+      "engines": {
+        "node": ">= 8.10.0"
+      },
+      "funding": {
+        "url": "https://paulmillr.com/funding/"
+      },
+      "optionalDependencies": {
+        "fsevents": "~2.3.2"
+      }
+    },
+    "node_modules/chownr": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz",
+      "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==",
+      "license": "ISC"
+    },
+    "node_modules/cjs-module-lexer": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/cjs-module-lexer/-/cjs-module-lexer-2.2.0.tgz",
+      "integrity": "sha512-4bHTS2YuzUvtoLjdy+98ykbNB5jS0+07EvFNXerqZQJ89F7DI6ET7OQo/HJuW6K0aVsKA9hj9/RVb2kQVOrPDQ==",
       "license": "MIT"
     },
-    "node_modules/json-stable-stringify-without-jsonify": {
-      "version": "1.0.1",
-      "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz",
-      "integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==",
+    "node_modules/cliui": {
+      "version": "8.0.1",
+      "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz",
+      "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==",
+      "license": "ISC",
+      "dependencies": {
+        "string-width": "^4.2.0",
+        "strip-ansi": "^6.0.1",
+        "wrap-ansi": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/clsx": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz",
+      "integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/collapse-white-space": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/collapse-white-space/-/collapse-white-space-2.1.0.tgz",
+      "integrity": "sha512-loKTxY1zCOuG4j9f6EPnuyyYkf58RnhhWTvRoZEokgB+WbdXehfjFviyOVYkqzEWz1Q5kRiZdBYS5SwxbQYwzw==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/color": {
+      "version": "4.2.3",
+      "resolved": "https://registry.npmjs.org/color/-/color-4.2.3.tgz",
+      "integrity": "sha512-1rXeuUUiGGrykh+CeBdu5Ie7OJwinCgQY0bc7GCRxy5xVHy+moaqkpL/jqQq0MtQOeYcrqEz4abc5f0KtU7W4A==",
+      "license": "MIT",
+      "dependencies": {
+        "color-convert": "^2.0.1",
+        "color-string": "^1.9.0"
+      },
+      "engines": {
+        "node": ">=12.5.0"
+      }
+    },
+    "node_modules/color-convert": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz",
+      "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==",
+      "license": "MIT",
+      "dependencies": {
+        "color-name": "~1.1.4"
+      },
+      "engines": {
+        "node": ">=7.0.0"
+      }
+    },
+    "node_modules/color-name": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz",
+      "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==",
       "license": "MIT"
     },
-    "node_modules/keyv": {
-      "version": "4.5.4",
-      "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz",
-      "integrity": "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==",
+    "node_modules/color-string": {
+      "version": "1.9.1",
+      "resolved": "https://registry.npmjs.org/color-string/-/color-string-1.9.1.tgz",
+      "integrity": "sha512-shrVawQFojnZv6xM40anx4CkoDP+fZsw/ZerEMsW/pyzsRbElpsL/DBVW7q3ExxwusdNXI3lXpuhEZkzs8p5Eg==",
+      "license": "MIT",
+      "dependencies": {
+        "color-name": "^1.0.0",
+        "simple-swizzle": "^0.2.2"
+      }
+    },
+    "node_modules/colorette": {
+      "version": "2.0.20",
+      "resolved": "https://registry.npmjs.org/colorette/-/colorette-2.0.20.tgz",
+      "integrity": "sha512-IfEDxwoWIjkeXL1eXcDiow4UbKjhLdq6/EuSVR9GMN7KVH3r9gQ83e73hsz1Nd1T3ijd5xv1wcWRYO+D6kCI2w=="
+    },
+    "node_modules/comma-separated-tokens": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/comma-separated-tokens/-/comma-separated-tokens-2.0.3.tgz",
+      "integrity": "sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/commander": {
+      "version": "5.1.0",
+      "resolved": "https://registry.npmjs.org/commander/-/commander-5.1.0.tgz",
+      "integrity": "sha512-P0CysNDQ7rtVw4QIQtm+MRxV66vKFSvlsQvGYXZWR3qFU0jlMKHZZZgw8e+8DSah4UDKMqnknRDQz+xuQXQ/Zg==",
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/comment-parser": {
+      "version": "1.4.1",
+      "resolved": "https://registry.npmjs.org/comment-parser/-/comment-parser-1.4.1.tgz",
+      "integrity": "sha512-buhp5kePrmda3vhc5B9t7pUQXAb2Tnd0qgpkIhPhkHXxJpiPJ11H0ZEU0oBpJ2QztSbzG/ZxMj/CHsYJqRHmyg==",
+      "engines": {
+        "node": ">= 12.0.0"
+      }
+    },
+    "node_modules/concat-map": {
+      "version": "0.0.1",
+      "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
+      "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==",
+      "license": "MIT"
+    },
+    "node_modules/cross-spawn": {
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
+      "license": "MIT",
+      "dependencies": {
+        "path-key": "^3.1.0",
+        "shebang-command": "^2.0.0",
+        "which": "^2.0.1"
+      },
+      "engines": {
+        "node": ">= 8"
+      }
+    },
+    "node_modules/dateformat": {
+      "version": "4.6.3",
+      "resolved": "https://registry.npmjs.org/dateformat/-/dateformat-4.6.3.tgz",
+      "integrity": "sha512-2P0p0pFGzHS5EMnhdxQi7aJN+iMheud0UhG4dlE1DLAlvL8JHjJJTX/CSm4JXwV0Ka5nGk3zC5mcb5bUQUxxMA==",
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/debug": {
+      "version": "4.4.3",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.4.3.tgz",
+      "integrity": "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==",
+      "license": "MIT",
+      "dependencies": {
+        "ms": "^2.1.3"
+      },
+      "engines": {
+        "node": ">=6.0"
+      },
+      "peerDependenciesMeta": {
+        "supports-color": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/decode-named-character-reference": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/decode-named-character-reference/-/decode-named-character-reference-1.2.0.tgz",
+      "integrity": "sha512-c6fcElNV6ShtZXmsgNgFFV5tVX2PaV4g+MOAkb8eXHvn6sryJBrZa9r0zV6+dtTyoCKxtDy5tyQ5ZwQuidtd+Q==",
+      "dependencies": {
+        "character-entities": "^2.0.0"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/decompress-response": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz",
+      "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==",
+      "license": "MIT",
+      "dependencies": {
+        "mimic-response": "^3.1.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/deep-extend": {
+      "version": "0.6.0",
+      "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz",
+      "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==",
       "license": "MIT",
+      "engines": {
+        "node": ">=4.0.0"
+      }
+    },
+    "node_modules/deep-is": {
+      "version": "0.1.4",
+      "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz",
+      "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==",
+      "license": "MIT"
+    },
+    "node_modules/define-data-property": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz",
+      "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==",
+      "dependencies": {
+        "es-define-property": "^1.0.0",
+        "es-errors": "^1.3.0",
+        "gopd": "^1.0.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/define-properties": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/define-properties/-/define-properties-1.2.1.tgz",
+      "integrity": "sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg==",
+      "dependencies": {
+        "define-data-property": "^1.0.1",
+        "has-property-descriptors": "^1.0.0",
+        "object-keys": "^1.1.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/dequal": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz",
+      "integrity": "sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/detect-libc": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz",
+      "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==",
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/detect-node": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/detect-node/-/detect-node-2.1.0.tgz",
+      "integrity": "sha512-T0NIuQpnTvFDATNuHN5roPwSBG83rFsuO+MXXH9/3N1eFbn4wcPjttvjMLEPWJ0RGUYgQE7cGgS3tNxbqCGM7g=="
+    },
+    "node_modules/devalue": {
+      "version": "5.6.1",
+      "resolved": "https://registry.npmjs.org/devalue/-/devalue-5.6.1.tgz",
+      "integrity": "sha512-jDwizj+IlEZBunHcOuuFVBnIMPAEHvTsJj0BcIp94xYguLRVBcXO853px/MyIJvbVzWdsGvrRweIUWJw8hBP7A=="
+    },
+    "node_modules/devlop": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/devlop/-/devlop-1.1.0.tgz",
+      "integrity": "sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==",
+      "dependencies": {
+        "dequal": "^2.0.0"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/dockerfile-ast": {
+      "version": "0.7.1",
+      "resolved": "https://registry.npmjs.org/dockerfile-ast/-/dockerfile-ast-0.7.1.tgz",
+      "integrity": "sha512-oX/A4I0EhSkGqrFv0YuvPkBUSYp1XiY8O8zAKc8Djglx8ocz+JfOr8gP0ryRMC2myqvDLagmnZaU9ot1vG2ijw==",
+      "dependencies": {
+        "vscode-languageserver-textdocument": "^1.0.8",
+        "vscode-languageserver-types": "^3.17.3"
+      },
+      "engines": {
+        "node": "*"
+      }
+    },
+    "node_modules/editions": {
+      "version": "6.22.0",
+      "resolved": "https://registry.npmjs.org/editions/-/editions-6.22.0.tgz",
+      "integrity": "sha512-UgGlf8IW75je7HZjNDpJdCv4cGJWIi6yumFdZ0R7A8/CIhQiWUjyGLCxdHpd8bmyD1gnkfUNK0oeOXqUS2cpfQ==",
+      "dependencies": {
+        "version-range": "^4.15.0"
+      },
+      "engines": {
+        "ecmascript": ">= es5",
+        "node": ">=4"
+      },
+      "funding": {
+        "url": "https://bevry.me/fund"
+      }
+    },
+    "node_modules/emoji-regex": {
+      "version": "8.0.0",
+      "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz",
+      "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==",
+      "license": "MIT"
+    },
+    "node_modules/end-of-stream": {
+      "version": "1.4.5",
+      "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz",
+      "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==",
+      "license": "MIT",
+      "dependencies": {
+        "once": "^1.4.0"
+      }
+    },
+    "node_modules/entities": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/entities/-/entities-6.0.1.tgz",
+      "integrity": "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==",
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=0.12"
+      },
+      "funding": {
+        "url": "https://github.com/fb55/entities?sponsor=1"
+      }
+    },
+    "node_modules/es-define-property": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz",
+      "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-errors": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz",
+      "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/es-module-lexer": {
+      "version": "1.7.0",
+      "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.7.0.tgz",
+      "integrity": "sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==",
+      "license": "MIT"
+    },
+    "node_modules/es6-error": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/es6-error/-/es6-error-4.1.1.tgz",
+      "integrity": "sha512-Um/+FxMr9CISWh0bi5Zv0iOD+4cFh5qLeks1qhAopKVAJw3drgKbKySikp7wGhDL0HPeaja0P5ULZrxLkniUVg=="
+    },
+    "node_modules/esast-util-from-estree": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/esast-util-from-estree/-/esast-util-from-estree-2.0.0.tgz",
+      "integrity": "sha512-4CyanoAudUSBAn5K13H4JhsMH6L9ZP7XbLVe/dKybkxMO7eDyLsT8UHl9TRNrU2Gr9nz+FovfSIjuXWJ81uVwQ==",
+      "dependencies": {
+        "@types/estree-jsx": "^1.0.0",
+        "devlop": "^1.0.0",
+        "estree-util-visit": "^2.0.0",
+        "unist-util-position-from-estree": "^2.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/esast-util-from-js": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/esast-util-from-js/-/esast-util-from-js-2.0.1.tgz",
+      "integrity": "sha512-8Ja+rNJ0Lt56Pcf3TAmpBZjmx8ZcK5Ts4cAzIOjsjevg9oSXJnl6SUQ2EevU8tv3h6ZLWmoKL5H4fgWvdvfETw==",
+      "dependencies": {
+        "@types/estree-jsx": "^1.0.0",
+        "acorn": "^8.0.0",
+        "esast-util-from-estree": "^2.0.0",
+        "vfile-message": "^4.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/escalade": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
+      "integrity": "sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/escape-string-regexp": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz",
+      "integrity": "sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/escomplex": {
+      "version": "2.0.0-alpha",
+      "resolved": "https://registry.npmjs.org/escomplex/-/escomplex-2.0.0-alpha.tgz",
+      "integrity": "sha512-svXitH4tBpW1CPLlGQOxZrqosDj/88GEIoul1bM7WpVGgBnp/5RXb2L0Fo62TAt56k1ZrqTgn+93vnLsAchCcw==",
+      "license": "MIT",
+      "dependencies": {
+        "check-types": "^5.1.0"
+      }
+    },
+    "node_modules/eslint": {
+      "version": "9.39.2",
+      "resolved": "https://registry.npmjs.org/eslint/-/eslint-9.39.2.tgz",
+      "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==",
+      "license": "MIT",
+      "peer": true,
+      "dependencies": {
+        "@eslint-community/eslint-utils": "^4.8.0",
+        "@eslint-community/regexpp": "^4.12.1",
+        "@eslint/config-array": "^0.21.1",
+        "@eslint/config-helpers": "^0.4.2",
+        "@eslint/core": "^0.17.0",
+        "@eslint/eslintrc": "^3.3.1",
+        "@eslint/js": "9.39.2",
+        "@eslint/plugin-kit": "^0.4.1",
+        "@humanfs/node": "^0.16.6",
+        "@humanwhocodes/module-importer": "^1.0.1",
+        "@humanwhocodes/retry": "^0.4.2",
+        "@types/estree": "^1.0.6",
+        "ajv": "^6.12.4",
+        "chalk": "^4.0.0",
+        "cross-spawn": "^7.0.6",
+        "debug": "^4.3.2",
+        "escape-string-regexp": "^4.0.0",
+        "eslint-scope": "^8.4.0",
+        "eslint-visitor-keys": "^4.2.1",
+        "espree": "^10.4.0",
+        "esquery": "^1.5.0",
+        "esutils": "^2.0.2",
+        "fast-deep-equal": "^3.1.3",
+        "file-entry-cache": "^8.0.0",
+        "find-up": "^5.0.0",
+        "glob-parent": "^6.0.2",
+        "ignore": "^5.2.0",
+        "imurmurhash": "^0.1.4",
+        "is-glob": "^4.0.0",
+        "json-stable-stringify-without-jsonify": "^1.0.1",
+        "lodash.merge": "^4.6.2",
+        "minimatch": "^3.1.2",
+        "natural-compare": "^1.4.0",
+        "optionator": "^0.9.3"
+      },
+      "bin": {
+        "eslint": "bin/eslint.js"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "url": "https://eslint.org/donate"
+      },
+      "peerDependencies": {
+        "jiti": "*"
+      },
+      "peerDependenciesMeta": {
+        "jiti": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/eslint-scope": {
+      "version": "8.4.0",
+      "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-8.4.0.tgz",
+      "integrity": "sha512-sNXOfKCn74rt8RICKMvJS7XKV/Xk9kA7DyJr8mJik3S7Cwgy3qlkkmyS2uQB3jiJg6VNdZd/pDBJu0nvG2NlTg==",
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "esrecurse": "^4.3.0",
+        "estraverse": "^5.2.0"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/eslint"
+      }
+    },
+    "node_modules/eslint-visitor-keys": {
+      "version": "4.2.1",
+      "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-4.2.1.tgz",
+      "integrity": "sha512-Uhdk5sfqcee/9H/rCOJikYz67o0a2Tw2hGRPOG2Y1R2dg7brRe1uG0yaNQDHu+TO/uQPF/5eCapvYSmHUjt7JQ==",
+      "license": "Apache-2.0",
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/eslint"
+      }
+    },
+    "node_modules/eslint/node_modules/ajv": {
+      "version": "6.12.6",
+      "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
+      "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==",
+      "dependencies": {
+        "fast-deep-equal": "^3.1.1",
+        "fast-json-stable-stringify": "^2.0.0",
+        "json-schema-traverse": "^0.4.1",
+        "uri-js": "^4.2.2"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/epoberezkin"
+      }
+    },
+    "node_modules/eslint/node_modules/glob-parent": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz",
+      "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==",
+      "license": "ISC",
+      "dependencies": {
+        "is-glob": "^4.0.3"
+      },
+      "engines": {
+        "node": ">=10.13.0"
+      }
+    },
+    "node_modules/eslint/node_modules/ignore": {
+      "version": "5.3.2",
+      "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
+      "integrity": "sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 4"
+      }
+    },
+    "node_modules/eslint/node_modules/json-schema-traverse": {
+      "version": "0.4.1",
+      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz",
+      "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg=="
+    },
+    "node_modules/esm-env": {
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/esm-env/-/esm-env-1.2.2.tgz",
+      "integrity": "sha512-Epxrv+Nr/CaL4ZcFGPJIYLWFom+YeV1DqMLHJoEd9SYRxNbaFruBwfEX/kkHUJf55j2+TUbmDcmuilbP1TmXHA=="
+    },
+    "node_modules/espree": {
+      "version": "10.4.0",
+      "resolved": "https://registry.npmjs.org/espree/-/espree-10.4.0.tgz",
+      "integrity": "sha512-j6PAQ2uUr79PZhBjP5C5fhl8e39FmRnOjsD5lGnWrFU8i2G776tBK7+nP8KuQUTTyAZUwfQqXAgrVH5MbH9CYQ==",
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "acorn": "^8.15.0",
+        "acorn-jsx": "^5.3.2",
+        "eslint-visitor-keys": "^4.2.1"
+      },
+      "engines": {
+        "node": "^18.18.0 || ^20.9.0 || >=21.1.0"
+      },
+      "funding": {
+        "url": "https://opencollective.com/eslint"
+      }
+    },
+    "node_modules/esprima": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz",
+      "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==",
+      "license": "BSD-2-Clause",
+      "bin": {
+        "esparse": "bin/esparse.js",
+        "esvalidate": "bin/esvalidate.js"
+      },
+      "engines": {
+        "node": ">=4"
+      }
+    },
+    "node_modules/esquery": {
+      "version": "1.7.0",
+      "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.7.0.tgz",
+      "integrity": "sha512-Ap6G0WQwcU/LHsvLwON1fAQX9Zp0A2Y6Y/cJBl9r/JbW90Zyg4/zbG6zzKa2OTALELarYHmKu0GhpM5EO+7T0g==",
+      "license": "BSD-3-Clause",
+      "dependencies": {
+        "estraverse": "^5.1.0"
+      },
+      "engines": {
+        "node": ">=0.10"
+      }
+    },
+    "node_modules/esrap": {
+      "version": "2.2.1",
+      "resolved": "https://registry.npmjs.org/esrap/-/esrap-2.2.1.tgz",
+      "integrity": "sha512-GiYWG34AN/4CUyaWAgunGt0Rxvr1PTMlGC0vvEov/uOQYWne2bpN03Um+k8jT+q3op33mKouP2zeJ6OlM+qeUg==",
+      "dependencies": {
+        "@jridgewell/sourcemap-codec": "^1.4.15"
+      }
+    },
+    "node_modules/esrecurse": {
+      "version": "4.3.0",
+      "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz",
+      "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==",
+      "license": "BSD-2-Clause",
+      "dependencies": {
+        "estraverse": "^5.2.0"
+      },
+      "engines": {
+        "node": ">=4.0"
+      }
+    },
+    "node_modules/estraverse": {
+      "version": "5.3.0",
+      "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz",
+      "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==",
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=4.0"
+      }
+    },
+    "node_modules/estree-util-attach-comments": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/estree-util-attach-comments/-/estree-util-attach-comments-3.0.0.tgz",
+      "integrity": "sha512-cKUwm/HUcTDsYh/9FgnuFqpfquUbwIqwKM26BVCGDPVgvaCl/nDCCjUfiLlx6lsEZ3Z4RFxNbOQ60pkaEwFxGw==",
+      "dependencies": {
+        "@types/estree": "^1.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/estree-util-build-jsx": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/estree-util-build-jsx/-/estree-util-build-jsx-3.0.1.tgz",
+      "integrity": "sha512-8U5eiL6BTrPxp/CHbs2yMgP8ftMhR5ww1eIKoWRMlqvltHF8fZn5LRDvTKuxD3DUn+shRbLGqXemcP51oFCsGQ==",
+      "dependencies": {
+        "@types/estree-jsx": "^1.0.0",
+        "devlop": "^1.0.0",
+        "estree-util-is-identifier-name": "^3.0.0",
+        "estree-walker": "^3.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/estree-util-is-identifier-name": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/estree-util-is-identifier-name/-/estree-util-is-identifier-name-3.0.0.tgz",
+      "integrity": "sha512-hFtqIDZTIUZ9BXLb8y4pYGyk6+wekIivNVTcmvk8NoOh+VeRn5y6cEHzbURrWbfp1fIqdVipilzj+lfaadNZmg==",
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/estree-util-scope": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/estree-util-scope/-/estree-util-scope-1.0.0.tgz",
+      "integrity": "sha512-2CAASclonf+JFWBNJPndcOpA8EMJwa0Q8LUFJEKqXLW6+qBvbFZuF5gItbQOs/umBUkjviCSDCbBwU2cXbmrhQ==",
+      "dependencies": {
+        "@types/estree": "^1.0.0",
+        "devlop": "^1.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/estree-util-to-js": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/estree-util-to-js/-/estree-util-to-js-2.0.0.tgz",
+      "integrity": "sha512-WDF+xj5rRWmD5tj6bIqRi6CkLIXbbNQUcxQHzGysQzvHmdYG2G7p/Tf0J0gpxGgkeMZNTIjT/AoSvC9Xehcgdg==",
+      "dependencies": {
+        "@types/estree-jsx": "^1.0.0",
+        "astring": "^1.8.0",
+        "source-map": "^0.7.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/estree-util-visit": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/estree-util-visit/-/estree-util-visit-2.0.0.tgz",
+      "integrity": "sha512-m5KgiH85xAhhW8Wta0vShLcUvOsh3LLPI2YVwcbio1l7E09NTLL1EyMZFM1OyWowoH0skScNbhOPl4kcBgzTww==",
+      "dependencies": {
+        "@types/estree-jsx": "^1.0.0",
+        "@types/unist": "^3.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/estree-walker": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-3.0.3.tgz",
+      "integrity": "sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==",
+      "dependencies": {
+        "@types/estree": "^1.0.0"
+      }
+    },
+    "node_modules/esutils": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz",
+      "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==",
+      "license": "BSD-2-Clause",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/eventemitter3": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-5.0.1.tgz",
+      "integrity": "sha512-GWkBvjiSZK87ELrYOSESUYeVIc9mvLLf/nXalMOS5dYrgZq9o5OVkbZAVM06CVxYsCwH9BDZFPlQTlPA1j4ahA==",
+      "license": "MIT"
+    },
+    "node_modules/events": {
+      "version": "3.3.0",
+      "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz",
+      "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==",
+      "engines": {
+        "node": ">=0.8.x"
+      }
+    },
+    "node_modules/events-universal": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/events-universal/-/events-universal-1.0.1.tgz",
+      "integrity": "sha512-LUd5euvbMLpwOF8m6ivPCbhQeSiYVNb8Vs0fQ8QjXo0JTkEHpz8pxdQf0gStltaPpw0Cca8b39KxvK9cfKRiAw==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "bare-events": "^2.7.0"
+      }
+    },
+    "node_modules/execa": {
+      "version": "8.0.1",
+      "resolved": "https://registry.npmjs.org/execa/-/execa-8.0.1.tgz",
+      "integrity": "sha512-VyhnebXciFV2DESc+p6B+y0LjSm0krU4OgJN44qFAhBY0TJ+1V61tYD2+wHusZ6F9n5K+vl8k0sTy7PEfV4qpg==",
+      "license": "MIT",
+      "dependencies": {
+        "cross-spawn": "^7.0.3",
+        "get-stream": "^8.0.1",
+        "human-signals": "^5.0.0",
+        "is-stream": "^3.0.0",
+        "merge-stream": "^2.0.0",
+        "npm-run-path": "^5.1.0",
+        "onetime": "^6.0.0",
+        "signal-exit": "^4.1.0",
+        "strip-final-newline": "^3.0.0"
+      },
+      "engines": {
+        "node": ">=16.17"
+      },
+      "funding": {
+        "url": "https://github.com/sindresorhus/execa?sponsor=1"
+      }
+    },
+    "node_modules/expand-template": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz",
+      "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==",
+      "license": "(MIT OR WTFPL)",
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/extend": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz",
+      "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g=="
+    },
+    "node_modules/fast-copy": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/fast-copy/-/fast-copy-4.0.2.tgz",
+      "integrity": "sha512-ybA6PDXIXOXivLJK/z9e+Otk7ve13I4ckBvGO5I2RRmBU1gMHLVDJYEuJYhGwez7YNlYji2M2DvVU+a9mSFDlw=="
+    },
+    "node_modules/fast-deep-equal": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz",
+      "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==",
+      "license": "MIT"
+    },
+    "node_modules/fast-fifo": {
+      "version": "1.3.2",
+      "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz",
+      "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==",
+      "license": "MIT"
+    },
+    "node_modules/fast-json-stable-stringify": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz",
+      "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw=="
+    },
+    "node_modules/fast-levenshtein": {
+      "version": "2.0.6",
+      "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz",
+      "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==",
+      "license": "MIT"
+    },
+    "node_modules/fast-safe-stringify": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/fast-safe-stringify/-/fast-safe-stringify-2.1.1.tgz",
+      "integrity": "sha512-W+KJc2dmILlPplD/H4K9l9LcAHAfPtP6BY84uVLXQ6Evcz9Lcg33Y2z1IVblT6xdY54PXYVHEv+0Wpq8Io6zkA=="
+    },
+    "node_modules/fast-uri": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz",
+      "integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/fastify"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/fastify"
+        }
+      ]
+    },
+    "node_modules/fast-xml-parser": {
+      "version": "5.3.3",
+      "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-5.3.3.tgz",
+      "integrity": "sha512-2O3dkPAAC6JavuMm8+4+pgTk+5hoAs+CjZ+sWcQLkX9+/tHRuTkQh/Oaifr8qDmZ8iEHb771Ea6G8CdwkrgvYA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/NaturalIntelligence"
+        }
+      ],
+      "dependencies": {
+        "strnum": "^2.1.0"
+      },
+      "bin": {
+        "fxparser": "src/cli/cli.js"
+      }
+    },
+    "node_modules/fd-slicer": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz",
+      "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==",
+      "dependencies": {
+        "pend": "~1.2.0"
+      }
+    },
+    "node_modules/fdir": {
+      "version": "6.4.2",
+      "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.2.tgz",
+      "integrity": "sha512-KnhMXsKSPZlAhp7+IjUkRZKPb4fUyccpDrdFXbi4QL1qkmFh9kVY09Yox+n4MaOb3lHZ1Tv829C3oaaXoMYPDQ==",
+      "license": "MIT",
+      "peerDependencies": {
+        "picomatch": "^3 || ^4"
+      },
+      "peerDependenciesMeta": {
+        "picomatch": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/fflate": {
+      "version": "0.8.2",
+      "resolved": "https://registry.npmjs.org/fflate/-/fflate-0.8.2.tgz",
+      "integrity": "sha512-cPJU47OaAoCbg0pBvzsgpTPhmhqI5eJjh/JIu8tPj5q+T7iLvW/JAYUqmE7KOB4R1ZyEhzBaIQpQpardBF5z8A=="
+    },
+    "node_modules/file-entry-cache": {
+      "version": "8.0.0",
+      "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-8.0.0.tgz",
+      "integrity": "sha512-XXTUwCvisa5oacNGRP9SfNtYBNAMi+RPwBFmblZEF7N7swHYQS6/Zfk7SRwx4D5j3CH211YNRco1DEMNVfZCnQ==",
+      "license": "MIT",
+      "dependencies": {
+        "flat-cache": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=16.0.0"
+      }
+    },
+    "node_modules/file-type": {
+      "version": "21.3.0",
+      "resolved": "https://registry.npmjs.org/file-type/-/file-type-21.3.0.tgz",
+      "integrity": "sha512-8kPJMIGz1Yt/aPEwOsrR97ZyZaD1Iqm8PClb1nYFclUCkBi0Ma5IsYNQzvSFS9ib51lWyIw5mIT9rWzI/xjpzA==",
+      "dependencies": {
+        "@tokenizer/inflate": "^0.4.1",
+        "strtok3": "^10.3.4",
+        "token-types": "^6.1.1",
+        "uint8array-extras": "^1.4.0"
+      },
+      "engines": {
+        "node": ">=20"
+      },
+      "funding": {
+        "url": "https://github.com/sindresorhus/file-type?sponsor=1"
+      }
+    },
+    "node_modules/file-uri-to-path": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz",
+      "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==",
+      "license": "MIT"
+    },
+    "node_modules/fill-range": {
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
+      "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
+      "license": "MIT",
+      "dependencies": {
+        "to-regex-range": "^5.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/find-up": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz",
+      "integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==",
+      "license": "MIT",
+      "dependencies": {
+        "locate-path": "^6.0.0",
+        "path-exists": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/flat-cache": {
+      "version": "4.0.1",
+      "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-4.0.1.tgz",
+      "integrity": "sha512-f7ccFPK3SXFHpx15UIGyRJ/FJQctuKZ0zVuN3frBo4HnK3cay9VEW0R6yPYFHC0AgqhukPzKjq22t5DmAyqGyw==",
+      "license": "MIT",
+      "dependencies": {
+        "flatted": "^3.2.9",
+        "keyv": "^4.5.4"
+      },
+      "engines": {
+        "node": ">=16"
+      }
+    },
+    "node_modules/flatbuffers": {
+      "version": "1.12.0",
+      "resolved": "https://registry.npmjs.org/flatbuffers/-/flatbuffers-1.12.0.tgz",
+      "integrity": "sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ==",
+      "license": "SEE LICENSE IN LICENSE.txt"
+    },
+    "node_modules/flatted": {
+      "version": "3.3.3",
+      "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.3.tgz",
+      "integrity": "sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==",
+      "license": "ISC"
+    },
+    "node_modules/fs-constants": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz",
+      "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==",
+      "license": "MIT"
+    },
+    "node_modules/fsevents": {
+      "version": "2.3.3",
+      "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
+      "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
+      "hasInstallScript": true,
+      "license": "MIT",
+      "optional": true,
+      "os": [
+        "darwin"
+      ],
+      "engines": {
+        "node": "^8.16.0 || ^10.6.0 || >=11.0.0"
+      }
+    },
+    "node_modules/get-caller-file": {
+      "version": "2.0.5",
+      "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz",
+      "integrity": "sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==",
+      "license": "ISC",
+      "engines": {
+        "node": "6.* || 8.* || >= 10.*"
+      }
+    },
+    "node_modules/get-stream": {
+      "version": "8.0.1",
+      "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-8.0.1.tgz",
+      "integrity": "sha512-VaUJspBffn/LMCJVoMvSAdmscJyS1auj5Zulnn5UoYcY531UWmdwhRWkcGKnGU93m5HSXP9LP2usOryrBtQowA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=16"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/github-from-package": {
+      "version": "0.0.0",
+      "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz",
+      "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==",
+      "license": "MIT"
+    },
+    "node_modules/glob-parent": {
+      "version": "5.1.2",
+      "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz",
+      "integrity": "sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==",
+      "license": "ISC",
+      "dependencies": {
+        "is-glob": "^4.0.1"
+      },
+      "engines": {
+        "node": ">= 6"
+      }
+    },
+    "node_modules/global-agent": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/global-agent/-/global-agent-3.0.0.tgz",
+      "integrity": "sha512-PT6XReJ+D07JvGoxQMkT6qji/jVNfX/h364XHZOWeRzy64sSFr+xJ5OX7LI3b4MPQzdL4H8Y8M0xzPpsVMwA8Q==",
+      "dependencies": {
+        "boolean": "^3.0.1",
+        "es6-error": "^4.1.1",
+        "matcher": "^3.0.0",
+        "roarr": "^2.15.3",
+        "semver": "^7.3.2",
+        "serialize-error": "^7.0.1"
+      },
+      "engines": {
+        "node": ">=10.0"
+      }
+    },
+    "node_modules/globals": {
+      "version": "14.0.0",
+      "resolved": "https://registry.npmjs.org/globals/-/globals-14.0.0.tgz",
+      "integrity": "sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/globalthis": {
+      "version": "1.0.4",
+      "resolved": "https://registry.npmjs.org/globalthis/-/globalthis-1.0.4.tgz",
+      "integrity": "sha512-DpLKbNU4WylpxJykQujfCcwYWiV/Jhm50Goo0wrVILAv5jOr9d+H+UR3PhSCD2rCCEIg0uc+G+muBTwD54JhDQ==",
+      "dependencies": {
+        "define-properties": "^1.2.1",
+        "gopd": "^1.0.1"
+      },
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/gopd": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz",
+      "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==",
+      "engines": {
+        "node": ">= 0.4"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/graphology": {
+      "version": "0.26.0",
+      "resolved": "https://registry.npmjs.org/graphology/-/graphology-0.26.0.tgz",
+      "integrity": "sha512-8SSImzgUUYC89Z042s+0r/vMibY7GX/Emz4LDO5e7jYXhuoWfHISPFJYjpRLUSJGq6UQ6xlenvX1p/hJdfXuXg==",
+      "dependencies": {
+        "events": "^3.3.0"
+      },
+      "peerDependencies": {
+        "graphology-types": ">=0.24.0"
+      }
+    },
+    "node_modules/graphology-types": {
+      "version": "0.24.8",
+      "resolved": "https://registry.npmjs.org/graphology-types/-/graphology-types-0.24.8.tgz",
+      "integrity": "sha512-hDRKYXa8TsoZHjgEaysSRyPdT6uB78Ci8WnjgbStlQysz7xR52PInxNsmnB7IBOM1BhikxkNyCVEFgmPKnpx3Q==",
+      "peer": true
+    },
+    "node_modules/graphql": {
+      "version": "16.12.0",
+      "resolved": "https://registry.npmjs.org/graphql/-/graphql-16.12.0.tgz",
+      "integrity": "sha512-DKKrynuQRne0PNpEbzuEdHlYOMksHSUI8Zc9Unei5gTsMNA2/vMpoMz/yKba50pejK56qj98qM0SjYxAKi13gQ==",
+      "engines": {
+        "node": "^12.22.0 || ^14.16.0 || ^16.0.0 || >=17.0.0"
+      }
+    },
+    "node_modules/greedy-number-partitioning": {
+      "version": "0.1.0",
+      "resolved": "https://registry.npmjs.org/greedy-number-partitioning/-/greedy-number-partitioning-0.1.0.tgz",
+      "integrity": "sha512-Yn+7NQztWAOjRuCXOVEoAFOL9sDe93q9QB1WJ6q8wFoMANvlSavA0QhYdvMvh1HdxNg1Uae5KyjDcstN705cIQ=="
+    },
+    "node_modules/guid-typescript": {
+      "version": "1.0.9",
+      "resolved": "https://registry.npmjs.org/guid-typescript/-/guid-typescript-1.0.9.tgz",
+      "integrity": "sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==",
+      "license": "ISC"
+    },
+    "node_modules/has-flag": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz",
+      "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/has-property-descriptors": {
+      "version": "1.0.2",
+      "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz",
+      "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==",
+      "dependencies": {
+        "es-define-property": "^1.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/ljharb"
+      }
+    },
+    "node_modules/hast-util-to-estree": {
+      "version": "3.1.3",
+      "resolved": "https://registry.npmjs.org/hast-util-to-estree/-/hast-util-to-estree-3.1.3.tgz",
+      "integrity": "sha512-48+B/rJWAp0jamNbAAf9M7Uf//UVqAoMmgXhBdxTDJLGKY+LRnZ99qcG+Qjl5HfMpYNzS5v4EAwVEF34LeAj7w==",
+      "dependencies": {
+        "@types/estree": "^1.0.0",
+        "@types/estree-jsx": "^1.0.0",
+        "@types/hast": "^3.0.0",
+        "comma-separated-tokens": "^2.0.0",
+        "devlop": "^1.0.0",
+        "estree-util-attach-comments": "^3.0.0",
+        "estree-util-is-identifier-name": "^3.0.0",
+        "hast-util-whitespace": "^3.0.0",
+        "mdast-util-mdx-expression": "^2.0.0",
+        "mdast-util-mdx-jsx": "^3.0.0",
+        "mdast-util-mdxjs-esm": "^2.0.0",
+        "property-information": "^7.0.0",
+        "space-separated-tokens": "^2.0.0",
+        "style-to-js": "^1.0.0",
+        "unist-util-position": "^5.0.0",
+        "zwitch": "^2.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/hast-util-to-jsx-runtime": {
+      "version": "2.3.6",
+      "resolved": "https://registry.npmjs.org/hast-util-to-jsx-runtime/-/hast-util-to-jsx-runtime-2.3.6.tgz",
+      "integrity": "sha512-zl6s8LwNyo1P9uw+XJGvZtdFF1GdAkOg8ujOw+4Pyb76874fLps4ueHXDhXWdk6YHQ6OgUtinliG7RsYvCbbBg==",
+      "dependencies": {
+        "@types/estree": "^1.0.0",
+        "@types/hast": "^3.0.0",
+        "@types/unist": "^3.0.0",
+        "comma-separated-tokens": "^2.0.0",
+        "devlop": "^1.0.0",
+        "estree-util-is-identifier-name": "^3.0.0",
+        "hast-util-whitespace": "^3.0.0",
+        "mdast-util-mdx-expression": "^2.0.0",
+        "mdast-util-mdx-jsx": "^3.0.0",
+        "mdast-util-mdxjs-esm": "^2.0.0",
+        "property-information": "^7.0.0",
+        "space-separated-tokens": "^2.0.0",
+        "style-to-js": "^1.0.0",
+        "unist-util-position": "^5.0.0",
+        "vfile-message": "^4.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/hast-util-whitespace": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/hast-util-whitespace/-/hast-util-whitespace-3.0.0.tgz",
+      "integrity": "sha512-88JUN06ipLwsnv+dVn+OIYOvAuvBMy/Qoi6O7mQHxdPXpjy+Cd6xRkWwux7DKO+4sYILtLBRIKgsdpS2gQc7qw==",
+      "dependencies": {
+        "@types/hast": "^3.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/hdr-histogram-js": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/hdr-histogram-js/-/hdr-histogram-js-3.0.1.tgz",
+      "integrity": "sha512-l3GSdZL1Jr1C0kyb461tUjEdrRPZr8Qry7jByltf5JGrA0xvqOSrxRBfcrJqqV/AMEtqqhHhC6w8HW0gn76tRQ==",
+      "dependencies": {
+        "@assemblyscript/loader": "^0.19.21",
+        "base64-js": "^1.2.0",
+        "pako": "^1.0.3"
+      },
+      "engines": {
+        "node": ">=14"
+      }
+    },
+    "node_modules/help-me": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/help-me/-/help-me-5.0.0.tgz",
+      "integrity": "sha512-7xgomUX6ADmcYzFik0HzAxh/73YlKR9bmFzf51CZwR+b6YtzU2m0u49hQCqV6SvlqIqsaxovfwdvbnsw3b/zpg=="
+    },
+    "node_modules/hnswlib-node": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/hnswlib-node/-/hnswlib-node-3.0.0.tgz",
+      "integrity": "sha512-fypn21qvVORassppC8/qNfZ5KAOspZpm/IbUkAtlqvbtDNnF5VVk5RWF7O5V6qwr7z+T3s1ePej6wQt5wRQ4Cg==",
+      "hasInstallScript": true,
+      "dependencies": {
+        "bindings": "^1.5.0",
+        "node-addon-api": "^8.0.0"
+      }
+    },
+    "node_modules/hnswlib-node/node_modules/node-addon-api": {
+      "version": "8.5.0",
+      "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-8.5.0.tgz",
+      "integrity": "sha512-/bRZty2mXUIFY/xU5HLvveNHlswNJej+RnxBjOMkidWfwZzgTbPG1E3K5TOxRLOR+5hX7bSofy8yf1hZevMS8A==",
+      "engines": {
+        "node": "^18 || ^20 || >= 21"
+      }
+    },
+    "node_modules/https-proxy-agent": {
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-7.0.6.tgz",
+      "integrity": "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw==",
+      "dependencies": {
+        "agent-base": "^7.1.2",
+        "debug": "4"
+      },
+      "engines": {
+        "node": ">= 14"
+      }
+    },
+    "node_modules/human-signals": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/human-signals/-/human-signals-5.0.0.tgz",
+      "integrity": "sha512-AXcZb6vzzrFAUE61HnN4mpLqd/cSIwNQjtNWR0euPm6y0iqx3G4gOXaIDdtdDwZmhwe82LA6+zinmW4UBWVePQ==",
+      "license": "Apache-2.0",
+      "engines": {
+        "node": ">=16.17.0"
+      }
+    },
+    "node_modules/iconv-lite": {
+      "version": "0.7.2",
+      "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.7.2.tgz",
+      "integrity": "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==",
+      "dependencies": {
+        "safer-buffer": ">= 2.1.2 < 3.0.0"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/express"
+      }
+    },
+    "node_modules/ieee754": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
+      "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/feross"
+        },
+        {
+          "type": "patreon",
+          "url": "https://www.patreon.com/feross"
+        },
+        {
+          "type": "consulting",
+          "url": "https://feross.org/support"
+        }
+      ],
+      "license": "BSD-3-Clause"
+    },
+    "node_modules/ignore": {
+      "version": "7.0.5",
+      "resolved": "https://registry.npmjs.org/ignore/-/ignore-7.0.5.tgz",
+      "integrity": "sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">= 4"
+      }
+    },
+    "node_modules/import-fresh": {
+      "version": "3.3.1",
+      "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz",
+      "integrity": "sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==",
+      "license": "MIT",
+      "dependencies": {
+        "parent-module": "^1.0.0",
+        "resolve-from": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/imurmurhash": {
+      "version": "0.1.4",
+      "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz",
+      "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.8.19"
+      }
+    },
+    "node_modules/inherits": {
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
+      "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==",
+      "license": "ISC"
+    },
+    "node_modules/ini": {
+      "version": "1.3.8",
+      "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz",
+      "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==",
+      "license": "ISC"
+    },
+    "node_modules/inline-style-parser": {
+      "version": "0.2.7",
+      "resolved": "https://registry.npmjs.org/inline-style-parser/-/inline-style-parser-0.2.7.tgz",
+      "integrity": "sha512-Nb2ctOyNR8DqQoR0OwRG95uNWIC0C1lCgf5Naz5H6Ji72KZ8OcFZLz2P5sNgwlyoJ8Yif11oMuYs5pBQa86csA=="
+    },
+    "node_modules/is-alphabetical": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/is-alphabetical/-/is-alphabetical-2.0.1.tgz",
+      "integrity": "sha512-FWyyY60MeTNyeSRpkM2Iry0G9hpr7/9kD40mD/cGQEuilcZYS4okz8SN2Q6rLCJ8gbCt6fN+rC+6tMGS99LaxQ==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/is-alphanumerical": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/is-alphanumerical/-/is-alphanumerical-2.0.1.tgz",
+      "integrity": "sha512-hmbYhX/9MUMF5uh7tOXyK/n0ZvWpad5caBA17GsC6vyuCqaWliRG5K1qS9inmUhEMaOBIW7/whAnSwveW/LtZw==",
+      "dependencies": {
+        "is-alphabetical": "^2.0.0",
+        "is-decimal": "^2.0.0"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/is-arrayish": {
+      "version": "0.3.4",
+      "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.4.tgz",
+      "integrity": "sha512-m6UrgzFVUYawGBh1dUsWR5M2Clqic9RVXC/9f8ceNlv2IcO9j9J/z8UoCLPqtsPBFNzEpfR3xftohbfqDx8EQA==",
+      "license": "MIT"
+    },
+    "node_modules/is-binary-path": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/is-binary-path/-/is-binary-path-2.1.0.tgz",
+      "integrity": "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw==",
+      "license": "MIT",
+      "dependencies": {
+        "binary-extensions": "^2.0.0"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/is-decimal": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/is-decimal/-/is-decimal-2.0.1.tgz",
+      "integrity": "sha512-AAB9hiomQs5DXWcRB1rqsxGUstbRroFOPPVAomNk/3XHR5JyEZChOyTWe2oayKnsSsr/kcGqF+z6yuH6HHpN0A==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/is-extglob": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz",
+      "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/is-fullwidth-code-point": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz",
+      "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/is-glob": {
+      "version": "4.0.3",
+      "resolved": "https://registry.npmjs.org/is-glob/-/is-glob-4.0.3.tgz",
+      "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==",
+      "license": "MIT",
+      "dependencies": {
+        "is-extglob": "^2.1.1"
+      },
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/is-hexadecimal": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/is-hexadecimal/-/is-hexadecimal-2.0.1.tgz",
+      "integrity": "sha512-DgZQp241c8oO6cA1SbTEWiXeoxV42vlcJxgH+B3hi1AiqqKruZR3ZGF8In3fj4+/y/7rHvlOZLZtgJ/4ttYGZg==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/is-number": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/is-number/-/is-number-7.0.0.tgz",
+      "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.12.0"
+      }
+    },
+    "node_modules/is-plain-obj": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-4.1.0.tgz",
+      "integrity": "sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/is-reference": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/is-reference/-/is-reference-3.0.3.tgz",
+      "integrity": "sha512-ixkJoqQvAP88E6wLydLGGqCJsrFUnqoH6HnaczB8XmDH1oaWU+xxdptvikTgaEhtZ53Ky6YXiBuUI2WXLMCwjw==",
+      "dependencies": {
+        "@types/estree": "^1.0.6"
+      }
+    },
+    "node_modules/is-stream": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/is-stream/-/is-stream-3.0.0.tgz",
+      "integrity": "sha512-LnQR4bZ9IADDRSkvpqMGvt/tEJWclzklNgSw48V5EAaAeDd6qGvN8ei6k5p0tvxSR171VmGyHuTiAOfxAbr8kA==",
+      "license": "MIT",
+      "engines": {
+        "node": "^12.20.0 || ^14.13.1 || >=16.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/isexe": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz",
+      "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==",
+      "license": "ISC"
+    },
+    "node_modules/istextorbinary": {
+      "version": "9.5.0",
+      "resolved": "https://registry.npmjs.org/istextorbinary/-/istextorbinary-9.5.0.tgz",
+      "integrity": "sha512-5mbUj3SiZXCuRf9fT3ibzbSSEWiy63gFfksmGfdOzujPjW3k+z8WvIBxcJHBoQNlaZaiyB25deviif2+osLmLw==",
+      "dependencies": {
+        "binaryextensions": "^6.11.0",
+        "editions": "^6.21.0",
+        "textextensions": "^6.11.0"
+      },
+      "engines": {
+        "node": ">=4"
+      },
+      "funding": {
+        "url": "https://bevry.me/fund"
+      }
+    },
+    "node_modules/joycon": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/joycon/-/joycon-3.1.1.tgz",
+      "integrity": "sha512-34wB/Y7MW7bzjKRjUKTa46I2Z7eV62Rkhva+KkopW7Qvv/OSWBqvkSY7vusOPrNuZcUG3tApvdVgNB8POj3SPw==",
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/js-tokens": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz",
+      "integrity": "sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ=="
+    },
+    "node_modules/js-yaml": {
+      "version": "4.1.1",
+      "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.1.tgz",
+      "integrity": "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==",
+      "license": "MIT",
+      "dependencies": {
+        "argparse": "^2.0.1"
+      },
+      "bin": {
+        "js-yaml": "bin/js-yaml.js"
+      }
+    },
+    "node_modules/jsdoc-type-pratt-parser": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/jsdoc-type-pratt-parser/-/jsdoc-type-pratt-parser-7.0.0.tgz",
+      "integrity": "sha512-c7YbokssPOSHmqTbSAmTtnVgAVa/7lumWNYqomgd5KOMyPrRve2anx6lonfOsXEQacqF9FKVUj7bLg4vRSvdYA==",
+      "engines": {
+        "node": ">=20.0.0"
+      }
+    },
+    "node_modules/jsesc": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/jsesc/-/jsesc-3.1.0.tgz",
+      "integrity": "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==",
+      "bin": {
+        "jsesc": "bin/jsesc"
+      },
+      "engines": {
+        "node": ">=6"
+      }
+    },
+    "node_modules/json-buffer": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz",
+      "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==",
+      "license": "MIT"
+    },
+    "node_modules/json-schema-traverse": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
+      "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="
+    },
+    "node_modules/json-stable-stringify-without-jsonify": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz",
+      "integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==",
+      "license": "MIT"
+    },
+    "node_modules/json-stringify-safe": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/json-stringify-safe/-/json-stringify-safe-5.0.1.tgz",
+      "integrity": "sha512-ZClg6AaYvamvYEE82d3Iyd3vSSIjQ+odgjaTzRuO3s7toCdFKczob2i0zCh7JE8kWn17yvAWhUVxvqGwUalsRA=="
+    },
+    "node_modules/jsonc-parser": {
+      "version": "3.3.1",
+      "resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.3.1.tgz",
+      "integrity": "sha512-HUgH65KyejrUFPvHFPbqOY0rsFip3Bo5wb4ngvdi1EpCYWUQDC5V+Y7mZws+DLkr4M//zQJoanu1SP+87Dv1oQ=="
+    },
+    "node_modules/keyv": {
+      "version": "4.5.4",
+      "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz",
+      "integrity": "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==",
+      "license": "MIT",
+      "dependencies": {
+        "json-buffer": "3.0.1"
+      }
+    },
+    "node_modules/levn": {
+      "version": "0.4.1",
+      "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz",
+      "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==",
+      "license": "MIT",
+      "dependencies": {
+        "prelude-ls": "^1.2.1",
+        "type-check": "~0.4.0"
+      },
+      "engines": {
+        "node": ">= 0.8.0"
+      }
+    },
+    "node_modules/linguist-languages": {
+      "version": "9.2.0",
+      "resolved": "https://registry.npmjs.org/linguist-languages/-/linguist-languages-9.2.0.tgz",
+      "integrity": "sha512-s13kOULTt5LvuijOcR5U/WSuR+K+VOH/Od77oaQFnoAEIDIFAW4FKYv7zAdcGfyw8RR3vFuTlCjFf96th4J+NQ=="
+    },
+    "node_modules/lmdb": {
+      "version": "3.4.4",
+      "resolved": "https://registry.npmjs.org/lmdb/-/lmdb-3.4.4.tgz",
+      "integrity": "sha512-+Y2DqovevLkb6DrSQ6SXTYLEd6kvlRbhsxzgJrk7BUfOVA/mt21ak6pFDZDKxiAczHMWxrb02kXBTSTIA0O94A==",
+      "hasInstallScript": true,
+      "dependencies": {
+        "msgpackr": "^1.11.2",
+        "node-addon-api": "^6.1.0",
+        "node-gyp-build-optional-packages": "5.2.2",
+        "ordered-binary": "^1.5.3",
+        "weak-lru-cache": "^1.2.2"
+      },
+      "bin": {
+        "download-lmdb-prebuilds": "bin/download-prebuilds.js"
+      },
+      "optionalDependencies": {
+        "@lmdb/lmdb-darwin-arm64": "3.4.4",
+        "@lmdb/lmdb-darwin-x64": "3.4.4",
+        "@lmdb/lmdb-linux-arm": "3.4.4",
+        "@lmdb/lmdb-linux-arm64": "3.4.4",
+        "@lmdb/lmdb-linux-x64": "3.4.4",
+        "@lmdb/lmdb-win32-arm64": "3.4.4",
+        "@lmdb/lmdb-win32-x64": "3.4.4"
+      }
+    },
+    "node_modules/locate-character": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/locate-character/-/locate-character-3.0.0.tgz",
+      "integrity": "sha512-SW13ws7BjaeJ6p7Q6CO2nchbYEc3X3J6WrmTTDto7yMPqVSZTUyY5Tjbid+Ab8gLnATtygYtiDIJGQRRn2ZOiA=="
+    },
+    "node_modules/locate-path": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz",
+      "integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==",
+      "license": "MIT",
+      "dependencies": {
+        "p-locate": "^5.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/lodash.merge": {
+      "version": "4.6.2",
+      "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz",
+      "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==",
+      "license": "MIT"
+    },
+    "node_modules/long": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz",
+      "integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==",
+      "license": "Apache-2.0"
+    },
+    "node_modules/longest-streak": {
+      "version": "3.1.0",
+      "resolved": "https://registry.npmjs.org/longest-streak/-/longest-streak-3.1.0.tgz",
+      "integrity": "sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/lru-cache": {
+      "version": "11.2.4",
+      "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-11.2.4.tgz",
+      "integrity": "sha512-B5Y16Jr9LB9dHVkh6ZevG+vAbOsNOYCX+sXvFWFu7B3Iz5mijW3zdbMyhsh8ANd2mSWBYdJgnqi+mL7/LrOPYg==",
+      "license": "BlueOak-1.0.0",
+      "engines": {
+        "node": "20 || >=22"
+      }
+    },
+    "node_modules/magic-string": {
+      "version": "0.30.21",
+      "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.21.tgz",
+      "integrity": "sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==",
+      "dependencies": {
+        "@jridgewell/sourcemap-codec": "^1.5.5"
+      }
+    },
+    "node_modules/markdown-extensions": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/markdown-extensions/-/markdown-extensions-2.0.0.tgz",
+      "integrity": "sha512-o5vL7aDWatOTX8LzaS1WMoaoxIiLRQJuIKKe2wAw6IeULDHaqbiqiggmx+pKvZDb1Sj+pE46Sn1T7lCqfFtg1Q==",
+      "engines": {
+        "node": ">=16"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/matcher": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/matcher/-/matcher-3.0.0.tgz",
+      "integrity": "sha512-OkeDaAZ/bQCxeFAozM55PKcKU0yJMPGifLwV4Qgjitu+5MoAfSQN4lsLJeXZ1b8w0x+/Emda6MZgXS1jvsapng==",
+      "dependencies": {
+        "escape-string-regexp": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/mdast-util-from-markdown": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/mdast-util-from-markdown/-/mdast-util-from-markdown-2.0.2.tgz",
+      "integrity": "sha512-uZhTV/8NBuw0WHkPTrCqDOl0zVe1BIng5ZtHoDk49ME1qqcjYmmLmOf0gELgcRMxN4w2iuIeVso5/6QymSrgmA==",
+      "dependencies": {
+        "@types/mdast": "^4.0.0",
+        "@types/unist": "^3.0.0",
+        "decode-named-character-reference": "^1.0.0",
+        "devlop": "^1.0.0",
+        "mdast-util-to-string": "^4.0.0",
+        "micromark": "^4.0.0",
+        "micromark-util-decode-numeric-character-reference": "^2.0.0",
+        "micromark-util-decode-string": "^2.0.0",
+        "micromark-util-normalize-identifier": "^2.0.0",
+        "micromark-util-symbol": "^2.0.0",
+        "micromark-util-types": "^2.0.0",
+        "unist-util-stringify-position": "^4.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/mdast-util-mdx": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/mdast-util-mdx/-/mdast-util-mdx-3.0.0.tgz",
+      "integrity": "sha512-JfbYLAW7XnYTTbUsmpu0kdBUVe+yKVJZBItEjwyYJiDJuZ9w4eeaqks4HQO+R7objWgS2ymV60GYpI14Ug554w==",
+      "dependencies": {
+        "mdast-util-from-markdown": "^2.0.0",
+        "mdast-util-mdx-expression": "^2.0.0",
+        "mdast-util-mdx-jsx": "^3.0.0",
+        "mdast-util-mdxjs-esm": "^2.0.0",
+        "mdast-util-to-markdown": "^2.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/mdast-util-mdx-expression": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/mdast-util-mdx-expression/-/mdast-util-mdx-expression-2.0.1.tgz",
+      "integrity": "sha512-J6f+9hUp+ldTZqKRSg7Vw5V6MqjATc+3E4gf3CFNcuZNWD8XdyI6zQ8GqH7f8169MM6P7hMBRDVGnn7oHB9kXQ==",
+      "dependencies": {
+        "@types/estree-jsx": "^1.0.0",
+        "@types/hast": "^3.0.0",
+        "@types/mdast": "^4.0.0",
+        "devlop": "^1.0.0",
+        "mdast-util-from-markdown": "^2.0.0",
+        "mdast-util-to-markdown": "^2.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/mdast-util-mdx-jsx": {
+      "version": "3.2.0",
+      "resolved": "https://registry.npmjs.org/mdast-util-mdx-jsx/-/mdast-util-mdx-jsx-3.2.0.tgz",
+      "integrity": "sha512-lj/z8v0r6ZtsN/cGNNtemmmfoLAFZnjMbNyLzBafjzikOM+glrjNHPlf6lQDOTccj9n5b0PPihEBbhneMyGs1Q==",
+      "dependencies": {
+        "@types/estree-jsx": "^1.0.0",
+        "@types/hast": "^3.0.0",
+        "@types/mdast": "^4.0.0",
+        "@types/unist": "^3.0.0",
+        "ccount": "^2.0.0",
+        "devlop": "^1.1.0",
+        "mdast-util-from-markdown": "^2.0.0",
+        "mdast-util-to-markdown": "^2.0.0",
+        "parse-entities": "^4.0.0",
+        "stringify-entities": "^4.0.0",
+        "unist-util-stringify-position": "^4.0.0",
+        "vfile-message": "^4.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/mdast-util-mdxjs-esm": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/mdast-util-mdxjs-esm/-/mdast-util-mdxjs-esm-2.0.1.tgz",
+      "integrity": "sha512-EcmOpxsZ96CvlP03NghtH1EsLtr0n9Tm4lPUJUBccV9RwUOneqSycg19n5HGzCf+10LozMRSObtVr3ee1WoHtg==",
+      "dependencies": {
+        "@types/estree-jsx": "^1.0.0",
+        "@types/hast": "^3.0.0",
+        "@types/mdast": "^4.0.0",
+        "devlop": "^1.0.0",
+        "mdast-util-from-markdown": "^2.0.0",
+        "mdast-util-to-markdown": "^2.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/mdast-util-phrasing": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/mdast-util-phrasing/-/mdast-util-phrasing-4.1.0.tgz",
+      "integrity": "sha512-TqICwyvJJpBwvGAMZjj4J2n0X8QWp21b9l0o7eXyVJ25YNWYbJDVIyD1bZXE6WtV6RmKJVYmQAKWa0zWOABz2w==",
+      "dependencies": {
+        "@types/mdast": "^4.0.0",
+        "unist-util-is": "^6.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/mdast-util-to-hast": {
+      "version": "13.2.1",
+      "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-13.2.1.tgz",
+      "integrity": "sha512-cctsq2wp5vTsLIcaymblUriiTcZd0CwWtCbLvrOzYCDZoWyMNV8sZ7krj09FSnsiJi3WVsHLM4k6Dq/yaPyCXA==",
+      "dependencies": {
+        "@types/hast": "^3.0.0",
+        "@types/mdast": "^4.0.0",
+        "@ungap/structured-clone": "^1.0.0",
+        "devlop": "^1.0.0",
+        "micromark-util-sanitize-uri": "^2.0.0",
+        "trim-lines": "^3.0.0",
+        "unist-util-position": "^5.0.0",
+        "unist-util-visit": "^5.0.0",
+        "vfile": "^6.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/mdast-util-to-markdown": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/mdast-util-to-markdown/-/mdast-util-to-markdown-2.1.2.tgz",
+      "integrity": "sha512-xj68wMTvGXVOKonmog6LwyJKrYXZPvlwabaryTjLh9LuvovB/KAH+kvi8Gjj+7rJjsFi23nkUxRQv1KqSroMqA==",
+      "dependencies": {
+        "@types/mdast": "^4.0.0",
+        "@types/unist": "^3.0.0",
+        "longest-streak": "^3.0.0",
+        "mdast-util-phrasing": "^4.0.0",
+        "mdast-util-to-string": "^4.0.0",
+        "micromark-util-classify-character": "^2.0.0",
+        "micromark-util-decode-string": "^2.0.0",
+        "unist-util-visit": "^5.0.0",
+        "zwitch": "^2.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/mdast-util-to-string": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/mdast-util-to-string/-/mdast-util-to-string-4.0.0.tgz",
+      "integrity": "sha512-0H44vDimn51F0YwvxSJSm0eCDOJTRlmN0R1yBh4HLj9wiV1Dn0QoXGbvFAWj2hSItVTlCmBF1hqKlIyUBVFLPg==",
+      "dependencies": {
+        "@types/mdast": "^4.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/merge-stream": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz",
+      "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==",
+      "license": "MIT"
+    },
+    "node_modules/micromark": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/micromark/-/micromark-4.0.2.tgz",
+      "integrity": "sha512-zpe98Q6kvavpCr1NPVSCMebCKfD7CA2NqZ+rykeNhONIJBpc1tFKt9hucLGwha3jNTNI8lHpctWJWoimVF4PfA==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "@types/debug": "^4.0.0",
+        "debug": "^4.0.0",
+        "decode-named-character-reference": "^1.0.0",
+        "devlop": "^1.0.0",
+        "micromark-core-commonmark": "^2.0.0",
+        "micromark-factory-space": "^2.0.0",
+        "micromark-util-character": "^2.0.0",
+        "micromark-util-chunked": "^2.0.0",
+        "micromark-util-combine-extensions": "^2.0.0",
+        "micromark-util-decode-numeric-character-reference": "^2.0.0",
+        "micromark-util-encode": "^2.0.0",
+        "micromark-util-normalize-identifier": "^2.0.0",
+        "micromark-util-resolve-all": "^2.0.0",
+        "micromark-util-sanitize-uri": "^2.0.0",
+        "micromark-util-subtokenize": "^2.0.0",
+        "micromark-util-symbol": "^2.0.0",
+        "micromark-util-types": "^2.0.0"
+      }
+    },
+    "node_modules/micromark-core-commonmark": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/micromark-core-commonmark/-/micromark-core-commonmark-2.0.3.tgz",
+      "integrity": "sha512-RDBrHEMSxVFLg6xvnXmb1Ayr2WzLAWjeSATAoxwKYJV94TeNavgoIdA0a9ytzDSVzBy2YKFK+emCPOEibLeCrg==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "decode-named-character-reference": "^1.0.0",
+        "devlop": "^1.0.0",
+        "micromark-factory-destination": "^2.0.0",
+        "micromark-factory-label": "^2.0.0",
+        "micromark-factory-space": "^2.0.0",
+        "micromark-factory-title": "^2.0.0",
+        "micromark-factory-whitespace": "^2.0.0",
+        "micromark-util-character": "^2.0.0",
+        "micromark-util-chunked": "^2.0.0",
+        "micromark-util-classify-character": "^2.0.0",
+        "micromark-util-html-tag-name": "^2.0.0",
+        "micromark-util-normalize-identifier": "^2.0.0",
+        "micromark-util-resolve-all": "^2.0.0",
+        "micromark-util-subtokenize": "^2.0.0",
+        "micromark-util-symbol": "^2.0.0",
+        "micromark-util-types": "^2.0.0"
+      }
+    },
+    "node_modules/micromark-extension-mdx-expression": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/micromark-extension-mdx-expression/-/micromark-extension-mdx-expression-3.0.1.tgz",
+      "integrity": "sha512-dD/ADLJ1AeMvSAKBwO22zG22N4ybhe7kFIZ3LsDI0GlsNr2A3KYxb0LdC1u5rj4Nw+CHKY0RVdnHX8vj8ejm4Q==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "@types/estree": "^1.0.0",
+        "devlop": "^1.0.0",
+        "micromark-factory-mdx-expression": "^2.0.0",
+        "micromark-factory-space": "^2.0.0",
+        "micromark-util-character": "^2.0.0",
+        "micromark-util-events-to-acorn": "^2.0.0",
+        "micromark-util-symbol": "^2.0.0",
+        "micromark-util-types": "^2.0.0"
+      }
+    },
+    "node_modules/micromark-extension-mdx-jsx": {
+      "version": "3.0.2",
+      "resolved": "https://registry.npmjs.org/micromark-extension-mdx-jsx/-/micromark-extension-mdx-jsx-3.0.2.tgz",
+      "integrity": "sha512-e5+q1DjMh62LZAJOnDraSSbDMvGJ8x3cbjygy2qFEi7HCeUT4BDKCvMozPozcD6WmOt6sVvYDNBKhFSz3kjOVQ==",
+      "dependencies": {
+        "@types/estree": "^1.0.0",
+        "devlop": "^1.0.0",
+        "estree-util-is-identifier-name": "^3.0.0",
+        "micromark-factory-mdx-expression": "^2.0.0",
+        "micromark-factory-space": "^2.0.0",
+        "micromark-util-character": "^2.0.0",
+        "micromark-util-events-to-acorn": "^2.0.0",
+        "micromark-util-symbol": "^2.0.0",
+        "micromark-util-types": "^2.0.0",
+        "vfile-message": "^4.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/micromark-extension-mdx-md": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/micromark-extension-mdx-md/-/micromark-extension-mdx-md-2.0.0.tgz",
+      "integrity": "sha512-EpAiszsB3blw4Rpba7xTOUptcFeBFi+6PY8VnJ2hhimH+vCQDirWgsMpz7w1XcZE7LVrSAUGb9VJpG9ghlYvYQ==",
+      "dependencies": {
+        "micromark-util-types": "^2.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/micromark-extension-mdxjs": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/micromark-extension-mdxjs/-/micromark-extension-mdxjs-3.0.0.tgz",
+      "integrity": "sha512-A873fJfhnJ2siZyUrJ31l34Uqwy4xIFmvPY1oj+Ean5PHcPBYzEsvqvWGaWcfEIr11O5Dlw3p2y0tZWpKHDejQ==",
+      "dependencies": {
+        "acorn": "^8.0.0",
+        "acorn-jsx": "^5.0.0",
+        "micromark-extension-mdx-expression": "^3.0.0",
+        "micromark-extension-mdx-jsx": "^3.0.0",
+        "micromark-extension-mdx-md": "^2.0.0",
+        "micromark-extension-mdxjs-esm": "^3.0.0",
+        "micromark-util-combine-extensions": "^2.0.0",
+        "micromark-util-types": "^2.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/micromark-extension-mdxjs-esm": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/micromark-extension-mdxjs-esm/-/micromark-extension-mdxjs-esm-3.0.0.tgz",
+      "integrity": "sha512-DJFl4ZqkErRpq/dAPyeWp15tGrcrrJho1hKK5uBS70BCtfrIFg81sqcTVu3Ta+KD1Tk5vAtBNElWxtAa+m8K9A==",
+      "dependencies": {
+        "@types/estree": "^1.0.0",
+        "devlop": "^1.0.0",
+        "micromark-core-commonmark": "^2.0.0",
+        "micromark-util-character": "^2.0.0",
+        "micromark-util-events-to-acorn": "^2.0.0",
+        "micromark-util-symbol": "^2.0.0",
+        "micromark-util-types": "^2.0.0",
+        "unist-util-position-from-estree": "^2.0.0",
+        "vfile-message": "^4.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/micromark-factory-destination": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/micromark-factory-destination/-/micromark-factory-destination-2.0.1.tgz",
+      "integrity": "sha512-Xe6rDdJlkmbFRExpTOmRj9N3MaWmbAgdpSrBQvCFqhezUn4AHqJHbaEnfbVYYiexVSs//tqOdY/DxhjdCiJnIA==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "micromark-util-character": "^2.0.0",
+        "micromark-util-symbol": "^2.0.0",
+        "micromark-util-types": "^2.0.0"
+      }
+    },
+    "node_modules/micromark-factory-label": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/micromark-factory-label/-/micromark-factory-label-2.0.1.tgz",
+      "integrity": "sha512-VFMekyQExqIW7xIChcXn4ok29YE3rnuyveW3wZQWWqF4Nv9Wk5rgJ99KzPvHjkmPXF93FXIbBp6YdW3t71/7Vg==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "devlop": "^1.0.0",
+        "micromark-util-character": "^2.0.0",
+        "micromark-util-symbol": "^2.0.0",
+        "micromark-util-types": "^2.0.0"
+      }
+    },
+    "node_modules/micromark-factory-mdx-expression": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/micromark-factory-mdx-expression/-/micromark-factory-mdx-expression-2.0.3.tgz",
+      "integrity": "sha512-kQnEtA3vzucU2BkrIa8/VaSAsP+EJ3CKOvhMuJgOEGg9KDC6OAY6nSnNDVRiVNRqj7Y4SlSzcStaH/5jge8JdQ==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "@types/estree": "^1.0.0",
+        "devlop": "^1.0.0",
+        "micromark-factory-space": "^2.0.0",
+        "micromark-util-character": "^2.0.0",
+        "micromark-util-events-to-acorn": "^2.0.0",
+        "micromark-util-symbol": "^2.0.0",
+        "micromark-util-types": "^2.0.0",
+        "unist-util-position-from-estree": "^2.0.0",
+        "vfile-message": "^4.0.0"
+      }
+    },
+    "node_modules/micromark-factory-space": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/micromark-factory-space/-/micromark-factory-space-2.0.1.tgz",
+      "integrity": "sha512-zRkxjtBxxLd2Sc0d+fbnEunsTj46SWXgXciZmHq0kDYGnck/ZSGj9/wULTV95uoeYiK5hRXP2mJ98Uo4cq/LQg==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "micromark-util-character": "^2.0.0",
+        "micromark-util-types": "^2.0.0"
+      }
+    },
+    "node_modules/micromark-factory-title": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/micromark-factory-title/-/micromark-factory-title-2.0.1.tgz",
+      "integrity": "sha512-5bZ+3CjhAd9eChYTHsjy6TGxpOFSKgKKJPJxr293jTbfry2KDoWkhBb6TcPVB4NmzaPhMs1Frm9AZH7OD4Cjzw==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "micromark-factory-space": "^2.0.0",
+        "micromark-util-character": "^2.0.0",
+        "micromark-util-symbol": "^2.0.0",
+        "micromark-util-types": "^2.0.0"
+      }
+    },
+    "node_modules/micromark-factory-whitespace": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/micromark-factory-whitespace/-/micromark-factory-whitespace-2.0.1.tgz",
+      "integrity": "sha512-Ob0nuZ3PKt/n0hORHyvoD9uZhr+Za8sFoP+OnMcnWK5lngSzALgQYKMr9RJVOWLqQYuyn6ulqGWSXdwf6F80lQ==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "micromark-factory-space": "^2.0.0",
+        "micromark-util-character": "^2.0.0",
+        "micromark-util-symbol": "^2.0.0",
+        "micromark-util-types": "^2.0.0"
+      }
+    },
+    "node_modules/micromark-util-character": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/micromark-util-character/-/micromark-util-character-2.1.1.tgz",
+      "integrity": "sha512-wv8tdUTJ3thSFFFJKtpYKOYiGP2+v96Hvk4Tu8KpCAsTMs6yi+nVmGh1syvSCsaxz45J6Jbw+9DD6g97+NV67Q==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "micromark-util-symbol": "^2.0.0",
+        "micromark-util-types": "^2.0.0"
+      }
+    },
+    "node_modules/micromark-util-chunked": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/micromark-util-chunked/-/micromark-util-chunked-2.0.1.tgz",
+      "integrity": "sha512-QUNFEOPELfmvv+4xiNg2sRYeS/P84pTW0TCgP5zc9FpXetHY0ab7SxKyAQCNCc1eK0459uoLI1y5oO5Vc1dbhA==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "micromark-util-symbol": "^2.0.0"
+      }
+    },
+    "node_modules/micromark-util-classify-character": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/micromark-util-classify-character/-/micromark-util-classify-character-2.0.1.tgz",
+      "integrity": "sha512-K0kHzM6afW/MbeWYWLjoHQv1sgg2Q9EccHEDzSkxiP/EaagNzCm7T/WMKZ3rjMbvIpvBiZgwR3dKMygtA4mG1Q==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "micromark-util-character": "^2.0.0",
+        "micromark-util-symbol": "^2.0.0",
+        "micromark-util-types": "^2.0.0"
+      }
+    },
+    "node_modules/micromark-util-combine-extensions": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/micromark-util-combine-extensions/-/micromark-util-combine-extensions-2.0.1.tgz",
+      "integrity": "sha512-OnAnH8Ujmy59JcyZw8JSbK9cGpdVY44NKgSM7E9Eh7DiLS2E9RNQf0dONaGDzEG9yjEl5hcqeIsj4hfRkLH/Bg==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "micromark-util-chunked": "^2.0.0",
+        "micromark-util-types": "^2.0.0"
+      }
+    },
+    "node_modules/micromark-util-decode-numeric-character-reference": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/micromark-util-decode-numeric-character-reference/-/micromark-util-decode-numeric-character-reference-2.0.2.tgz",
+      "integrity": "sha512-ccUbYk6CwVdkmCQMyr64dXz42EfHGkPQlBj5p7YVGzq8I7CtjXZJrubAYezf7Rp+bjPseiROqe7G6foFd+lEuw==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "micromark-util-symbol": "^2.0.0"
+      }
+    },
+    "node_modules/micromark-util-decode-string": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/micromark-util-decode-string/-/micromark-util-decode-string-2.0.1.tgz",
+      "integrity": "sha512-nDV/77Fj6eH1ynwscYTOsbK7rR//Uj0bZXBwJZRfaLEJ1iGBR6kIfNmlNqaqJf649EP0F3NWNdeJi03elllNUQ==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "decode-named-character-reference": "^1.0.0",
+        "micromark-util-character": "^2.0.0",
+        "micromark-util-decode-numeric-character-reference": "^2.0.0",
+        "micromark-util-symbol": "^2.0.0"
+      }
+    },
+    "node_modules/micromark-util-encode": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/micromark-util-encode/-/micromark-util-encode-2.0.1.tgz",
+      "integrity": "sha512-c3cVx2y4KqUnwopcO9b/SCdo2O67LwJJ/UyqGfbigahfegL9myoEFoDYZgkT7f36T0bLrM9hZTAaAyH+PCAXjw==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ]
+    },
+    "node_modules/micromark-util-events-to-acorn": {
+      "version": "2.0.3",
+      "resolved": "https://registry.npmjs.org/micromark-util-events-to-acorn/-/micromark-util-events-to-acorn-2.0.3.tgz",
+      "integrity": "sha512-jmsiEIiZ1n7X1Rr5k8wVExBQCg5jy4UXVADItHmNk1zkwEVhBuIUKRu3fqv+hs4nxLISi2DQGlqIOGiFxgbfHg==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "@types/estree": "^1.0.0",
+        "@types/unist": "^3.0.0",
+        "devlop": "^1.0.0",
+        "estree-util-visit": "^2.0.0",
+        "micromark-util-symbol": "^2.0.0",
+        "micromark-util-types": "^2.0.0",
+        "vfile-message": "^4.0.0"
+      }
+    },
+    "node_modules/micromark-util-html-tag-name": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/micromark-util-html-tag-name/-/micromark-util-html-tag-name-2.0.1.tgz",
+      "integrity": "sha512-2cNEiYDhCWKI+Gs9T0Tiysk136SnR13hhO8yW6BGNyhOC4qYFnwF1nKfD3HFAIXA5c45RrIG1ub11GiXeYd1xA==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ]
+    },
+    "node_modules/micromark-util-normalize-identifier": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/micromark-util-normalize-identifier/-/micromark-util-normalize-identifier-2.0.1.tgz",
+      "integrity": "sha512-sxPqmo70LyARJs0w2UclACPUUEqltCkJ6PhKdMIDuJ3gSf/Q+/GIe3WKl0Ijb/GyH9lOpUkRAO2wp0GVkLvS9Q==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "micromark-util-symbol": "^2.0.0"
+      }
+    },
+    "node_modules/micromark-util-resolve-all": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/micromark-util-resolve-all/-/micromark-util-resolve-all-2.0.1.tgz",
+      "integrity": "sha512-VdQyxFWFT2/FGJgwQnJYbe1jjQoNTS4RjglmSjTUlpUMa95Htx9NHeYW4rGDJzbjvCsl9eLjMQwGeElsqmzcHg==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
+      "dependencies": {
+        "micromark-util-types": "^2.0.0"
+      }
+    },
+    "node_modules/micromark-util-sanitize-uri": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/micromark-util-sanitize-uri/-/micromark-util-sanitize-uri-2.0.1.tgz",
+      "integrity": "sha512-9N9IomZ/YuGGZZmQec1MbgxtlgougxTodVwDzzEouPKo3qFWvymFHWcnDi2vzV1ff6kas9ucW+o3yzJK9YB1AQ==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
       "dependencies": {
-        "json-buffer": "3.0.1"
+        "micromark-util-character": "^2.0.0",
+        "micromark-util-encode": "^2.0.0",
+        "micromark-util-symbol": "^2.0.0"
       }
     },
-    "node_modules/levn": {
-      "version": "0.4.1",
-      "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz",
-      "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==",
-      "license": "MIT",
+    "node_modules/micromark-util-subtokenize": {
+      "version": "2.1.0",
+      "resolved": "https://registry.npmjs.org/micromark-util-subtokenize/-/micromark-util-subtokenize-2.1.0.tgz",
+      "integrity": "sha512-XQLu552iSctvnEcgXw6+Sx75GflAPNED1qx7eBJ+wydBb2KCbRZe+NwvIEEMM83uml1+2WSXpBAcp9IUCgCYWA==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ],
       "dependencies": {
-        "prelude-ls": "^1.2.1",
-        "type-check": "~0.4.0"
-      },
-      "engines": {
-        "node": ">= 0.8.0"
+        "devlop": "^1.0.0",
+        "micromark-util-chunked": "^2.0.0",
+        "micromark-util-symbol": "^2.0.0",
+        "micromark-util-types": "^2.0.0"
       }
     },
-    "node_modules/locate-path": {
-      "version": "6.0.0",
-      "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz",
-      "integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==",
+    "node_modules/micromark-util-symbol": {
+      "version": "2.0.1",
+      "resolved": "https://registry.npmjs.org/micromark-util-symbol/-/micromark-util-symbol-2.0.1.tgz",
+      "integrity": "sha512-vs5t8Apaud9N28kgCrRUdEed4UJ+wWNvicHLPxCa9ENlYuAY31M0ETy5y1vA33YoNPDFTghEbnh6efaE8h4x0Q==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ]
+    },
+    "node_modules/micromark-util-types": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/micromark-util-types/-/micromark-util-types-2.0.2.tgz",
+      "integrity": "sha512-Yw0ECSpJoViF1qTU4DC6NwtC4aWGt1EkzaQB8KPPyCRR8z9TWeV0HbEFGTO+ZY1wB22zmxnJqhPyTpOVCpeHTA==",
+      "funding": [
+        {
+          "type": "GitHub Sponsors",
+          "url": "https://github.com/sponsors/unifiedjs"
+        },
+        {
+          "type": "OpenCollective",
+          "url": "https://opencollective.com/unified"
+        }
+      ]
+    },
+    "node_modules/mimic-fn": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/mimic-fn/-/mimic-fn-4.0.0.tgz",
+      "integrity": "sha512-vqiC06CuhBTUdZH+RYl8sFrL096vA45Ok5ISO6sE/Mr1jRbGH4Csnhi8f3wKVl7x8mO4Au7Ir9D3Oyv1VYMFJw==",
       "license": "MIT",
-      "dependencies": {
-        "p-locate": "^5.0.0"
-      },
       "engines": {
-        "node": ">=10"
+        "node": ">=12"
       },
       "funding": {
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/lodash.merge": {
-      "version": "4.6.2",
-      "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz",
-      "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==",
-      "license": "MIT"
-    },
-    "node_modules/long": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz",
-      "integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==",
-      "license": "Apache-2.0"
-    },
     "node_modules/mimic-response": {
       "version": "3.1.0",
       "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz",
@@ -1289,12 +4771,6 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/minhash": {
-      "version": "0.0.9",
-      "resolved": "https://registry.npmjs.org/minhash/-/minhash-0.0.9.tgz",
-      "integrity": "sha512-Rs0iOE6oJwvMN2CrsNvdCMkpo0Lzh7wmTZAVH6j550n0VeqxfVhQe0lV8Ay4bmXC5ATZmihq9YbZ/n6cf6kP7A==",
-      "license": "MIT"
-    },
     "node_modules/minimatch": {
       "version": "3.1.2",
       "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
@@ -1328,6 +4804,52 @@
       "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
       "license": "MIT"
     },
+    "node_modules/msgpackr": {
+      "version": "1.11.8",
+      "resolved": "https://registry.npmjs.org/msgpackr/-/msgpackr-1.11.8.tgz",
+      "integrity": "sha512-bC4UGzHhVvgDNS7kn9tV8fAucIYUBuGojcaLiz7v+P63Lmtm0Xeji8B/8tYKddALXxJLpwIeBmUN3u64C4YkRA==",
+      "optionalDependencies": {
+        "msgpackr-extract": "^3.0.2"
+      }
+    },
+    "node_modules/msgpackr-extract": {
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/msgpackr-extract/-/msgpackr-extract-3.0.3.tgz",
+      "integrity": "sha512-P0efT1C9jIdVRefqjzOQ9Xml57zpOXnIuS+csaB4MdZbTdmGDLo8XhzBG1N7aO11gKDDkJvBLULeFTo46wwreA==",
+      "hasInstallScript": true,
+      "optional": true,
+      "dependencies": {
+        "node-gyp-build-optional-packages": "5.2.2"
+      },
+      "bin": {
+        "download-msgpackr-prebuilds": "bin/download-prebuilds.js"
+      },
+      "optionalDependencies": {
+        "@msgpackr-extract/msgpackr-extract-darwin-arm64": "3.0.3",
+        "@msgpackr-extract/msgpackr-extract-darwin-x64": "3.0.3",
+        "@msgpackr-extract/msgpackr-extract-linux-arm": "3.0.3",
+        "@msgpackr-extract/msgpackr-extract-linux-arm64": "3.0.3",
+        "@msgpackr-extract/msgpackr-extract-linux-x64": "3.0.3",
+        "@msgpackr-extract/msgpackr-extract-win32-x64": "3.0.3"
+      }
+    },
+    "node_modules/nanoid": {
+      "version": "3.3.11",
+      "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.11.tgz",
+      "integrity": "sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "bin": {
+        "nanoid": "bin/nanoid.cjs"
+      },
+      "engines": {
+        "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1"
+      }
+    },
     "node_modules/napi-build-utils": {
       "version": "2.0.0",
       "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz",
@@ -1358,6 +4880,117 @@
       "integrity": "sha512-+eawOlIgy680F0kBzPUNFhMZGtJ1YmqM6l4+Crf4IkImjYrO/mqPwRMh352g23uIaQKFItcQ64I7KMaJxHgAVA==",
       "license": "MIT"
     },
+    "node_modules/node-forge": {
+      "version": "1.3.3",
+      "resolved": "https://registry.npmjs.org/node-forge/-/node-forge-1.3.3.tgz",
+      "integrity": "sha512-rLvcdSyRCyouf6jcOIPe/BgwG/d7hKjzMKOas33/pHEr6gbq18IK9zV7DiPvzsz0oBJPme6qr6H6kGZuI9/DZg==",
+      "license": "(BSD-3-Clause OR GPL-2.0)",
+      "engines": {
+        "node": ">= 6.13.0"
+      }
+    },
+    "node_modules/node-gyp-build-optional-packages": {
+      "version": "5.2.2",
+      "resolved": "https://registry.npmjs.org/node-gyp-build-optional-packages/-/node-gyp-build-optional-packages-5.2.2.tgz",
+      "integrity": "sha512-s+w+rBWnpTMwSFbaE0UXsRlg7hU4FjekKU4eyAih5T8nJuNZT1nNsskXpxmeqSK9UzkBl6UgRlnKc8hz8IEqOw==",
+      "dependencies": {
+        "detect-libc": "^2.0.1"
+      },
+      "bin": {
+        "node-gyp-build-optional-packages": "bin.js",
+        "node-gyp-build-optional-packages-optional": "optional.js",
+        "node-gyp-build-optional-packages-test": "build-test.js"
+      }
+    },
+    "node_modules/node-sql-parser": {
+      "version": "5.3.13",
+      "resolved": "https://registry.npmjs.org/node-sql-parser/-/node-sql-parser-5.3.13.tgz",
+      "integrity": "sha512-heyWv3lLjKHpcBDMUSR+R0DohRYZTYq+Ro3hJ4m9Ia8ccdKbL5UijIaWr2L4co+bmmFuvBVZ4v23QW2PqvBFAA==",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@types/pegjs": "^0.10.0",
+        "big-integer": "^1.6.48"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/normalize-path": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz",
+      "integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/npm-run-path": {
+      "version": "5.3.0",
+      "resolved": "https://registry.npmjs.org/npm-run-path/-/npm-run-path-5.3.0.tgz",
+      "integrity": "sha512-ppwTtiJZq0O/ai0z7yfudtBpWIoxM8yE6nHi1X47eFR2EWORqfbu6CnPlNsjeN683eT0qG6H/Pyf9fCcvjnnnQ==",
+      "license": "MIT",
+      "dependencies": {
+        "path-key": "^4.0.0"
+      },
+      "engines": {
+        "node": "^12.20.0 || ^14.13.1 || >=16.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/npm-run-path/node_modules/path-key": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/path-key/-/path-key-4.0.0.tgz",
+      "integrity": "sha512-haREypq7xkM7ErfgIyA0z+Bj4AGKlMSdlQE2jvJo6huWD1EdkKYV+G/T4nq0YEF2vgTT8kqMFKo1uHn950r4SQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/nunjucks": {
+      "version": "3.2.4",
+      "resolved": "https://registry.npmjs.org/nunjucks/-/nunjucks-3.2.4.tgz",
+      "integrity": "sha512-26XRV6BhkgK0VOxfbU5cQI+ICFUtMLixv1noZn1tGU38kQH5A5nmmbk/O45xdyBhD1esk47nKrY0mvQpZIhRjQ==",
+      "dependencies": {
+        "a-sync-waterfall": "^1.0.0",
+        "asap": "^2.0.3",
+        "commander": "^5.1.0"
+      },
+      "bin": {
+        "nunjucks-precompile": "bin/precompile"
+      },
+      "engines": {
+        "node": ">= 6.9.0"
+      },
+      "peerDependencies": {
+        "chokidar": "^3.3.0"
+      },
+      "peerDependenciesMeta": {
+        "chokidar": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/object-keys": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.1.1.tgz",
+      "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==",
+      "engines": {
+        "node": ">= 0.4"
+      }
+    },
+    "node_modules/on-exit-leak-free": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/on-exit-leak-free/-/on-exit-leak-free-2.1.2.tgz",
+      "integrity": "sha512-0eJJY6hXLGf1udHwfNftBqH+g73EU4B504nZeKpz1sYRKafAghwxEJunB2O7rDZkL4PGfsMVnTXZ2EjibbqcsA==",
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
     "node_modules/once": {
       "version": "1.4.0",
       "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
@@ -1367,6 +5000,21 @@
         "wrappy": "1"
       }
     },
+    "node_modules/onetime": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/onetime/-/onetime-6.0.0.tgz",
+      "integrity": "sha512-1FlR+gjXK7X+AsAHso35MnyN5KqGwJRi/31ft6x0M194ht7S+rWAvd7PHss9xSKMzE0asv1pyIHaJYq+BbacAQ==",
+      "license": "MIT",
+      "dependencies": {
+        "mimic-fn": "^4.0.0"
+      },
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
     "node_modules/onnx-proto": {
       "version": "4.0.4",
       "resolved": "https://registry.npmjs.org/onnx-proto/-/onnx-proto-4.0.4.tgz",
@@ -1376,6 +5024,31 @@
         "protobufjs": "^6.8.8"
       }
     },
+    "node_modules/onnx-proto/node_modules/protobufjs": {
+      "version": "6.11.4",
+      "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.4.tgz",
+      "integrity": "sha512-5kQWPaJHi1WoCpjTGszzQ32PG2F4+wRY6BmAT4Vfw56Q2FZ4YZzK20xUYQH4YkfehY1e6QSICrJquM6xXZNcrw==",
+      "hasInstallScript": true,
+      "dependencies": {
+        "@protobufjs/aspromise": "^1.1.2",
+        "@protobufjs/base64": "^1.1.2",
+        "@protobufjs/codegen": "^2.0.4",
+        "@protobufjs/eventemitter": "^1.1.0",
+        "@protobufjs/fetch": "^1.1.0",
+        "@protobufjs/float": "^1.0.2",
+        "@protobufjs/inquire": "^1.1.0",
+        "@protobufjs/path": "^1.1.2",
+        "@protobufjs/pool": "^1.1.0",
+        "@protobufjs/utf8": "^1.1.0",
+        "@types/long": "^4.0.1",
+        "@types/node": ">=13.7.0",
+        "long": "^4.0.0"
+      },
+      "bin": {
+        "pbjs": "bin/pbjs",
+        "pbts": "bin/pbts"
+      }
+    },
     "node_modules/onnxruntime-common": {
       "version": "1.14.0",
       "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.14.0.tgz",
@@ -1383,20 +5056,26 @@
       "license": "MIT"
     },
     "node_modules/onnxruntime-node": {
-      "version": "1.14.0",
-      "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.14.0.tgz",
-      "integrity": "sha512-5ba7TWomIV/9b6NH/1x/8QEeowsb+jBEvFzU6z0T4mNsFwdPqXeFUM7uxC6QeSRkEbWu3qEB0VMjrvzN/0S9+w==",
-      "license": "MIT",
-      "optional": true,
+      "version": "1.23.2",
+      "resolved": "https://registry.npmjs.org/onnxruntime-node/-/onnxruntime-node-1.23.2.tgz",
+      "integrity": "sha512-OBTsG0W8ddBVOeVVVychpVBS87A9YV5sa2hJ6lc025T97Le+J4v++PwSC4XFs1C62SWyNdof0Mh4KvnZgtt4aw==",
+      "hasInstallScript": true,
       "os": [
         "win32",
         "darwin",
         "linux"
       ],
       "dependencies": {
-        "onnxruntime-common": "~1.14.0"
+        "adm-zip": "^0.5.16",
+        "global-agent": "^3.0.0",
+        "onnxruntime-common": "1.23.2"
       }
     },
+    "node_modules/onnxruntime-node/node_modules/onnxruntime-common": {
+      "version": "1.23.2",
+      "resolved": "https://registry.npmjs.org/onnxruntime-common/-/onnxruntime-common-1.23.2.tgz",
+      "integrity": "sha512-5LFsC9Dukzp2WV6kNHYLNzp8sT6V02IubLCbzw2Xd6X5GOlr65gAX6xiJwyi2URJol/s71gaQLC5F2C25AAR2w=="
+    },
     "node_modules/onnxruntime-web": {
       "version": "1.14.0",
       "resolved": "https://registry.npmjs.org/onnxruntime-web/-/onnxruntime-web-1.14.0.tgz",
@@ -1428,6 +5107,11 @@
         "node": ">= 0.8.0"
       }
     },
+    "node_modules/ordered-binary": {
+      "version": "1.6.1",
+      "resolved": "https://registry.npmjs.org/ordered-binary/-/ordered-binary-1.6.1.tgz",
+      "integrity": "sha512-QkCdPooczexPLiXIrbVOPYkR3VO3T6v2OyKRkR1Xbhpy7/LAVXwahnRCgRp78Oe/Ehf0C/HATAxfSr6eA1oX+w=="
+    },
     "node_modules/p-limit": {
       "version": "3.1.0",
       "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz",
@@ -1458,6 +5142,39 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/p-queue": {
+      "version": "9.1.0",
+      "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-9.1.0.tgz",
+      "integrity": "sha512-O/ZPaXuQV29uSLbxWBGGZO1mCQXV2BLIwUr59JUU9SoH76mnYvtms7aafH/isNSNGwuEfP6W/4xD0/TJXxrizw==",
+      "license": "MIT",
+      "dependencies": {
+        "eventemitter3": "^5.0.1",
+        "p-timeout": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=20"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/p-timeout": {
+      "version": "7.0.1",
+      "resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-7.0.1.tgz",
+      "integrity": "sha512-AxTM2wDGORHGEkPCt8yqxOTMgpfbEHqF51f/5fJCmwFC3C/zNcGT63SymH2ttOAaiIws2zVg4+izQCjrakcwHg==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=20"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/pako": {
+      "version": "1.0.11",
+      "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz",
+      "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw=="
+    },
     "node_modules/parent-module": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz",
@@ -1470,22 +5187,159 @@
         "node": ">=6"
       }
     },
-    "node_modules/path-exists": {
-      "version": "4.0.0",
-      "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
-      "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==",
-      "license": "MIT",
+    "node_modules/parse-entities": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/parse-entities/-/parse-entities-4.0.2.tgz",
+      "integrity": "sha512-GG2AQYWoLgL877gQIKeRPGO1xF9+eG1ujIb5soS5gPvLQ1y2o8FL90w2QWNdf9I361Mpp7726c+lj3U0qK1uGw==",
+      "dependencies": {
+        "@types/unist": "^2.0.0",
+        "character-entities-legacy": "^3.0.0",
+        "character-reference-invalid": "^2.0.0",
+        "decode-named-character-reference": "^1.0.0",
+        "is-alphanumerical": "^2.0.0",
+        "is-decimal": "^2.0.0",
+        "is-hexadecimal": "^2.0.0"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/parse-entities/node_modules/@types/unist": {
+      "version": "2.0.11",
+      "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.11.tgz",
+      "integrity": "sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA=="
+    },
+    "node_modules/parse5": {
+      "version": "7.3.0",
+      "resolved": "https://registry.npmjs.org/parse5/-/parse5-7.3.0.tgz",
+      "integrity": "sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==",
+      "license": "MIT",
+      "dependencies": {
+        "entities": "^6.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/inikulin/parse5?sponsor=1"
+      }
+    },
+    "node_modules/path-exists": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz",
+      "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/path-key": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
+      "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/pend": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz",
+      "integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg=="
+    },
+    "node_modules/picocolors": {
+      "version": "1.1.1",
+      "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.1.1.tgz",
+      "integrity": "sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA=="
+    },
+    "node_modules/picomatch": {
+      "version": "4.0.3",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.3.tgz",
+      "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
+      "peer": true,
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
+    "node_modules/pino": {
+      "version": "10.1.1",
+      "resolved": "https://registry.npmjs.org/pino/-/pino-10.1.1.tgz",
+      "integrity": "sha512-3qqVfpJtRQUCAOs4rTOEwLH6mwJJ/CSAlbis8fKOiMzTtXh0HN/VLsn3UWVTJ7U8DsWmxeNon2IpGb+wORXH4g==",
+      "dependencies": {
+        "@pinojs/redact": "^0.4.0",
+        "atomic-sleep": "^1.0.0",
+        "on-exit-leak-free": "^2.1.0",
+        "pino-abstract-transport": "^3.0.0",
+        "pino-std-serializers": "^7.0.0",
+        "process-warning": "^5.0.0",
+        "quick-format-unescaped": "^4.0.3",
+        "real-require": "^0.2.0",
+        "safe-stable-stringify": "^2.3.1",
+        "sonic-boom": "^4.0.1",
+        "thread-stream": "^4.0.0"
+      },
+      "bin": {
+        "pino": "bin.js"
+      }
+    },
+    "node_modules/pino-abstract-transport": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/pino-abstract-transport/-/pino-abstract-transport-3.0.0.tgz",
+      "integrity": "sha512-wlfUczU+n7Hy/Ha5j9a/gZNy7We5+cXp8YL+X+PG8S0KXxw7n/JXA3c46Y0zQznIJ83URJiwy7Lh56WLokNuxg==",
+      "dependencies": {
+        "split2": "^4.0.0"
+      }
+    },
+    "node_modules/pino-pretty": {
+      "version": "13.1.3",
+      "resolved": "https://registry.npmjs.org/pino-pretty/-/pino-pretty-13.1.3.tgz",
+      "integrity": "sha512-ttXRkkOz6WWC95KeY9+xxWL6AtImwbyMHrL1mSwqwW9u+vLp/WIElvHvCSDg0xO/Dzrggz1zv3rN5ovTRVowKg==",
+      "dependencies": {
+        "colorette": "^2.0.7",
+        "dateformat": "^4.6.3",
+        "fast-copy": "^4.0.0",
+        "fast-safe-stringify": "^2.1.1",
+        "help-me": "^5.0.0",
+        "joycon": "^3.1.1",
+        "minimist": "^1.2.6",
+        "on-exit-leak-free": "^2.1.0",
+        "pino-abstract-transport": "^3.0.0",
+        "pump": "^3.0.0",
+        "secure-json-parse": "^4.0.0",
+        "sonic-boom": "^4.0.1",
+        "strip-json-comments": "^5.0.2"
+      },
+      "bin": {
+        "pino-pretty": "bin.js"
+      }
+    },
+    "node_modules/pino-pretty/node_modules/strip-json-comments": {
+      "version": "5.0.3",
+      "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-5.0.3.tgz",
+      "integrity": "sha512-1tB5mhVo7U+ETBKNf92xT4hrQa3pm0MZ0PQvuDnWgAAGHDsfp4lPSpiS6psrSiet87wyGPh9ft6wmhOMQ0hDiw==",
       "engines": {
-        "node": ">=8"
+        "node": ">=14.16"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
       }
     },
-    "node_modules/path-key": {
-      "version": "3.1.1",
-      "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz",
-      "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==",
+    "node_modules/pino-std-serializers": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/pino-std-serializers/-/pino-std-serializers-7.0.0.tgz",
+      "integrity": "sha512-e906FRY0+tV27iq4juKzSYPbUj2do2X2JX4EzSca1631EB2QJQUqGbDuERal7LCtOpxl6x3+nvo9NPZcmjkiFA=="
+    },
+    "node_modules/piscina": {
+      "version": "5.1.4",
+      "resolved": "https://registry.npmjs.org/piscina/-/piscina-5.1.4.tgz",
+      "integrity": "sha512-7uU4ZnKeQq22t9AsmHGD2w4OYQGonwFnTypDypaWi7Qr2EvQIFVtG8J5D/3bE7W123Wdc9+v4CZDu5hJXVCtBg==",
       "license": "MIT",
       "engines": {
-        "node": ">=8"
+        "node": ">=20.x"
+      },
+      "optionalDependencies": {
+        "@napi-rs/nice": "^1.0.4"
       }
     },
     "node_modules/platform": {
@@ -1494,6 +5348,33 @@
       "integrity": "sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==",
       "license": "MIT"
     },
+    "node_modules/postcss": {
+      "version": "8.5.6",
+      "resolved": "https://registry.npmjs.org/postcss/-/postcss-8.5.6.tgz",
+      "integrity": "sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==",
+      "funding": [
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/postcss/"
+        },
+        {
+          "type": "tidelift",
+          "url": "https://tidelift.com/funding/github/npm/postcss"
+        },
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/ai"
+        }
+      ],
+      "dependencies": {
+        "nanoid": "^3.3.11",
+        "picocolors": "^1.1.1",
+        "source-map-js": "^1.2.1"
+      },
+      "engines": {
+        "node": "^10 || ^12 || >=14"
+      }
+    },
     "node_modules/prebuild-install": {
       "version": "7.1.3",
       "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz",
@@ -1557,12 +5438,47 @@
         "node": ">= 0.8.0"
       }
     },
+    "node_modules/process-warning": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/process-warning/-/process-warning-5.0.0.tgz",
+      "integrity": "sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/fastify"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/fastify"
+        }
+      ]
+    },
+    "node_modules/prom-client": {
+      "version": "15.1.3",
+      "resolved": "https://registry.npmjs.org/prom-client/-/prom-client-15.1.3.tgz",
+      "integrity": "sha512-6ZiOBfCywsD4k1BN9IX0uZhF+tJkV8q8llP64G5Hajs4JOeVLPCwpPVcpXy3BwYiUGgyJzsJJQeOIv7+hDSq8g==",
+      "dependencies": {
+        "@opentelemetry/api": "^1.4.0",
+        "tdigest": "^0.1.1"
+      },
+      "engines": {
+        "node": "^16 || ^18 || >=20"
+      }
+    },
+    "node_modules/property-information": {
+      "version": "7.1.0",
+      "resolved": "https://registry.npmjs.org/property-information/-/property-information-7.1.0.tgz",
+      "integrity": "sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
     "node_modules/protobufjs": {
-      "version": "6.11.4",
-      "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-6.11.4.tgz",
-      "integrity": "sha512-5kQWPaJHi1WoCpjTGszzQ32PG2F4+wRY6BmAT4Vfw56Q2FZ4YZzK20xUYQH4YkfehY1e6QSICrJquM6xXZNcrw==",
+      "version": "8.0.0",
+      "resolved": "https://registry.npmjs.org/protobufjs/-/protobufjs-8.0.0.tgz",
+      "integrity": "sha512-jx6+sE9h/UryaCZhsJWbJtTEy47yXoGNYI4z8ZaRncM0zBKeRqjO2JEcOUYwrYGb1WLhXM1FfMzW3annvFv0rw==",
       "hasInstallScript": true,
-      "license": "BSD-3-Clause",
       "dependencies": {
         "@protobufjs/aspromise": "^1.1.2",
         "@protobufjs/base64": "^1.1.2",
@@ -1574,15 +5490,23 @@
         "@protobufjs/path": "^1.1.2",
         "@protobufjs/pool": "^1.1.0",
         "@protobufjs/utf8": "^1.1.0",
-        "@types/long": "^4.0.1",
         "@types/node": ">=13.7.0",
-        "long": "^4.0.0"
+        "long": "^5.0.0"
       },
-      "bin": {
-        "pbjs": "bin/pbjs",
-        "pbts": "bin/pbts"
+      "engines": {
+        "node": ">=12.0.0"
       }
     },
+    "node_modules/protobufjs/node_modules/long": {
+      "version": "5.3.2",
+      "resolved": "https://registry.npmjs.org/long/-/long-5.3.2.tgz",
+      "integrity": "sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA=="
+    },
+    "node_modules/proxy-from-env": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz",
+      "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg=="
+    },
     "node_modules/pump": {
       "version": "3.0.3",
       "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.3.tgz",
@@ -1597,11 +5521,30 @@
       "version": "2.3.1",
       "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz",
       "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==",
-      "license": "MIT",
       "engines": {
         "node": ">=6"
       }
     },
+    "node_modules/pyright": {
+      "version": "1.1.408",
+      "resolved": "https://registry.npmjs.org/pyright/-/pyright-1.1.408.tgz",
+      "integrity": "sha512-N61pxaLLCsPcUuPPHMNIrGoZgGBgrbjBX5UqkaT5UV8NVZdL7ExsO6N3ectv1DzAUsLOzdlyqoYtX76u8eF4YA==",
+      "bin": {
+        "pyright": "index.js",
+        "pyright-langserver": "langserver.index.js"
+      },
+      "engines": {
+        "node": ">=14.0.0"
+      },
+      "optionalDependencies": {
+        "fsevents": "~2.3.3"
+      }
+    },
+    "node_modules/quick-format-unescaped": {
+      "version": "4.0.4",
+      "resolved": "https://registry.npmjs.org/quick-format-unescaped/-/quick-format-unescaped-4.0.4.tgz",
+      "integrity": "sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg=="
+    },
     "node_modules/rc": {
       "version": "1.2.8",
       "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz",
@@ -1626,6 +5569,11 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/re2js": {
+      "version": "1.2.0",
+      "resolved": "https://registry.npmjs.org/re2js/-/re2js-1.2.0.tgz",
+      "integrity": "sha512-IoeSIsrlUnaCBHCez7SAJvTB6Z6zb45aFgXs0q2owmnJZTXstbGhXqq4Ix+TV4jgtR5KcYa0NePNb5tF7V/SLg=="
+    },
     "node_modules/readable-stream": {
       "version": "3.6.2",
       "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz",
@@ -1640,6 +5588,176 @@
         "node": ">= 6"
       }
     },
+    "node_modules/readdirp": {
+      "version": "3.6.0",
+      "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz",
+      "integrity": "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA==",
+      "license": "MIT",
+      "dependencies": {
+        "picomatch": "^2.2.1"
+      },
+      "engines": {
+        "node": ">=8.10.0"
+      }
+    },
+    "node_modules/readdirp/node_modules/picomatch": {
+      "version": "2.3.1",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-2.3.1.tgz",
+      "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=8.6"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
+    "node_modules/real-require": {
+      "version": "0.2.0",
+      "resolved": "https://registry.npmjs.org/real-require/-/real-require-0.2.0.tgz",
+      "integrity": "sha512-57frrGM/OCTLqLOAh0mhVA9VBMHd+9U7Zb2THMGdBUoZVOtGbJzjxsYGDJ3A9AYYCP4hn6y1TVbaOfzWtm5GFg==",
+      "engines": {
+        "node": ">= 12.13.0"
+      }
+    },
+    "node_modules/recma-build-jsx": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/recma-build-jsx/-/recma-build-jsx-1.0.0.tgz",
+      "integrity": "sha512-8GtdyqaBcDfva+GUKDr3nev3VpKAhup1+RvkMvUxURHpW7QyIvk9F5wz7Vzo06CEMSilw6uArgRqhpiUcWp8ew==",
+      "dependencies": {
+        "@types/estree": "^1.0.0",
+        "estree-util-build-jsx": "^3.0.0",
+        "vfile": "^6.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/recma-jsx": {
+      "version": "1.0.1",
+      "resolved": "https://registry.npmjs.org/recma-jsx/-/recma-jsx-1.0.1.tgz",
+      "integrity": "sha512-huSIy7VU2Z5OLv6oFLosQGGDqPqdO1iq6bWNAdhzMxSJP7RAso4fCZ1cKu8j9YHCZf3TPrq4dw3okhrylgcd7w==",
+      "dependencies": {
+        "acorn-jsx": "^5.0.0",
+        "estree-util-to-js": "^2.0.0",
+        "recma-parse": "^1.0.0",
+        "recma-stringify": "^1.0.0",
+        "unified": "^11.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      },
+      "peerDependencies": {
+        "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
+      }
+    },
+    "node_modules/recma-parse": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/recma-parse/-/recma-parse-1.0.0.tgz",
+      "integrity": "sha512-OYLsIGBB5Y5wjnSnQW6t3Xg7q3fQ7FWbw/vcXtORTnyaSFscOtABg+7Pnz6YZ6c27fG1/aN8CjfwoUEUIdwqWQ==",
+      "dependencies": {
+        "@types/estree": "^1.0.0",
+        "esast-util-from-js": "^2.0.0",
+        "unified": "^11.0.0",
+        "vfile": "^6.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/recma-stringify": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/recma-stringify/-/recma-stringify-1.0.0.tgz",
+      "integrity": "sha512-cjwII1MdIIVloKvC9ErQ+OgAtwHBmcZ0Bg4ciz78FtbT8In39aAYbaA7zvxQ61xVMSPE8WxhLwLbhif4Js2C+g==",
+      "dependencies": {
+        "@types/estree": "^1.0.0",
+        "estree-util-to-js": "^2.0.0",
+        "unified": "^11.0.0",
+        "vfile": "^6.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/rehype-recma": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/rehype-recma/-/rehype-recma-1.0.0.tgz",
+      "integrity": "sha512-lqA4rGUf1JmacCNWWZx0Wv1dHqMwxzsDWYMTowuplHF3xH0N/MmrZ/G3BDZnzAkRmxDadujCjaKM2hqYdCBOGw==",
+      "dependencies": {
+        "@types/estree": "^1.0.0",
+        "@types/hast": "^3.0.0",
+        "hast-util-to-estree": "^3.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/remark-mdx": {
+      "version": "3.1.1",
+      "resolved": "https://registry.npmjs.org/remark-mdx/-/remark-mdx-3.1.1.tgz",
+      "integrity": "sha512-Pjj2IYlUY3+D8x00UJsIOg5BEvfMyeI+2uLPn9VO9Wg4MEtN/VTIq2NEJQfde9PnX15KgtHyl9S0BcTnWrIuWg==",
+      "dependencies": {
+        "mdast-util-mdx": "^3.0.0",
+        "micromark-extension-mdxjs": "^3.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/remark-parse": {
+      "version": "11.0.0",
+      "resolved": "https://registry.npmjs.org/remark-parse/-/remark-parse-11.0.0.tgz",
+      "integrity": "sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA==",
+      "dependencies": {
+        "@types/mdast": "^4.0.0",
+        "mdast-util-from-markdown": "^2.0.0",
+        "micromark-util-types": "^2.0.0",
+        "unified": "^11.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/remark-rehype": {
+      "version": "11.1.2",
+      "resolved": "https://registry.npmjs.org/remark-rehype/-/remark-rehype-11.1.2.tgz",
+      "integrity": "sha512-Dh7l57ianaEoIpzbp0PC9UKAdCSVklD8E5Rpw7ETfbTl3FqcOOgq5q2LVDhgGCkaBv7p24JXikPdvhhmHvKMsw==",
+      "dependencies": {
+        "@types/hast": "^3.0.0",
+        "@types/mdast": "^4.0.0",
+        "mdast-util-to-hast": "^13.0.0",
+        "unified": "^11.0.0",
+        "vfile": "^6.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/require-directory": {
+      "version": "2.1.1",
+      "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
+      "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/require-from-string": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz",
+      "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
     "node_modules/resolve-from": {
       "version": "4.0.0",
       "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz",
@@ -1649,6 +5767,30 @@
         "node": ">=4"
       }
     },
+    "node_modules/roaring-wasm": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/roaring-wasm/-/roaring-wasm-1.1.0.tgz",
+      "integrity": "sha512-mhNqA0BOqIW7k4ZYSYe3kCyvn5T3VWT+2661G7fZH0C6XcVkGoTDLAqne7b47xCNQE6LhuYviMKBnzbOiBXkdw==",
+      "engines": {
+        "node": ">=14"
+      }
+    },
+    "node_modules/roarr": {
+      "version": "2.15.4",
+      "resolved": "https://registry.npmjs.org/roarr/-/roarr-2.15.4.tgz",
+      "integrity": "sha512-CHhPh+UNHD2GTXNYhPWLnU8ONHdI+5DI+4EYIAOaiD63rHeYlZvyh8P+in5999TTSFgUYuKUAjzRI4mdh/p+2A==",
+      "dependencies": {
+        "boolean": "^3.0.1",
+        "detect-node": "^2.0.4",
+        "globalthis": "^1.0.1",
+        "json-stringify-safe": "^5.0.1",
+        "semver-compare": "^1.0.0",
+        "sprintf-js": "^1.1.2"
+      },
+      "engines": {
+        "node": ">=8.0"
+      }
+    },
     "node_modules/safe-buffer": {
       "version": "5.2.1",
       "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz",
@@ -1669,11 +5811,51 @@
       ],
       "license": "MIT"
     },
+    "node_modules/safe-stable-stringify": {
+      "version": "2.5.0",
+      "resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.5.0.tgz",
+      "integrity": "sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==",
+      "engines": {
+        "node": ">=10"
+      }
+    },
+    "node_modules/safer-buffer": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz",
+      "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg=="
+    },
+    "node_modules/secure-json-parse": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/secure-json-parse/-/secure-json-parse-4.1.0.tgz",
+      "integrity": "sha512-l4KnYfEyqYJxDwlNVyRfO2E4NTHfMKAWdUuA8J0yve2Dz/E/PdBepY03RvyJpssIpRFwJoCD55wA+mEDs6ByWA==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/fastify"
+        },
+        {
+          "type": "opencollective",
+          "url": "https://opencollective.com/fastify"
+        }
+      ]
+    },
     "node_modules/seedrandom": {
       "version": "3.0.5",
       "resolved": "https://registry.npmjs.org/seedrandom/-/seedrandom-3.0.5.tgz",
-      "integrity": "sha512-8OwmbklUNzwezjGInmZ+2clQmExQPvomqjL7LFqOYqtmuxRgQYqOD3mHaU+MvZn5FLUeVxVfQjwLZW/n/JFuqg==",
-      "license": "MIT"
+      "integrity": "sha512-8OwmbklUNzwezjGInmZ+2clQmExQPvomqjL7LFqOYqtmuxRgQYqOD3mHaU+MvZn5FLUeVxVfQjwLZW/n/JFuqg=="
+    },
+    "node_modules/selfsigned": {
+      "version": "2.4.1",
+      "resolved": "https://registry.npmjs.org/selfsigned/-/selfsigned-2.4.1.tgz",
+      "integrity": "sha512-th5B4L2U+eGLq1TVh7zNRGBapioSORUeymIydxgFpwww9d2qyKvtuPU2jJuHvYAwwqi2Y596QBL3eEqcPEYL8Q==",
+      "license": "MIT",
+      "dependencies": {
+        "@types/node-forge": "^1.3.0",
+        "node-forge": "^1"
+      },
+      "engines": {
+        "node": ">=10"
+      }
     },
     "node_modules/semver": {
       "version": "7.7.3",
@@ -1687,6 +5869,25 @@
         "node": ">=10"
       }
     },
+    "node_modules/semver-compare": {
+      "version": "1.0.0",
+      "resolved": "https://registry.npmjs.org/semver-compare/-/semver-compare-1.0.0.tgz",
+      "integrity": "sha512-YM3/ITh2MJ5MtzaM429anh+x2jiLVjqILF4m4oyQB18W7Ggea7BfqdH/wGMK7dDiMghv/6WG7znWMwUDzJiXow=="
+    },
+    "node_modules/serialize-error": {
+      "version": "7.0.1",
+      "resolved": "https://registry.npmjs.org/serialize-error/-/serialize-error-7.0.1.tgz",
+      "integrity": "sha512-8I8TjW5KMOKsZQTvoxjuSIa7foAwPWGOts+6o7sgjz41/qMD9VQHEDxi6PBvK2l0MXUmqZyNpUK+T2tQaaElvw==",
+      "dependencies": {
+        "type-fest": "^0.13.1"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
     "node_modules/sharp": {
       "version": "0.32.6",
       "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.32.6.tgz",
@@ -1731,6 +5932,18 @@
         "node": ">=8"
       }
     },
+    "node_modules/signal-exit": {
+      "version": "4.1.0",
+      "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz",
+      "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=14"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/isaacs"
+      }
+    },
     "node_modules/simple-concat": {
       "version": "1.0.1",
       "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz",
@@ -1800,39 +6013,138 @@
         "is-arrayish": "^0.3.1"
       }
     },
+    "node_modules/smol-toml": {
+      "version": "1.6.0",
+      "resolved": "https://registry.npmjs.org/smol-toml/-/smol-toml-1.6.0.tgz",
+      "integrity": "sha512-4zemZi0HvTnYwLfrpk/CF9LOd9Lt87kAt50GnqhMpyF9U3poDAP2+iukq2bZsO/ufegbYehBkqINbsWxj4l4cw==",
+      "engines": {
+        "node": ">= 18"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/cyyynthia"
+      }
+    },
     "node_modules/snowball-stemmers": {
       "version": "0.6.0",
       "resolved": "https://registry.npmjs.org/snowball-stemmers/-/snowball-stemmers-0.6.0.tgz",
       "integrity": "sha512-47VTJvsZzEme3VxhbUFc9wR0CX8fYKNMgL7AiBdJcHUrwbmN7htFroeX+8ESIAaoNgwEgwI2wnv1HIcqQqAdcA==",
       "license": "ISC"
     },
-    "node_modules/streamx": {
-      "version": "2.23.0",
-      "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
-      "integrity": "sha512-kn+e44esVfn2Fa/O0CPFcex27fjIL6MkVae0Mm6q+E6f0hWv578YCERbv+4m02cjxvDsPKLnmxral/rR6lBMAg==",
-      "license": "MIT",
+    "node_modules/sonic-boom": {
+      "version": "4.2.0",
+      "resolved": "https://registry.npmjs.org/sonic-boom/-/sonic-boom-4.2.0.tgz",
+      "integrity": "sha512-INb7TM37/mAcsGmc9hyyI6+QR3rR1zVRu36B0NeGXKnOOLiZOfER5SA+N7X7k3yUYRzLWafduTDvJAfDswwEww==",
+      "dependencies": {
+        "atomic-sleep": "^1.0.0"
+      }
+    },
+    "node_modules/source-map": {
+      "version": "0.7.6",
+      "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.7.6.tgz",
+      "integrity": "sha512-i5uvt8C3ikiWeNZSVZNWcfZPItFQOsYTUAOkcUPGd8DqDy1uOUikjt5dG+uRlwyvR108Fb9DOd4GvXfT0N2/uQ==",
+      "engines": {
+        "node": ">= 12"
+      }
+    },
+    "node_modules/source-map-js": {
+      "version": "1.2.1",
+      "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz",
+      "integrity": "sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==",
+      "engines": {
+        "node": ">=0.10.0"
+      }
+    },
+    "node_modules/space-separated-tokens": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/space-separated-tokens/-/space-separated-tokens-2.0.2.tgz",
+      "integrity": "sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/split2": {
+      "version": "4.2.0",
+      "resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz",
+      "integrity": "sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==",
+      "engines": {
+        "node": ">= 10.x"
+      }
+    },
+    "node_modules/sprintf-js": {
+      "version": "1.1.3",
+      "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz",
+      "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA=="
+    },
+    "node_modules/streamx": {
+      "version": "2.23.0",
+      "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.23.0.tgz",
+      "integrity": "sha512-kn+e44esVfn2Fa/O0CPFcex27fjIL6MkVae0Mm6q+E6f0hWv578YCERbv+4m02cjxvDsPKLnmxral/rR6lBMAg==",
+      "license": "MIT",
+      "dependencies": {
+        "events-universal": "^1.0.0",
+        "fast-fifo": "^1.3.2",
+        "text-decoder": "^1.1.0"
+      }
+    },
+    "node_modules/string_decoder": {
+      "version": "1.3.0",
+      "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
+      "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
+      "license": "MIT",
+      "dependencies": {
+        "safe-buffer": "~5.2.0"
+      }
+    },
+    "node_modules/string-width": {
+      "version": "4.2.3",
+      "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz",
+      "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==",
+      "license": "MIT",
+      "dependencies": {
+        "emoji-regex": "^8.0.0",
+        "is-fullwidth-code-point": "^3.0.0",
+        "strip-ansi": "^6.0.1"
+      },
+      "engines": {
+        "node": ">=8"
+      }
+    },
+    "node_modules/stringify-entities": {
+      "version": "4.0.4",
+      "resolved": "https://registry.npmjs.org/stringify-entities/-/stringify-entities-4.0.4.tgz",
+      "integrity": "sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==",
       "dependencies": {
-        "events-universal": "^1.0.0",
-        "fast-fifo": "^1.3.2",
-        "text-decoder": "^1.1.0"
+        "character-entities-html4": "^2.0.0",
+        "character-entities-legacy": "^3.0.0"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
       }
     },
-    "node_modules/string_decoder": {
-      "version": "1.3.0",
-      "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz",
-      "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==",
+    "node_modules/strip-ansi": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz",
+      "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==",
       "license": "MIT",
       "dependencies": {
-        "safe-buffer": "~5.2.0"
+        "ansi-regex": "^5.0.1"
+      },
+      "engines": {
+        "node": ">=8"
       }
     },
-    "node_modules/strip-comments": {
-      "version": "2.0.1",
-      "resolved": "https://registry.npmjs.org/strip-comments/-/strip-comments-2.0.1.tgz",
-      "integrity": "sha512-ZprKx+bBLXv067WTCALv8SSz5l2+XhpYCsVtSqlMnkAXMWDq+/ekVbl1ghqP9rUHTzv6sm/DwCOiYutU/yp1fw==",
+    "node_modules/strip-final-newline": {
+      "version": "3.0.0",
+      "resolved": "https://registry.npmjs.org/strip-final-newline/-/strip-final-newline-3.0.0.tgz",
+      "integrity": "sha512-dOESqjYr96iWYylGObzd39EuNTa5VJxyvVAEm5Jnh7KGo75V43Hk1odPQkNDyXNmUR6k+gEiDVXnjB8HJ3crXw==",
       "license": "MIT",
       "engines": {
-        "node": ">=10"
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
       }
     },
     "node_modules/strip-json-comments": {
@@ -1847,6 +6159,48 @@
         "url": "https://github.com/sponsors/sindresorhus"
       }
     },
+    "node_modules/strnum": {
+      "version": "2.1.2",
+      "resolved": "https://registry.npmjs.org/strnum/-/strnum-2.1.2.tgz",
+      "integrity": "sha512-l63NF9y/cLROq/yqKXSLtcMeeyOfnSQlfMSlzFt/K73oIaD8DGaQWd7Z34X9GPiKqP5rbSh84Hl4bOlLcjiSrQ==",
+      "funding": [
+        {
+          "type": "github",
+          "url": "https://github.com/sponsors/NaturalIntelligence"
+        }
+      ]
+    },
+    "node_modules/strtok3": {
+      "version": "10.3.4",
+      "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-10.3.4.tgz",
+      "integrity": "sha512-KIy5nylvC5le1OdaaoCJ07L+8iQzJHGH6pWDuzS+d07Cu7n1MZ2x26P8ZKIWfbK02+XIL8Mp4RkWeqdUCrDMfg==",
+      "dependencies": {
+        "@tokenizer/token": "^0.3.0"
+      },
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/Borewit"
+      }
+    },
+    "node_modules/style-to-js": {
+      "version": "1.1.21",
+      "resolved": "https://registry.npmjs.org/style-to-js/-/style-to-js-1.1.21.tgz",
+      "integrity": "sha512-RjQetxJrrUJLQPHbLku6U/ocGtzyjbJMP9lCNK7Ag0CNh690nSH8woqWH9u16nMjYBAok+i7JO1NP2pOy8IsPQ==",
+      "dependencies": {
+        "style-to-object": "1.0.14"
+      }
+    },
+    "node_modules/style-to-object": {
+      "version": "1.0.14",
+      "resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-1.0.14.tgz",
+      "integrity": "sha512-LIN7rULI0jBscWQYaSswptyderlarFkjQ+t79nzty8tcIAceVomEVlLzH5VP4Cmsv6MtKhs7qaAiwlcp+Mgaxw==",
+      "dependencies": {
+        "inline-style-parser": "0.2.7"
+      }
+    },
     "node_modules/supports-color": {
       "version": "7.2.0",
       "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
@@ -1859,6 +6213,31 @@
         "node": ">=8"
       }
     },
+    "node_modules/svelte": {
+      "version": "5.46.1",
+      "resolved": "https://registry.npmjs.org/svelte/-/svelte-5.46.1.tgz",
+      "integrity": "sha512-ynjfCHD3nP2el70kN5Pmg37sSi0EjOm9FgHYQdC4giWG/hzO3AatzXXJJgP305uIhGQxSufJLuYWtkY8uK/8RA==",
+      "dependencies": {
+        "@jridgewell/remapping": "^2.3.4",
+        "@jridgewell/sourcemap-codec": "^1.5.0",
+        "@sveltejs/acorn-typescript": "^1.0.5",
+        "@types/estree": "^1.0.5",
+        "acorn": "^8.12.1",
+        "aria-query": "^5.3.1",
+        "axobject-query": "^4.1.0",
+        "clsx": "^2.1.1",
+        "devalue": "^5.5.0",
+        "esm-env": "^1.2.1",
+        "esrap": "^2.2.1",
+        "is-reference": "^3.0.3",
+        "locate-character": "^3.0.0",
+        "magic-string": "^0.30.11",
+        "zimmerframe": "^1.1.2"
+      },
+      "engines": {
+        "node": ">=18"
+      }
+    },
     "node_modules/tar-fs": {
       "version": "3.1.1",
       "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.1.1.tgz",
@@ -1884,6 +6263,14 @@
         "streamx": "^2.15.0"
       }
     },
+    "node_modules/tdigest": {
+      "version": "0.1.2",
+      "resolved": "https://registry.npmjs.org/tdigest/-/tdigest-0.1.2.tgz",
+      "integrity": "sha512-+G0LLgjjo9BZX2MfdvPfH+MKLCrxlXSYec5DaPYP1fe6Iyhf0/fSmJ0bFiZ1F8BT6cGXl2LpltQptzjXKWEkKA==",
+      "dependencies": {
+        "bintrees": "1.0.2"
+      }
+    },
     "node_modules/text-decoder": {
       "version": "1.2.3",
       "resolved": "https://registry.npmjs.org/text-decoder/-/text-decoder-1.2.3.tgz",
@@ -1893,6 +6280,143 @@
         "b4a": "^1.6.4"
       }
     },
+    "node_modules/textextensions": {
+      "version": "6.11.0",
+      "resolved": "https://registry.npmjs.org/textextensions/-/textextensions-6.11.0.tgz",
+      "integrity": "sha512-tXJwSr9355kFJI3lbCkPpUH5cP8/M0GGy2xLO34aZCjMXBaK3SoPnZwr/oWmo1FdCnELcs4npdCIOFtq9W3ruQ==",
+      "dependencies": {
+        "editions": "^6.21.0"
+      },
+      "engines": {
+        "node": ">=4"
+      },
+      "funding": {
+        "url": "https://bevry.me/fund"
+      }
+    },
+    "node_modules/thread-stream": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/thread-stream/-/thread-stream-4.0.0.tgz",
+      "integrity": "sha512-4iMVL6HAINXWf1ZKZjIPcz5wYaOdPhtO8ATvZ+Xqp3BTdaqtAwQkNmKORqcIo5YkQqGXq5cwfswDwMqqQNrpJA==",
+      "dependencies": {
+        "real-require": "^0.2.0"
+      },
+      "engines": {
+        "node": ">=20"
+      }
+    },
+    "node_modules/three": {
+      "version": "0.182.0",
+      "resolved": "https://registry.npmjs.org/three/-/three-0.182.0.tgz",
+      "integrity": "sha512-GbHabT+Irv+ihI1/f5kIIsZ+Ef9Sl5A1Y7imvS5RQjWgtTPfPnZ43JmlYI7NtCRDK9zir20lQpfg8/9Yd02OvQ==",
+      "license": "MIT"
+    },
+    "node_modules/tinybench": {
+      "version": "6.0.0",
+      "resolved": "https://registry.npmjs.org/tinybench/-/tinybench-6.0.0.tgz",
+      "integrity": "sha512-BWlWpVbbZXaYjRV0twGLNQO00Zj4HA/sjLOQP2IvzQqGwRGp+2kh7UU3ijyJ3ywFRogYDRbiHDMrUOfaMnN56g==",
+      "engines": {
+        "node": ">=20.0.0"
+      }
+    },
+    "node_modules/tinyglobby": {
+      "version": "0.2.15",
+      "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.15.tgz",
+      "integrity": "sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==",
+      "dependencies": {
+        "fdir": "^6.5.0",
+        "picomatch": "^4.0.3"
+      },
+      "engines": {
+        "node": ">=12.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/SuperchupuDev"
+      }
+    },
+    "node_modules/tinyglobby/node_modules/fdir": {
+      "version": "6.5.0",
+      "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz",
+      "integrity": "sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==",
+      "engines": {
+        "node": ">=12.0.0"
+      },
+      "peerDependencies": {
+        "picomatch": "^3 || ^4"
+      },
+      "peerDependenciesMeta": {
+        "picomatch": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/to-regex-range": {
+      "version": "5.0.1",
+      "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz",
+      "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==",
+      "license": "MIT",
+      "dependencies": {
+        "is-number": "^7.0.0"
+      },
+      "engines": {
+        "node": ">=8.0"
+      }
+    },
+    "node_modules/token-types": {
+      "version": "6.1.2",
+      "resolved": "https://registry.npmjs.org/token-types/-/token-types-6.1.2.tgz",
+      "integrity": "sha512-dRXchy+C0IgK8WPC6xvCHFRIWYUbqqdEIKPaKo/AcTUNzwLTK6AH7RjdLWsEZcAN/TBdtfUw3PYEgPr5VPr6ww==",
+      "dependencies": {
+        "@borewit/text-codec": "^0.2.1",
+        "@tokenizer/token": "^0.3.0",
+        "ieee754": "^1.2.1"
+      },
+      "engines": {
+        "node": ">=14.16"
+      },
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/Borewit"
+      }
+    },
+    "node_modules/tree-sitter-wasms": {
+      "version": "0.1.13",
+      "resolved": "https://registry.npmjs.org/tree-sitter-wasms/-/tree-sitter-wasms-0.1.13.tgz",
+      "integrity": "sha512-wT+cR6DwaIz80/vho3AvSF0N4txuNx/5bcRKoXouOfClpxh/qqrF4URNLQXbbt8MaAxeksZcZd1j8gcGjc+QxQ==",
+      "license": "Unlicense",
+      "dependencies": {
+        "tree-sitter-wasms": "^0.1.11"
+      }
+    },
+    "node_modules/trim-lines": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz",
+      "integrity": "sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/trough": {
+      "version": "2.2.0",
+      "resolved": "https://registry.npmjs.org/trough/-/trough-2.2.0.tgz",
+      "integrity": "sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
+    },
+    "node_modules/ts-api-utils": {
+      "version": "2.4.0",
+      "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.4.0.tgz",
+      "integrity": "sha512-3TaVTaAv2gTiMB35i3FiGJaRfwb3Pyn/j3m/bfAvGe8FB7CF6u+LMYqYlDh7reQf7UNvoTvdfAqHGmPGOSsPmA==",
+      "engines": {
+        "node": ">=18.12"
+      },
+      "peerDependencies": {
+        "typescript": ">=4.8.4"
+      }
+    },
     "node_modules/tunnel-agent": {
       "version": "0.6.0",
       "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz",
@@ -1917,17 +6441,144 @@
         "node": ">= 0.8.0"
       }
     },
+    "node_modules/type-fest": {
+      "version": "0.13.1",
+      "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.13.1.tgz",
+      "integrity": "sha512-34R7HTnG0XIJcBSn5XhDd7nNFPRcXYRZrBB2O2jdKqYODldSzBAqzsWoZYYvduky73toYS/ESqxPvkDf/F0XMg==",
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
+    "node_modules/typescript": {
+      "version": "5.9.3",
+      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz",
+      "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
+      "peer": true,
+      "bin": {
+        "tsc": "bin/tsc",
+        "tsserver": "bin/tsserver"
+      },
+      "engines": {
+        "node": ">=14.17"
+      }
+    },
+    "node_modules/uint8array-extras": {
+      "version": "1.5.0",
+      "resolved": "https://registry.npmjs.org/uint8array-extras/-/uint8array-extras-1.5.0.tgz",
+      "integrity": "sha512-rvKSBiC5zqCCiDZ9kAOszZcDvdAHwwIKJG33Ykj43OKcWsnmcBRL09YTU4nOeHZ8Y2a7l1MgTd08SBe9A8Qj6A==",
+      "engines": {
+        "node": ">=18"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/sindresorhus"
+      }
+    },
     "node_modules/undici-types": {
       "version": "7.16.0",
       "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.16.0.tgz",
       "integrity": "sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==",
       "license": "MIT"
     },
+    "node_modules/unified": {
+      "version": "11.0.5",
+      "resolved": "https://registry.npmjs.org/unified/-/unified-11.0.5.tgz",
+      "integrity": "sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==",
+      "dependencies": {
+        "@types/unist": "^3.0.0",
+        "bail": "^2.0.0",
+        "devlop": "^1.0.0",
+        "extend": "^3.0.0",
+        "is-plain-obj": "^4.0.0",
+        "trough": "^2.0.0",
+        "vfile": "^6.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/unist-util-is": {
+      "version": "6.0.1",
+      "resolved": "https://registry.npmjs.org/unist-util-is/-/unist-util-is-6.0.1.tgz",
+      "integrity": "sha512-LsiILbtBETkDz8I9p1dQ0uyRUWuaQzd/cuEeS1hoRSyW5E5XGmTzlwY1OrNzzakGowI9Dr/I8HVaw4hTtnxy8g==",
+      "dependencies": {
+        "@types/unist": "^3.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/unist-util-position": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/unist-util-position/-/unist-util-position-5.0.0.tgz",
+      "integrity": "sha512-fucsC7HjXvkB5R3kTCO7kUjRdrS0BJt3M/FPxmHMBOm8JQi2BsHAHFsy27E0EolP8rp0NzXsJ+jNPyDWvOJZPA==",
+      "dependencies": {
+        "@types/unist": "^3.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/unist-util-position-from-estree": {
+      "version": "2.0.0",
+      "resolved": "https://registry.npmjs.org/unist-util-position-from-estree/-/unist-util-position-from-estree-2.0.0.tgz",
+      "integrity": "sha512-KaFVRjoqLyF6YXCbVLNad/eS4+OfPQQn2yOd7zF/h5T/CSL2v8NpN6a5TPvtbXthAGw5nG+PuTtq+DdIZr+cRQ==",
+      "dependencies": {
+        "@types/unist": "^3.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/unist-util-stringify-position": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/unist-util-stringify-position/-/unist-util-stringify-position-4.0.0.tgz",
+      "integrity": "sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ==",
+      "dependencies": {
+        "@types/unist": "^3.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/unist-util-visit": {
+      "version": "5.0.0",
+      "resolved": "https://registry.npmjs.org/unist-util-visit/-/unist-util-visit-5.0.0.tgz",
+      "integrity": "sha512-MR04uvD+07cwl/yhVuVWAtw+3GOR/knlL55Nd/wAdblk27GCVt3lqpTivy/tkJcZoNPzTwS1Y+KMojlLDhoTzg==",
+      "dependencies": {
+        "@types/unist": "^3.0.0",
+        "unist-util-is": "^6.0.0",
+        "unist-util-visit-parents": "^6.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/unist-util-visit-parents": {
+      "version": "6.0.2",
+      "resolved": "https://registry.npmjs.org/unist-util-visit-parents/-/unist-util-visit-parents-6.0.2.tgz",
+      "integrity": "sha512-goh1s1TBrqSqukSc8wrjwWhL0hiJxgA8m4kFxGlQ+8FYQ3C/m11FcTs4YYem7V664AhHVvgoQLk890Ssdsr2IQ==",
+      "dependencies": {
+        "@types/unist": "^3.0.0",
+        "unist-util-is": "^6.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
     "node_modules/uri-js": {
       "version": "4.4.1",
       "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz",
       "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==",
-      "license": "BSD-2-Clause",
       "dependencies": {
         "punycode": "^2.1.0"
       }
@@ -1938,10 +6589,91 @@
       "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==",
       "license": "MIT"
     },
-    "node_modules/varint": {
-      "version": "6.0.0",
-      "resolved": "https://registry.npmjs.org/varint/-/varint-6.0.0.tgz",
-      "integrity": "sha512-cXEIW6cfr15lFv563k4GuVuW/fiwjknytD37jIOLSdSWuOI6WnO/oKwmP2FQTU2l01LP8/M5TSAJpzUaGe3uWg==",
+    "node_modules/version-range": {
+      "version": "4.15.0",
+      "resolved": "https://registry.npmjs.org/version-range/-/version-range-4.15.0.tgz",
+      "integrity": "sha512-Ck0EJbAGxHwprkzFO966t4/5QkRuzh+/I1RxhLgUKKwEn+Cd8NwM60mE3AqBZg5gYODoXW0EFsQvbZjRlvdqbg==",
+      "engines": {
+        "node": ">=4"
+      },
+      "funding": {
+        "url": "https://bevry.me/fund"
+      }
+    },
+    "node_modules/vfile": {
+      "version": "6.0.3",
+      "resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.3.tgz",
+      "integrity": "sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==",
+      "dependencies": {
+        "@types/unist": "^3.0.0",
+        "vfile-message": "^4.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/vfile-message": {
+      "version": "4.0.3",
+      "resolved": "https://registry.npmjs.org/vfile-message/-/vfile-message-4.0.3.tgz",
+      "integrity": "sha512-QTHzsGd1EhbZs4AsQ20JX1rC3cOlt/IWJruk893DfLRr57lcnOeMaWG4K0JrRta4mIJZKth2Au3mM3u03/JWKw==",
+      "dependencies": {
+        "@types/unist": "^3.0.0",
+        "unist-util-stringify-position": "^4.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
+    "node_modules/vscode-jsonrpc": {
+      "version": "8.2.1",
+      "resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.1.tgz",
+      "integrity": "sha512-kdjOSJ2lLIn7r1rtrMbbNCHjyMPfRnowdKjBQ+mGq6NAW5QY2bEZC/khaC5OR8svbbjvLEaIXkOq45e2X9BIbQ==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
+    "node_modules/vscode-languageserver-protocol": {
+      "version": "3.17.5",
+      "resolved": "https://registry.npmjs.org/vscode-languageserver-protocol/-/vscode-languageserver-protocol-3.17.5.tgz",
+      "integrity": "sha512-mb1bvRJN8SVznADSGWM9u/b07H7Ecg0I3OgXDuLdn307rl/J3A9YD6/eYOssqhecL27hK1IPZAsaqh00i/Jljg==",
+      "license": "MIT",
+      "dependencies": {
+        "vscode-jsonrpc": "8.2.0",
+        "vscode-languageserver-types": "3.17.5"
+      }
+    },
+    "node_modules/vscode-languageserver-protocol/node_modules/vscode-jsonrpc": {
+      "version": "8.2.0",
+      "resolved": "https://registry.npmjs.org/vscode-jsonrpc/-/vscode-jsonrpc-8.2.0.tgz",
+      "integrity": "sha512-C+r0eKJUIfiDIfwJhria30+TYWPtuHJXHtI7J0YlOmKAo7ogxP20T0zxB7HZQIFhIyvoBPwWskjxrvAtfjyZfA==",
+      "license": "MIT",
+      "engines": {
+        "node": ">=14.0.0"
+      }
+    },
+    "node_modules/vscode-languageserver-textdocument": {
+      "version": "1.0.12",
+      "resolved": "https://registry.npmjs.org/vscode-languageserver-textdocument/-/vscode-languageserver-textdocument-1.0.12.tgz",
+      "integrity": "sha512-cxWNPesCnQCcMPeenjKKsOCKQZ/L6Tv19DTRIGuLWe32lyzWhihGVJ/rcckZXJxfdKCFvRLS3fpBIsV/ZGX4zA=="
+    },
+    "node_modules/vscode-languageserver-types": {
+      "version": "3.17.5",
+      "resolved": "https://registry.npmjs.org/vscode-languageserver-types/-/vscode-languageserver-types-3.17.5.tgz",
+      "integrity": "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==",
+      "license": "MIT"
+    },
+    "node_modules/weak-lru-cache": {
+      "version": "1.2.2",
+      "resolved": "https://registry.npmjs.org/weak-lru-cache/-/weak-lru-cache-1.2.2.tgz",
+      "integrity": "sha512-DEAoo25RfSYMuTGc9vPJzZcZullwIqRDSI9LOy+fkCJPi6hykCnfKaXTuPBDuXAUcqHXyOgFtHNp/kB2FjYHbw=="
+    },
+    "node_modules/web-tree-sitter": {
+      "version": "0.20.8",
+      "resolved": "https://registry.npmjs.org/web-tree-sitter/-/web-tree-sitter-0.20.8.tgz",
+      "integrity": "sha512-weOVgZ3aAARgdnb220GqYuh7+rZU0Ka9k9yfKtGAzEYMa6GgiCzW9JjQRJyCJakvibQW+dfjJdihjInKuuCAUQ==",
       "license": "MIT"
     },
     "node_modules/which": {
@@ -1968,17 +6700,47 @@
         "node": ">=0.10.0"
       }
     },
+    "node_modules/wrap-ansi": {
+      "version": "7.0.0",
+      "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
+      "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==",
+      "license": "MIT",
+      "dependencies": {
+        "ansi-styles": "^4.0.0",
+        "string-width": "^4.1.0",
+        "strip-ansi": "^6.0.0"
+      },
+      "engines": {
+        "node": ">=10"
+      },
+      "funding": {
+        "url": "https://github.com/chalk/wrap-ansi?sponsor=1"
+      }
+    },
     "node_modules/wrappy": {
       "version": "1.0.2",
       "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
       "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
       "license": "ISC"
     },
+    "node_modules/xxhash-wasm": {
+      "version": "1.1.0",
+      "resolved": "https://registry.npmjs.org/xxhash-wasm/-/xxhash-wasm-1.1.0.tgz",
+      "integrity": "sha512-147y/6YNh+tlp6nd/2pWq38i9h6mz/EuQ6njIrmW8D1BS5nCqs0P6DG+m6zTGnNz5I+uhZ0SHxBs9BsPrwcKDA=="
+    },
+    "node_modules/y18n": {
+      "version": "5.0.8",
+      "resolved": "https://registry.npmjs.org/y18n/-/y18n-5.0.8.tgz",
+      "integrity": "sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=10"
+      }
+    },
     "node_modules/yaml": {
       "version": "2.8.2",
       "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.2.tgz",
       "integrity": "sha512-mplynKqc1C2hTVYxd0PU2xQAc22TI1vShAYGksCCfxbn/dFwnHTNi1bvYsBTkhdUNtGIf5xNOg938rrSSYvS9A==",
-      "license": "ISC",
       "bin": {
         "yaml": "bin.mjs"
       },
@@ -1989,6 +6751,42 @@
         "url": "https://github.com/sponsors/eemeli"
       }
     },
+    "node_modules/yargs": {
+      "version": "17.7.2",
+      "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz",
+      "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==",
+      "license": "MIT",
+      "dependencies": {
+        "cliui": "^8.0.1",
+        "escalade": "^3.1.1",
+        "get-caller-file": "^2.0.5",
+        "require-directory": "^2.1.1",
+        "string-width": "^4.2.3",
+        "y18n": "^5.0.5",
+        "yargs-parser": "^21.1.1"
+      },
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/yargs-parser": {
+      "version": "21.1.1",
+      "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz",
+      "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==",
+      "license": "ISC",
+      "engines": {
+        "node": ">=12"
+      }
+    },
+    "node_modules/yauzl": {
+      "version": "2.10.0",
+      "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz",
+      "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==",
+      "dependencies": {
+        "buffer-crc32": "~0.2.3",
+        "fd-slicer": "~1.1.0"
+      }
+    },
     "node_modules/yocto-queue": {
       "version": "0.1.0",
       "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz",
@@ -2000,6 +6798,20 @@
       "funding": {
         "url": "https://github.com/sponsors/sindresorhus"
       }
+    },
+    "node_modules/zimmerframe": {
+      "version": "1.1.4",
+      "resolved": "https://registry.npmjs.org/zimmerframe/-/zimmerframe-1.1.4.tgz",
+      "integrity": "sha512-B58NGBEoc8Y9MWWCQGl/gq9xBCe4IiKM0a2x7GZdQKOW5Exr8S1W24J6OgM1njK8xCRGvAJIL/MxXHf6SkmQKQ=="
+    },
+    "node_modules/zwitch": {
+      "version": "2.0.4",
+      "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz",
+      "integrity": "sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==",
+      "funding": {
+        "type": "github",
+        "url": "https://github.com/sponsors/wooorm"
+      }
     }
   }
 }
diff --git a/package.json b/package.json
index 134f72344..fdea77a26 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "pairofcleats",
-  "version": "0.2.0",
+  "version": "0.3.0",
   "private": true,
   "type": "module",
   "bin": {
@@ -11,8 +11,7 @@
     "build-index": "node build_index.js",
     "watch-index": "node build_index.js --watch",
     "search": "node search.js",
-    "merge-history": "node tools/mergeSearchHistory.js",
-    "merge-no-results": "node tools/mergeNoResultQueries.js",
+    "merge-append": "node tools/mergeAppendOnly.js",
     "download-dicts": "node tools/download-dicts.js",
     "download-dicts-test": "node tests/download-dicts.js",
     "download-models": "node tools/download-models.js",
@@ -21,11 +20,21 @@
     "generate-repo-dict": "node tools/generate-repo-dict.js",
     "tooling-detect": "node tools/tooling-detect.js",
     "tooling-install": "node tools/tooling-install.js",
+    "ctags-ingest": "node tools/ctags-ingest.js",
+    "scip-ingest": "node tools/scip-ingest.js",
+    "lsif-ingest": "node tools/lsif-ingest.js",
+    "gtags-ingest": "node tools/gtags-ingest.js",
+    "structural-search": "node tools/structural-search.js",
     "git-hooks": "node tools/git-hooks.js",
     "git-hooks-test": "node tests/git-hooks.js",
+    "git-blame-range-test": "node tests/git-blame-range.js",
+    "external-docs-test": "node tests/external-docs.js",
+    "import-links-test": "node tests/import-links.js",
     "build-sqlite-index": "node tools/build-sqlite-index.js",
-    "search-sqlite": "node tools/search-sqlite.js",
+    "build-lmdb-index": "node tools/build-lmdb-index.js",
     "report-artifacts": "node tools/report-artifacts.js",
+    "map-iso": "node tools/map-iso-serve.js",
+    "index-validate": "node tools/index-validate.js",
     "cache-gc": "node tools/cache-gc.js",
     "clean-artifacts": "node tools/clean-artifacts.js",
     "compact-sqlite-index": "node tools/compact-sqlite-index.js",
@@ -33,13 +42,27 @@
     "compare-models": "node tools/compare-models.js",
     "summary-report": "node tools/combined-summary.js",
     "config-validate": "node tools/validate-config.js",
+    "config-dump": "node tools/config-dump.js",
+    "config-reset": "node tools/reset-config.js",
+    "release-check": "node tools/release-check.js",
+    "release-check:breaking": "node tools/release-check.js --breaking",
     "uninstall": "node tools/uninstall.js",
+    "api-server": "node tools/api-server.js",
     "mcp-server": "node tools/mcp-server.js",
+    "indexer-service": "node tools/indexer-service.js",
     "test-all": "node tests/all.js",
+    "test-all-no-bench": "node tests/all.js --skip-bench",
     "verify": "node tests/smoke.js",
+    "smoke:section1": "node tests/smoke-section1.js",
+    "smoke:retrieval": "node tests/smoke-retrieval.js",
+    "smoke:services": "node tests/smoke-services.js",
+    "smoke:workers": "node tests/smoke-workers.js",
+    "smoke:embeddings": "node tests/smoke-embeddings.js",
+    "smoke:sqlite": "node tests/smoke-sqlite.js",
     "setup": "node tools/setup.js",
     "bootstrap": "node tools/bootstrap.js",
     "parity": "node tests/parity.js",
+    "parity:matrix": "node tools/parity-matrix.js",
     "lint": "eslint .",
     "format": "eslint . --fix",
     "ci-build": "node tools/ci-build-artifacts.js",
@@ -47,50 +70,217 @@
     "fixture-smoke": "node tests/fixture-smoke.js",
     "fixture-parity": "node tests/fixture-parity.js",
     "fixture-eval": "node tests/fixture-eval.js",
+    "eval-run": "node tools/eval/run.js",
+    "eval-quality-test": "node tests/eval-quality.js",
+    "fielded-bm25-test": "node tests/fielded-bm25.js",
+    "artifact-formats-test": "node tests/artifact-formats.js",
+    "artifact-size-guardrails-test": "node tests/artifact-size-guardrails.js",
+    "chunk-meta-jsonl-cleanup-test": "node tests/chunk-meta-jsonl-cleanup.js",
+    "incremental-manifest-test": "node tests/incremental-manifest.js",
+    "query-intent-test": "node tests/query-intent.js",
+    "context-expansion-test": "node tests/context-expansion.js",
     "query-cache-test": "node tests/query-cache.js",
+    "json-stream-test": "node tests/json-stream.js",
+    "index-cache-test": "node tests/index-cache.js",
+    "index-lock-test": "node tests/index-lock.js",
+    "sqlite-cache-test": "node tests/sqlite-cache.js",
+    "search-rrf-test": "node tests/search-rrf.js",
+    "search-topn-filters-test": "node tests/search-topn-filters.js",
+    "search-determinism-test": "node tests/search-determinism.js",
+    "artifact-bak-recovery-test": "node tests/artifact-bak-recovery.js",
+    "encoding-hash-test": "node tests/encoding-hash.js",
+    "embeddings-cache-identity-test": "node tests/embeddings-cache-identity.js",
+    "embeddings-dims-mismatch-test": "node tests/embeddings-dims-mismatch.js",
+    "embeddings-cache-invalidation-test": "node tests/embeddings-cache-invalidation.js",
+    "embeddings-dims-validation-test": "node tests/embeddings-dims-validation.js",
+    "embeddings-sqlite-dense-test": "node tests/embeddings-sqlite-dense.js",
+    "embeddings-validate-test": "node tests/embeddings-validate.js",
+    "filter-index-artifact-test": "node tests/filter-index-artifact.js",
+    "filter-index-test": "node tests/filter-index.js",
     "bench": "node tests/bench.js",
     "bench-ann": "node tests/bench.js --ann",
+    "bench-dict-seg": "node tools/bench-dict-seg.js",
+    "bench-score-strategy": "node tools/bench-score-strategy.js",
+    "bench-micro": "node tools/bench/micro/run.js",
+    "bench-micro:tiny": "node tools/bench/micro/tinybench.js",
+    "bench-language": "node tools/bench-language-repos.js",
+    "bench-language:matrix": "node tools/bench-language-matrix.js",
+    "bench-queries": "node tools/bench-query-generator.js",
+    "show-throughput": "node tools/show-throughput.js",
+    "get-last-failure": "node tools/get-last-failure.js",
+    "shard-census": "node tools/shard-census.js",
+    "bench-language-test": "node tests/bench-language-repos.js",
+    "bench-language-lock-test": "node tests/bench-language-lock.js",
+    "bench-language-progress-parse-test": "node tests/bench-language-progress-parse.js",
+    "bench-language-lock-semantics-test": "node tests/bench-language-lock-semantics.js",
+    "retrieval-branch-filter-test": "node tests/retrieval-branch-filter.js",
+    "retrieval-backend-policy-test": "node tests/retrieval-backend-policy.js",
+    "script-coverage-harness-test": "node tests/script-coverage-harness.js",
     "uninstall-test": "node tests/uninstall.js",
     "clean-artifacts-test": "node tests/clean-artifacts.js",
     "sqlite-incremental-test": "node tests/sqlite-incremental.js",
+    "sqlite-incremental-no-change-test": "node tests/sqlite-incremental-no-change.js",
+    "sqlite-bundle-missing-test": "node tests/sqlite-bundle-missing.js",
     "sqlite-compact-test": "node tests/sqlite-compact.js",
+    "sqlite-sidecar-cleanup-test": "node tests/sqlite-sidecar-cleanup.js",
     "sqlite-ann-extension-test": "node tests/sqlite-ann-extension.js",
+    "sqlite-auto-backend-test": "node tests/sqlite-auto-backend.js",
+    "sqlite-missing-dep-test": "node tests/sqlite-missing-dep.js",
+    "sqlite-vec-candidate-set-test": "node tests/sqlite-vec-candidate-set.js",
+    "sqlite-build-manifest-test": "node tests/sqlite-build-manifest.js",
+    "sqlite-build-vocab-test": "node tests/sqlite-build-vocab.js",
+    "sqlite-build-delete-test": "node tests/sqlite-build-delete.js",
+    "lmdb-backend-test": "node tests/lmdb-backend.js",
+    "hnsw-ann-test": "node tests/hnsw-ann.js",
+    "hnsw-atomic-test": "node tests/hnsw-atomic.js",
     "language-fidelity-test": "node tests/language-fidelity.js",
+    "metadata-v2-test": "node tests/metadata-v2.js",
+    "chunking-limits-test": "node tests/chunking-limits.js",
+    "graph-chunk-id-test": "node tests/graph-chunk-id.js",
+    "sqlite-chunk-id-test": "node tests/sqlite-chunk-id.js",
+    "tree-sitter-chunks-test": "node tests/tree-sitter-chunks.js",
+    "prose-skip-imports-test": "node tests/prose-skip-imports.js",
     "format-fidelity-test": "node tests/format-fidelity.js",
-    "repometrics-dashboard-test": "node tests/repometrics-dashboard.js",        
+    "repometrics-dashboard-test": "node tests/repometrics-dashboard.js",
     "triage-test": "node tests/triage-records.js",
     "mcp-server-test": "node tests/mcp-server.js",
-    "compare-models-test": "node tests/compare-models.js",
-    "docs-consistency-test": "node tests/docs-consistency.js",
+    "mcp-schema-test": "node tests/mcp-schema.js",
+    "mcp-robustness-test": "node tests/mcp-robustness.js",
+    "api-server-test": "node tests/api-server.js",
+    "api-server-stream-test": "node tests/api-server-stream.js",
+    "index-validate-test": "node tests/index-validate.js",
+    "indexer-service-test": "node tests/indexer-service.js",
+    "build-index-all-test": "node tests/build-index-all.js",
+    "piece-assembly-test": "node tests/piece-assembly.js",
+    "compact-pieces-test": "node tests/compact-pieces.js",
     "summary-report-test": "node tests/summary-report.js",
     "config-validate-test": "node tests/config-validate.js",
+    "config-dump-test": "node tests/config-dump.js",
+    "uv-threadpool-env-test": "node tests/uv-threadpool-env.js",
+    "uv-threadpool-no-override-test": "node tests/uv-threadpool-no-override.js",
+    "io-concurrency-cap-test": "node tests/io-concurrency-cap.js",
+    "profile-config-test": "node tests/profile-config.js",
+    "backend-policy-test": "node tests/backend-policy.js",
+    "dict-adaptive-test": "node tests/dict-adaptive.js",
+    "chargram-guardrails-test": "node tests/chargram-guardrails.js",
+    "core-api-test": "node tests/core-api.js",
+    "typescript-parser-selection-test": "node tests/typescript-parser-selection.js",
     "cli-test": "node tests/cli.js",
-    "type-inference-crossfile-test": "node tests/type-inference-crossfile.js",  
+    "type-inference-crossfile-test": "node tests/type-inference-crossfile.js",
+    "type-inference-lsp-enrichment-test": "node tests/type-inference-lsp-enrichment.js",
     "download-extensions-test": "node tests/download-extensions.js",
     "tooling-detect-test": "node tests/tooling-detect.js",
     "tooling-install-test": "node tests/tooling-install.js",
     "script-coverage-test": "node tests/script-coverage.js",
+    "lsp-shutdown-test": "node tests/lsp-shutdown.js",
     "setup-test": "node tests/setup.js",
+    "setup-index-detection-test": "node tests/setup-index-detection.js",
     "cache-gc-test": "node tests/cache-gc.js",
-    "search-filters-test": "node tests/search-filters.js"
+    "cache-lru-test": "node tests/cache-lru.js",
+    "discover-test": "node tests/discover.js",
+    "watch-debounce-test": "node tests/watch-debounce.js",
+    "watch-filter-test": "node tests/watch-filter.js",
+    "search-filters-test": "node tests/search-filters.js",
+    "search-explain-test": "node tests/search-explain.js",
+    "ctags-ingest-test": "node tests/ctags-ingest.js",
+    "scip-ingest-test": "node tests/scip-ingest.js",
+    "lsif-ingest-test": "node tests/lsif-ingest.js",
+    "gtags-ingest-test": "node tests/gtags-ingest.js",
+    "structural-search-test": "node tests/structural-search.js",
+    "structural-filters-test": "node tests/structural-filters.js",
+    "lang-filter-test": "node tests/lang-filter.js",
+    "kotlin-perf-guard-test": "node tests/kotlin-perf-guard.js",
+    "search-symbol-boost-test": "node tests/search-symbol-boost.js",
+    "sqlite-index-state-fail-closed-test": "node tests/sqlite-index-state-fail-closed.js",
+    "vector-extension-sanitize-test": "node tests/vector-extension-sanitize.js",
+    "vscode-extension-test": "node tests/vscode-extension.js",
+    "ext-filter-test": "node tests/ext-filter.js",
+    "filter-strictness-test": "node tests/filter-strictness.js",
+    "search-missing-index-test": "node tests/search-missing-index.js",
+    "search-help-test": "node tests/search-help.js",
+    "worker-pool-test": "node tests/worker-pool.js",
+    "worker-pool-windows-test": "node tests/worker-pool-windows.js",
+    "minhash-parity-test": "node tests/minhash-parity.js",
+    "safe-regex-engine-test": "node tests/safe-regex-engine.js"
   },
   "dependencies": {
+    "@ast-grep/napi": "^0.40.4",
+    "@astrojs/compiler": "^2.13.0",
+    "@babel/parser": "^7.27.1",
+    "@babel/traverse": "^7.28.5",
+    "@es-joy/jsdoccomment": "^0.79.0",
+    "@handlebars/parser": "^2.2.2",
+    "@mdx-js/mdx": "^3.1.1",
+    "@swc/core": "^1.15.8",
+    "@typescript-eslint/typescript-estree": "^8.52.0",
+    "@vscode/ripgrep": "^1.17.0",
+    "@vue/compiler-sfc": "^3.5.26",
     "@xenova/transformers": "2.17.2",
     "acorn": "8.15.0",
     "adm-zip": "0.5.16",
+    "aho-corasick": "^0.1.3",
+    "ajv": "^8.17.1",
     "better-sqlite3": "12.5.0",
+    "chardet": "^2.1.1",
+    "chokidar": "^3.5.3",
+    "cjs-module-lexer": "^2.2.0",
+    "dockerfile-ast": "^0.7.1",
+    "es-module-lexer": "^1.6.0",
     "escomplex": "2.0.0-alpha",
     "eslint": "9.39.2",
     "esprima": "4.0.1",
-    "ignore": "5.3.2",
-    "minhash": "0.0.9",
-    "minimist": "1.2.8",
-    "seedrandom": "3.0.5",
+    "esquery": "^1.7.0",
+    "execa": "^8.0.1",
+    "fast-xml-parser": "^5.3.3",
+    "fdir": "6.4.2",
+    "fflate": "^0.8.2",
+    "file-type": "^21.3.0",
+    "graphology": "^0.26.0",
+    "graphql": "^16.12.0",
+    "greedy-number-partitioning": "^0.1.0",
+    "hdr-histogram-js": "^3.0.1",
+    "hnswlib-node": "^3.0.0",
+    "iconv-lite": "^0.7.2",
+    "ignore": "7.0.5",
+    "istextorbinary": "^9.5.0",
+    "jsdoc-type-pratt-parser": "^7.0.0",
+    "jsonc-parser": "^3.3.1",
+    "linguist-languages": "^9.2.0",
+    "lmdb": "^3.4.4",
+    "lru-cache": "11.2.4",
+    "micromark": "^4.0.2",
+    "msgpackr": "^1.11.8",
+    "node-sql-parser": "^5.3.13",
+    "nunjucks": "^3.2.4",
+    "onnxruntime-node": "^1.23.2",
+    "p-queue": "9.1.0",
+    "parse5": "^7.1.2",
+    "picomatch": "^4.0.3",
+    "pino": "^10.1.1",
+    "pino-pretty": "^13.1.3",
+    "piscina": "^5.1.4",
+    "prom-client": "^15.1.3",
+    "protobufjs": "^8.0.0",
+    "pyright": "^1.1.408",
+    "re2js": "^1.2.0",
+    "roaring-wasm": "^1.1.0",
+    "seedrandom": "^3.0.5",
+    "selfsigned": "^2.4.1",
+    "semver": "^7.7.3",
     "simple-git": "3.30.0",
+    "smol-toml": "^1.6.0",
     "snowball-stemmers": "0.6.0",
-    "strip-comments": "2.0.1",
+    "svelte": "^5.46.1",
     "tar-fs": "3.1.1",
-    "varint": "6.0.0",
-    "yaml": "2.8.2"
+    "three": "^0.182.0",
+    "tinybench": "^6.0.0",
+    "tree-sitter-wasms": "0.1.13",
+    "typescript": "^5.9.3",
+    "vscode-jsonrpc": "8.2.1",
+    "vscode-languageserver-protocol": "3.17.5",
+    "web-tree-sitter": "0.20.8",
+    "xxhash-wasm": "^1.1.0",
+    "yaml": "^2.8.2",
+    "yargs": "^17.7.2"
   }
 }
diff --git a/profiles/balanced.json b/profiles/balanced.json
new file mode 100644
index 000000000..851f2aff4
--- /dev/null
+++ b/profiles/balanced.json
@@ -0,0 +1,19 @@
+{
+  "indexing": {
+    "gitBlame": false,
+    "lint": false,
+    "complexity": false,
+    "riskAnalysis": true,
+    "riskAnalysisCrossFile": true,
+    "typeInference": false,
+    "typeInferenceCrossFile": false,
+    "postings": {
+      "enableChargrams": true,
+      "enablePhraseNgrams": true
+    }
+  },
+  "search": {
+    "annDefault": true,
+    "denseVectorMode": "auto"
+  }
+}
diff --git a/profiles/ci-parity.json b/profiles/ci-parity.json
new file mode 100644
index 000000000..d582e3847
--- /dev/null
+++ b/profiles/ci-parity.json
@@ -0,0 +1,9 @@
+{
+  "indexing": {
+    "treeSitter": { "enabled": false },
+    "pythonAst": { "enabled": false }
+  },
+  "tooling": {
+    "autoEnableOnDetect": false
+  }
+}
diff --git a/profiles/full.json b/profiles/full.json
new file mode 100644
index 000000000..2fdb8278d
--- /dev/null
+++ b/profiles/full.json
@@ -0,0 +1,19 @@
+{
+  "indexing": {
+    "gitBlame": true,
+    "lint": true,
+    "complexity": true,
+    "riskAnalysis": true,
+    "riskAnalysisCrossFile": true,
+    "typeInference": true,
+    "typeInferenceCrossFile": true,
+    "postings": {
+      "enableChargrams": true,
+      "enablePhraseNgrams": true
+    }
+  },
+  "search": {
+    "annDefault": true,
+    "denseVectorMode": "auto"
+  }
+}
diff --git a/profiles/lite.json b/profiles/lite.json
new file mode 100644
index 000000000..e8af34ecc
--- /dev/null
+++ b/profiles/lite.json
@@ -0,0 +1,19 @@
+{
+  "indexing": {
+    "gitBlame": false,
+    "lint": false,
+    "complexity": false,
+    "riskAnalysis": false,
+    "riskAnalysisCrossFile": false,
+    "typeInference": false,
+    "typeInferenceCrossFile": false,
+    "postings": {
+      "enableChargrams": false,
+      "enablePhraseNgrams": false
+    }
+  },
+  "search": {
+    "annDefault": false,
+    "denseVectorMode": "auto"
+  }
+}
diff --git a/rules/ast-grep/js-safety.yml b/rules/ast-grep/js-safety.yml
new file mode 100644
index 000000000..2a7268986
--- /dev/null
+++ b/rules/ast-grep/js-safety.yml
@@ -0,0 +1,6 @@
+rules:
+  - id: poct-astgrep-eval
+    message: Avoid eval() usage.
+    language: javascript
+    rule:
+      pattern: eval($$$ARGS)
diff --git a/rules/comby/todo.json b/rules/comby/todo.json
new file mode 100644
index 000000000..a9479d094
--- /dev/null
+++ b/rules/comby/todo.json
@@ -0,0 +1,6 @@
+{
+  "id": "poct-comby-todo",
+  "language": ".md",
+  "pattern": "TODO:[...]",
+  "message": "TODO marker present."
+}
diff --git a/rules/registry.json b/rules/registry.json
new file mode 100644
index 000000000..b27a5481f
--- /dev/null
+++ b/rules/registry.json
@@ -0,0 +1,45 @@
+{
+  "packs": [
+    {
+      "id": "semgrep-security",
+      "label": "Semgrep security starter pack",
+      "engine": "semgrep",
+      "rules": [
+        "rules/semgrep/security.yml"
+      ],
+      "severity": "medium",
+      "tags": [
+        "security",
+        "baseline"
+      ],
+      "description": "Basic security-oriented rules for common code patterns."
+    },
+    {
+      "id": "astgrep-js-safety",
+      "label": "ast-grep JS safety pack",
+      "engine": "ast-grep",
+      "rules": [
+        "rules/ast-grep/js-safety.yml"
+      ],
+      "severity": "low",
+      "tags": [
+        "safety",
+        "javascript"
+      ],
+      "description": "Lightweight ast-grep rules for JS/TS safety checks."
+    },
+    {
+      "id": "comby-docs",
+      "label": "Comby documentation patterns",
+      "engine": "comby",
+      "rules": [
+        "rules/comby/todo.json"
+      ],
+      "severity": "info",
+      "tags": [
+        "documentation"
+      ],
+      "description": "Template-based checks for TODO-style documentation markers."
+    }
+  ]
+}
diff --git a/rules/semgrep/security.yml b/rules/semgrep/security.yml
new file mode 100644
index 000000000..abb6faaaf
--- /dev/null
+++ b/rules/semgrep/security.yml
@@ -0,0 +1,12 @@
+rules:
+  - id: poct-security-eval
+    patterns:
+      - pattern: eval(...)
+    message: Avoid eval() usage.
+    severity: WARNING
+    languages:
+      - javascript
+      - typescript
+    metadata:
+      category: security
+      confidence: LOW
diff --git a/search.js b/search.js
index e3dbca8fe..1da230c26 100644
--- a/search.js
+++ b/search.js
@@ -1,2 +1,4 @@
 #!/usr/bin/env node
-import './src/search/cli.js';
+import { search } from './src/integrations/core/index.js';
+
+await search(null, { args: process.argv.slice(2), emitOutput: true, exitOnError: true });
diff --git a/src/compare/config.js b/src/experimental/compare/config.js
similarity index 100%
rename from src/compare/config.js
rename to src/experimental/compare/config.js
diff --git a/src/experimental/structural/binaries.js b/src/experimental/structural/binaries.js
new file mode 100644
index 000000000..d580af8a6
--- /dev/null
+++ b/src/experimental/structural/binaries.js
@@ -0,0 +1,93 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const isWindows = process.platform === 'win32';
+const binaryCache = new Map();
+
+const runCommand = (resolved, args, options = {}) => {
+  const command = resolved?.command || resolved;
+  const argsPrefix = resolved?.argsPrefix || [];
+  const useShell = isWindows && /\.(cmd|bat)$/i.test(command);
+  return spawnSync(command, [...argsPrefix, ...args], { ...options, shell: useShell });
+};
+
+const findOnPath = (candidate) => {
+  const pathEnv = process.env.PATH || '';
+  const paths = pathEnv.split(path.delimiter).filter(Boolean);
+  const ext = path.extname(candidate);
+  const names = ext
+    ? [candidate]
+    : [
+      candidate,
+      `${candidate}.exe`,
+      `${candidate}.cmd`,
+      `${candidate}.bat`,
+      `${candidate}.ps1`
+    ];
+  const checked = [];
+  for (const dir of paths) {
+    for (const name of names) {
+      const fullPath = path.join(dir, name);
+      checked.push(fullPath);
+      if (fsExists(fullPath)) return { path: fullPath, checked };
+    }
+  }
+  return { path: null, checked };
+};
+
+const fsExists = (target) => {
+  try {
+    return !!target && !!path.resolve(target) && fs.statSync(target);
+  } catch {
+    return false;
+  }
+};
+
+export const resolveBinary = (engine) => {
+  if (binaryCache.has(engine)) return binaryCache.get(engine);
+  const candidates = {
+    semgrep: ['semgrep'],
+    'ast-grep': ['sg', 'ast-grep'],
+    comby: ['comby']
+  }[engine] || [];
+  if (isWindows) {
+    let checkedPaths = [];
+    for (const candidate of candidates) {
+      const resolved = findOnPath(candidate);
+      checkedPaths = checkedPaths.concat(resolved.checked || []);
+      if (!resolved.path) continue;
+      const ext = path.extname(resolved.path).toLowerCase();
+      if (!ext || ['.js', '.mjs', '.cjs'].includes(ext)) {
+        const output = { command: process.execPath, argsPrefix: [resolved.path], checkedPaths };
+        binaryCache.set(engine, output);
+        return output;
+      }
+      const output = { command: resolved.path, argsPrefix: [], checkedPaths };
+      binaryCache.set(engine, output);
+      return output;
+    }
+    const output = { command: candidates[0] || engine, argsPrefix: [], checkedPaths };
+    binaryCache.set(engine, output);
+    return output;
+  }
+  for (const candidate of candidates) {
+    const result = runCommand(candidate, ['--version'], { encoding: 'utf8' });
+    if (!result.error && result.status === 0) {
+      const output = { command: candidate, argsPrefix: [], checkedPaths: [] };
+      binaryCache.set(engine, output);
+      return output;
+    }
+    const help = runCommand(candidate, ['--help'], { encoding: 'utf8' });
+    if (!help.error && help.status === 0) {
+      const output = { command: candidate, argsPrefix: [], checkedPaths: [] };
+      binaryCache.set(engine, output);
+      return output;
+    }
+  }
+  const output = { command: candidates[0] || engine, argsPrefix: [], checkedPaths: [] };
+  binaryCache.set(engine, output);
+  return output;
+};
+
+export const runBinary = (resolved, args, options = {}) => runCommand(resolved, args, options);
diff --git a/src/experimental/structural/io.js b/src/experimental/structural/io.js
new file mode 100644
index 000000000..0ea94ecfd
--- /dev/null
+++ b/src/experimental/structural/io.js
@@ -0,0 +1,20 @@
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+
+export const writeJsonl = (items, outPath = null) => {
+  const payload = items.map((item) => JSON.stringify(item)).join('\n');
+  if (outPath) {
+    fs.writeFileSync(outPath, `${payload}${payload ? '\n' : ''}`, 'utf8');
+  } else {
+    process.stdout.write(`${payload}${payload ? '\n' : ''}`);
+  }
+};
+
+export const writeJson = async (items, outPath = null) => {
+  const payload = JSON.stringify({ results: items }, null, 2);
+  if (outPath) {
+    await fsPromises.writeFile(outPath, payload);
+  } else {
+    console.log(payload);
+  }
+};
diff --git a/src/experimental/structural/parsers.js b/src/experimental/structural/parsers.js
new file mode 100644
index 000000000..84ae17843
--- /dev/null
+++ b/src/experimental/structural/parsers.js
@@ -0,0 +1,138 @@
+import fs from 'node:fs';
+import path from 'node:path';
+
+const parseJsonLines = (text) => text
+  .split(/\r?\n/)
+  .map((line) => line.trim())
+  .filter(Boolean)
+  .map((line) => {
+    try {
+      return JSON.parse(line);
+    } catch {
+      return null;
+    }
+  })
+  .filter(Boolean);
+
+const mergeTags = (tags = [], packTags = []) => {
+  const combined = [...tags, ...packTags].map((entry) => String(entry)).filter(Boolean);
+  return Array.from(new Set(combined));
+};
+
+const normalizeResult = (input) => ({
+  engine: input.engine,
+  pack: input.pack?.id || null,
+  ruleId: input.ruleId || null,
+  message: input.message || null,
+  severity: input.severity || input.pack?.severity || null,
+  tags: mergeTags(input.tags || [], input.pack?.tags || []),
+  path: input.path || null,
+  startLine: input.startLine ?? null,
+  startCol: input.startCol ?? null,
+  endLine: input.endLine ?? null,
+  endCol: input.endCol ?? null,
+  snippet: input.snippet || null,
+  metadata: input.metadata || null
+});
+
+export const parseSemgrep = (output, pack) => {
+  if (!output.trim()) return [];
+  const payload = JSON.parse(output);
+  const results = Array.isArray(payload.results) ? payload.results : [];
+  return results.map((entry) => normalizeResult({
+    engine: 'semgrep',
+    pack,
+    ruleId: entry.check_id || null,
+    message: entry.extra?.message || null,
+    severity: entry.extra?.severity || null,
+    tags: Array.isArray(entry.extra?.metadata?.category)
+      ? entry.extra.metadata.category
+      : (Array.isArray(entry.extra?.metadata?.tags) ? entry.extra.metadata.tags : []),
+    path: entry.path || null,
+    startLine: entry.start?.line ?? null,
+    startCol: entry.start?.col ?? null,
+    endLine: entry.end?.line ?? null,
+    endCol: entry.end?.col ?? null,
+    snippet: entry.extra?.lines || null,
+    metadata: entry.extra?.metadata || null
+  }));
+};
+
+export const parseAstGrep = (output, pack) => {
+  if (!output.trim()) return [];
+  let parsed;
+  try {
+    parsed = JSON.parse(output);
+  } catch {
+    parsed = parseJsonLines(output);
+  }
+  const entries = Array.isArray(parsed) ? parsed : [parsed];
+  const results = [];
+  for (const entry of entries) {
+    if (!entry) continue;
+    const matches = Array.isArray(entry.matches) ? entry.matches : [];
+    const ruleId = entry.ruleId || entry.rule?.id || null;
+    for (const match of matches) {
+      const range = match.range || {};
+      const start = range.start || {};
+      const end = range.end || {};
+      results.push(normalizeResult({
+        engine: 'ast-grep',
+        pack,
+        ruleId,
+        message: match.message || entry.message || null,
+        severity: entry.severity || null,
+        tags: Array.isArray(entry.tags) ? entry.tags : [],
+        path: entry.file || entry.path || null,
+        startLine: start.line ?? null,
+        startCol: start.column ?? null,
+        endLine: end.line ?? null,
+        endCol: end.column ?? null,
+        snippet: match.text || match.matched || null,
+        metadata: entry.metadata || null
+      }));
+    }
+  }
+  return results;
+};
+
+export const parseComby = (output, pack, ruleId, message) => {
+  const entries = parseJsonLines(output);
+  const results = [];
+  for (const entry of entries) {
+    if (!entry) continue;
+    const matches = Array.isArray(entry.matches) ? entry.matches : [];
+    for (const match of matches) {
+      const range = match.range || {};
+      const start = range.start || {};
+      const end = range.end || {};
+      results.push(normalizeResult({
+        engine: 'comby',
+        pack,
+        ruleId,
+        message: message || null,
+        severity: entry.severity || null,
+        tags: Array.isArray(entry.tags) ? entry.tags : [],
+        path: entry.uri || entry.path || null,
+        startLine: start.line ?? null,
+        startCol: start.col ?? null,
+        endLine: end.line ?? null,
+        endCol: end.col ?? null,
+        snippet: match.matched || null,
+        metadata: entry.metadata || null
+      }));
+    }
+  }
+  return results;
+};
+
+export const readCombyRule = (rulePath) => {
+  const payload = JSON.parse(fs.readFileSync(rulePath, 'utf8'));
+  return {
+    id: payload.id || path.basename(rulePath),
+    message: payload.message || null,
+    language: payload.language || '.',
+    pattern: payload.pattern || '',
+    rewrite: payload.rewrite || ''
+  };
+};
diff --git a/src/experimental/structural/registry.js b/src/experimental/structural/registry.js
new file mode 100644
index 000000000..5a22fe50b
--- /dev/null
+++ b/src/experimental/structural/registry.js
@@ -0,0 +1,27 @@
+import fs from 'node:fs';
+
+const readJson = (filePath) => JSON.parse(fs.readFileSync(filePath, 'utf8'));
+
+const normalizePack = (pack) => ({
+  id: String(pack.id || '').trim(),
+  label: pack.label || '',
+  engine: pack.engine || '',
+  rules: Array.isArray(pack.rules) ? pack.rules : [],
+  severity: pack.severity || null,
+  tags: Array.isArray(pack.tags) ? pack.tags : [],
+  description: pack.description || ''
+});
+
+export const loadRegistry = (registryPath) => {
+  if (!fs.existsSync(registryPath)) return { packs: [] };
+  const registry = readJson(registryPath);
+  const packs = Array.isArray(registry.packs) ? registry.packs : [];
+  return { packs: packs.map(normalizePack) };
+};
+
+export const resolvePacks = (registry, packIds) => {
+  const resolvePack = (id) => registry.packs.find((pack) => pack.id === id);
+  const selectedPacks = packIds.map(resolvePack).filter(Boolean);
+  const missingPacks = packIds.filter((id) => !resolvePack(id));
+  return { selectedPacks, missingPacks };
+};
diff --git a/src/experimental/structural/runner.js b/src/experimental/structural/runner.js
new file mode 100644
index 000000000..0cffc4511
--- /dev/null
+++ b/src/experimental/structural/runner.js
@@ -0,0 +1,94 @@
+import { runBinary, resolveBinary } from './binaries.js';
+import { parseAstGrep, parseComby, parseSemgrep, readCombyRule } from './parsers.js';
+
+const buildMissingBinaryMessage = (engine, cmd) => {
+  const checked = Array.isArray(cmd?.checkedPaths) ? cmd.checkedPaths : [];
+  if (!checked.length) return `${engine} binary not found on PATH.`;
+  return `${engine} binary not found on PATH. Checked: ${checked.join(', ')}`;
+};
+
+const runSemgrep = (repoRoot, pack, rules) => {
+  const cmd = resolveBinary('semgrep');
+  const args = ['--json'];
+  for (const rulePath of rules) args.push('--config', rulePath);
+  args.push('--quiet');
+  const result = runBinary(cmd, args, { cwd: repoRoot, encoding: 'utf8' });
+  if (result.error) {
+    if (result.error.code === 'ENOENT') {
+      throw new Error(buildMissingBinaryMessage('semgrep', cmd));
+    }
+    throw result.error;
+  }
+  if (result.status !== 0 && !result.stdout) {
+    throw new Error(result.stderr || 'semgrep failed');
+  }
+  return parseSemgrep(result.stdout || '', pack);
+};
+
+const runAstGrep = (repoRoot, pack, rules) => {
+  const cmd = resolveBinary('ast-grep');
+  const results = [];
+  for (const rulePath of rules) {
+    const args = ['scan', '--json', '--rule', rulePath];
+    const result = runBinary(cmd, args, { cwd: repoRoot, encoding: 'utf8' });
+    if (result.error) {
+      if (result.error.code === 'ENOENT') {
+        throw new Error(buildMissingBinaryMessage('ast-grep', cmd));
+      }
+      throw result.error;
+    }
+    if (result.status !== 0 && !result.stdout) {
+      throw new Error(result.stderr || 'ast-grep failed');
+    }
+    results.push(...parseAstGrep(result.stdout || '', pack));
+  }
+  return results;
+};
+
+const runComby = (repoRoot, pack, rules) => {
+  const cmd = resolveBinary('comby');
+  const results = [];
+  for (const rulePath of rules) {
+    const rule = readCombyRule(rulePath);
+    const args = [
+      '-json-lines',
+      '-matcher', rule.language,
+      rule.pattern,
+      rule.rewrite || rule.pattern,
+      repoRoot
+    ];
+    const result = runBinary(cmd, args, { cwd: repoRoot, encoding: 'utf8' });
+    if (result.error) {
+      if (result.error.code === 'ENOENT') {
+        throw new Error(buildMissingBinaryMessage('comby', cmd));
+      }
+      throw result.error;
+    }
+    if (result.status !== 0 && !result.stdout) {
+      throw new Error(result.stderr || 'comby failed');
+    }
+    results.push(...parseComby(result.stdout || '', pack, rule.id, rule.message));
+  }
+  return results;
+};
+
+export const runStructuralSearch = ({ repoRoot, packsToRun }) => {
+  const results = [];
+  for (const entry of packsToRun) {
+    if (!entry.engine) continue;
+    if (!entry.rules.length) continue;
+    const packMeta = entry.pack
+      ? { id: entry.pack.id, tags: entry.pack.tags, severity: entry.pack.severity }
+      : null;
+    if (entry.engine === 'semgrep') {
+      results.push(...runSemgrep(repoRoot, packMeta, entry.rules));
+    } else if (entry.engine === 'ast-grep') {
+      results.push(...runAstGrep(repoRoot, packMeta, entry.rules));
+    } else if (entry.engine === 'comby') {
+      results.push(...runComby(repoRoot, packMeta, entry.rules));
+    } else {
+      throw new Error(`Unsupported engine: ${entry.engine}`);
+    }
+  }
+  return results;
+};
diff --git a/src/index/analysis.js b/src/index/analysis.js
new file mode 100644
index 000000000..24272e125
--- /dev/null
+++ b/src/index/analysis.js
@@ -0,0 +1,75 @@
+import escomplex from 'escomplex';
+import { ESLint } from 'eslint';
+
+let eslintInstance = null;
+let eslintInitFailed = false;
+let eslintInitWarned = false;
+
+async function getEslintInstance() {
+  if (eslintInitFailed) return null;
+  if (eslintInstance) return eslintInstance;
+  const primaryOptions = {
+    overrideConfigFile: null,
+    overrideConfig: {}
+  };
+  try {
+    eslintInstance = new ESLint(primaryOptions);
+    return eslintInstance;
+  } catch (err) {
+    const message = String(err?.message || err || '');
+    if (!eslintInitWarned && message) {
+      console.warn(`[lint] ESLint init failed with overrideConfigFile=null: ${message}`);
+      eslintInitWarned = true;
+    }
+    try {
+      eslintInstance = new ESLint({ useEslintrc: false });
+      if (!eslintInitWarned) {
+        console.warn('[lint] ESLint fallback initialized with useEslintrc=false.');
+        eslintInitWarned = true;
+      }
+      return eslintInstance;
+    } catch (fallbackErr) {
+      const fallbackMessage = String(fallbackErr?.message || fallbackErr || '');
+      if (!eslintInitWarned && fallbackMessage) {
+        console.warn(`[lint] ESLint fallback init failed: ${fallbackMessage}`);
+        eslintInitWarned = true;
+      }
+      eslintInitFailed = true;
+      return null;
+    }
+  }
+}
+
+/**
+ * Compute basic cyclomatic complexity metrics for JS code.
+ * @param {string} code
+ * @returns {Promise<{functions:number,averageCyclomatic:number}|{}>}
+ */
+export async function analyzeComplexity(code) {
+  try {
+    const report = escomplex.analyse(code, { esmImportExport: true });
+    return report && report.functions ? {
+      functions: report.functions.length,
+      averageCyclomatic: (report.aggregate && report.aggregate.cyclomatic) || 0
+    } : {};
+  } catch {
+    return {};
+  }
+}
+
+/**
+ * Run ESLint on a code chunk and return lint messages.
+ * @param {string} text
+ * @param {string} relPath
+ * @returns {Promise<Array<{message:string}>>}
+ */
+export async function lintChunk(text, relPath) {
+  try {
+    const eslint = await getEslintInstance();
+    if (!eslint) return [];
+    const results = await eslint.lintText(text, { filePath: relPath });
+    return results.length ? results[0].messages : [];
+  } catch {
+    return [];
+  }
+}
diff --git a/src/index/build/args.js b/src/index/build/args.js
new file mode 100644
index 000000000..f20e5686c
--- /dev/null
+++ b/src/index/build/args.js
@@ -0,0 +1,20 @@
+import { createCli } from '../../shared/cli.js';
+import { INDEX_BUILD_OPTIONS, validateBuildArgs } from '../../shared/cli-options.js';
+
+/**
+ * Parse CLI args for build_index.
+ * @param {string[]} rawArgs
+ * @returns {{argv:object,modes:string[]}}
+ */
+export function parseBuildArgs(rawArgs) {
+  const argv = createCli({
+    scriptName: 'build-index',
+    argv: ['node', 'build-index.js', ...rawArgs],
+    options: INDEX_BUILD_OPTIONS
+  }).parse();
+  validateBuildArgs(argv);
+  const modes = argv.mode === 'all'
+    ? ['prose', 'code', 'extracted-prose']
+    : [argv.mode];
+  return { argv, modes };
+}
diff --git a/src/index/build/artifacts.js b/src/index/build/artifacts.js
new file mode 100644
index 000000000..9e869362f
--- /dev/null
+++ b/src/index/build/artifacts.js
@@ -0,0 +1,559 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { log } from '../../shared/progress.js';
+import { MAX_JSON_BYTES } from '../../shared/artifact-io.js';
+import { writeJsonArrayFile, writeJsonObjectFile } from '../../shared/json-stream.js';
+import { runWithConcurrency } from '../../shared/concurrency.js';
+import { normalizePostingsConfig } from '../../shared/postings-config.js';
+import { resolveCompressionConfig } from './artifacts/compression.js';
+import { writePiecesManifest } from './artifacts/checksums.js';
+import { buildFileMeta } from './artifacts/file-meta.js';
+import { buildSerializedFilterIndex } from './artifacts/filter-index.js';
+import { writeIndexMetrics } from './artifacts/metrics.js';
+import { resolveTokenMode } from './artifacts/token-mode.js';
+import { enqueueFileRelationsArtifacts } from './artifacts/writers/file-relations.js';
+import { createRepoMapIterator } from './artifacts/writers/repo-map.js';
+import {
+  createChunkMetaIterator,
+  enqueueChunkMetaArtifacts,
+  resolveChunkMetaPlan
+} from './artifacts/writers/chunk-meta.js';
+
+/**
+ * Write index artifacts and metrics.
+ * @param {object} input
+ */
+export async function writeIndexArtifacts(input) {
+  const {
+    outDir,
+    mode,
+    state,
+    postings,
+    postingsConfig,
+    modelId,
+    useStubEmbeddings,
+    dictSummary,
+    timing,
+    root,
+    userConfig,
+    incrementalEnabled,
+    fileCounts,
+    perfProfile,
+    indexState,
+    graphRelations
+  } = input;
+  const indexingConfig = userConfig?.indexing || {};
+  const {
+    resolvedTokenMode,
+    tokenMaxFiles,
+    tokenSampleSize
+  } = resolveTokenMode({ indexingConfig, state, fileCounts });
+  const {
+    compressionEnabled,
+    compressionMode,
+    compressionKeepRaw,
+    compressibleArtifacts
+  } = resolveCompressionConfig(indexingConfig);
+  const artifactConfig = indexingConfig.artifacts || {};
+  const artifactMode = typeof artifactConfig.mode === 'string'
+    ? artifactConfig.mode.toLowerCase()
+    : 'auto';
+  const chunkMetaFormatConfig = typeof artifactConfig.chunkMetaFormat === 'string'
+    ? artifactConfig.chunkMetaFormat.toLowerCase()
+    : null;
+  const chunkMetaJsonlThreshold = Number.isFinite(Number(artifactConfig.chunkMetaJsonlThreshold))
+    ? Math.max(0, Math.floor(Number(artifactConfig.chunkMetaJsonlThreshold)))
+    : 200000;
+  const chunkMetaShardSize = Number.isFinite(Number(artifactConfig.chunkMetaShardSize))
+    ? Math.max(0, Math.floor(Number(artifactConfig.chunkMetaShardSize)))
+    : 100000;
+  const tokenPostingsFormatConfig = typeof artifactConfig.tokenPostingsFormat === 'string'
+    ? artifactConfig.tokenPostingsFormat.toLowerCase()
+    : null;
+  let tokenPostingsShardSize = Number.isFinite(Number(artifactConfig.tokenPostingsShardSize))
+    ? Math.max(1000, Math.floor(Number(artifactConfig.tokenPostingsShardSize)))
+    : 50000;
+  const tokenPostingsShardThreshold = Number.isFinite(Number(artifactConfig.tokenPostingsShardThreshold))
+    ? Math.max(0, Math.floor(Number(artifactConfig.tokenPostingsShardThreshold)))
+    : 200000;
+  const formatBytes = (bytes) => {
+    const value = Number(bytes);
+    if (!Number.isFinite(value) || value <= 0) return '0B';
+    if (value < 1024) return `${Math.round(value)}B`;
+    const kb = value / 1024;
+    if (kb < 1024) return `${kb.toFixed(1)}KB`;
+    const mb = kb / 1024;
+    if (mb < 1024) return `${mb.toFixed(1)}MB`;
+    const gb = mb / 1024;
+    return `${gb.toFixed(1)}GB`;
+  };
+
+  const { fileMeta, fileIdByPath } = buildFileMeta(state);
+  const repoMapIterator = createRepoMapIterator({
+    chunks: state.chunks,
+    fileRelations: state.fileRelations
+  });
+
+  const fileListConfig = userConfig?.indexing || {};
+  const debugFileLists = fileListConfig.debugFileLists === true;
+  const sampleSize = Number.isFinite(Number(fileListConfig.fileListSampleSize))
+    ? Math.max(0, Math.floor(Number(fileListConfig.fileListSampleSize)))
+    : 50;
+  const sampleList = (list) => {
+    if (!Array.isArray(list) || sampleSize <= 0) return [];
+    if (list.length <= sampleSize) return list.slice();
+    return list.slice(0, sampleSize);
+  };
+  const fileListSummary = {
+    generatedAt: new Date().toISOString(),
+    scanned: {
+      count: state.scannedFilesTimes.length,
+      sample: sampleList(state.scannedFilesTimes)
+    },
+    skipped: {
+      count: state.skippedFiles.length,
+      sample: sampleList(state.skippedFiles)
+    }
+  };
+  const fileListPath = path.join(outDir, '.filelists.json');
+  await writeJsonObjectFile(fileListPath, { fields: fileListSummary, atomic: true });
+  if (debugFileLists) {
+    await writeJsonArrayFile(
+      path.join(outDir, '.scannedfiles.json'),
+      state.scannedFilesTimes,
+      { atomic: true }
+    );
+    await writeJsonArrayFile(
+      path.join(outDir, '.skippedfiles.json'),
+      state.skippedFiles,
+      { atomic: true }
+    );
+    log('→ Wrote .filelists.json, .scannedfiles.json, and .skippedfiles.json');
+  } else {
+    log('→ Wrote .filelists.json (samples only).');
+  }
+
+  const resolvedConfig = normalizePostingsConfig(postingsConfig || {});
+  const filterIndex = buildSerializedFilterIndex({
+    chunks: state.chunks,
+    resolvedConfig,
+    userConfig
+  });
+  const denseScale = 2 / 255;
+  const maxJsonBytes = MAX_JSON_BYTES;
+  const maxJsonBytesSoft = maxJsonBytes * 0.9;
+  const shardTargetBytes = maxJsonBytes * 0.75;
+  const chunkMetaIterator = createChunkMetaIterator({
+    chunks: state.chunks,
+    fileIdByPath,
+    resolvedTokenMode,
+    tokenSampleSize
+  });
+  const chunkMetaPlan = resolveChunkMetaPlan({
+    chunks: state.chunks,
+    chunkMetaIterator,
+    artifactMode,
+    chunkMetaFormatConfig,
+    chunkMetaJsonlThreshold,
+    chunkMetaShardSize,
+    maxJsonBytes
+  });
+  const tokenPostingsFormat = tokenPostingsFormatConfig
+    || (artifactMode === 'sharded' ? 'sharded' : (artifactMode === 'json' ? 'json' : 'auto'));
+  let tokenPostingsUseShards = tokenPostingsFormat === 'sharded'
+    || (tokenPostingsFormat === 'auto'
+      && postings.tokenVocab.length >= tokenPostingsShardThreshold);
+  const estimatePostingsBytes = (vocab, postingsList, sampleLimit = 200) => {
+    const total = Array.isArray(vocab) ? vocab.length : 0;
+    if (!total) return null;
+    const sampleSize = Math.min(total, sampleLimit);
+    let sampledBytes = 0;
+    for (let i = 0; i < sampleSize; i += 1) {
+      const token = vocab[i];
+      const posting = postingsList?.[i] || [];
+      sampledBytes += Buffer.byteLength(JSON.stringify(token), 'utf8') + 1;
+      sampledBytes += Buffer.byteLength(JSON.stringify(posting), 'utf8') + 1;
+    }
+    if (!sampledBytes) return null;
+    const avgBytes = sampledBytes / sampleSize;
+    return { avgBytes, estimatedBytes: avgBytes * total };
+  };
+  const tokenPostingsEstimate = estimatePostingsBytes(
+    postings.tokenVocab,
+    postings.tokenPostingsList
+  );
+  if (tokenPostingsEstimate) {
+    if (tokenPostingsEstimate.estimatedBytes > maxJsonBytesSoft) {
+      tokenPostingsUseShards = true;
+      const targetShardSize = Math.max(1, Math.floor(shardTargetBytes / tokenPostingsEstimate.avgBytes));
+      tokenPostingsShardSize = Math.min(tokenPostingsShardSize, targetShardSize);
+      log(
+        `Token postings estimate ~${formatBytes(tokenPostingsEstimate.estimatedBytes)}; ` +
+        `using sharded output to stay under ${formatBytes(maxJsonBytes)}.`
+      );
+    } else if (tokenPostingsUseShards) {
+      const targetShardSize = Math.max(1, Math.floor(shardTargetBytes / tokenPostingsEstimate.avgBytes));
+      tokenPostingsShardSize = Math.min(tokenPostingsShardSize, targetShardSize);
+    }
+  }
+  const removeArtifact = async (targetPath) => {
+    try {
+      await fs.rm(targetPath, { recursive: true, force: true });
+    } catch {}
+  };
+  if (tokenPostingsUseShards) {
+    await removeArtifact(path.join(outDir, 'token_postings.json'));
+    await removeArtifact(path.join(outDir, 'token_postings.json.gz'));
+    await removeArtifact(path.join(outDir, 'token_postings.shards'));
+  } else {
+    await removeArtifact(path.join(outDir, 'token_postings.meta.json'));
+    await removeArtifact(path.join(outDir, 'token_postings.shards'));
+  }
+  const writeStart = Date.now();
+  const writes = [];
+  let totalWrites = 0;
+  let completedWrites = 0;
+  let lastWriteLog = 0;
+  let lastWriteLabel = '';
+  const writeLogIntervalMs = 1000;
+  const formatArtifactLabel = (filePath) => path.relative(outDir, filePath).split(path.sep).join('/');
+  const pieceEntries = [];
+  const addPieceFile = (entry, filePath) => {
+    pieceEntries.push({ ...entry, path: formatArtifactLabel(filePath) });
+  };
+  addPieceFile({ type: 'stats', name: 'filelists', format: 'json' }, path.join(outDir, '.filelists.json'));
+  const logWriteProgress = (label) => {
+    completedWrites += 1;
+    if (label) lastWriteLabel = label;
+    const now = Date.now();
+    if (completedWrites === totalWrites || completedWrites === 1 || (now - lastWriteLog) >= writeLogIntervalMs) {
+      lastWriteLog = now;
+      const percent = totalWrites > 0
+        ? (completedWrites / totalWrites * 100).toFixed(1)
+        : '100.0';
+      const suffix = lastWriteLabel ? ` | ${lastWriteLabel}` : '';
+      log(`Writing index files ${completedWrites}/${totalWrites} (${percent}%)${suffix}`);
+    }
+  };
+  const enqueueWrite = (label, job) => {
+    writes.push({ label, job });
+  };
+  if (indexState && typeof indexState === 'object') {
+    const indexStatePath = path.join(outDir, 'index_state.json');
+    enqueueWrite(
+      formatArtifactLabel(indexStatePath),
+      () => writeJsonObjectFile(indexStatePath, { fields: indexState, atomic: true })
+    );
+    addPieceFile({ type: 'stats', name: 'index_state', format: 'json' }, indexStatePath);
+  }
+  const artifactPath = (base, compressed) => path.join(
+    outDir,
+    compressed ? `${base}.json.gz` : `${base}.json`
+  );
+  const enqueueJsonObject = (base, payload, { compressible = true, piece = null } = {}) => {
+    if (compressionEnabled && compressible && compressibleArtifacts.has(base)) {
+      const gzPath = artifactPath(base, true);
+      enqueueWrite(
+        formatArtifactLabel(gzPath),
+        () => writeJsonObjectFile(gzPath, {
+          ...payload,
+          compression: compressionMode,
+          atomic: true
+        })
+      );
+      if (piece) {
+        addPieceFile({ ...piece, format: 'json', compression: compressionMode }, gzPath);
+      }
+      if (compressionKeepRaw) {
+        const rawPath = artifactPath(base, false);
+        enqueueWrite(
+          formatArtifactLabel(rawPath),
+          () => writeJsonObjectFile(rawPath, { ...payload, atomic: true })
+        );
+        if (piece) {
+          addPieceFile({ ...piece, format: 'json' }, rawPath);
+        }
+      }
+      return;
+    }
+    const rawPath = artifactPath(base, false);
+    enqueueWrite(
+      formatArtifactLabel(rawPath),
+      () => writeJsonObjectFile(rawPath, { ...payload, atomic: true })
+    );
+    if (piece) {
+      addPieceFile({ ...piece, format: 'json' }, rawPath);
+    }
+  };
+  const enqueueJsonArray = (base, items, { compressible = true, piece = null } = {}) => {
+    if (compressionEnabled && compressible && compressibleArtifacts.has(base)) {
+      const gzPath = artifactPath(base, true);
+      enqueueWrite(
+        formatArtifactLabel(gzPath),
+        () => writeJsonArrayFile(gzPath, items, {
+          compression: compressionMode,
+          atomic: true
+        })
+      );
+      if (piece) {
+        addPieceFile({ ...piece, format: 'json', compression: compressionMode }, gzPath);
+      }
+      if (compressionKeepRaw) {
+        const rawPath = artifactPath(base, false);
+        enqueueWrite(
+          formatArtifactLabel(rawPath),
+          () => writeJsonArrayFile(rawPath, items, { atomic: true })
+        );
+        if (piece) {
+          addPieceFile({ ...piece, format: 'json' }, rawPath);
+        }
+      }
+      return;
+    }
+    const rawPath = artifactPath(base, false);
+    enqueueWrite(
+      formatArtifactLabel(rawPath),
+      () => writeJsonArrayFile(rawPath, items, { atomic: true })
+    );
+    if (piece) {
+      addPieceFile({ ...piece, format: 'json' }, rawPath);
+    }
+  };
+
+  const denseVectorsEnabled = postings.dims > 0 && postings.quantizedVectors.length;
+  if (!denseVectorsEnabled) {
+    await removeArtifact(path.join(outDir, 'dense_vectors_uint8.json'));
+    await removeArtifact(path.join(outDir, 'dense_vectors_uint8.json.gz'));
+    await removeArtifact(path.join(outDir, 'dense_vectors_doc_uint8.json'));
+    await removeArtifact(path.join(outDir, 'dense_vectors_doc_uint8.json.gz'));
+    await removeArtifact(path.join(outDir, 'dense_vectors_code_uint8.json'));
+    await removeArtifact(path.join(outDir, 'dense_vectors_code_uint8.json.gz'));
+  }
+  if (denseVectorsEnabled) {
+    enqueueJsonObject('dense_vectors_uint8', {
+      fields: { model: modelId, dims: postings.dims, scale: denseScale },
+      arrays: { vectors: postings.quantizedVectors }
+    }, {
+      piece: {
+        type: 'embeddings',
+        name: 'dense_vectors',
+        count: postings.quantizedVectors.length,
+        dims: postings.dims
+      }
+    });
+  }
+  enqueueJsonArray('file_meta', fileMeta, {
+    compressible: false,
+    piece: { type: 'chunks', name: 'file_meta', count: fileMeta.length }
+  });
+  if (denseVectorsEnabled) {
+    enqueueJsonObject('dense_vectors_doc_uint8', {
+      fields: { model: modelId, dims: postings.dims, scale: denseScale },
+      arrays: { vectors: postings.quantizedDocVectors }
+    }, {
+      piece: {
+        type: 'embeddings',
+        name: 'dense_vectors_doc',
+        count: postings.quantizedDocVectors.length,
+        dims: postings.dims
+      }
+    });
+    enqueueJsonObject('dense_vectors_code_uint8', {
+      fields: { model: modelId, dims: postings.dims, scale: denseScale },
+      arrays: { vectors: postings.quantizedCodeVectors }
+    }, {
+      piece: {
+        type: 'embeddings',
+        name: 'dense_vectors_code',
+        count: postings.quantizedCodeVectors.length,
+        dims: postings.dims
+      }
+    });
+  }
+  await enqueueChunkMetaArtifacts({
+    state,
+    outDir,
+    chunkMetaIterator,
+    chunkMetaPlan,
+    maxJsonBytes,
+    enqueueJsonArray,
+    enqueueWrite,
+    addPieceFile,
+    formatArtifactLabel
+  });
+  enqueueJsonArray('repo_map', repoMapIterator(), {
+    compressible: false,
+    piece: { type: 'chunks', name: 'repo_map' }
+  });
+  if (filterIndex) {
+    enqueueJsonObject('filter_index', { fields: filterIndex }, {
+      compressible: false,
+      piece: { type: 'chunks', name: 'filter_index' }
+    });
+  }
+  enqueueJsonObject('minhash_signatures', { arrays: { signatures: postings.minhashSigs } }, {
+    piece: {
+      type: 'postings',
+      name: 'minhash_signatures',
+      count: postings.minhashSigs.length
+    }
+  });
+  if (tokenPostingsUseShards) {
+    const shardsDir = path.join(outDir, 'token_postings.shards');
+    await fs.mkdir(shardsDir, { recursive: true });
+    const parts = [];
+    let shardIndex = 0;
+    for (let i = 0; i < postings.tokenVocab.length; i += tokenPostingsShardSize) {
+      const end = Math.min(i + tokenPostingsShardSize, postings.tokenVocab.length);
+      const partCount = end - i;
+      const partName = `token_postings.part-${String(shardIndex).padStart(5, '0')}.json`;
+      const partPath = path.join(shardsDir, partName);
+      parts.push(path.join('token_postings.shards', partName));
+      enqueueWrite(
+        formatArtifactLabel(partPath),
+        () => writeJsonObjectFile(partPath, {
+          arrays: {
+            vocab: postings.tokenVocab.slice(i, end),
+            postings: postings.tokenPostingsList.slice(i, end)
+          },
+          atomic: true
+        })
+      );
+      addPieceFile({
+        type: 'postings',
+        name: 'token_postings',
+        format: 'json',
+        count: partCount
+      }, partPath);
+      shardIndex += 1;
+    }
+    const metaPath = path.join(outDir, 'token_postings.meta.json');
+    enqueueWrite(
+      formatArtifactLabel(metaPath),
+      () => writeJsonObjectFile(metaPath, {
+        fields: {
+          avgDocLen: postings.avgDocLen,
+          totalDocs: state.docLengths.length,
+          format: 'sharded',
+          shardSize: tokenPostingsShardSize,
+          vocabCount: postings.tokenVocab.length,
+          parts
+        },
+        arrays: {
+          docLengths: state.docLengths
+        },
+        atomic: true
+      })
+    );
+    addPieceFile({ type: 'postings', name: 'token_postings_meta', format: 'json' }, metaPath);
+  } else {
+    enqueueJsonObject('token_postings', {
+      fields: {
+        avgDocLen: postings.avgDocLen,
+        totalDocs: state.docLengths.length
+      },
+      arrays: {
+        vocab: postings.tokenVocab,
+        postings: postings.tokenPostingsList,
+        docLengths: state.docLengths
+      }
+    }, {
+      piece: { type: 'postings', name: 'token_postings', count: postings.tokenVocab.length }
+    });
+  }
+  if (postings.fieldPostings?.fields) {
+    enqueueJsonObject('field_postings', postings.fieldPostings, {
+      piece: { type: 'postings', name: 'field_postings' }
+    });
+  }
+  if (resolvedConfig.fielded !== false && Array.isArray(state.fieldTokens)) {
+    enqueueJsonArray('field_tokens', state.fieldTokens, {
+      piece: { type: 'postings', name: 'field_tokens', count: state.fieldTokens.length }
+    });
+  }
+  enqueueFileRelationsArtifacts({
+    state,
+    outDir,
+    enqueueWrite,
+    addPieceFile,
+    formatArtifactLabel
+  });
+  if (graphRelations && typeof graphRelations === 'object') {
+    enqueueJsonObject('graph_relations', { fields: graphRelations }, {
+      compressible: false,
+      piece: { type: 'relations', name: 'graph_relations' }
+    });
+  }
+  if (resolvedConfig.enablePhraseNgrams !== false) {
+    enqueueJsonObject('phrase_ngrams', {
+      arrays: { vocab: postings.phraseVocab, postings: postings.phrasePostings }
+    }, {
+      piece: { type: 'postings', name: 'phrase_ngrams', count: postings.phraseVocab.length }
+    });
+  }
+  if (resolvedConfig.enableChargrams !== false) {
+    enqueueJsonObject('chargram_postings', {
+      arrays: { vocab: postings.chargramVocab, postings: postings.chargramPostings }
+    }, {
+      piece: { type: 'postings', name: 'chargram_postings', count: postings.chargramVocab.length }
+    });
+  }
+  totalWrites = writes.length;
+  if (totalWrites) {
+    const artifactLabel = totalWrites === 1 ? 'artifact' : 'artifacts';
+    log(`Writing index files (${totalWrites} ${artifactLabel})...`);
+    const writeConcurrency = Math.max(1, Math.min(4, totalWrites));
+    await runWithConcurrency(
+      writes,
+      writeConcurrency,
+      async ({ label, job }) => {
+        try {
+          await job();
+        } finally {
+          logWriteProgress(label);
+        }
+      },
+      { collectResults: false }
+    );
+  } else {
+    log('Writing index files (0 artifacts)...');
+  }
+  timing.writeMs = Date.now() - writeStart;
+  timing.totalMs = Date.now() - timing.start;
+  log(
+    `📦  ${mode.padEnd(5)}: ${state.chunks.length.toLocaleString()} chunks, ${postings.tokenVocab.length.toLocaleString()} tokens, dims=${postings.dims}`
+  );
+
+  await writePiecesManifest({
+    pieceEntries,
+    outDir,
+    mode,
+    indexState
+  });
+  await writeIndexMetrics({
+    root,
+    userConfig,
+    mode,
+    outDir,
+    state,
+    postings,
+    dictSummary,
+    useStubEmbeddings,
+    modelId,
+    denseVectorsEnabled,
+    incrementalEnabled,
+    fileCounts,
+    timing,
+    perfProfile,
+    indexState,
+    resolvedTokenMode,
+    tokenSampleSize,
+    tokenMaxFiles,
+    chunkMetaUseJsonl: chunkMetaPlan.chunkMetaUseJsonl,
+    chunkMetaUseShards: chunkMetaPlan.chunkMetaUseShards,
+    tokenPostingsUseShards,
+    compressionEnabled,
+    compressionMode,
+    compressionKeepRaw
+  });
+}
diff --git a/src/index/build/artifacts/checksums.js b/src/index/build/artifacts/checksums.js
new file mode 100644
index 000000000..d45019473
--- /dev/null
+++ b/src/index/build/artifacts/checksums.js
@@ -0,0 +1,51 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { log } from '../../../shared/progress.js';
+import { runWithConcurrency } from '../../../shared/concurrency.js';
+import { checksumFile } from '../../../shared/hash.js';
+import { writeJsonObjectFile } from '../../../shared/json-stream.js';
+
+export const writePiecesManifest = async ({
+  pieceEntries,
+  outDir,
+  mode,
+  indexState
+}) => {
+  if (!pieceEntries.length) return;
+  const piecesDir = path.join(outDir, 'pieces');
+  await fs.mkdir(piecesDir, { recursive: true });
+  const manifestPath = path.join(piecesDir, 'manifest.json');
+  const normalizedEntries = await runWithConcurrency(
+    pieceEntries,
+    Math.min(4, pieceEntries.length),
+    async (entry) => {
+      const absPath = path.join(outDir, entry.path.split('/').join(path.sep));
+      let bytes = null;
+      let checksum = null;
+      let checksumAlgo = null;
+      try {
+        const stat = await fs.stat(absPath);
+        bytes = stat.size;
+        const result = await checksumFile(absPath);
+        checksum = result?.value || null;
+        checksumAlgo = result?.algo || null;
+      } catch {}
+      return {
+        ...entry,
+        bytes,
+        checksum: checksum && checksumAlgo ? `${checksumAlgo}:${checksum}` : null
+      };
+    }
+  );
+  await writeJsonObjectFile(manifestPath, {
+    fields: {
+      version: 2,
+      generatedAt: new Date().toISOString(),
+      mode,
+      stage: indexState?.stage || null,
+      pieces: normalizedEntries
+    },
+    atomic: true
+  });
+  log(`→ Wrote pieces manifest (${normalizedEntries.length} entries).`);
+};
diff --git a/src/index/build/artifacts/compression.js b/src/index/build/artifacts/compression.js
new file mode 100644
index 000000000..2d26e93d4
--- /dev/null
+++ b/src/index/build/artifacts/compression.js
@@ -0,0 +1,23 @@
+export const resolveCompressionConfig = (indexingConfig = {}) => {
+  const compressionConfig = indexingConfig.artifactCompression || {};
+  const compressionMode = compressionConfig.mode === 'gzip' ? 'gzip' : null;
+  const compressionEnabled = compressionConfig.enabled === true && compressionMode;
+  const compressionKeepRaw = compressionConfig.keepRaw === true;
+  const compressibleArtifacts = new Set([
+    'dense_vectors_uint8',
+    'dense_vectors_doc_uint8',
+    'dense_vectors_code_uint8',
+    'minhash_signatures',
+    'token_postings',
+    'field_postings',
+    'field_tokens',
+    'phrase_ngrams',
+    'chargram_postings'
+  ]);
+  return {
+    compressionEnabled,
+    compressionMode,
+    compressionKeepRaw,
+    compressibleArtifacts
+  };
+};
diff --git a/src/index/build/artifacts/file-meta.js b/src/index/build/artifacts/file-meta.js
new file mode 100644
index 000000000..d849988d0
--- /dev/null
+++ b/src/index/build/artifacts/file-meta.js
@@ -0,0 +1,23 @@
+export function buildFileMeta(state) {
+  const fileMeta = [];
+  const fileIdByPath = new Map();
+  for (const c of state.chunks) {
+    if (!c?.file) continue;
+    if (fileIdByPath.has(c.file)) continue;
+    const id = fileMeta.length;
+    fileIdByPath.set(c.file, id);
+    fileMeta.push({
+      id,
+      file: c.file,
+      ext: c.ext,
+      externalDocs: c.externalDocs,
+      last_modified: c.last_modified,
+      last_author: c.last_author,
+      churn: c.churn,
+      churn_added: c.churn_added,
+      churn_deleted: c.churn_deleted,
+      churn_commits: c.churn_commits
+    });
+  }
+  return { fileMeta, fileIdByPath };
+}
diff --git a/src/index/build/artifacts/filter-index.js b/src/index/build/artifacts/filter-index.js
new file mode 100644
index 000000000..cff82a1b0
--- /dev/null
+++ b/src/index/build/artifacts/filter-index.js
@@ -0,0 +1,12 @@
+import { buildFilterIndex, serializeFilterIndex } from '../../../retrieval/filter-index.js';
+
+export const buildSerializedFilterIndex = ({ chunks, resolvedConfig, userConfig }) => {
+  const filePrefilterConfig = userConfig?.search?.filePrefilter || {};
+  const fileChargramN = Number.isFinite(Number(filePrefilterConfig.chargramN))
+    ? Math.max(2, Math.floor(Number(filePrefilterConfig.chargramN)))
+    : resolvedConfig.chargramMinN;
+  return serializeFilterIndex(buildFilterIndex(chunks, {
+    fileChargramN,
+    includeBitmaps: false
+  }));
+};
diff --git a/src/index/build/artifacts/metrics.js b/src/index/build/artifacts/metrics.js
new file mode 100644
index 000000000..441e46937
--- /dev/null
+++ b/src/index/build/artifacts/metrics.js
@@ -0,0 +1,130 @@
+import fs from 'node:fs/promises';
+import os from 'node:os';
+import path from 'node:path';
+import { getEffectiveConfigHash, getMetricsDir, getToolVersion } from '../../../../tools/dict-utils.js';
+import { getRepoProvenance } from '../../git.js';
+import { writeJsonObjectFile } from '../../../shared/json-stream.js';
+
+export const writeIndexMetrics = async ({
+  root,
+  userConfig,
+  mode,
+  outDir,
+  state,
+  postings,
+  dictSummary,
+  useStubEmbeddings,
+  modelId,
+  denseVectorsEnabled,
+  incrementalEnabled,
+  fileCounts,
+  timing,
+  perfProfile,
+  indexState,
+  resolvedTokenMode,
+  tokenSampleSize,
+  tokenMaxFiles,
+  chunkMetaUseJsonl,
+  chunkMetaUseShards,
+  tokenPostingsUseShards,
+  compressionEnabled,
+  compressionMode,
+  compressionKeepRaw
+}) => {
+  const cacheHits = state.scannedFilesTimes.filter((entry) => entry.cached).length;
+  const cacheMisses = state.scannedFilesTimes.length - cacheHits;
+  const skippedByReason = state.skippedFiles.reduce((acc, entry) => {
+    const reason = entry && typeof entry === 'object' && entry.reason
+      ? String(entry.reason)
+      : 'unknown';
+    acc[reason] = (acc[reason] || 0) + 1;
+    return acc;
+  }, {});
+  const toolVersion = getToolVersion();
+  const effectiveConfigHash = getEffectiveConfigHash(root, userConfig);
+  const repoProvenance = await getRepoProvenance(root);
+  const metrics = {
+    generatedAt: new Date().toISOString(),
+    tool: {
+      version: toolVersion,
+      node: process.version,
+      os: {
+        platform: os.platform(),
+        release: os.release(),
+        arch: os.arch()
+      },
+      configHash: effectiveConfigHash
+    },
+    repo: {
+      provenance: repoProvenance
+    },
+    repoRoot: path.resolve(root),
+    mode,
+    indexDir: path.resolve(outDir),
+    incremental: incrementalEnabled,
+    git: { branch: repoProvenance.branch, isRepo: repoProvenance.isRepo },
+    cache: {
+      hits: cacheHits,
+      misses: cacheMisses,
+      hitRate: state.scannedFilesTimes.length ? cacheHits / state.scannedFilesTimes.length : 0
+    },
+    files: {
+      scanned: state.scannedFiles.length,
+      skipped: state.skippedFiles.length,
+      candidates: fileCounts.candidates,
+      skippedByReason
+    },
+    chunks: {
+      total: state.chunks.length,
+      avgTokens: state.chunks.length ? state.totalTokens / state.chunks.length : 0
+    },
+    tokens: {
+      total: state.totalTokens,
+      vocab: postings.tokenVocab.length
+    },
+    bm25: {
+      k1: postings.k1,
+      b: postings.b,
+      avgChunkLen: postings.avgChunkLen,
+      totalDocs: postings.totalDocs
+    },
+    embeddings: {
+      dims: postings.dims,
+      stub: useStubEmbeddings,
+      model: modelId,
+      enabled: denseVectorsEnabled
+    },
+    dictionaries: dictSummary,
+    artifacts: {
+      chunkTokens: {
+        mode: resolvedTokenMode,
+        sampleSize: tokenSampleSize,
+        maxFiles: tokenMaxFiles
+      },
+      formats: {
+        chunkMeta: chunkMetaUseShards ? 'jsonl-sharded' : (chunkMetaUseJsonl ? 'jsonl' : 'json'),
+        tokenPostings: tokenPostingsUseShards ? 'sharded' : 'json'
+      },
+      compression: {
+        enabled: Boolean(compressionEnabled),
+        mode: compressionMode,
+        keepRaw: compressionKeepRaw
+      }
+    },
+    timings: timing
+  };
+  try {
+    const metricsDir = getMetricsDir(root, userConfig);
+    await fs.mkdir(metricsDir, { recursive: true });
+    await writeJsonObjectFile(
+      path.join(metricsDir, `index-${mode}.json`),
+      { fields: metrics, atomic: true }
+    );
+    if (perfProfile) {
+      await writeJsonObjectFile(
+        path.join(metricsDir, `perf-profile-${mode}.json`),
+        { fields: perfProfile, atomic: true }
+      );
+    }
+  } catch {}
+};
diff --git a/src/index/build/artifacts/token-mode.js b/src/index/build/artifacts/token-mode.js
new file mode 100644
index 000000000..5428eed84
--- /dev/null
+++ b/src/index/build/artifacts/token-mode.js
@@ -0,0 +1,42 @@
+import { log } from '../../../shared/progress.js';
+
+export function resolveTokenMode({ indexingConfig = {}, state, fileCounts }) {
+  const tokenModeRaw = indexingConfig.chunkTokenMode || 'auto';
+  const tokenMode = ['auto', 'full', 'sample', 'none'].includes(tokenModeRaw)
+    ? tokenModeRaw
+    : 'auto';
+  const tokenMaxFiles = Number.isFinite(Number(indexingConfig.chunkTokenMaxFiles))
+    ? Math.max(0, Number(indexingConfig.chunkTokenMaxFiles))
+    : 5000;
+  const tokenMaxTotalRaw = Number(indexingConfig.chunkTokenMaxTokens);
+  const tokenMaxTotal = Number.isFinite(tokenMaxTotalRaw) && tokenMaxTotalRaw > 0
+    ? Math.floor(tokenMaxTotalRaw)
+    : 5000000;
+  const tokenSampleSize = Number.isFinite(Number(indexingConfig.chunkTokenSampleSize))
+    ? Math.max(1, Math.floor(Number(indexingConfig.chunkTokenSampleSize)))
+    : 32;
+  let resolvedTokenMode = tokenMode === 'auto'
+    ? ((fileCounts?.candidates ?? 0) <= tokenMaxFiles ? 'full' : 'sample')
+    : tokenMode;
+  if (resolvedTokenMode === 'full' && tokenMode === 'auto') {
+    let totalTokens = 0;
+    for (const chunk of state.chunks) {
+      const count = Number.isFinite(chunk?.tokenCount)
+        ? chunk.tokenCount
+        : (Array.isArray(chunk?.tokens) ? chunk.tokens.length : 0);
+      totalTokens += count;
+      if (totalTokens > tokenMaxTotal) break;
+    }
+    if (totalTokens > tokenMaxTotal) {
+      resolvedTokenMode = 'sample';
+      log(`Chunk token mode auto -> sample (token budget ${totalTokens} > ${tokenMaxTotal}).`);
+    }
+  }
+  return {
+    tokenMode,
+    resolvedTokenMode,
+    tokenMaxFiles,
+    tokenMaxTotal,
+    tokenSampleSize
+  };
+}
diff --git a/src/index/build/artifacts/writers/chunk-meta.js b/src/index/build/artifacts/writers/chunk-meta.js
new file mode 100644
index 000000000..4981df91d
--- /dev/null
+++ b/src/index/build/artifacts/writers/chunk-meta.js
@@ -0,0 +1,291 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { log } from '../../../../shared/progress.js';
+import { MAX_JSON_BYTES } from '../../../../shared/artifact-io.js';
+import {
+  writeJsonArrayFile,
+  writeJsonLinesFile,
+  writeJsonObjectFile
+} from '../../../../shared/json-stream.js';
+
+const formatBytes = (bytes) => {
+  const value = Number(bytes);
+  if (!Number.isFinite(value) || value <= 0) return '0B';
+  if (value < 1024) return `${Math.round(value)}B`;
+  const kb = value / 1024;
+  if (kb < 1024) return `${kb.toFixed(1)}KB`;
+  const mb = kb / 1024;
+  if (mb < 1024) return `${mb.toFixed(1)}MB`;
+  const gb = mb / 1024;
+  return `${gb.toFixed(1)}GB`;
+};
+
+export const createChunkMetaIterator = ({
+  chunks,
+  fileIdByPath,
+  resolvedTokenMode,
+  tokenSampleSize
+}) => function* chunkMetaIterator(start = 0, end = chunks.length) {
+  for (let i = start; i < end; i += 1) {
+    const c = chunks[i];
+    const entry = {
+      id: c.id,
+      fileId: fileIdByPath.get(c.file) ?? null,
+      start: c.start,
+      end: c.end,
+      startLine: c.startLine,
+      endLine: c.endLine,
+      kind: c.kind,
+      name: c.name,
+      weight: c.weight,
+      headline: c.headline,
+      preContext: c.preContext,
+      postContext: c.postContext,
+      segment: c.segment || null,
+      codeRelations: c.codeRelations,
+      docmeta: c.docmeta,
+      metaV2: c.metaV2,
+      stats: c.stats,
+      complexity: c.complexity,
+      lint: c.lint,
+      chunk_authors: c.chunk_authors
+    };
+    if (resolvedTokenMode !== 'none') {
+      const tokens = Array.isArray(c.tokens) ? c.tokens : [];
+      const ngrams = Array.isArray(c.ngrams) ? c.ngrams : null;
+      const tokenOut = resolvedTokenMode === 'sample'
+        ? tokens.slice(0, tokenSampleSize)
+        : tokens;
+      const ngramOut = resolvedTokenMode === 'sample' && Array.isArray(ngrams)
+        ? ngrams.slice(0, tokenSampleSize)
+        : ngrams;
+      entry.tokens = tokenOut;
+      entry.ngrams = ngramOut;
+    }
+    yield entry;
+  }
+};
+
+export const resolveChunkMetaPlan = ({
+  chunks,
+  chunkMetaIterator,
+  artifactMode,
+  chunkMetaFormatConfig,
+  chunkMetaJsonlThreshold,
+  chunkMetaShardSize,
+  maxJsonBytes = MAX_JSON_BYTES
+}) => {
+  const maxJsonBytesSoft = maxJsonBytes * 0.9;
+  const shardTargetBytes = maxJsonBytes * 0.75;
+  const chunkMetaCount = chunks.length;
+  const chunkMetaFormat = chunkMetaFormatConfig
+    || (artifactMode === 'jsonl' ? 'jsonl' : (artifactMode === 'json' ? 'json' : 'auto'));
+  let chunkMetaUseJsonl = chunkMetaFormat === 'jsonl'
+    || (chunkMetaFormat === 'auto' && chunkMetaCount >= chunkMetaJsonlThreshold);
+  let resolvedShardSize = chunkMetaShardSize;
+  let chunkMetaUseShards = chunkMetaUseJsonl
+    && resolvedShardSize > 0
+    && chunkMetaCount > resolvedShardSize;
+  if (chunkMetaCount > 0) {
+    const sampleSize = Math.min(chunkMetaCount, 200);
+    let sampledBytes = 0;
+    let sampled = 0;
+    for (const entry of chunkMetaIterator(0, sampleSize)) {
+      sampledBytes += Buffer.byteLength(JSON.stringify(entry), 'utf8') + 1;
+      sampled += 1;
+    }
+    if (sampled) {
+      const avgBytes = sampledBytes / sampled;
+      const estimatedBytes = avgBytes * chunkMetaCount;
+      if (estimatedBytes > maxJsonBytesSoft) {
+        chunkMetaUseJsonl = true;
+        const targetShardSize = Math.max(1, Math.floor(shardTargetBytes / avgBytes));
+        if (resolvedShardSize > 0) {
+          resolvedShardSize = Math.min(resolvedShardSize, targetShardSize);
+        } else {
+          resolvedShardSize = targetShardSize;
+        }
+        chunkMetaUseShards = chunkMetaCount > resolvedShardSize;
+        const chunkMetaMode = chunkMetaUseShards ? 'jsonl-sharded' : 'jsonl';
+        log(
+          `Chunk metadata estimate ~${formatBytes(estimatedBytes)}; ` +
+          `using ${chunkMetaMode} to stay under ${formatBytes(maxJsonBytes)}.`
+        );
+      }
+    }
+  }
+  return {
+    chunkMetaCount,
+    chunkMetaFormat,
+    chunkMetaUseJsonl,
+    chunkMetaUseShards,
+    chunkMetaShardSize: resolvedShardSize
+  };
+};
+
+export const enqueueChunkMetaArtifacts = async ({
+  state,
+  outDir,
+  chunkMetaIterator,
+  chunkMetaPlan,
+  maxJsonBytes = MAX_JSON_BYTES,
+  enqueueJsonArray,
+  enqueueWrite,
+  addPieceFile,
+  formatArtifactLabel
+}) => {
+  const {
+    chunkMetaUseJsonl,
+    chunkMetaUseShards,
+    chunkMetaShardSize,
+    chunkMetaCount
+  } = chunkMetaPlan;
+  const removeArtifact = async (targetPath) => {
+    try {
+      await fs.rm(targetPath, { recursive: true, force: true });
+    } catch {}
+  };
+  if (chunkMetaUseJsonl) {
+    await removeArtifact(path.join(outDir, 'chunk_meta.json'));
+    await removeArtifact(path.join(outDir, 'chunk_meta.json.gz'));
+    if (chunkMetaUseShards) {
+      // When writing sharded JSONL output, ensure any prior unsharded JSONL output is removed.
+      await removeArtifact(path.join(outDir, 'chunk_meta.jsonl'));
+    } else {
+      // When writing unsharded JSONL output, remove any stale shard artifacts.
+      // The loader prefers chunk_meta.meta.json / chunk_meta.parts over chunk_meta.jsonl.
+      await removeArtifact(path.join(outDir, 'chunk_meta.meta.json'));
+      await removeArtifact(path.join(outDir, 'chunk_meta.parts'));
+    }
+  } else {
+    await removeArtifact(path.join(outDir, 'chunk_meta.jsonl'));
+    await removeArtifact(path.join(outDir, 'chunk_meta.meta.json'));
+    await removeArtifact(path.join(outDir, 'chunk_meta.parts'));
+  }
+
+  const writeJsonlOutput = async () => {
+    const useShards = chunkMetaShardSize > 0 && chunkMetaCount > chunkMetaShardSize;
+    chunkMetaPlan.chunkMetaUseJsonl = true;
+    chunkMetaPlan.chunkMetaUseShards = useShards;
+    if (useShards) {
+      const partsDir = path.join(outDir, 'chunk_meta.parts');
+      await fs.rm(partsDir, { recursive: true, force: true });
+      await fs.mkdir(partsDir, { recursive: true });
+      const parts = [];
+      let partIndex = 0;
+      for (let i = 0; i < state.chunks.length; i += chunkMetaShardSize) {
+        const end = Math.min(i + chunkMetaShardSize, state.chunks.length);
+        const partCount = end - i;
+        const partName = `chunk_meta.part-${String(partIndex).padStart(5, '0')}.jsonl`;
+        const partPath = path.join(partsDir, partName);
+        parts.push(path.posix.join('chunk_meta.parts', partName));
+        await writeJsonLinesFile(partPath, chunkMetaIterator(i, end), { atomic: true });
+        addPieceFile({
+          type: 'chunks',
+          name: 'chunk_meta',
+          format: 'jsonl',
+          count: partCount
+        }, partPath);
+        partIndex += 1;
+      }
+      const metaPath = path.join(outDir, 'chunk_meta.meta.json');
+      await writeJsonObjectFile(metaPath, {
+        fields: {
+          format: 'jsonl',
+          shardSize: chunkMetaShardSize,
+          totalChunks: chunkMetaCount,
+          parts
+        },
+        atomic: true
+      });
+      addPieceFile({ type: 'chunks', name: 'chunk_meta_meta', format: 'json' }, metaPath);
+      return;
+    }
+    const jsonlPath = path.join(outDir, 'chunk_meta.jsonl');
+    await writeJsonLinesFile(jsonlPath, chunkMetaIterator(), { atomic: true });
+    addPieceFile({
+      type: 'chunks',
+      name: 'chunk_meta',
+      format: 'jsonl',
+      count: chunkMetaCount
+    }, jsonlPath);
+  };
+
+  if (chunkMetaUseJsonl) {
+    if (chunkMetaUseShards) {
+      const partsDir = path.join(outDir, 'chunk_meta.parts');
+      await fs.rm(partsDir, { recursive: true, force: true });
+      await fs.mkdir(partsDir, { recursive: true });
+      const parts = [];
+      let partIndex = 0;
+      for (let i = 0; i < state.chunks.length; i += chunkMetaShardSize) {
+        const end = Math.min(i + chunkMetaShardSize, state.chunks.length);
+        const partCount = end - i;
+        const partName = `chunk_meta.part-${String(partIndex).padStart(5, '0')}.jsonl`;
+        const partPath = path.join(partsDir, partName);
+        parts.push(path.posix.join('chunk_meta.parts', partName));
+        enqueueWrite(
+          formatArtifactLabel(partPath),
+          () => writeJsonLinesFile(
+            partPath,
+            chunkMetaIterator(i, end),
+            { atomic: true }
+          )
+        );
+        addPieceFile({
+          type: 'chunks',
+          name: 'chunk_meta',
+          format: 'jsonl',
+          count: partCount
+        }, partPath);
+        partIndex += 1;
+      }
+      const metaPath = path.join(outDir, 'chunk_meta.meta.json');
+      enqueueWrite(
+        formatArtifactLabel(metaPath),
+        () => writeJsonObjectFile(metaPath, {
+          fields: {
+            format: 'jsonl',
+            shardSize: chunkMetaShardSize,
+            totalChunks: chunkMetaCount,
+            parts
+          },
+          atomic: true
+        })
+      );
+      addPieceFile({ type: 'chunks', name: 'chunk_meta_meta', format: 'json' }, metaPath);
+    } else {
+      const jsonlPath = path.join(outDir, 'chunk_meta.jsonl');
+      enqueueWrite(
+        formatArtifactLabel(jsonlPath),
+        () => writeJsonLinesFile(jsonlPath, chunkMetaIterator(), { atomic: true })
+      );
+      addPieceFile({
+        type: 'chunks',
+        name: 'chunk_meta',
+        format: 'jsonl',
+        count: chunkMetaCount
+      }, jsonlPath);
+    }
+  } else {
+    const jsonPath = path.join(outDir, 'chunk_meta.json');
+    enqueueWrite(
+      formatArtifactLabel(jsonPath),
+      async () => {
+        await writeJsonArrayFile(jsonPath, chunkMetaIterator(), { atomic: true });
+        const stat = await fs.stat(jsonPath);
+        if (stat.size <= maxJsonBytes) {
+          addPieceFile({
+            type: 'chunks',
+            name: 'chunk_meta',
+            format: 'json',
+            count: chunkMetaCount
+          }, jsonPath);
+          return;
+        }
+        await fs.rm(jsonPath, { force: true });
+        await writeJsonlOutput();
+      }
+    );
+  }
+};
diff --git a/src/index/build/artifacts/writers/file-relations.js b/src/index/build/artifacts/writers/file-relations.js
new file mode 100644
index 000000000..09e18cb0f
--- /dev/null
+++ b/src/index/build/artifacts/writers/file-relations.js
@@ -0,0 +1,39 @@
+import path from 'node:path';
+import { writeJsonArrayFile } from '../../../../shared/json-stream.js';
+
+export const createFileRelationsIterator = (relations) => function* fileRelationsIterator() {
+  if (!relations || typeof relations.entries !== 'function') return;
+  for (const [file, data] of relations.entries()) {
+    if (!file || !data) continue;
+    yield {
+      file,
+      relations: data
+    };
+  }
+};
+
+export const enqueueFileRelationsArtifacts = ({
+  state,
+  outDir,
+  enqueueWrite,
+  addPieceFile,
+  formatArtifactLabel
+}) => {
+  if (!state.fileRelations || !state.fileRelations.size) return;
+  const relationsPath = path.join(outDir, 'file_relations.json');
+  const fileRelationsIterator = createFileRelationsIterator(state.fileRelations);
+  enqueueWrite(
+    formatArtifactLabel(relationsPath),
+    () => writeJsonArrayFile(
+      relationsPath,
+      fileRelationsIterator(),
+      { atomic: true }
+    )
+  );
+  addPieceFile({
+    type: 'relations',
+    name: 'file_relations',
+    format: 'json',
+    count: state.fileRelations.size
+  }, relationsPath);
+};
diff --git a/src/index/build/artifacts/writers/repo-map.js b/src/index/build/artifacts/writers/repo-map.js
new file mode 100644
index 000000000..5845e441a
--- /dev/null
+++ b/src/index/build/artifacts/writers/repo-map.js
@@ -0,0 +1,28 @@
+export const createRepoMapIterator = ({ chunks, fileRelations }) => {
+  const fileExportMap = new Map();
+  if (fileRelations && fileRelations.size) {
+    for (const [file, relations] of fileRelations.entries()) {
+      if (!Array.isArray(relations?.exports) || !relations.exports.length) continue;
+      fileExportMap.set(file, new Set(relations.exports));
+    }
+  }
+  return function* repoMapIterator() {
+    for (const c of chunks) {
+      if (!c?.name) continue;
+      const exportsSet = fileExportMap.get(c.file) || null;
+      const exported = exportsSet
+        ? exportsSet.has(c.name) || exportsSet.has('*') || (c.name === 'default' && exportsSet.has('default'))
+        : false;
+      yield {
+        file: c.file,
+        ext: c.ext,
+        name: c.name,
+        kind: c.kind,
+        signature: c.docmeta?.signature || null,
+        startLine: c.startLine,
+        endLine: c.endLine,
+        exported
+      };
+    }
+  };
+};
diff --git a/src/index/build/build-state.js b/src/index/build/build-state.js
new file mode 100644
index 000000000..3ea7ab297
--- /dev/null
+++ b/src/index/build/build-state.js
@@ -0,0 +1,151 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { writeJsonObjectFile } from '../../shared/json-stream.js';
+
+const STATE_FILE = 'build_state.json';
+
+const resolveStatePath = (buildRoot) => path.join(buildRoot, STATE_FILE);
+
+const mergeState = (base, patch) => {
+  const merged = { ...base, ...patch };
+  if (patch.phases) {
+    merged.phases = { ...(base?.phases || {}), ...patch.phases };
+  }
+  if (patch.progress) {
+    merged.progress = { ...(base?.progress || {}), ...patch.progress };
+  }
+  if (patch.heartbeat) {
+    merged.heartbeat = { ...(base?.heartbeat || {}), ...patch.heartbeat };
+  }
+  return merged;
+};
+
+export async function initBuildState({
+  buildRoot,
+  buildId,
+  repoRoot,
+  modes,
+  stage,
+  configHash,
+  toolVersion,
+  repoProvenance
+}) {
+  if (!buildRoot) return null;
+  const statePath = resolveStatePath(buildRoot);
+  const now = new Date().toISOString();
+  const payload = {
+    buildId,
+    repoRoot: repoRoot ? path.resolve(repoRoot) : null,
+    createdAt: now,
+    updatedAt: now,
+    stage: stage || null,
+    modes: Array.isArray(modes) ? modes : null,
+    tool: {
+      version: toolVersion || null,
+      node: process.version
+    },
+    configHash: configHash || null,
+    repo: repoProvenance || null,
+    phases: {},
+    progress: {}
+  };
+  await fs.mkdir(buildRoot, { recursive: true });
+  await writeJsonObjectFile(statePath, { fields: payload, atomic: true });
+  return statePath;
+}
+
+export async function updateBuildState(buildRoot, patch) {
+  if (!buildRoot || !patch) return null;
+  const statePath = resolveStatePath(buildRoot);
+  let state = {};
+  try {
+    state = JSON.parse(await fs.readFile(statePath, 'utf8'));
+  } catch {}
+  const now = new Date().toISOString();
+  const merged = mergeState(state, { ...patch, updatedAt: now });
+  try {
+    await writeJsonObjectFile(statePath, { fields: merged, atomic: true });
+  } catch {}
+  return merged;
+}
+
+export async function markBuildPhase(buildRoot, phase, status, detail = null) {
+  if (!buildRoot || !phase || !status) return null;
+  const now = new Date().toISOString();
+  let current = {};
+  try {
+    current = JSON.parse(await fs.readFile(resolveStatePath(buildRoot), 'utf8'));
+  } catch {}
+  const existing = current?.phases?.[phase] || {};
+  const next = {
+    ...existing,
+    status,
+    detail: detail || existing.detail || null,
+    updatedAt: now
+  };
+  if (status === 'running' && !existing.startedAt) next.startedAt = now;
+  if (status === 'done' || status === 'failed') next.finishedAt = now;
+  return updateBuildState(buildRoot, {
+    phase,
+    phases: { [phase]: next }
+  });
+}
+
+export function startBuildHeartbeat(buildRoot, stage, intervalMs = 30000) {
+  if (!buildRoot) return () => {};
+  const tick = () => {
+    const now = new Date().toISOString();
+    void updateBuildState(buildRoot, {
+      heartbeat: {
+        stage: stage || null,
+        lastHeartbeatAt: now
+      }
+    }).catch(() => {});
+  };
+  tick();
+  const timer = setInterval(tick, intervalMs);
+  return () => clearInterval(timer);
+}
+
+export function createBuildCheckpoint({
+  buildRoot,
+  mode,
+  totalFiles,
+  batchSize = 1000,
+  intervalMs = 120000
+}) {
+  if (!buildRoot || !mode) {
+    return { tick() {}, finish() {} };
+  }
+  let processed = 0;
+  let lastAt = 0;
+  const flush = () => {
+    const now = new Date().toISOString();
+    void updateBuildState(buildRoot, {
+      progress: {
+        [mode]: {
+          processedFiles: processed,
+          totalFiles: Number.isFinite(totalFiles) ? totalFiles : null,
+          updatedAt: now
+        }
+      }
+    }).catch(() => {});
+    lastAt = Date.now();
+  };
+  return {
+    tick() {
+      processed += 1;
+      const now = Date.now();
+      if (processed % batchSize === 0 || now - lastAt >= intervalMs) {
+        flush();
+      }
+    },
+    finish() {
+      flush();
+    }
+  };
+}
+
+export function resolveBuildStatePath(buildRoot) {
+  return buildRoot ? resolveStatePath(buildRoot) : null;
+}
diff --git a/src/index/build/context-window.js b/src/index/build/context-window.js
new file mode 100644
index 000000000..45697e2d1
--- /dev/null
+++ b/src/index/build/context-window.js
@@ -0,0 +1,72 @@
+import path from 'node:path';
+import { smartChunk } from '../chunking.js';
+import { buildLanguageContext } from '../language-registry.js';
+import { resolveSpecialCodeExt } from '../constants.js';
+import { readTextFile } from '../../shared/encoding.js';
+import { fileExt, toPosix } from '../../shared/files.js';
+
+/**
+ * Estimate context window size from sampled chunk lengths.
+ * @param {{files:string[],root:string,mode:'code'|'prose',languageOptions:object}} input
+ * @returns {Promise<number>}
+ */
+export async function estimateContextWindow({ files, root, mode, languageOptions }) {
+  const sampleChunkLens = [];
+  // Ensure determinism regardless of upstream file enumeration order. We select a
+  // stable lexicographic sample rather than relying on the first N entries.
+  const sampleLimit = Math.min(20, files.length);
+  const sampleFiles = [];
+  const insertSorted = (arr, value) => {
+    let i = arr.length;
+    while (i > 0 && arr[i - 1] > value) i -= 1;
+    arr.splice(i, 0, value);
+  };
+  for (const filePath of files) {
+    if (sampleFiles.length < sampleLimit) {
+      insertSorted(sampleFiles, filePath);
+      continue;
+    }
+    const last = sampleFiles[sampleFiles.length - 1];
+    if (filePath >= last) continue;
+    insertSorted(sampleFiles, filePath);
+    sampleFiles.pop();
+  }
+
+  for (let i = 0; i < sampleFiles.length; ++i) {
+    try {
+      const { text } = await readTextFile(sampleFiles[i]);
+      const relSample = path.relative(root, sampleFiles[i]);
+      const relSampleKey = toPosix(relSample);
+      const baseName = path.basename(sampleFiles[i]);
+      const rawExt = fileExt(sampleFiles[i]);
+      const ext = resolveSpecialCodeExt(baseName) || rawExt;
+      const { context: sampleContext } = await buildLanguageContext({
+        ext,
+        relPath: relSampleKey,
+        mode,
+        text,
+        options: languageOptions
+      });
+      const chunks0 = smartChunk({
+        text,
+        ext,
+        relPath: relSampleKey,
+        mode,
+        context: {
+          ...sampleContext,
+          chunking: languageOptions?.chunking || null
+        }
+      });
+      sampleChunkLens.push(...chunks0.map(c =>
+        text.slice(c.start, c.end).split('\n').length
+      ));
+    } catch {
+      continue;
+    }
+  }
+  sampleChunkLens.sort((a, b) => a - b);
+  const medianChunkLines = sampleChunkLens.length
+    ? sampleChunkLens[Math.floor(sampleChunkLens.length / 2)]
+    : 8;
+  return Math.min(10, Math.max(3, Math.floor(medianChunkLines / 2)));
+}
diff --git a/src/index/build/crash-log.js b/src/index/build/crash-log.js
new file mode 100644
index 000000000..05cb58fdf
--- /dev/null
+++ b/src/index/build/crash-log.js
@@ -0,0 +1,101 @@
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { getRecentLogEvents } from '../../shared/progress.js';
+import { normalizeFailureEvent, validateFailureEvent } from './failure-taxonomy.js';
+
+const formatTimestamp = () => new Date().toISOString();
+
+const safeStringify = (value) => {
+  try {
+    return JSON.stringify(value);
+  } catch {
+    return '"[unserializable]"';
+  }
+};
+
+export async function createCrashLogger({ repoCacheRoot, enabled, log }) {
+  if (!enabled || !repoCacheRoot) {
+    return {
+      enabled: false,
+      updatePhase: () => {},
+      updateFile: () => {},
+      logError: () => {}
+    };
+  }
+  const logsDir = path.join(repoCacheRoot, 'logs');
+  const statePath = path.join(logsDir, 'index-crash-state.json');
+  const logPath = path.join(logsDir, 'index-crash.log');
+  const eventsPath = path.join(logsDir, 'index-crash-events.json');
+  let currentPhase = null;
+  let currentFile = null;
+  try {
+    await fs.mkdir(logsDir, { recursive: true });
+  } catch {}
+
+  const writeState = async (state) => {
+    const payload = { ts: formatTimestamp(), ...state };
+    try {
+      await fs.writeFile(statePath, JSON.stringify(payload, null, 2));
+    } catch {}
+  };
+
+  const appendLine = async (message, extra) => {
+    const suffix = extra ? ` ${safeStringify(extra)}` : '';
+    const line = `[${formatTimestamp()}] ${message}${suffix}\n`;
+    try {
+      await fs.appendFile(logPath, line);
+    } catch {}
+  };
+  const writeStateSync = (state) => {
+    const payload = { ts: formatTimestamp(), ...state };
+    try {
+      fsSync.writeFileSync(statePath, JSON.stringify(payload, null, 2));
+    } catch {}
+  };
+  const appendLineSync = (message, extra) => {
+    const suffix = extra ? ` ${safeStringify(extra)}` : '';
+    const line = `[${formatTimestamp()}] ${message}${suffix}\n`;
+    try {
+      fsSync.appendFileSync(logPath, line);
+    } catch {}
+  };
+
+  if (log) log(`Crash logging enabled: ${logPath}`);
+
+  return {
+    enabled: true,
+    updatePhase(phase) {
+      currentPhase = phase || null;
+      void writeState({ phase }).catch(() => {});
+      void appendLine(`phase ${phase}`).catch(() => {});
+    },
+    updateFile(entry) {
+      currentFile = entry || null;
+      void writeState({ phase: entry?.phase || 'file', file: entry || null }).catch(() => {});
+    },
+    logError(error) {
+      const baseEvent = normalizeFailureEvent({
+        phase: error?.phase || currentPhase,
+        file: error?.file || currentFile?.file || null,
+        stage: error?.stage || null,
+        ...error
+      });
+      const validation = validateFailureEvent(baseEvent);
+      const event = validation.ok
+        ? baseEvent
+        : { ...baseEvent, validationErrors: validation.errors };
+      const recentEvents = getRecentLogEvents();
+      appendLineSync('error', event || {});
+      writeStateSync({ phase: 'error', error: event || null });
+      if (recentEvents.length) {
+        try {
+          fsSync.writeFileSync(
+            eventsPath,
+            JSON.stringify({ ts: formatTimestamp(), events: recentEvents }, null, 2)
+          );
+        } catch {}
+      }
+    }
+  };
+}
diff --git a/src/index/build/discover.js b/src/index/build/discover.js
new file mode 100644
index 000000000..c1c6cfdde
--- /dev/null
+++ b/src/index/build/discover.js
@@ -0,0 +1,190 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { fdir } from 'fdir';
+import {
+  EXTS_CODE,
+  EXTS_PROSE,
+  isLockFile,
+  isManifestFile,
+  isSpecialCodeFile,
+  resolveSpecialCodeExt
+} from '../constants.js';
+import { getLanguageForFile } from '../language-registry.js';
+import { fileExt, toPosix } from '../../shared/files.js';
+
+/**
+ * Recursively discover indexable files under a directory.
+ * @param {{root:string,mode:'code'|'prose'|'extracted-prose',ignoreMatcher:import('ignore').Ignore,skippedFiles:Array, maxFileBytes:number|null,fileCaps?:object,maxDepth?:number|null,maxFiles?:number|null}} input
+ * @returns {Promise<Array<{abs:string,rel:string,stat:import('node:fs').Stats}>>}
+ */
+export async function discoverFiles({ root, mode, ignoreMatcher, skippedFiles, maxFileBytes = null, fileCaps = null, maxDepth = null, maxFiles = null }) {
+  const { entries, skippedCommon } = await discoverEntries({ root, ignoreMatcher, maxFileBytes, fileCaps, maxDepth, maxFiles });
+  if (skippedFiles) skippedFiles.push(...skippedCommon);
+  return filterEntriesByMode(entries, mode, skippedFiles);
+}
+
+/**
+ * Discover files for multiple modes in a single traversal.
+ * @param {{root:string,modes:Array<'code'|'prose'|'extracted-prose'>,ignoreMatcher:import('ignore').Ignore,skippedByMode:Record<string,Array>,maxFileBytes:number|null,fileCaps?:object,maxDepth?:number|null,maxFiles?:number|null}} input
+ * @returns {Promise<Record<string,Array<{abs:string,rel:string,stat:import('node:fs').Stats}>>>}
+ */
+export async function discoverFilesForModes({ root, modes, ignoreMatcher, skippedByMode, maxFileBytes = null, fileCaps = null, maxDepth = null, maxFiles = null }) {
+  const { entries, skippedCommon } = await discoverEntries({ root, ignoreMatcher, maxFileBytes, fileCaps, maxDepth, maxFiles });
+  const output = {};
+  for (const mode of modes) {
+    const skipped = skippedByMode && skippedByMode[mode] ? skippedByMode[mode] : null;
+    if (skipped) skipped.push(...skippedCommon);
+    output[mode] = filterEntriesByMode(entries, mode, skipped);
+  }
+  return output;
+}
+
+export async function discoverEntries({ root, ignoreMatcher, maxFileBytes = null, fileCaps = null, maxDepth = null, maxFiles = null }) {
+  const maxBytes = Number.isFinite(Number(maxFileBytes)) && Number(maxFileBytes) > 0
+    ? Number(maxFileBytes)
+    : null;
+  const maxDepthValue = maxDepth == null
+    ? null
+    : (Number.isFinite(Number(maxDepth)) && Number(maxDepth) >= 0
+      ? Math.floor(Number(maxDepth))
+      : null);
+  const maxFilesValue = Number.isFinite(Number(maxFiles)) && Number(maxFiles) > 0
+    ? Math.floor(Number(maxFiles))
+    : null;
+  const skippedCommon = [];
+  const recordSkip = (filePath, reason, extra = {}) => {
+    skippedCommon.push({ file: filePath, reason, ...extra });
+  };
+  const minifiedNameRegex = /(?:\.min\.[^/]+$)|(?:-min\.[^/]+$)/i;
+  const normalizeCapValue = (value) => {
+    const parsed = Number(value);
+    return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
+  };
+  const resolveMaxBytesForExt = (ext) => {
+    const extKey = ext ? ext.toLowerCase() : '';
+    const defaultCap = fileCaps?.default?.maxBytes;
+    const extCap = extKey ? fileCaps?.byExt?.[extKey]?.maxBytes : null;
+    const capValue = normalizeCapValue(extCap ?? defaultCap);
+    if (!Number.isFinite(capValue) || capValue <= 0) {
+      return maxBytes;
+    }
+    if (!Number.isFinite(maxBytes) || maxBytes <= 0) {
+      return capValue;
+    }
+    return Math.min(maxBytes, capValue);
+  };
+  const normalizeRoot = (value) => {
+    const resolved = path.resolve(value || '');
+    return process.platform === 'win32' ? resolved.toLowerCase() : resolved;
+  };
+  const listGitFiles = () => {
+    try {
+      const rootCheck = spawnSync('git', ['-C', root, 'rev-parse', '--show-toplevel'], { encoding: 'utf8' });
+      if (rootCheck.status !== 0) return null;
+      const gitRoot = String(rootCheck.stdout || '').trim();
+      if (!gitRoot) return null;
+      if (normalizeRoot(gitRoot) !== normalizeRoot(root)) return null;
+      const result = spawnSync('git', ['-C', root, 'ls-files', '-z'], { encoding: 'utf8' });
+      if (result.status !== 0) return null;
+      const output = String(result.stdout || '');
+      if (!output) return [];
+      return output.split('\u0000').filter(Boolean);
+    } catch {
+      return null;
+    }
+  };
+
+  const listFdirFiles = async () => {
+    const crawler = new fdir().withFullPaths().crawl(root);
+    return crawler.withPromise();
+  };
+
+  const relPaths = listGitFiles();
+  const candidates = Array.isArray(relPaths)
+    ? relPaths.map((rel) => path.join(root, rel))
+    : await listFdirFiles();
+
+  const entries = [];
+  for (const absPath of candidates) {
+    const relPosix = toPosix(path.relative(root, absPath));
+    if (!relPosix || relPosix === '.' || relPosix.startsWith('..')) continue;
+    if (maxDepthValue != null) {
+      const depth = relPosix.split('/').length - 1;
+      if (depth > maxDepthValue) {
+        recordSkip(absPath, 'max-depth', { depth, maxDepth: maxDepthValue });
+        continue;
+      }
+    }
+    if (maxFilesValue && entries.length >= maxFilesValue) {
+      recordSkip(absPath, 'max-files', { maxFiles: maxFilesValue });
+      break;
+    }
+    const baseName = path.basename(absPath);
+    const ext = resolveSpecialCodeExt(baseName) || fileExt(absPath);
+    const isManifest = isManifestFile(baseName);
+    const isLock = isLockFile(baseName);
+    const language = getLanguageForFile(ext, relPosix);
+    const isSpecialLanguage = !!language && !EXTS_CODE.has(ext) && !EXTS_PROSE.has(ext);
+    const isSpecial = isSpecialCodeFile(baseName) || isManifest || isLock || isSpecialLanguage;
+    if (minifiedNameRegex.test(baseName.toLowerCase())) {
+      recordSkip(absPath, 'minified', { method: 'name' });
+      continue;
+    }
+    if (ignoreMatcher.ignores(relPosix)) {
+      recordSkip(absPath, 'ignored');
+      continue;
+    }
+    let stat;
+    try {
+      stat = await fs.stat(absPath);
+    } catch {
+      recordSkip(absPath, 'stat-failed');
+      continue;
+    }
+    const maxBytesForExt = resolveMaxBytesForExt(ext);
+    if (maxBytesForExt && stat.size > maxBytesForExt) {
+      recordSkip(absPath, 'oversize', { bytes: stat.size, maxBytes: maxBytesForExt });
+      continue;
+    }
+    entries.push({
+      abs: absPath,
+      rel: relPosix,
+      stat,
+      ext,
+      isSpecial,
+      isManifest,
+      isLock
+    });
+  }
+
+  entries.sort((a, b) => (a.rel < b.rel ? -1 : a.rel > b.rel ? 1 : 0));
+  skippedCommon.sort((a, b) => {
+    const fileA = String(a?.file || '');
+    const fileB = String(b?.file || '');
+    if (fileA < fileB) return -1;
+    if (fileA > fileB) return 1;
+    const reasonA = String(a?.reason || '');
+    const reasonB = String(b?.reason || '');
+    if (reasonA < reasonB) return -1;
+    if (reasonA > reasonB) return 1;
+    return 0;
+  });
+  return { entries, skippedCommon };
+}
+
+function filterEntriesByMode(entries, mode, skippedFiles) {
+  const output = [];
+  for (const entry of entries) {
+    const isProse = mode === 'prose' || mode === 'extracted-prose';
+    const isCode = mode === 'code' || mode === 'extracted-prose';
+    const allowed = (isProse && EXTS_PROSE.has(entry.ext))
+      || (isCode && (EXTS_CODE.has(entry.ext) || entry.isSpecial));
+    if (!allowed) {
+      if (skippedFiles) skippedFiles.push({ file: entry.abs, reason: 'unsupported' });
+      continue;
+    }
+    output.push({ abs: entry.abs, rel: entry.rel, stat: entry.stat });
+  }
+  return output;
+}
diff --git a/src/index/build/embedding-batch.js b/src/index/build/embedding-batch.js
new file mode 100644
index 000000000..3852cefa5
--- /dev/null
+++ b/src/index/build/embedding-batch.js
@@ -0,0 +1,25 @@
+const normalizeMultiplier = (value) => {
+  const parsed = Number(value);
+  return Number.isFinite(parsed) && parsed > 0 ? parsed : null;
+};
+
+export function normalizeEmbeddingBatchMultipliers(raw = {}, fallback = {}) {
+  const output = {};
+  for (const source of [fallback, raw]) {
+    if (!source || typeof source !== 'object') continue;
+    for (const [key, value] of Object.entries(source)) {
+      const multiplier = normalizeMultiplier(value);
+      if (!multiplier) continue;
+      output[key.toLowerCase()] = multiplier;
+    }
+  }
+  return output;
+}
+
+export function resolveEmbeddingBatchSize(baseSize, languageId, multipliers = null) {
+  const resolvedBase = Number.isFinite(baseSize) ? baseSize : 0;
+  if (!resolvedBase || !languageId || !multipliers) return baseSize;
+  const multiplier = normalizeMultiplier(multipliers[String(languageId).toLowerCase()]);
+  if (!multiplier) return baseSize;
+  return Math.max(1, Math.floor(resolvedBase * multiplier));
+}
diff --git a/src/index/build/failure-taxonomy.js b/src/index/build/failure-taxonomy.js
new file mode 100644
index 000000000..766b8161a
--- /dev/null
+++ b/src/index/build/failure-taxonomy.js
@@ -0,0 +1,120 @@
+import Ajv from 'ajv';
+
+export const FAILURE_CATEGORIES = Object.freeze([
+  'parse',
+  'tooling',
+  'worker-pool',
+  'artifact-io',
+  'sqlite',
+  'unknown'
+]);
+
+const FAILURE_SCHEMA = {
+  type: 'object',
+  required: ['category', 'message'],
+  additionalProperties: true,
+  properties: {
+    category: { type: 'string', enum: FAILURE_CATEGORIES },
+    message: { type: 'string' },
+    phase: { type: 'string' },
+    stage: { type: 'string' },
+    file: { type: 'string' },
+    languageId: { type: 'string' },
+    shardId: { type: 'string' },
+    workerId: { type: ['string', 'number'] },
+    tool: { type: 'string' },
+    retryable: { type: 'boolean' },
+    hints: { type: 'array', items: { type: 'string' } },
+    mitigations: { type: 'array', items: { type: 'string' } }
+  }
+};
+
+const FAILURE_MITIGATIONS = {
+  parse: [
+    'reduce file caps (indexing.fileCaps)',
+    'disable tree-sitter for the language',
+    'skip heavy relations in stage1'
+  ],
+  tooling: [
+    'disable tooling integration',
+    'verify tool install/versions',
+    'run with --stage1 to isolate tooling'
+  ],
+  'worker-pool': [
+    'disable worker pool or lower maxWorkers',
+    'reduce workerPool.maxFileBytes threshold',
+    'retry with PAIROFCLEATS_WORKER_POOL=off'
+  ],
+  'artifact-io': [
+    'clear cache/build artifacts and rebuild',
+    'disable artifact compression temporarily',
+    'check filesystem free space'
+  ],
+  sqlite: [
+    'rebuild sqlite indexes from bundles',
+    'disable sqlite-fts backend temporarily',
+    'run with --build-index only'
+  ],
+  unknown: []
+};
+
+const ajv = new Ajv({ allErrors: true, strict: false });
+const validate = ajv.compile(FAILURE_SCHEMA);
+
+const normalizeString = (value) => (typeof value === 'string' ? value : '');
+
+const resolveCategory = (event) => {
+  const explicit = normalizeString(event?.category).toLowerCase();
+  if (FAILURE_CATEGORIES.includes(explicit)) return explicit;
+  const phase = normalizeString(event?.phase).toLowerCase();
+  const tool = normalizeString(event?.tool).toLowerCase();
+  if (phase.includes('worker') || tool.includes('worker')) return 'worker-pool';
+  if (phase.includes('sqlite') || tool.includes('sqlite')) return 'sqlite';
+  if (phase.includes('artifact') || phase.includes('bundle') || phase.includes('postings')) return 'artifact-io';
+  if (phase.includes('import') || phase.includes('tool')) return 'tooling';
+  if (phase.includes('parse') || phase.includes('tree-sitter')) return 'parse';
+  return 'unknown';
+};
+
+const buildHints = (event) => {
+  const hints = [];
+  if (event?.file) hints.push(`file=${event.file}`);
+  if (event?.languageId) hints.push(`lang=${event.languageId}`);
+  if (event?.phase) hints.push(`phase=${event.phase}`);
+  if (event?.stage) hints.push(`stage=${event.stage}`);
+  if (event?.shardId) hints.push(`shard=${event.shardId}`);
+  if (event?.workerId !== undefined && event?.workerId !== null) {
+    hints.push(`worker=${event.workerId}`);
+  }
+  return hints;
+};
+
+export function normalizeFailureEvent(event = {}) {
+  const message = normalizeString(event.message) || normalizeString(event.error) || 'unknown failure';
+  const workerId = event.workerId != null ? event.workerId : (event.threadId != null ? event.threadId : null);
+  const category = resolveCategory(event);
+  const hints = Array.isArray(event.hints) && event.hints.length
+    ? event.hints
+    : buildHints(event);
+  const mitigations = Array.isArray(event.mitigations) && event.mitigations.length
+    ? event.mitigations
+    : (FAILURE_MITIGATIONS[category] || []);
+  return {
+    ...event,
+    message,
+    ...(workerId != null ? { workerId } : {}),
+    category,
+    hints,
+    mitigations
+  };
+}
+
+export function validateFailureEvent(event) {
+  const ok = validate(event);
+  if (ok) return { ok: true, errors: [] };
+  const errors = (validate.errors || []).map((err) => {
+    const path = err.instancePath || '#';
+    return `${path} ${err.message || 'invalid'}`.trim();
+  });
+  return { ok: false, errors };
+}
diff --git a/src/index/build/feature-metrics.js b/src/index/build/feature-metrics.js
new file mode 100644
index 000000000..1fb84b54b
--- /dev/null
+++ b/src/index/build/feature-metrics.js
@@ -0,0 +1,309 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { writeJsonObjectFile } from '../../shared/json-stream.js';
+
+const FEATURE_METRICS_VERSION = 1;
+
+const initBucket = () => ({
+  count: 0,
+  lines: 0,
+  bytes: 0,
+  durationMs: 0
+});
+
+const addBucket = (bucket, delta) => {
+  if (!bucket || !delta) return;
+  const count = Number(delta.count) || 0;
+  const lines = Number(delta.lines) || 0;
+  const bytes = Number(delta.bytes) || 0;
+  const durationMs = Number(delta.durationMs) || 0;
+  bucket.count += count;
+  bucket.lines += lines;
+  bucket.bytes += bytes;
+  bucket.durationMs += durationMs;
+};
+
+const finalizeBucket = (bucket) => {
+  if (!bucket) return null;
+  const count = Number(bucket.count) || 0;
+  const lines = Number(bucket.lines) || 0;
+  const durationMs = Number(bucket.durationMs) || 0;
+  bucket.msPerLine = lines > 0 ? durationMs / lines : 0;
+  bucket.linesPerMs = durationMs > 0 ? lines / durationMs : 0;
+  bucket.msPerCount = count > 0 ? durationMs / count : 0;
+  bucket.linesPerCount = count > 0 ? lines / count : 0;
+  return bucket;
+};
+
+const ensureMode = (state, mode) => {
+  const key = mode || 'unknown';
+  if (!state.modes[key]) {
+    state.modes[key] = {
+      totals: initBucket(),
+      languages: {},
+      languageSets: {},
+      settings: {}
+    };
+  }
+  return state.modes[key];
+};
+
+const ensureNamedBucket = (collection, key) => {
+  const name = key || 'unknown';
+  if (!collection[name]) collection[name] = initBucket();
+  return collection[name];
+};
+
+const ensureSetting = (modeEntry, setting, enabled = null) => {
+  const key = setting || 'unknown';
+  if (!modeEntry.settings[key]) {
+    modeEntry.settings[key] = {
+      enabled: enabled === null ? null : Boolean(enabled),
+      enabledRuns: 0,
+      totals: initBucket(),
+      languages: {},
+      languageSets: {}
+    };
+  } else if (enabled !== null && typeof enabled === 'boolean') {
+    modeEntry.settings[key].enabled = enabled;
+  }
+  return modeEntry.settings[key];
+};
+
+export function createFeatureMetrics({
+  buildId = null,
+  configHash = null,
+  stage = null,
+  repoRoot = null,
+  toolVersion = null
+} = {}) {
+  const state = {
+    version: FEATURE_METRICS_VERSION,
+    generatedAt: new Date().toISOString(),
+    buildId,
+    configHash,
+    stage,
+    repoRoot,
+    toolVersion,
+    modes: {}
+  };
+
+  const registerSettings = (mode, settings = {}) => {
+    const modeEntry = ensureMode(state, mode);
+    for (const [setting, enabled] of Object.entries(settings || {})) {
+      ensureSetting(modeEntry, setting, Boolean(enabled));
+    }
+  };
+
+  const recordFile = ({
+    mode,
+    languageSet,
+    languageLines,
+    lines,
+    bytes,
+    durationMs
+  }) => {
+    const modeEntry = ensureMode(state, mode);
+    addBucket(modeEntry.totals, { count: 1, lines, bytes, durationMs });
+    if (languageSet) {
+      const setBucket = ensureNamedBucket(modeEntry.languageSets, languageSet);
+      addBucket(setBucket, { count: 1, lines, bytes, durationMs });
+    }
+    const totalLines = Number(lines) || 0;
+    const entries = languageLines instanceof Map
+      ? Array.from(languageLines.entries())
+      : Object.entries(languageLines || {});
+    if (!entries.length) return;
+    const fallbackTotal = entries.reduce((sum, [, value]) => sum + (Number(value) || 0), 0);
+    const lineTotal = totalLines > 0 ? totalLines : fallbackTotal;
+    for (const [languageId, languageLineCount] of entries) {
+      const langLines = Number(languageLineCount) || 0;
+      if (!langLines) continue;
+      const share = lineTotal > 0 ? langLines / lineTotal : 0;
+      const langBucket = ensureNamedBucket(modeEntry.languages, languageId);
+      addBucket(langBucket, {
+        count: 1,
+        lines: langLines,
+        bytes: share > 0 ? (Number(bytes) || 0) * share : 0,
+        durationMs: share > 0 ? (Number(durationMs) || 0) * share : 0
+      });
+    }
+  };
+
+  const recordSetting = ({
+    mode,
+    setting,
+    enabled = null,
+    languageId,
+    languageSet,
+    lines,
+    bytes,
+    durationMs,
+    count = 1
+  }) => {
+    const modeEntry = ensureMode(state, mode);
+    const settingEntry = ensureSetting(modeEntry, setting, enabled);
+    addBucket(settingEntry.totals, { count, lines, bytes, durationMs });
+    if (languageId) {
+      const langBucket = ensureNamedBucket(settingEntry.languages, languageId);
+      addBucket(langBucket, { count, lines, bytes, durationMs });
+    }
+    if (languageSet) {
+      const setBucket = ensureNamedBucket(settingEntry.languageSets, languageSet);
+      addBucket(setBucket, { count, lines, bytes, durationMs });
+    }
+  };
+
+  const recordSettingByLanguageShare = ({
+    mode,
+    setting,
+    enabled = null,
+    durationMs,
+    count = 1
+  }) => {
+    const modeEntry = ensureMode(state, mode);
+    const languageEntries = Object.entries(modeEntry.languages || {});
+    const totalLines = languageEntries.reduce((sum, [, bucket]) => (
+      sum + (Number(bucket?.lines) || 0)
+    ), 0);
+    if (!totalLines) return;
+    for (const [languageId, bucket] of languageEntries) {
+      const lines = Number(bucket?.lines) || 0;
+      if (!lines) continue;
+      const share = lines / totalLines;
+      const shareDuration = (Number(durationMs) || 0) * share;
+      recordSetting({
+        mode,
+        setting,
+        enabled,
+        languageId,
+        lines,
+        durationMs: shareDuration,
+        count
+      });
+    }
+  };
+
+  const finalize = () => finalizeFeatureMetrics(state);
+
+  return {
+    state,
+    registerSettings,
+    recordFile,
+    recordSetting,
+    recordSettingByLanguageShare,
+    finalize
+  };
+}
+
+export function finalizeFeatureMetrics(state) {
+  if (!state || typeof state !== 'object') return null;
+  state.generatedAt = new Date().toISOString();
+  for (const modeEntry of Object.values(state.modes || {})) {
+    finalizeBucket(modeEntry.totals);
+    for (const langBucket of Object.values(modeEntry.languages || {})) {
+      finalizeBucket(langBucket);
+    }
+    for (const setBucket of Object.values(modeEntry.languageSets || {})) {
+      finalizeBucket(setBucket);
+    }
+    for (const settingEntry of Object.values(modeEntry.settings || {})) {
+      finalizeBucket(settingEntry.totals);
+      for (const langBucket of Object.values(settingEntry.languages || {})) {
+        finalizeBucket(langBucket);
+      }
+      for (const setBucket of Object.values(settingEntry.languageSets || {})) {
+        finalizeBucket(setBucket);
+      }
+    }
+  }
+  return state;
+}
+
+const mergeBucket = (target, source) => {
+  if (!target || !source) return;
+  addBucket(target, {
+    count: source.count,
+    lines: source.lines,
+    bytes: source.bytes,
+    durationMs: source.durationMs
+  });
+};
+
+const mergeNamedBuckets = (target, source) => {
+  for (const [key, bucket] of Object.entries(source || {})) {
+    const targetBucket = ensureNamedBucket(target, key);
+    mergeBucket(targetBucket, bucket);
+  }
+};
+
+export function mergeFeatureMetrics(base, next) {
+  if (!next || typeof next !== 'object') return base || null;
+  if (!base || typeof base !== 'object') {
+    const seeded = JSON.parse(JSON.stringify(next));
+    seeded.runs = 1;
+    seeded.firstRunAt = next.generatedAt || seeded.generatedAt || null;
+    seeded.lastRunAt = next.generatedAt || seeded.generatedAt || null;
+    seeded.lastBuildId = next.buildId || null;
+    return finalizeFeatureMetrics(seeded);
+  }
+  const merged = JSON.parse(JSON.stringify(base));
+  merged.runs = Number(merged.runs) || 0;
+  merged.runs += 1;
+  merged.firstRunAt = merged.firstRunAt || merged.generatedAt || null;
+  merged.lastRunAt = next.generatedAt || new Date().toISOString();
+  merged.lastBuildId = next.buildId || merged.lastBuildId || null;
+  merged.version = FEATURE_METRICS_VERSION;
+  merged.repoRoot = merged.repoRoot || next.repoRoot || null;
+  merged.toolVersion = merged.toolVersion || next.toolVersion || null;
+  merged.configHash = merged.configHash || next.configHash || null;
+  for (const [mode, modeEntry] of Object.entries(next.modes || {})) {
+    const mergedMode = ensureMode(merged, mode);
+    mergeBucket(mergedMode.totals, modeEntry.totals);
+    mergeNamedBuckets(mergedMode.languages, modeEntry.languages);
+    mergeNamedBuckets(mergedMode.languageSets, modeEntry.languageSets);
+    for (const [setting, settingEntry] of Object.entries(modeEntry.settings || {})) {
+      const targetSetting = ensureSetting(mergedMode, setting, settingEntry.enabled);
+      targetSetting.enabled = settingEntry.enabled ?? targetSetting.enabled;
+      targetSetting.enabledRuns = Number(targetSetting.enabledRuns) || 0;
+      if (settingEntry.enabled) targetSetting.enabledRuns += 1;
+      mergeBucket(targetSetting.totals, settingEntry.totals);
+      mergeNamedBuckets(targetSetting.languages, settingEntry.languages);
+      mergeNamedBuckets(targetSetting.languageSets, settingEntry.languageSets);
+    }
+  }
+  return finalizeFeatureMetrics(merged);
+}
+
+async function readMetricsFile(filePath) {
+  try {
+    const raw = await fs.readFile(filePath, 'utf8');
+    return JSON.parse(raw);
+  } catch {
+    return null;
+  }
+}
+
+export async function writeFeatureMetrics({ metricsDir, featureMetrics }) {
+  if (!metricsDir || !featureMetrics) return;
+  const state = featureMetrics.state || featureMetrics;
+  if (!state) return;
+  await fs.mkdir(metricsDir, { recursive: true });
+  const runMetrics = finalizeFeatureMetrics(JSON.parse(JSON.stringify(state)));
+  const runFile = runMetrics.buildId
+    ? `feature-metrics-${runMetrics.buildId}.json`
+    : 'feature-metrics-run.json';
+  await writeJsonObjectFile(
+    path.join(metricsDir, runFile),
+    { fields: runMetrics, atomic: true }
+  );
+  const overallPath = path.join(metricsDir, 'feature-metrics.json');
+  const existing = await readMetricsFile(overallPath);
+  const merged = mergeFeatureMetrics(existing, runMetrics);
+  if (merged) {
+    await writeJsonObjectFile(
+      overallPath,
+      { fields: merged, atomic: true }
+    );
+  }
+}
diff --git a/src/index/build/file-processor.js b/src/index/build/file-processor.js
new file mode 100644
index 000000000..487ed2a3c
--- /dev/null
+++ b/src/index/build/file-processor.js
@@ -0,0 +1,871 @@
+import fs from 'node:fs/promises';
+import util from 'node:util';
+import path from 'node:path';
+import { analyzeComplexity, lintChunk } from '../analysis.js';
+import { chunkSegments, detectFrontmatter, discoverSegments, normalizeSegmentsConfig } from '../segments.js';
+import { extractComments, normalizeCommentConfig } from '../comments.js';
+import { buildChunkRelations, buildLanguageContext } from '../language-registry.js';
+import { detectRiskSignals } from '../risk.js';
+import { inferTypeMetadata } from '../type-inference.js';
+import { getChunkAuthorsFromLines, getGitMetaForFile } from '../git.js';
+import { isJsLike } from '../constants.js';
+import { buildLineIndex, offsetToLine } from '../../shared/lines.js';
+import { createLruCache, estimateJsonBytes } from '../../shared/cache.js';
+import { toPosix } from '../../shared/files.js';
+import { log, logLine } from '../../shared/progress.js';
+import { getEnvConfig } from '../../shared/env.js';
+import { readTextFileWithHash } from '../../shared/encoding.js';
+import { createFileScanner } from './file-scan.js';
+import { buildTokenSequence, createTokenizationBuffers, createTokenizationContext, tokenizeChunkText } from './tokenization.js';
+import { assignCommentsToChunks, getStructuralMatchesForChunk } from './file-processor/chunk.js';
+import { buildChunkPayload } from './file-processor/assemble.js';
+import { reuseCachedBundle } from './file-processor/cached-bundle.js';
+import { attachEmbeddings } from './file-processor/embeddings.js';
+import { loadCachedBundleForFile, writeBundleForFile } from './file-processor/incremental.js';
+import { formatError, mergeFlowMeta } from './file-processor/meta.js';
+import { buildCallIndex, buildFileRelations } from './file-processor/relations.js';
+import { resolveBinarySkip, resolvePreReadSkip } from './file-processor/skip.js';
+import { createFileTimingTracker } from './file-processor/timings.js';
+import { resolveExt, resolveFileCaps, truncateByBytes } from './file-processor/read.js';
+
+/**
+ * Create a file processor with shared caches.
+ * @param {object} options
+ * @returns {{processFile:(abs:string,fileIndex:number)=>Promise<object|null>}}
+ */
+export function createFileProcessor(options) {
+  const {
+    root,
+    mode,
+    dictConfig,
+    dictWords,
+    dictShared,
+    languageOptions,
+    postingsConfig,
+    segmentsConfig,
+    commentsConfig,
+    allImports,
+    contextWin,
+    incrementalState,
+    getChunkEmbedding,
+    getChunkEmbeddings,
+    typeInferenceEnabled,
+    riskAnalysisEnabled,
+    riskConfig,
+    relationsEnabled: relationsEnabledRaw,
+    seenFiles,
+    gitBlameEnabled,
+    lintEnabled: lintEnabledRaw,
+    complexityEnabled: complexityEnabledRaw,
+    structuralMatches,
+    cacheConfig,
+    cacheReporter,
+    queues,
+    useCpuQueue = true,
+    workerPool = null,
+    embeddingBatchSize = 0,
+    crashLogger = null,
+    fileCaps = null,
+    fileScan = null,
+    skippedFiles = null,
+    embeddingEnabled = true,
+    toolInfo = null,
+    tokenizationStats = null,
+    featureMetrics = null
+  } = options;
+  const lintEnabled = lintEnabledRaw !== false;
+  const complexityEnabled = complexityEnabledRaw !== false;
+  const relationsEnabled = relationsEnabledRaw !== false;
+  const { astDataflowEnabled, controlFlowEnabled } = languageOptions;
+  const ioQueue = queues?.io || null;
+  const cpuQueue = queues?.cpu || null;
+  const embeddingQueue = queues?.embedding || null;
+  const runIo = ioQueue ? (fn) => ioQueue.add(fn) : (fn) => fn();
+  const runCpu = cpuQueue && useCpuQueue ? (fn) => cpuQueue.add(fn) : (fn) => fn();
+  const runEmbedding = embeddingQueue ? (fn) => embeddingQueue.add(fn) : (fn) => fn();
+  const showLineProgress = getEnvConfig().progressLines === true;
+  const tokenDictWords = dictShared || dictWords;
+  const tokenContext = createTokenizationContext({
+    dictWords: tokenDictWords,
+    dictConfig,
+    postingsConfig
+  });
+  const normalizedSegmentsConfig = normalizeSegmentsConfig(segmentsConfig);
+  const normalizedCommentsConfig = normalizeCommentConfig(commentsConfig);
+  const fileScanner = createFileScanner(fileScan);
+  const recordSkip = (filePath, reason, extra = {}) => {
+    if (!skippedFiles) return;
+    skippedFiles.push({ file: filePath, reason, ...extra });
+  };
+  const getWorkerDictOverride = () => {
+    if (!workerPool?.dictConfig || !dictConfig) return null;
+    const base = workerPool.dictConfig;
+    const nextSegmentation = typeof dictConfig.segmentation === 'string'
+      ? dictConfig.segmentation
+      : base.segmentation;
+    const nextMaxToken = Number.isFinite(Number(dictConfig.dpMaxTokenLength))
+      ? Number(dictConfig.dpMaxTokenLength)
+      : base.dpMaxTokenLength;
+    if (base.segmentation === nextSegmentation && base.dpMaxTokenLength === nextMaxToken) {
+      return null;
+    }
+    return { segmentation: nextSegmentation, dpMaxTokenLength: nextMaxToken };
+  };
+  const workerDictOverride = getWorkerDictOverride();
+  let tokenWorkerDisabled = false;
+  let workerTokenizeFailed = false;
+  const lintCacheConfig = cacheConfig?.lint || {};
+  const complexityCacheConfig = cacheConfig?.complexity || {};
+  const lintCache = createLruCache({
+    name: 'lint',
+    maxMb: lintCacheConfig.maxMb,
+    ttlMs: lintCacheConfig.ttlMs,
+    sizeCalculation: estimateJsonBytes,
+    reporter: cacheReporter
+  });
+  const complexityCache = createLruCache({
+    name: 'complexity',
+    maxMb: complexityCacheConfig.maxMb,
+    ttlMs: complexityCacheConfig.ttlMs,
+    sizeCalculation: estimateJsonBytes,
+    reporter: cacheReporter
+  });
+
+
+
+  /**
+   * Process a file: read, chunk, analyze, and produce chunk payloads.
+   * @param {string} abs
+   * @param {number} fileIndex
+   * @returns {Promise<object|null>}
+   */
+  async function processFile(fileEntry, fileIndex) {
+    const abs = typeof fileEntry === 'string' ? fileEntry : fileEntry.abs;      
+    const fileStart = Date.now();
+    const timing = createFileTimingTracker({ mode, featureMetrics });
+    const {
+      metricsCollector,
+      addSettingMetric,
+      addLineSpan,
+      addParseDuration,
+      addTokenizeDuration,
+      addEnrichDuration,
+      addEmbeddingDuration,
+      addLintDuration,
+      addComplexityDuration,
+      setGitDuration,
+      setPythonAstDuration,
+      finalizeLanguageLines,
+      recordFeatureMetrics,
+      recordFileMetrics,
+      buildFileMetrics
+    } = timing;
+    const relKey = typeof fileEntry === 'object' && fileEntry.rel
+      ? fileEntry.rel
+      : toPosix(path.relative(root, abs));
+    const rel = typeof fileEntry === 'object' && fileEntry.rel
+      ? fileEntry.rel.split('/').join(path.sep)
+      : path.relative(root, abs);
+    const fileStructural = structuralMatches?.get(relKey) || null;
+    if (seenFiles) seenFiles.add(relKey);
+    const ext = resolveExt(abs);
+    let fileLanguageId = null;
+    let fileLineCount = 0;
+    let fileStat;
+    try {
+      fileStat = typeof fileEntry === 'object' && fileEntry.stat
+        ? fileEntry.stat
+        : await runIo(() => fs.stat(abs));
+    } catch {
+      return null;
+    }
+    const preReadSkip = await resolvePreReadSkip({
+      abs,
+      fileEntry,
+      fileStat,
+      ext,
+      fileCaps,
+      fileScanner,
+      runIo
+    });
+    if (preReadSkip) {
+      const { reason, ...extra } = preReadSkip;
+      recordSkip(abs, reason || 'oversize', extra);
+      return null;
+    }
+    const knownLines = Number(fileEntry?.lines);
+
+    let cachedBundle = null;
+    let text = null;
+    let fileHash = null;
+    let fileBuffer = null;
+    const cachedResult = await loadCachedBundleForFile({
+      runIo,
+      incrementalState,
+      absPath: abs,
+      relKey,
+      fileStat
+    });
+    cachedBundle = cachedResult.cachedBundle;
+    fileHash = cachedResult.fileHash;
+    fileBuffer = cachedResult.buffer;
+
+    const cachedOutcome = reuseCachedBundle({
+      abs,
+      relKey,
+      fileIndex,
+      fileStat,
+      fileHash,
+      ext,
+      fileCaps,
+      cachedBundle,
+      incrementalState,
+      allImports,
+      fileStructural,
+      toolInfo,
+      fileStart,
+      knownLines,
+      fileLanguageId
+    });
+    if (cachedOutcome?.skip) {
+      const { reason, ...extra } = cachedOutcome.skip;
+      recordSkip(abs, reason || 'oversize', extra);
+      return null;
+    }
+    if (cachedOutcome?.result) {
+      return cachedOutcome.result;
+    }
+
+    if (!fileBuffer) {
+      try {
+        fileBuffer = await runIo(() => fs.readFile(abs));
+      } catch (err) {
+        recordSkip(abs, 'read-failure', {
+          code: err?.code || null,
+          message: err?.message || String(err)
+        });
+        return null;
+      }
+    }
+    const binarySkip = await resolveBinarySkip({
+      abs,
+      fileBuffer,
+      fileScanner
+    });
+    if (binarySkip) {
+      const { reason, ...extra } = binarySkip;
+      recordSkip(abs, reason || 'binary', extra);
+      return null;
+    }
+    if (!text || !fileHash) {
+      const decoded = await readTextFileWithHash(abs, { buffer: fileBuffer });
+      if (!text) text = decoded.text;
+      if (!fileHash) fileHash = decoded.hash;
+    }
+
+    let languageLines = null;
+    let languageSetKey = null;
+
+    const { chunks: fileChunks, fileRelations, skip } = await runCpu(async () => {
+      const languageContextOptions = languageOptions && typeof languageOptions === 'object'
+        ? { ...languageOptions, relationsEnabled, metricsCollector, filePath: abs }
+        : { relationsEnabled, metricsCollector, filePath: abs };
+      const { lang, context: languageContext } = await buildLanguageContext({
+        ext,
+        relPath: relKey,
+        mode,
+        text,
+        options: languageContextOptions
+      });
+      fileLanguageId = lang?.id || null;
+      if (languageContext?.pythonAstMetrics?.durationMs) {
+        setPythonAstDuration(languageContext.pythonAstMetrics.durationMs);
+      }
+      const tokenMode = mode === 'extracted-prose' ? 'prose' : mode;
+      const lineIndex = buildLineIndex(text);
+      const totalLines = lineIndex.length || 1;
+      fileLineCount = totalLines;
+      const fileLines = contextWin > 0 ? text.split('\n') : null;
+      const capsByLanguage = resolveFileCaps(fileCaps, ext, lang?.id);
+      if (capsByLanguage.maxLines && totalLines > capsByLanguage.maxLines) {
+        return {
+          chunks: [],
+          fileRelations: null,
+          skip: { reason: 'oversize', lines: totalLines, maxLines: capsByLanguage.maxLines }
+        };
+      }
+      let lastLineLogged = 0;
+      let lastLineLogMs = 0;
+      const rawRelations = (mode === 'code' && relationsEnabled && lang && typeof lang.buildRelations === 'function')
+        ? lang.buildRelations({
+          text,
+          relPath: relKey,
+          allImports,
+          context: languageContext,
+          options: languageOptions
+        })
+        : null;
+      const fileRelations = relationsEnabled ? buildFileRelations(rawRelations) : null;
+      const callIndex = relationsEnabled ? buildCallIndex(rawRelations) : null;
+      const gitStart = Date.now();
+      const gitMeta = await runIo(() => getGitMetaForFile(relKey, {
+        blame: gitBlameEnabled,
+        baseDir: root
+      }));
+      setGitDuration(Date.now() - gitStart);
+      const lineAuthors = Array.isArray(gitMeta?.lineAuthors)
+        ? gitMeta.lineAuthors
+        : null;
+      const fileGitMeta = gitMeta && typeof gitMeta === 'object'
+        ? Object.fromEntries(Object.entries(gitMeta).filter(([key]) => key !== 'lineAuthors'))
+        : {};
+      const commentsEnabled = (mode === 'code' || mode === 'extracted-prose')
+        && normalizedCommentsConfig.extract !== 'off';
+      const parseStart = Date.now();
+      const commentData = commentsEnabled
+        ? extractComments({
+          text,
+          ext,
+          languageId: lang?.id || null,
+          lineIndex,
+          config: normalizedCommentsConfig
+        })
+        : { comments: [], configSegments: [] };
+      const commentEntries = [];
+      const commentSegments = [];
+      if (commentsEnabled && Array.isArray(commentData.comments)) {
+        for (const comment of commentData.comments) {
+          const commentTokens = buildTokenSequence({
+            text: comment.text,
+            mode: 'prose',
+            ext,
+            dictWords: tokenDictWords,
+            dictConfig
+          }).tokens;
+          if (commentTokens.length < normalizedCommentsConfig.minTokens) continue;
+          const entry = { ...comment, tokens: commentTokens };
+          commentEntries.push(entry);
+          if (comment.type !== 'license' || normalizedCommentsConfig.includeLicense) {
+            commentSegments.push({
+              type: 'comment',
+              languageId: lang?.id || null,
+              start: comment.start,
+              end: comment.end,
+              parentSegmentId: null,
+              embeddingContext: 'prose',
+              meta: {
+                commentType: comment.type,
+                commentStyle: comment.style
+              }
+            });
+          }
+        }
+      }
+      const extraSegments = [];
+      if (commentSegments.length) extraSegments.push(...commentSegments);
+      if (Array.isArray(commentData.configSegments) && commentData.configSegments.length) {
+        extraSegments.push(...commentData.configSegments);
+      }
+      if (mode === 'extracted-prose' && (ext === '.md' || ext === '.mdx')) {
+        const frontmatter = detectFrontmatter(text);
+        if (frontmatter) {
+          extraSegments.push({
+            type: 'prose',
+            languageId: 'markdown',
+            start: frontmatter.start,
+            end: frontmatter.end,
+            parentSegmentId: null,
+            embeddingContext: 'prose',
+            meta: { frontmatter: true }
+          });
+        }
+      }
+      const resolvedSegmentsConfig = mode === 'extracted-prose'
+        ? { ...normalizedSegmentsConfig, onlyExtras: true }
+        : normalizedSegmentsConfig;
+      const segments = discoverSegments({
+        text,
+        ext,
+        relPath: relKey,
+        mode,
+        languageId: lang?.id || null,
+        context: languageContext,
+        segmentsConfig: resolvedSegmentsConfig,
+        extraSegments
+      });
+      const sc = chunkSegments({
+        text,
+        ext,
+        relPath: relKey,
+        mode,
+        segments,
+        lineIndex,
+        context: {
+          ...languageContext,
+          yamlChunking: languageOptions?.yamlChunking,
+          chunking: languageOptions?.chunking,
+          javascript: languageOptions?.javascript,
+          typescript: languageOptions?.typescript,
+          treeSitter: languageOptions?.treeSitter,
+          log: languageOptions?.log
+        }
+      });
+      addParseDuration(Date.now() - parseStart);
+      const chunkLineRanges = sc.map((chunk) => {
+        const startLine = chunk.meta?.startLine ?? offsetToLine(lineIndex, chunk.start);
+        const endOffset = chunk.end > chunk.start ? chunk.end - 1 : chunk.start;
+        let endLine = chunk.meta?.endLine ?? offsetToLine(lineIndex, endOffset);
+        if (endLine < startLine) endLine = startLine;
+        return { startLine, endLine };
+      });
+      const commentAssignments = assignCommentsToChunks(commentEntries, sc);
+      const chunks = [];
+      const tokenBuffers = createTokenizationBuffers();
+      const codeTexts = embeddingEnabled ? [] : null;
+      const docTexts = embeddingEnabled ? [] : null;
+      const useWorkerForTokens = tokenMode === 'code'
+        && !tokenWorkerDisabled
+        && workerPool
+        && workerPool.shouldUseForFile
+        ? workerPool.shouldUseForFile(fileStat.size)
+        : false;
+
+      for (let ci = 0; ci < sc.length; ++ci) {
+        const c = sc[ci];
+        const ctext = text.slice(c.start, c.end);
+        const lineRange = chunkLineRanges[ci] || { startLine: 1, endLine: fileLineCount || 1 };
+        const startLine = lineRange.startLine;
+        const endLine = lineRange.endLine;
+        const chunkLineCount = Math.max(1, endLine - startLine + 1);
+        const chunkLanguageId = c.segment?.languageId || fileLanguageId || lang?.id || 'unknown';
+        addLineSpan(chunkLanguageId, startLine, endLine);
+        if (showLineProgress) {
+          const currentLine = chunkLineRanges[ci]?.endLine ?? totalLines;
+          const now = Date.now();
+          const shouldLog = currentLine >= totalLines
+            || currentLine - lastLineLogged >= 200
+            || now - lastLineLogMs >= 1000;
+          if (shouldLog && currentLine > lastLineLogged) {
+            lastLineLogged = currentLine;
+            lastLineLogMs = now;
+            logLine(`Line ${currentLine}/${totalLines}`);
+          }
+        }
+
+        let codeRelations = {}, docmeta = {};
+        if (mode === 'code') {
+          const relationStart = Date.now();
+          docmeta = lang && typeof lang.extractDocMeta === 'function'
+            ? lang.extractDocMeta({
+              text,
+              chunk: c,
+              fileRelations,
+              context: languageContext,
+              options: languageOptions
+            })
+            : {};
+          if (relationsEnabled && fileRelations) {
+            codeRelations = buildChunkRelations({
+              lang,
+              chunk: c,
+              fileRelations,
+              callIndex
+            });
+          }
+          let flowMeta = null;
+          if (lang && typeof lang.flow === 'function') {
+            const flowStart = Date.now();
+            flowMeta = lang.flow({
+              text,
+              chunk: c,
+              context: languageContext,
+              options: languageOptions
+            });
+            const flowDurationMs = Date.now() - flowStart;
+            if (flowDurationMs > 0) {
+              const flowTargets = [];
+              if (astDataflowEnabled) flowTargets.push('astDataflow');
+              if (controlFlowEnabled) flowTargets.push('controlFlow');
+              const flowShareMs = flowTargets.length
+                ? flowDurationMs / flowTargets.length
+                : 0;
+              for (const flowTarget of flowTargets) {
+                addSettingMetric(flowTarget, chunkLanguageId, chunkLineCount, flowShareMs);
+              }
+            }
+          }
+          if (flowMeta) {
+          docmeta = mergeFlowMeta(docmeta, flowMeta, { astDataflowEnabled, controlFlowEnabled });
+          }
+          addParseDuration(Date.now() - relationStart);
+          if (typeInferenceEnabled) {
+            const enrichStart = Date.now();
+            const inferredTypes = inferTypeMetadata({
+              docmeta,
+              chunkText: ctext,
+              languageId: lang?.id || null
+            });
+            if (inferredTypes) {
+              docmeta = { ...docmeta, inferredTypes };
+            }
+            const typeDurationMs = Date.now() - enrichStart;
+            addEnrichDuration(typeDurationMs);
+            addSettingMetric('typeInference', chunkLanguageId, chunkLineCount, typeDurationMs);
+          }
+          if (riskAnalysisEnabled) {
+            const enrichStart = Date.now();
+            const risk = detectRiskSignals({
+              text: ctext,
+              chunk: c,
+              config: riskConfig,
+              languageId: lang?.id || null
+            });
+            if (risk) {
+              docmeta = { ...docmeta, risk };
+            }
+            const riskDurationMs = Date.now() - enrichStart;
+            addEnrichDuration(riskDurationMs);
+            addSettingMetric('riskAnalysis', chunkLanguageId, chunkLineCount, riskDurationMs);
+          }
+        }
+
+        if (fileStructural) {
+          const structural = getStructuralMatchesForChunk(
+            fileStructural,
+            startLine,
+            endLine,
+            totalLines
+          );
+          if (structural) {
+            docmeta = { ...docmeta, structural };
+          }
+        }
+
+        let commentFieldTokens = [];
+        if (commentAssignments.size) {
+          const assigned = commentAssignments.get(ci) || [];
+          if (assigned.length) {
+            const chunkStart = c.start;
+            const sorted = assigned.slice().sort((a, b) => (
+              Math.abs(a.start - chunkStart) - Math.abs(b.start - chunkStart)
+            ));
+            const maxPerChunk = normalizedCommentsConfig.maxPerChunk;
+            const maxBytes = normalizedCommentsConfig.maxBytesPerChunk;
+            let totalBytes = 0;
+            const metaComments = [];
+            for (const comment of sorted) {
+              if (maxPerChunk && metaComments.length >= maxPerChunk) break;
+              const remaining = maxBytes ? Math.max(0, maxBytes - totalBytes) : 0;
+              if (maxBytes && remaining <= 0) break;
+              const clipped = maxBytes ? truncateByBytes(comment.text, remaining) : {
+                text: comment.text,
+                truncated: false,
+                bytes: Buffer.byteLength(comment.text, 'utf8')
+              };
+              if (!clipped.text) continue;
+              totalBytes += clipped.bytes;
+              const includeInTokens = comment.type === 'inline'
+                || comment.type === 'block'
+                || (comment.type === 'license' && normalizedCommentsConfig.includeLicense);
+              if (includeInTokens) {
+                const tokens = buildTokenSequence({
+                  text: clipped.text,
+                  mode: 'prose',
+                  ext,
+                  dictWords: tokenDictWords,
+                  dictConfig
+                }).tokens;
+                if (tokens.length) commentFieldTokens = commentFieldTokens.concat(tokens);
+              }
+              metaComments.push({
+                type: comment.type,
+                style: comment.style,
+                languageId: comment.languageId || null,
+                start: comment.start,
+                end: comment.end,
+                startLine: comment.startLine,
+                endLine: comment.endLine,
+                text: clipped.text,
+                truncated: clipped.truncated || false,
+                indexed: includeInTokens,
+                anchorChunkId: null
+              });
+            }
+            if (metaComments.length) {
+              docmeta = { ...docmeta, comments: metaComments };
+            }
+          }
+        }
+
+        let fieldChargramTokens = null;
+        if (tokenContext.chargramSource === 'fields') {
+          const fieldText = [c.name, docmeta?.doc].filter(Boolean).join(' ');
+          if (fieldText) {
+            const fieldSeq = buildTokenSequence({
+              text: fieldText,
+              mode: tokenMode,
+              ext,
+              dictWords: tokenDictWords,
+              dictConfig
+            }).seq;
+            if (fieldSeq.length) fieldChargramTokens = fieldSeq;
+          }
+        }
+
+        let tokenPayload = null;
+        if (useWorkerForTokens) {
+          try {
+            const tokenStart = Date.now();
+            tokenPayload = await workerPool.runTokenize({
+              text: ctext,
+              mode: tokenMode,
+              ext,
+              file: relKey,
+              size: fileStat.size,
+              chargramTokens: fieldChargramTokens,
+              ...(workerDictOverride ? { dictConfig: workerDictOverride } : {})
+            });
+            const tokenDurationMs = Date.now() - tokenStart;
+            addTokenizeDuration(tokenDurationMs);
+            if (tokenPayload) {
+              addSettingMetric('tokenize', chunkLanguageId, chunkLineCount, tokenDurationMs);
+            }
+          } catch (err) {
+            if (!workerTokenizeFailed) {
+              const message = formatError(err);
+              const detail = err?.stack || err?.cause || null;
+              log(`Worker tokenization failed; falling back to main thread. ${message}`);
+              if (detail) log(`Worker tokenization detail: ${detail}`);
+              workerTokenizeFailed = true;
+            }
+            tokenWorkerDisabled = true;
+            if (crashLogger?.enabled) {
+              crashLogger.logError({
+                phase: 'worker-tokenize',
+                file: relKey,
+                size: fileStat?.size || null,
+                languageId: fileLanguageId || lang?.id || null,
+                message: formatError(err),
+                stack: err?.stack || null,
+                raw: util.inspect(err, {
+                  depth: 5,
+                  breakLength: 120,
+                  showHidden: true,
+                  getters: true
+                }),
+                ownProps: err && typeof err === 'object'
+                  ? Object.getOwnPropertyNames(err)
+                  : [],
+                ownSymbols: err && typeof err === 'object'
+                  ? Object.getOwnPropertySymbols(err).map((sym) => sym.toString())
+                  : []
+              });
+            }
+          }
+        }
+        if (!tokenPayload) {
+          const tokenStart = Date.now();
+          tokenPayload = tokenizeChunkText({
+            text: ctext,
+            mode: tokenMode,
+            ext,
+            context: tokenContext,
+            chargramTokens: fieldChargramTokens,
+            buffers: tokenBuffers
+          });
+          const tokenDurationMs = Date.now() - tokenStart;
+          addTokenizeDuration(tokenDurationMs);
+          addSettingMetric('tokenize', chunkLanguageId, chunkLineCount, tokenDurationMs);
+        }
+
+        const {
+          tokens,
+          seq,
+          ngrams,
+          chargrams,
+          minhashSig,
+          stats
+        } = tokenPayload;
+
+        if (tokenizationStats) {
+          tokenizationStats.chunks += 1;
+          tokenizationStats.tokens += tokens.length;
+          tokenizationStats.seq += seq.length;
+          tokenizationStats.ngrams += Array.isArray(ngrams) ? ngrams.length : 0;
+          tokenizationStats.chargrams += Array.isArray(chargrams) ? chargrams.length : 0;
+        }
+
+        if (!seq.length) continue;
+
+        const docText = typeof docmeta.doc === 'string' ? docmeta.doc : '';
+
+        let complexity = {}, lint = [];
+        if (isJsLike(ext) && mode === 'code') {
+          if (complexityEnabled) {
+            const cacheKey = fileHash ? `${rel}:${fileHash}` : rel;
+            let cachedComplexity = complexityCache.get(cacheKey);
+            if (!cachedComplexity) {
+              const enrichStart = Date.now();
+              const fullCode = text;
+              const compResult = await analyzeComplexity(fullCode, rel);        
+              complexityCache.set(cacheKey, compResult);
+              cachedComplexity = compResult;
+              const enrichDurationMs = Date.now() - enrichStart;
+              addComplexityDuration(enrichDurationMs);
+            }
+            complexity = cachedComplexity || {};
+          }
+
+          if (lintEnabled) {
+            const cacheKey = fileHash ? `${rel}:${fileHash}` : rel;
+            let cachedLint = lintCache.get(cacheKey);
+            if (!cachedLint) {
+              const enrichStart = Date.now();
+              const fullCode = text;
+              const lintResult = await lintChunk(fullCode, rel);
+              lintCache.set(cacheKey, lintResult);
+              cachedLint = lintResult;
+              const enrichDurationMs = Date.now() - enrichStart;
+              addLintDuration(enrichDurationMs);
+            }
+            lint = cachedLint || [];
+          }
+        }
+
+        let preContext = [], postContext = [];
+        if (contextWin > 0 && fileLines) {
+          if (ci > 0) {
+            const prev = chunkLineRanges[ci - 1];
+            const startIdx = Math.max(0, prev.startLine - 1);
+            const endIdx = Math.min(fileLines.length, prev.endLine);
+            preContext = fileLines.slice(startIdx, endIdx).slice(-contextWin);
+          }
+          if (ci + 1 < sc.length) {
+            const next = chunkLineRanges[ci + 1];
+            const startIdx = Math.max(0, next.startLine - 1);
+            const endIdx = Math.min(fileLines.length, next.endLine);
+            postContext = fileLines.slice(startIdx, endIdx).slice(0, contextWin);
+          }
+        }
+        const chunkAuthors = lineAuthors
+          ? getChunkAuthorsFromLines(lineAuthors, startLine, endLine)
+          : [];
+        const gitMeta = {
+          ...fileGitMeta,
+          ...(chunkAuthors.length ? { chunk_authors: chunkAuthors } : {})
+        };
+        const chunkRecord = { ...c, startLine, endLine };
+        const chunkPayload = buildChunkPayload({
+          chunk: chunkRecord,
+          rel,
+          relKey,
+          ext,
+          languageId: fileLanguageId || lang?.id || null,
+          tokens,
+          seq,
+          ngrams,
+          chargrams,
+          codeRelations,
+          docmeta,
+          stats,
+          complexity,
+          lint,
+          preContext,
+          postContext,
+          minhashSig,
+          commentFieldTokens,
+          dictWords: tokenDictWords,
+          dictConfig,
+          postingsConfig,
+          tokenMode,
+          fileRelations,
+          relationsEnabled,
+          toolInfo,
+          gitMeta
+        });
+
+        chunks.push(chunkPayload);
+        if (embeddingEnabled && codeTexts && docTexts) {
+          codeTexts.push(ctext);
+          docTexts.push(docText.trim() ? docText : '');
+        }
+      }
+
+      const embeddingResult = await attachEmbeddings({
+        chunks,
+        codeTexts,
+        docTexts,
+        embeddingEnabled,
+        getChunkEmbedding,
+        getChunkEmbeddings,
+        runEmbedding,
+        embeddingBatchSize,
+        fileLanguageId,
+        languageOptions
+      });
+      addEmbeddingDuration(embeddingResult.embeddingMs);
+
+      return { chunks, fileRelations, skip: null };
+    });
+    if (skip) {
+      const { reason, ...extra } = skip;
+      recordSkip(abs, reason || 'oversize', extra);
+      return null;
+    }
+
+    const { languageLines: resolvedLanguageLines, languageSetKey: resolvedLanguageSetKey } =
+      finalizeLanguageLines({ fileLineCount, fileLanguageId });
+    if (resolvedLanguageLines) {
+      languageLines = resolvedLanguageLines;
+      languageSetKey = resolvedLanguageSetKey;
+    }
+    recordFeatureMetrics({
+      gitBlameEnabled,
+      embeddingEnabled,
+      lintEnabled,
+      complexityEnabled,
+      fileLineCount,
+      languageLines,
+      languageSetKey
+    });
+
+    const manifestEntry = await writeBundleForFile({
+      runIo,
+      incrementalState,
+      relKey,
+      fileStat,
+      fileHash,
+      fileChunks,
+      fileRelations
+    });
+
+    const fileDurationMs = Date.now() - fileStart;
+    const fileMetrics = buildFileMetrics({
+      fileLineCount,
+      fileStat,
+      fileDurationMs,
+      fileLanguageId,
+      cached: false
+    });
+    recordFileMetrics({
+      fileLineCount,
+      fileStat,
+      fileDurationMs,
+      languageLines,
+      languageSetKey
+    });
+    return {
+      abs,
+      relKey,
+      fileIndex,
+      cached: false,
+      durationMs: fileDurationMs,
+      chunks: fileChunks,
+      fileRelations,
+      manifestEntry,
+      fileMetrics
+    };
+  }
+
+  return { processFile };
+}
diff --git a/src/index/build/file-processor/assemble.js b/src/index/build/file-processor/assemble.js
new file mode 100644
index 000000000..02af4825f
--- /dev/null
+++ b/src/index/build/file-processor/assemble.js
@@ -0,0 +1,109 @@
+import { getHeadline } from '../../headline.js';
+import { getFieldWeight } from '../../field-weighting.js';
+import { buildMetaV2 } from '../../metadata-v2.js';
+import { buildTokenSequence } from '../tokenization.js';
+import { buildExternalDocs } from './meta.js';
+
+export function buildChunkPayload({
+  chunk,
+  rel,
+  relKey,
+  ext,
+  languageId,
+  tokens,
+  seq,
+  ngrams,
+  chargrams,
+  codeRelations,
+  docmeta,
+  stats,
+  complexity,
+  lint,
+  preContext,
+  postContext,
+  minhashSig,
+  commentFieldTokens,
+  dictWords,
+  dictConfig,
+  postingsConfig,
+  tokenMode,
+  fileRelations,
+  relationsEnabled,
+  toolInfo,
+  gitMeta
+}) {
+  const weight = getFieldWeight(chunk, rel);
+  const docText = typeof docmeta.doc === 'string' ? docmeta.doc : '';
+  const fieldedEnabled = postingsConfig?.fielded !== false;
+  const fieldTokens = fieldedEnabled ? {
+    name: chunk.name ? buildTokenSequence({
+      text: chunk.name,
+      mode: tokenMode,
+      ext,
+      dictWords,
+      dictConfig
+    }).tokens : [],
+    signature: docmeta?.signature
+      ? buildTokenSequence({
+        text: docmeta.signature,
+        mode: tokenMode,
+        ext,
+        dictWords,
+        dictConfig
+      }).tokens
+      : [],
+    doc: docText
+      ? buildTokenSequence({
+        text: docText,
+        mode: tokenMode,
+        ext,
+        dictWords,
+        dictConfig
+      }).tokens
+      : [],
+    comment: commentFieldTokens,
+    body: tokens
+  } : null;
+  const headline = getHeadline(chunk, tokens);
+  const externalDocs = relationsEnabled
+    ? buildExternalDocs(ext, fileRelations?.imports)
+    : [];
+  const chunkPayload = {
+    file: relKey,
+    ext,
+    lang: languageId,
+    segment: chunk.segment || null,
+    start: chunk.start,
+    end: chunk.end,
+    startLine: chunk.startLine,
+    endLine: chunk.endLine,
+    kind: chunk.kind,
+    name: chunk.name,
+    tokens,
+    seq,
+    ngrams,
+    chargrams,
+    codeRelations,
+    docmeta,
+    stats,
+    complexity,
+    lint,
+    headline,
+    preContext,
+    postContext,
+    embedding: [],
+    embed_doc: [],
+    embed_code: [],
+    minhashSig,
+    ...(fieldTokens ? { fieldTokens } : {}),
+    weight,
+    ...gitMeta,
+    externalDocs
+  };
+  chunkPayload.metaV2 = buildMetaV2({
+    chunk: chunkPayload,
+    docmeta,
+    toolInfo
+  });
+  return chunkPayload;
+}
diff --git a/src/index/build/file-processor/cached-bundle.js b/src/index/build/file-processor/cached-bundle.js
new file mode 100644
index 000000000..67ad692b9
--- /dev/null
+++ b/src/index/build/file-processor/cached-bundle.js
@@ -0,0 +1,101 @@
+import { sha1 } from '../../../shared/hash.js';
+import { buildMetaV2 } from '../../metadata-v2.js';
+import { applyStructuralMatchesToChunks } from './chunk.js';
+import { resolveFileCaps } from './read.js';
+import { buildFileRelations, stripFileRelations } from './relations.js';
+
+export function reuseCachedBundle({
+  abs,
+  relKey,
+  fileIndex,
+  fileStat,
+  fileHash,
+  ext,
+  fileCaps,
+  cachedBundle,
+  incrementalState,
+  allImports,
+  fileStructural,
+  toolInfo,
+  fileStart,
+  knownLines,
+  fileLanguageId
+}) {
+  if (!cachedBundle || !Array.isArray(cachedBundle.chunks)) return { result: null, skip: null };
+  const cachedCaps = resolveFileCaps(fileCaps, ext);
+  if (cachedCaps.maxLines) {
+    const maxLine = cachedBundle.chunks.reduce((max, chunk) => {
+      const endLine = Number(chunk?.endLine) || 0;
+      return endLine > max ? endLine : max;
+    }, 0);
+    if (maxLine > cachedCaps.maxLines) {
+      return { result: null, skip: { reason: 'oversize', lines: maxLine, maxLines: cachedCaps.maxLines } };
+    }
+  }
+  const cachedEntry = incrementalState.manifest?.files?.[relKey] || null;
+  const manifestEntry = cachedEntry ? {
+    hash: fileHash || cachedEntry.hash || null,
+    mtimeMs: fileStat.mtimeMs,
+    size: fileStat.size,
+    bundle: cachedEntry.bundle || `${sha1(relKey)}.json`
+  } : null;
+  let fileRelations = cachedBundle.fileRelations || null;
+  if (!fileRelations) {
+    const sample = cachedBundle.chunks.find((chunk) => chunk?.codeRelations);
+    if (sample?.codeRelations) {
+      fileRelations = buildFileRelations(sample.codeRelations);
+    }
+  }
+  if (fileRelations?.imports) {
+    const importLinks = fileRelations.imports
+      .map((i) => allImports[i])
+      .filter((x) => !!x)
+      .flat();
+    fileRelations = { ...fileRelations, importLinks };
+  }
+  const updatedChunks = cachedBundle.chunks.map((cachedChunk) => {
+    const updatedChunk = { ...cachedChunk };
+    if (updatedChunk.codeRelations) {
+      updatedChunk.codeRelations = stripFileRelations(updatedChunk.codeRelations);
+    }
+    if (!updatedChunk.metaV2?.chunkId) {
+      updatedChunk.metaV2 = buildMetaV2({
+        chunk: updatedChunk,
+        docmeta: updatedChunk.docmeta,
+        toolInfo
+      });
+    }
+    return updatedChunk;
+  });
+  applyStructuralMatchesToChunks(updatedChunks, fileStructural);
+  const fileDurationMs = Date.now() - fileStart;
+  const cachedLanguage = updatedChunks.find((chunk) => chunk?.lang)?.lang || null;
+  const cachedLines = updatedChunks.reduce((max, chunk) => {
+    const endLine = Number(chunk?.endLine) || 0;
+    return endLine > max ? endLine : max;
+  }, 0);
+  return {
+    skip: null,
+    result: {
+      abs,
+      relKey,
+      fileIndex,
+      cached: true,
+      durationMs: fileDurationMs,
+      chunks: updatedChunks,
+      manifestEntry,
+      fileRelations,
+      fileMetrics: {
+        languageId: fileLanguageId || cachedLanguage || null,
+        bytes: fileStat.size,
+        lines: cachedLines || (Number.isFinite(knownLines) ? knownLines : 0),
+        durationMs: fileDurationMs,
+        parseMs: 0,
+        tokenizeMs: 0,
+        enrichMs: 0,
+        embeddingMs: 0,
+        cached: true
+      }
+    }
+  };
+}
diff --git a/src/index/build/file-processor/chunk.js b/src/index/build/file-processor/chunk.js
new file mode 100644
index 000000000..b7a57ff3a
--- /dev/null
+++ b/src/index/build/file-processor/chunk.js
@@ -0,0 +1,53 @@
+export const getStructuralMatchesForChunk = (matches, startLine, endLine, totalLines) => {
+  if (!matches || !matches.length) return null;
+  const start = Number.isFinite(startLine) ? startLine : 1;
+  const end = Number.isFinite(endLine) ? endLine : start;
+  const fileEnd = Number.isFinite(totalLines) && totalLines > 0 ? totalLines : end;
+  const selected = [];
+  for (const match of matches) {
+    const matchStart = Number.isFinite(match.startLine) ? match.startLine : 1;
+    const matchEnd = Number.isFinite(match.endLine) ? match.endLine : fileEnd;
+    if (matchEnd < start || matchStart > end) continue;
+    selected.push(match);
+  }
+  return selected.length ? selected : null;
+};
+
+export const assignCommentsToChunks = (comments, chunks) => {
+  const assignments = new Map();
+  if (!Array.isArray(comments) || !comments.length || !Array.isArray(chunks) || !chunks.length) {
+    return assignments;
+  }
+  let chunkIdx = 0;
+  for (const comment of comments) {
+    while (chunkIdx < chunks.length && chunks[chunkIdx].end < comment.start) {
+      chunkIdx += 1;
+    }
+    const target = chunkIdx < chunks.length ? chunkIdx : chunks.length - 1;
+    if (target < 0) continue;
+    if (!assignments.has(target)) assignments.set(target, []);
+    assignments.get(target).push(comment);
+  }
+  return assignments;
+};
+
+export const applyStructuralMatchesToChunks = (chunks, matches) => {
+  if (!matches || !matches.length || !Array.isArray(chunks)) return chunks;
+  const totalLines = chunks.reduce((max, chunk) => {
+    const endLine = Number(chunk?.endLine) || 0;
+    return endLine > max ? endLine : max;
+  }, 0) || 1;
+  for (const chunk of chunks) {
+    if (!chunk) continue;
+    const structural = getStructuralMatchesForChunk(
+      matches,
+      chunk.startLine,
+      chunk.endLine,
+      totalLines
+    );
+    if (!structural) continue;
+    const docmeta = chunk.docmeta && typeof chunk.docmeta === 'object' ? chunk.docmeta : {};
+    chunk.docmeta = { ...docmeta, structural };
+  }
+  return chunks;
+};
diff --git a/src/index/build/file-processor/embeddings.js b/src/index/build/file-processor/embeddings.js
new file mode 100644
index 000000000..93606e63f
--- /dev/null
+++ b/src/index/build/file-processor/embeddings.js
@@ -0,0 +1,168 @@
+import { normalizeVec } from '../../embedding.js';
+import { resolveEmbeddingBatchSize } from '../embedding-batch.js';
+
+export async function attachEmbeddings({
+  chunks,
+  codeTexts,
+  docTexts,
+  embeddingEnabled,
+  getChunkEmbedding,
+  getChunkEmbeddings,
+  runEmbedding,
+  embeddingBatchSize,
+  fileLanguageId,
+  languageOptions
+}) {
+  if (!embeddingEnabled) {
+    for (const chunk of chunks) {
+      chunk.embed_code = [];
+      chunk.embed_doc = [];
+      chunk.embedding = [];
+    }
+    return { embeddingMs: 0 };
+  }
+
+  const embedStart = Date.now();
+  const embedBatch = async (texts) => {
+    if (!texts.length) return [];
+    if (typeof getChunkEmbeddings === 'function') {
+      return getChunkEmbeddings(texts);
+    }
+    const out = [];
+    for (const text of texts) {
+      out.push(await getChunkEmbedding(text));
+    }
+    return out;
+  };
+
+  const runBatched = async (texts) => {
+    if (!texts.length) return [];
+    const effectiveBatchSize = resolveEmbeddingBatchSize(
+      embeddingBatchSize,
+      fileLanguageId,
+      languageOptions?.embeddingBatchMultipliers
+    );
+    const batchSize = Number.isFinite(effectiveBatchSize) ? effectiveBatchSize : 0;
+    if (!batchSize || texts.length <= batchSize) {
+      return embedBatch(texts);
+    }
+    const out = [];
+    for (let i = 0; i < texts.length; i += batchSize) {
+      const slice = texts.slice(i, i + batchSize);
+      const batch = await embedBatch(slice);
+      out.push(...batch);
+    }
+    return out;
+  };
+
+  const coerceVector = (value) => {
+    if (Array.isArray(value)) return value;
+    if (ArrayBuffer.isView(value)) return Array.from(value);
+    return null;
+  };
+
+  const validateBatchOutput = ({ label, vectors, expectedCount }) => {
+    if (!Array.isArray(vectors)) {
+      throw new Error(`[embeddings] ${label} embedder returned a non-array result.`);
+    }
+    if (vectors.length !== expectedCount) {
+      throw new Error(
+        `[embeddings] ${label} embedder returned ${vectors.length} vectors; expected ${expectedCount}.`
+      );
+    }
+    let dims = 0;
+    const out = new Array(expectedCount);
+    for (let i = 0; i < expectedCount; i += 1) {
+      const vec = coerceVector(vectors[i]);
+      if (!vec) {
+        throw new Error(`[embeddings] ${label} embedder returned a non-vector at index ${i}.`);
+      }
+      if (expectedCount > 0 && vec.length <= 0) {
+        throw new Error(`[embeddings] ${label} embedder returned an empty vector at index ${i}.`);
+      }
+      if (!dims) dims = vec.length;
+      if (dims && vec.length !== dims) {
+        throw new Error(
+          `[embeddings] ${label} embedder dims mismatch at index ${i} (expected ${dims}, got ${vec.length}).`
+        );
+      }
+      out[i] = vec;
+    }
+    return { vectors: out, dims };
+  };
+
+  const expectedChunkCount = Array.isArray(chunks) ? chunks.length : 0;
+  if (Array.isArray(codeTexts) && codeTexts.length !== expectedChunkCount) {
+    throw new Error(
+      `[embeddings] code payload count mismatch (texts=${codeTexts.length}, chunks=${expectedChunkCount}).`
+    );
+  }
+  if (Array.isArray(docTexts) && docTexts.length !== expectedChunkCount) {
+    throw new Error(
+      `[embeddings] doc payload count mismatch (texts=${docTexts.length}, chunks=${expectedChunkCount}).`
+    );
+  }
+  let codeVectors = await runEmbedding(() => runBatched(codeTexts || []));
+  if (!Array.isArray(codeVectors) || codeVectors.length !== expectedChunkCount) {
+    codeVectors = await runEmbedding(async () => {
+      const out = [];
+      for (const text of codeTexts || []) {
+        out.push(await getChunkEmbedding(text));
+      }
+      return out;
+    });
+  }
+  const validatedCode = validateBatchOutput({
+    label: 'code',
+    vectors: codeVectors,
+    expectedCount: expectedChunkCount
+  });
+
+  const docVectors = new Array(chunks.length).fill(null);
+  const docIndexes = [];
+  const docPayloads = [];
+  for (let i = 0; i < (docTexts || []).length; i += 1) {
+    if (docTexts[i]) {
+      docIndexes.push(i);
+      docPayloads.push(docTexts[i]);
+    }
+  }
+  if (docPayloads.length) {
+    const embeddedDocs = await runEmbedding(() => runBatched(docPayloads));
+    const validatedDocs = validateBatchOutput({
+      label: 'doc',
+      vectors: embeddedDocs,
+      expectedCount: docPayloads.length
+    });
+    for (let i = 0; i < docIndexes.length; i += 1) {
+      docVectors[docIndexes[i]] = validatedDocs.vectors[i] || null;
+    }
+  }
+
+  const docDims = docPayloads.length && Array.isArray(docVectors[docIndexes[0]])
+    ? docVectors[docIndexes[0]].length
+    : 0;
+  if (validatedCode.dims && docDims && validatedCode.dims !== docDims) {
+    throw new Error(
+      `[embeddings] dims mismatch (code=${validatedCode.dims}, doc=${docDims}).`
+    );
+  }
+  const dims = validatedCode.dims || docDims || 0;
+  if (expectedChunkCount && !dims) {
+    throw new Error('[embeddings] embedder returned no usable vector dims.');
+  }
+  const zeroVec = dims ? new Array(dims).fill(0) : [];
+  for (let i = 0; i < expectedChunkCount; i += 1) {
+    const chunk = chunks[i];
+    const embedCode = validatedCode.vectors[i] || [];
+    const embedDoc = Array.isArray(docVectors[i]) ? docVectors[i] : zeroVec;
+    const merged = embedCode.length
+      ? embedCode.map((v, idx) => (v + (embedDoc[idx] ?? 0)) / 2)
+      : embedDoc;
+    chunk.embed_code = embedCode;
+    chunk.embed_doc = embedDoc;
+    chunk.embedding = normalizeVec(merged);
+  }
+
+  return { embeddingMs: Date.now() - embedStart };
+}
diff --git a/src/index/build/file-processor/incremental.js b/src/index/build/file-processor/incremental.js
new file mode 100644
index 000000000..f0cbeed87
--- /dev/null
+++ b/src/index/build/file-processor/incremental.js
@@ -0,0 +1,40 @@
+import { readCachedBundle, writeIncrementalBundle } from '../incremental.js';
+
+export async function loadCachedBundleForFile({
+  runIo,
+  incrementalState,
+  absPath,
+  relKey,
+  fileStat
+}) {
+  return runIo(() => readCachedBundle({
+    enabled: incrementalState.enabled,
+    absPath,
+    relKey,
+    fileStat,
+    manifest: incrementalState.manifest,
+    bundleDir: incrementalState.bundleDir,
+    bundleFormat: incrementalState.bundleFormat
+  }));
+}
+
+export async function writeBundleForFile({
+  runIo,
+  incrementalState,
+  relKey,
+  fileStat,
+  fileHash,
+  fileChunks,
+  fileRelations
+}) {
+  return runIo(() => writeIncrementalBundle({
+    enabled: incrementalState.enabled,
+    bundleDir: incrementalState.bundleDir,
+    relKey,
+    fileStat,
+    fileHash,
+    fileChunks,
+    fileRelations,
+    bundleFormat: incrementalState.bundleFormat
+  }));
+}
diff --git a/src/index/build/file-processor/meta.js b/src/index/build/file-processor/meta.js
new file mode 100644
index 000000000..69e13f988
--- /dev/null
+++ b/src/index/build/file-processor/meta.js
@@ -0,0 +1,123 @@
+import util from 'node:util';
+import { isGo, isJsLike } from '../../constants.js';
+
+export const mergeFlowMeta = (docmeta, flowMeta, { astDataflowEnabled, controlFlowEnabled }) => {
+  if (!flowMeta) return docmeta;
+  const output = docmeta && typeof docmeta === 'object' ? docmeta : {};
+  if (controlFlowEnabled && flowMeta.controlFlow && output.controlFlow == null) {
+    output.controlFlow = flowMeta.controlFlow;
+  }
+  if (astDataflowEnabled) {
+    if (flowMeta.dataflow && output.dataflow == null) output.dataflow = flowMeta.dataflow;
+    if (flowMeta.throws && output.throws === undefined) output.throws = flowMeta.throws;
+    if (flowMeta.awaits && output.awaits === undefined) output.awaits = flowMeta.awaits;
+    if (typeof flowMeta.yields === 'boolean' && output.yields === undefined) output.yields = flowMeta.yields;
+    if (typeof flowMeta.returnsValue === 'boolean') {
+      const shouldOverride = output.returnsValue === undefined || (output.returnsValue === false && flowMeta.returnsValue);
+      if (shouldOverride) {
+        output.returnsValue = flowMeta.returnsValue;
+      }
+    }
+  }
+  return output;
+};
+
+export const buildExternalDocs = (ext, imports) => {
+  const externalDocs = [];
+  if (!imports || !imports.length) return externalDocs;
+  const isPython = ext === '.py';
+  const isNode = isJsLike(ext);
+  const isGoLang = isGo(ext);
+  for (const mod of imports) {
+    if (mod.startsWith('.')) continue;
+    if (isPython) {
+      const base = mod.split('.')[0];
+      if (base) externalDocs.push(`https://pypi.org/project/${base}`);
+    } else if (isNode) {
+      const encoded = mod
+        .split('/')
+        .map((segment) => encodeURIComponent(segment).replace(/%40/g, '@'))
+        .join('/');
+      externalDocs.push(`https://www.npmjs.com/package/${encoded}`);
+    } else if (isGoLang) {
+      externalDocs.push(`https://pkg.go.dev/${mod}`);
+    }
+  }
+  return externalDocs;
+};
+
+const normalizeEmptyMessage = (value) => {
+  if (typeof value !== 'string') return value;
+  const trimmed = value.trim();
+  if (!trimmed) return null;
+  if (trimmed === '{}' || trimmed === '[object Object]') return null;
+  if (/^Error:?\s*\{\}$/i.test(trimmed)) return null;
+  if (/^Error:?\s*\[object Object\]$/i.test(trimmed)) return null;
+  return value;
+};
+
+const describeObject = (value) => {
+  if (!value || typeof value !== 'object') return '';
+  const ctor = value.constructor && value.constructor.name
+    ? value.constructor.name
+    : 'Object';
+  const keys = Object.keys(value);
+  if (keys.length) {
+    return `${ctor} keys: ${keys.slice(0, 6).join(', ')}${keys.length > 6 ? '…' : ''}`;
+  }
+  const ownProps = Object.getOwnPropertyNames(value);
+  const ownSymbols = Object.getOwnPropertySymbols(value);
+  const propList = [...ownProps, ...ownSymbols.map((sym) => sym.toString())];
+  if (propList.length) {
+    return `${ctor} props: ${propList.slice(0, 6).join(', ')}${propList.length > 6 ? '…' : ''}`;
+  }
+  return `${ctor} (no enumerable keys)`;
+};
+
+export const formatError = (err) => {
+  if (!err) return 'unknown error';
+  if (typeof err === 'string') {
+    const normalized = normalizeEmptyMessage(err);
+    return normalized || 'unhelpful error string';
+  }
+  if (err instanceof Error) {
+    const name = err.name || 'Error';
+    const message = normalizeEmptyMessage(err.message) || '';
+    return message ? `${name}: ${message}` : name;
+  }
+  if (typeof err?.message === 'string') {
+    const normalized = normalizeEmptyMessage(err.message);
+    if (normalized) return normalized;
+  }
+  if (Array.isArray(err?.errors) && err.errors.length) {
+    const inner = err.errors
+      .map((innerErr) => formatError(innerErr))
+      .filter(Boolean)
+      .join(' | ');
+    if (inner) return `AggregateError: ${inner}`;
+  }
+  if (typeof err?.code === 'string') return `Error code: ${err.code}`;
+  try {
+    const json = JSON.stringify(err);
+    const normalized = normalizeEmptyMessage(json);
+    if (normalized && normalized !== '[]') return normalized;
+  } catch {
+    // Fall through to util.inspect.
+  }
+  try {
+    const inspected = util.inspect(err, {
+      depth: 3,
+      breakLength: 120,
+      showHidden: true,
+      getters: true
+    });
+    const normalized = normalizeEmptyMessage(inspected);
+    if (normalized) return normalized;
+    const summary = describeObject(err);
+    if (summary) return `unhelpful error object: ${summary}`;
+  } catch {
+    // ignore
+  }
+  const summary = describeObject(err);
+  return summary || String(err);
+};
diff --git a/src/index/build/file-processor/read.js b/src/index/build/file-processor/read.js
new file mode 100644
index 000000000..b8e0c0cb1
--- /dev/null
+++ b/src/index/build/file-processor/read.js
@@ -0,0 +1,42 @@
+import path from 'node:path';
+import { resolveSpecialCodeExt } from '../../constants.js';
+import { fileExt } from '../../../shared/files.js';
+
+export const pickMinLimit = (...values) => {
+  const candidates = values.filter((value) => Number.isFinite(value) && value > 0);
+  return candidates.length ? Math.min(...candidates) : null;
+};
+
+export const resolveFileCaps = (fileCaps, ext, languageId = null) => {
+  const extKey = typeof ext === 'string' ? ext.toLowerCase() : '';
+  const languageKey = typeof languageId === 'string' ? languageId.toLowerCase() : '';
+  const defaultCaps = fileCaps?.default || {};
+  const extCaps = extKey ? fileCaps?.byExt?.[extKey] : null;
+  const langCaps = languageKey ? fileCaps?.byLanguage?.[languageKey] : null;
+  return {
+    maxBytes: pickMinLimit(defaultCaps.maxBytes, extCaps?.maxBytes, langCaps?.maxBytes),
+    maxLines: pickMinLimit(defaultCaps.maxLines, extCaps?.maxLines, langCaps?.maxLines)
+  };
+};
+
+export const truncateByBytes = (value, maxBytes) => {
+  const text = typeof value === 'string' ? value : '';
+  const limit = Number.isFinite(Number(maxBytes)) ? Number(maxBytes) : 0;
+  if (!limit || Buffer.byteLength(text, 'utf8') <= limit) {
+    return { text, truncated: false, bytes: Buffer.byteLength(text, 'utf8') };
+  }
+  const buffer = Buffer.from(text, 'utf8');
+  const sliced = buffer.toString('utf8', 0, limit);
+  return {
+    text: sliced,
+    truncated: true,
+    bytes: Buffer.byteLength(sliced, 'utf8')
+  };
+};
+
+export const resolveExt = (absPath) => {
+  const baseName = path.basename(absPath);
+  const specialExt = resolveSpecialCodeExt(baseName);
+  if (specialExt) return specialExt;
+  return fileExt(absPath);
+};
diff --git a/src/index/build/file-processor/relations.js b/src/index/build/file-processor/relations.js
new file mode 100644
index 000000000..096d45a7f
--- /dev/null
+++ b/src/index/build/file-processor/relations.js
@@ -0,0 +1,55 @@
+export const buildCallIndex = (relations) => {
+  if (!relations) return null;
+  const callsByCaller = new Map();
+  if (Array.isArray(relations.calls)) {
+    for (const entry of relations.calls) {
+      if (!entry || entry.length < 2) continue;
+      const caller = entry[0];
+      if (!caller) continue;
+      const list = callsByCaller.get(caller) || [];
+      list.push(entry);
+      callsByCaller.set(caller, list);
+    }
+  }
+  const callDetailsByCaller = new Map();
+  if (Array.isArray(relations.callDetails)) {
+    for (const detail of relations.callDetails) {
+      const caller = detail?.caller;
+      if (!caller) continue;
+      const list = callDetailsByCaller.get(caller) || [];
+      list.push(detail);
+      callDetailsByCaller.set(caller, list);
+    }
+  }
+  return { callsByCaller, callDetailsByCaller };
+};
+
+export const buildFileRelations = (relations) => {
+  if (!relations) return null;
+  return {
+    imports: Array.isArray(relations.imports) ? relations.imports : [],
+    exports: Array.isArray(relations.exports) ? relations.exports : [],
+    usages: Array.isArray(relations.usages) ? relations.usages : [],
+    importLinks: Array.isArray(relations.importLinks) ? relations.importLinks : [],
+    functionMeta: relations.functionMeta && typeof relations.functionMeta === 'object'
+      ? relations.functionMeta
+      : {},
+    classMeta: relations.classMeta && typeof relations.classMeta === 'object'
+      ? relations.classMeta
+      : {}
+  };
+};
+
+export const stripFileRelations = (codeRelations) => {
+  if (!codeRelations || typeof codeRelations !== 'object') return codeRelations;
+  const {
+    imports,
+    exports,
+    usages,
+    importLinks,
+    functionMeta,
+    classMeta,
+    ...rest
+  } = codeRelations;
+  return rest;
+};
diff --git a/src/index/build/file-processor/skip.js b/src/index/build/file-processor/skip.js
new file mode 100644
index 000000000..a703944cc
--- /dev/null
+++ b/src/index/build/file-processor/skip.js
@@ -0,0 +1,55 @@
+import path from 'node:path';
+import { resolveFileCaps } from './read.js';
+import { detectBinary, isMinifiedName, readFileSample } from '../file-scan.js';
+
+export async function resolvePreReadSkip({
+  abs,
+  fileEntry,
+  fileStat,
+  ext,
+  fileCaps,
+  fileScanner,
+  runIo
+}) {
+  const capsByExt = resolveFileCaps(fileCaps, ext);
+  if (capsByExt.maxBytes && fileStat.size > capsByExt.maxBytes) {
+    return { reason: 'oversize', bytes: fileStat.size, maxBytes: capsByExt.maxBytes };
+  }
+  const scanState = typeof fileEntry === 'object' ? fileEntry.scan : null;
+  if (scanState?.skip) {
+    const { reason, ...extra } = scanState.skip;
+    return { reason: reason || 'oversize', ...extra };
+  }
+  if (isMinifiedName(path.basename(abs))) {
+    return { reason: 'minified', method: 'name' };
+  }
+  const knownLines = Number(fileEntry?.lines);
+  if (capsByExt.maxLines && Number.isFinite(knownLines) && knownLines > capsByExt.maxLines) {
+    return { reason: 'oversize', lines: knownLines, maxLines: capsByExt.maxLines };
+  }
+  if (!scanState?.checkedBinary || !scanState?.checkedMinified) {
+    const scanResult = await runIo(() => fileScanner.scanFile({
+      absPath: abs,
+      stat: fileStat,
+      ext,
+      readSample: readFileSample
+    }));
+    if (scanResult?.skip) {
+      const { reason, ...extra } = scanResult.skip;
+      return { reason: reason || 'oversize', ...extra };
+    }
+  }
+  return null;
+}
+
+export async function resolveBinarySkip({ abs, fileBuffer, fileScanner }) {
+  if (!fileBuffer || !fileBuffer.length) return null;
+  const binarySkip = await detectBinary({
+    absPath: abs,
+    buffer: fileBuffer,
+    maxNonTextRatio: fileScanner.binary?.maxNonTextRatio ?? 0.3
+  });
+  if (!binarySkip) return null;
+  const { reason, ...extra } = binarySkip;
+  return { reason: reason || 'binary', ...extra };
+}
diff --git a/src/index/build/file-processor/timings.js b/src/index/build/file-processor/timings.js
new file mode 100644
index 000000000..6da2d2281
--- /dev/null
+++ b/src/index/build/file-processor/timings.js
@@ -0,0 +1,204 @@
+export function createFileTimingTracker({ mode, featureMetrics }) {
+  const fileTimings = {
+    parseMs: 0,
+    tokenizeMs: 0,
+    enrichMs: 0,
+    embeddingMs: 0
+  };
+  const settingMetrics = featureMetrics ? new Map() : null;
+  const lineSpansByLanguage = featureMetrics ? new Map() : null;
+  const totals = {
+    git: 0,
+    pythonAst: 0,
+    embedding: 0,
+    lint: 0,
+    complexity: 0
+  };
+  const addSettingMetric = (setting, languageId, lines, durationMs, count = 1) => {
+    if (!settingMetrics) return;
+    const duration = Number(durationMs) || 0;
+    if (duration <= 0) return;
+    const langKey = languageId || 'unknown';
+    const entry = settingMetrics.get(setting) || new Map();
+    const current = entry.get(langKey) || { count: 0, lines: 0, durationMs: 0 };
+    current.count += Number(count) || 0;
+    current.lines += Number(lines) || 0;
+    current.durationMs += duration;
+    entry.set(langKey, current);
+    settingMetrics.set(setting, entry);
+  };
+  const addLineSpan = (languageId, startLine, endLine) => {
+    if (!lineSpansByLanguage) return;
+    const langKey = languageId || 'unknown';
+    const start = Number(startLine) || 1;
+    const end = Number(endLine) || start;
+    const spans = lineSpansByLanguage.get(langKey) || [];
+    spans.push([start, Math.max(start, end)]);
+    lineSpansByLanguage.set(langKey, spans);
+  };
+  const countLinesFromSpans = (spans) => {
+    if (!Array.isArray(spans) || !spans.length) return 0;
+    const sorted = spans.slice().sort((a, b) => (a[0] - b[0]) || (a[1] - b[1]));
+    let total = 0;
+    let currentStart = null;
+    let currentEnd = null;
+    for (const [start, end] of sorted) {
+      if (currentStart === null) {
+        currentStart = start;
+        currentEnd = end;
+        continue;
+      }
+      if (start > currentEnd + 1) {
+        total += Math.max(0, currentEnd - currentStart + 1);
+        currentStart = start;
+        currentEnd = end;
+      } else {
+        currentEnd = Math.max(currentEnd, end);
+      }
+    }
+    if (currentStart !== null) {
+      total += Math.max(0, currentEnd - currentStart + 1);
+    }
+    return total;
+  };
+  const finalizeLanguageLines = ({ fileLineCount, fileLanguageId }) => {
+    if (!featureMetrics) return { languageLines: null, languageSetKey: null };
+    const languageLineMap = new Map();
+    if (lineSpansByLanguage && lineSpansByLanguage.size) {
+      for (const [languageId, spans] of lineSpansByLanguage.entries()) {
+        const lineCount = countLinesFromSpans(spans);
+        if (lineCount) languageLineMap.set(languageId, lineCount);
+      }
+    }
+    if (!languageLineMap.size) {
+      const fallbackLanguage = fileLanguageId || 'unknown';
+      if (fileLineCount) languageLineMap.set(fallbackLanguage, fileLineCount);
+    }
+    const languageKeys = Array.from(languageLineMap.keys()).sort();
+    const languageSetKey = languageKeys.length ? languageKeys.join('+') : 'unknown';
+    return { languageLines: languageLineMap, languageSetKey };
+  };
+  const recordFeatureMetrics = ({
+    gitBlameEnabled,
+    embeddingEnabled,
+    lintEnabled,
+    complexityEnabled,
+    fileLineCount,
+    languageLines,
+    languageSetKey
+  }) => {
+    if (!featureMetrics || !languageLines) return;
+    const distributeSetting = (setting, durationMs) => {
+      const duration = Number(durationMs) || 0;
+      if (!duration || !languageLines) return;
+      const totalLines = Number(fileLineCount) || 0;
+      const fallbackTotal = Array.from(languageLines.values()).reduce(
+        (sum, value) => sum + (Number(value) || 0),
+        0
+      );
+      const lineTotal = totalLines > 0 ? totalLines : fallbackTotal;
+      for (const [languageId, lineCount] of languageLines.entries()) {
+        const lines = Number(lineCount) || 0;
+        if (!lines) continue;
+        const share = lineTotal > 0 ? lines / lineTotal : 0;
+        const shareDuration = share > 0 ? duration * share : 0;
+        addSettingMetric(setting, languageId, lines, shareDuration);
+      }
+    };
+    if (gitBlameEnabled) distributeSetting('gitBlame', totals.git);
+    if (totals.pythonAst) distributeSetting('pythonAst', totals.pythonAst);
+    if (embeddingEnabled) distributeSetting('embeddings', totals.embedding);
+    if (lintEnabled) distributeSetting('lint', totals.lint);
+    if (complexityEnabled) distributeSetting('complexity', totals.complexity);
+    if (settingMetrics) {
+      for (const [setting, languages] of settingMetrics.entries()) {
+        for (const [languageId, entry] of languages.entries()) {
+          featureMetrics.recordSetting({
+            mode,
+            setting,
+            languageId,
+            languageSet: languageSetKey,
+            lines: entry.lines,
+            durationMs: entry.durationMs,
+            count: entry.count
+          });
+        }
+      }
+    }
+  };
+  const recordFileMetrics = ({
+    fileLineCount,
+    fileStat,
+    fileDurationMs,
+    languageLines,
+    languageSetKey
+  }) => {
+    if (!featureMetrics || !languageLines) return;
+    featureMetrics.recordFile({
+      mode,
+      languageSet: languageSetKey,
+      languageLines,
+      lines: fileLineCount,
+      bytes: fileStat.size,
+      durationMs: fileDurationMs
+    });
+  };
+  const buildFileMetrics = ({
+    fileLineCount,
+    fileStat,
+    fileDurationMs,
+    fileLanguageId,
+    cached
+  }) => ({
+    languageId: fileLanguageId || null,
+    bytes: fileStat.size,
+    lines: fileLineCount,
+    durationMs: fileDurationMs,
+    parseMs: fileTimings.parseMs,
+    tokenizeMs: fileTimings.tokenizeMs,
+    enrichMs: fileTimings.enrichMs,
+    embeddingMs: fileTimings.embeddingMs,
+    cached
+  });
+
+  return {
+    fileTimings,
+    metricsCollector: settingMetrics ? { add: addSettingMetric } : null,
+    addSettingMetric,
+    addLineSpan,
+    addParseDuration: (durationMs) => {
+      fileTimings.parseMs += Number(durationMs) || 0;
+    },
+    addTokenizeDuration: (durationMs) => {
+      fileTimings.tokenizeMs += Number(durationMs) || 0;
+    },
+    addEnrichDuration: (durationMs) => {
+      fileTimings.enrichMs += Number(durationMs) || 0;
+    },
+    addEmbeddingDuration: (durationMs) => {
+      const value = Number(durationMs) || 0;
+      fileTimings.embeddingMs += value;
+      totals.embedding += value;
+    },
+    addLintDuration: (durationMs) => {
+      const value = Number(durationMs) || 0;
+      fileTimings.enrichMs += value;
+      totals.lint += value;
+    },
+    addComplexityDuration: (durationMs) => {
+      const value = Number(durationMs) || 0;
+      fileTimings.enrichMs += value;
+      totals.complexity += value;
+    },
+    setGitDuration: (durationMs) => {
+      totals.git = Number(durationMs) || 0;
+    },
+    setPythonAstDuration: (durationMs) => {
+      totals.pythonAst = Number(durationMs) || 0;
+    },
+    finalizeLanguageLines,
+    recordFeatureMetrics,
+    recordFileMetrics,
+    buildFileMetrics
+  };
+}
diff --git a/src/index/build/file-scan.js b/src/index/build/file-scan.js
new file mode 100644
index 000000000..a43690e3d
--- /dev/null
+++ b/src/index/build/file-scan.js
@@ -0,0 +1,198 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { fileTypeFromBuffer } from 'file-type';
+import * as istextorbinary from 'istextorbinary';
+import { CSS_EXTS, HTML_EXTS, JS_EXTS } from '../constants.js';
+
+const MINIFIED_NAME_REGEX = /(?:\.min\.[^/]+$)|(?:-min\.[^/]+$)/i;
+const MINIFIED_SAMPLE_EXTS = new Set([...JS_EXTS, ...CSS_EXTS, ...HTML_EXTS]);
+
+const normalizeLimit = (value, fallback) => {
+  const parsed = Number(value);
+  if (Number.isFinite(parsed) && parsed > 0) return parsed;
+  return fallback;
+};
+
+export const isMinifiedName = (baseName) => {
+  if (!baseName) return false;
+  return MINIFIED_NAME_REGEX.test(baseName.toLowerCase());
+};
+
+export const readFileSample = async (absPath, sampleSizeBytes) => {
+  if (!sampleSizeBytes) return null;
+  const handle = await fs.open(absPath, 'r');
+  try {
+    const buffer = Buffer.alloc(sampleSizeBytes);
+    const { bytesRead } = await handle.read(buffer, 0, sampleSizeBytes, 0);
+    return bytesRead > 0 ? buffer.subarray(0, bytesRead) : null;
+  } finally {
+    await handle.close();
+  }
+};
+
+const isLikelyBinary = (buffer, maxNonTextRatio) => {
+  if (!buffer || !buffer.length) return false;
+  let nonText = 0;
+  for (const byte of buffer) {
+    if (byte === 0) return true;
+    if (byte < 9 || (byte > 13 && byte < 32) || byte === 127) nonText += 1;
+  }
+  return nonText / buffer.length > maxNonTextRatio;
+};
+
+const resolveTextOrBinary = async (absPath, buffer) => {
+  const syncFn = istextorbinary?.isBinarySync;
+  if (typeof syncFn === 'function') {
+    try {
+      return syncFn(absPath, buffer);
+    } catch {
+      return null;
+    }
+  }
+  const asyncFn = istextorbinary?.isBinary;
+  if (typeof asyncFn !== 'function') return null;
+  try {
+    const result = asyncFn(absPath, buffer);
+    if (typeof result === 'boolean') return result;
+    if (result && typeof result.then === 'function') {
+      return await result;
+    }
+  } catch {
+    return null;
+  }
+  return null;
+};
+
+export const detectBinary = async ({ absPath, buffer, maxNonTextRatio }) => {
+  if (!buffer || !buffer.length) return null;
+  try {
+    const type = await fileTypeFromBuffer(buffer);
+    if (type?.mime) {
+      const mime = String(type.mime).toLowerCase();
+      if (!mime.startsWith('text/')) {
+        return { reason: 'binary', method: 'file-type', mime, ext: type.ext || null };
+      }
+    }
+  } catch {}
+  const binaryResult = await resolveTextOrBinary(absPath, buffer);
+  if (binaryResult === true) {
+    return { reason: 'binary', method: 'istextorbinary' };
+  }
+  // Even if istextorbinary says "text", run a lightweight heuristic as a backstop.
+  // This catches obvious binary signals (e.g., NUL bytes) that can slip through.
+  if (isLikelyBinary(buffer, maxNonTextRatio)) {
+    return { reason: 'binary', method: 'heuristic' };
+  }
+  if (binaryResult === false) return null;
+  return null;
+};
+
+const isLikelyMinifiedText = (text, config) => {
+  if (!text || text.length < (config.minChars || 0)) return false;
+  let lines = 1;
+  let whitespace = 0;
+  let maxLine = 0;
+  let currentLine = 0;
+  for (let i = 0; i < text.length; i += 1) {
+    const code = text.charCodeAt(i);
+    if (code === 10) {
+      lines += 1;
+      if (currentLine > maxLine) maxLine = currentLine;
+      currentLine = 0;
+      continue;
+    }
+    currentLine += 1;
+    if (code === 9 || code === 11 || code === 12 || code === 13 || code === 32) {
+      whitespace += 1;
+    }
+  }
+  if (currentLine > maxLine) maxLine = currentLine;
+  const avgLine = text.length / lines;
+  const whitespaceRatio = whitespace / text.length;
+  if (config.singleLineChars && text.length >= config.singleLineChars && lines <= 1) {
+    return true;
+  }
+  if (!config.avgLineThreshold || !config.maxLineThreshold) return false;
+  return avgLine > config.avgLineThreshold
+    && maxLine > config.maxLineThreshold
+    && whitespaceRatio < config.maxWhitespaceRatio;
+};
+
+export function createFileScanner(fileScanConfig = {}) {
+  const config = fileScanConfig && typeof fileScanConfig === 'object' ? fileScanConfig : {};
+  const sampleSizeBytes = normalizeLimit(config.sampleBytes, 8192);
+  const minifiedConfig = config.minified || {};
+  const binaryConfig = config.binary || {};
+  const minified = {
+    sampleMinBytes: normalizeLimit(minifiedConfig.sampleMinBytes, 4096),
+    minChars: normalizeLimit(minifiedConfig.minChars, 1024),
+    singleLineChars: normalizeLimit(minifiedConfig.singleLineChars, 4096),
+    avgLineThreshold: normalizeLimit(minifiedConfig.avgLineThreshold, 300),
+    maxLineThreshold: normalizeLimit(minifiedConfig.maxLineThreshold, 600),
+    maxWhitespaceRatio: Number.isFinite(Number(minifiedConfig.maxWhitespaceRatio))
+      ? Number(minifiedConfig.maxWhitespaceRatio)
+      : 0.2
+  };
+  const binary = {
+    sampleMinBytes: normalizeLimit(binaryConfig.sampleMinBytes, 65536),
+    maxNonTextRatio: Number.isFinite(Number(binaryConfig.maxNonTextRatio))
+      ? Number(binaryConfig.maxNonTextRatio)
+      : 0.3
+  };
+  const fileTypeSampleBytes = sampleSizeBytes
+    ? Math.min(sampleSizeBytes, 4100)
+    : 0;
+  const shouldSampleBinary = (size) => binary.sampleMinBytes && size >= binary.sampleMinBytes;
+  const shouldSampleMinified = (size, ext) => minified.sampleMinBytes
+    && size >= minified.sampleMinBytes
+    && MINIFIED_SAMPLE_EXTS.has(ext);
+  const scanFile = async ({ absPath, stat, ext, readSample }) => {
+    const size = stat?.size || 0;
+    const wantsBinary = shouldSampleBinary(size);
+    const wantsMinified = shouldSampleMinified(size, ext);
+    const result = {
+      checkedBinary: false,
+      checkedMinified: false,
+      skip: null
+    };
+    if (!sampleSizeBytes || (!wantsBinary && !wantsMinified && !fileTypeSampleBytes)) return result;
+    const sampleBytes = Math.max(fileTypeSampleBytes, wantsBinary || wantsMinified ? sampleSizeBytes : 0);
+    let sampleBuffer = null;
+    try {
+      sampleBuffer = await readSample(absPath, sampleBytes);
+    } catch {
+      sampleBuffer = null;
+    }
+    if (!sampleBuffer) return result;
+    const binarySkip = await detectBinary({
+      absPath,
+      buffer: sampleBuffer,
+      maxNonTextRatio: binary.maxNonTextRatio
+    });
+    if (binarySkip) {
+      result.checkedBinary = true;
+      result.skip = { ...binarySkip, bytes: size };
+      return result;
+    }
+    if (wantsBinary) result.checkedBinary = true;
+    if (wantsMinified) {
+      result.checkedMinified = true;
+      const sampleText = sampleBuffer.toString('utf8');
+      if (isLikelyMinifiedText(sampleText, minified)) {
+        result.skip = { reason: 'minified', method: 'content' };
+        return result;
+      }
+    }
+    return result;
+  };
+  const normalizeBaseName = (absPath) => path.basename(absPath);
+  return {
+    scanFile,
+    sampleSizeBytes,
+    minified,
+    binary,
+    normalizeBaseName,
+    shouldSampleBinary,
+    shouldSampleMinified
+  };
+}
diff --git a/src/index/build/graphs.js b/src/index/build/graphs.js
new file mode 100644
index 000000000..4df7fbd34
--- /dev/null
+++ b/src/index/build/graphs.js
@@ -0,0 +1,113 @@
+import Graph from 'graphology';
+
+const buildChunkKey = (chunk) => `${chunk.file}::${chunk.name}`;
+
+const mergeNode = (graph, id, attrs) => {
+  if (!id) return;
+  if (graph.hasNode(id)) {
+    graph.mergeNodeAttributes(id, attrs);
+  } else {
+    graph.addNode(id, attrs);
+  }
+};
+
+const addDirectedEdge = (graph, source, target) => {
+  if (!source || !target) return;
+  mergeNode(graph, source, {});
+  mergeNode(graph, target, {});
+  graph.mergeEdge(source, target);
+};
+
+const serializeGraph = (graph) => {
+  const nodes = [];
+  graph.forEachNode((id, attrs) => {
+    const out = graph.outNeighbors(id).slice().sort();
+    const incoming = graph.inNeighbors(id).slice().sort();
+    nodes.push({
+      id,
+      ...attrs,
+      out,
+      in: incoming
+    });
+  });
+  nodes.sort((a, b) => (a.id < b.id ? -1 : a.id > b.id ? 1 : 0));
+  return {
+    nodeCount: graph.order,
+    edgeCount: graph.size,
+    nodes
+  };
+};
+
+export function buildRelationGraphs({ chunks = [], fileRelations = null } = {}) {
+  const callGraph = new Graph({ type: 'directed' });
+  const usageGraph = new Graph({ type: 'directed' });
+  const importGraph = new Graph({ type: 'directed' });
+
+  for (const chunk of chunks) {
+    if (!chunk?.file || !chunk?.name) continue;
+    const key = buildChunkKey(chunk);
+    const attrs = {
+      file: chunk.file,
+      name: chunk.name,
+      kind: chunk.kind || null,
+      chunkId: chunk.metaV2?.chunkId || null
+    };
+    mergeNode(callGraph, key, attrs);
+    mergeNode(usageGraph, key, attrs);
+  }
+
+  for (const chunk of chunks) {
+    if (!chunk?.file || !chunk?.name) continue;
+    const sourceKey = buildChunkKey(chunk);
+    const relations = chunk.codeRelations || {};
+    if (Array.isArray(relations.callLinks)) {
+      for (const link of relations.callLinks) {
+        const targetKey = link?.file && link?.target
+          ? `${link.file}::${link.target}`
+          : null;
+        if (!targetKey) continue;
+        mergeNode(callGraph, targetKey, {
+          file: link.file || null,
+          name: link.target || null,
+          kind: link.kind || null
+        });
+        addDirectedEdge(callGraph, sourceKey, targetKey);
+      }
+    }
+    if (Array.isArray(relations.usageLinks)) {
+      for (const link of relations.usageLinks) {
+        const targetKey = link?.file && link?.target
+          ? `${link.file}::${link.target}`
+          : null;
+        if (!targetKey) continue;
+        mergeNode(usageGraph, targetKey, {
+          file: link.file || null,
+          name: link.target || null,
+          kind: link.kind || null
+        });
+        addDirectedEdge(usageGraph, sourceKey, targetKey);
+      }
+    }
+  }
+
+  if (fileRelations && typeof fileRelations.entries === 'function') {
+    for (const [file, relations] of fileRelations.entries()) {
+      if (!file) continue;
+      mergeNode(importGraph, file, { file });
+      const imports = Array.isArray(relations?.importLinks) ? relations.importLinks : [];
+      for (const target of imports) {
+        if (!target) continue;
+        mergeNode(importGraph, target, { file: target });
+        addDirectedEdge(importGraph, file, target);
+      }
+    }
+  }
+
+  return {
+    version: 1,
+    generatedAt: new Date().toISOString(),
+    callGraph: serializeGraph(callGraph),
+    usageGraph: serializeGraph(usageGraph),
+    importGraph: serializeGraph(importGraph)
+  };
+}
diff --git a/src/indexer/build/ignore.js b/src/index/build/ignore.js
similarity index 50%
rename from src/indexer/build/ignore.js
rename to src/index/build/ignore.js
index b7e23a405..13774e83f 100644
--- a/src/indexer/build/ignore.js
+++ b/src/index/build/ignore.js
@@ -1,7 +1,7 @@
 import fs from 'node:fs/promises';
 import path from 'node:path';
 import ignore from 'ignore';
-import { SKIP_DIRS, SKIP_FILES } from '../constants.js';
+import { SKIP_DIRS, SKIP_FILES, SKIP_GLOBS } from '../constants.js';
 
 /**
  * Build ignore matcher for indexing.
@@ -21,7 +21,8 @@ export async function buildIgnoreMatcher({ root, userConfig }) {
   if (config.useDefaultSkips) {
     const defaultIgnorePatterns = [
       ...Array.from(SKIP_DIRS, (dir) => `${dir}/`),
-      ...Array.from(SKIP_FILES)
+      ...Array.from(SKIP_FILES),
+      ...Array.from(SKIP_GLOBS)
     ];
     ignoreMatcher.add(defaultIgnorePatterns);
   }
@@ -38,8 +39,45 @@ export async function buildIgnoreMatcher({ root, userConfig }) {
       ignoreMatcher.add(contents);
     } catch {}
   }
+  const expandExtraIgnore = (patterns) => {
+    const expanded = [];
+    const seen = new Set();
+    const reignored = new Set();
+    const hasGlob = (value) => /[*?\[\]{}]/.test(value);
+    const addPattern = (value) => {
+      if (!value || seen.has(value)) return;
+      seen.add(value);
+      expanded.push(value);
+    };
+    for (const raw of patterns) {
+      const pattern = typeof raw === 'string' ? raw.trim() : '';
+      if (!pattern) continue;
+      if (!pattern.startsWith('!')) {
+        addPattern(pattern);
+        continue;
+      }
+      const negated = pattern.slice(1).trim();
+      const rel = negated.startsWith('/') ? negated.slice(1) : negated;
+      const parts = rel.split('/').filter(Boolean);
+      if (parts.length > 1) {
+        let current = '';
+        for (let i = 0; i < parts.length - 1; i += 1) {
+          const part = parts[i];
+          if (!part || hasGlob(part)) break;
+          current = current ? `${current}/${part}` : part;
+          addPattern(`!${current}/`);
+          if (!reignored.has(current) && ignoreMatcher.ignores(`${current}/`)) {
+            addPattern(`${current}/**`);
+            reignored.add(current);
+          }
+        }
+      }
+      addPattern(pattern);
+    }
+    return expanded;
+  };
   if (config.extraIgnore.length) {
-    ignoreMatcher.add(config.extraIgnore);
+    ignoreMatcher.add(expandExtraIgnore(config.extraIgnore));
   }
 
   return { ignoreMatcher, config, ignoreFiles };
diff --git a/src/index/build/imports.js b/src/index/build/imports.js
new file mode 100644
index 000000000..e511c6c13
--- /dev/null
+++ b/src/index/build/imports.js
@@ -0,0 +1,256 @@
+import path from 'node:path';
+import { init as initEsModuleLexer, parse as parseEsModuleLexer } from 'es-module-lexer';
+import { init as initCjsLexer, parse as parseCjsLexer } from 'cjs-module-lexer';
+import { collectLanguageImports } from '../language-registry.js';
+import { isJsLike, isTypeScript } from '../constants.js';
+import { runWithConcurrency, runWithQueue } from '../../shared/concurrency.js';
+import { readTextFile } from '../../shared/encoding.js';
+import { fileExt, toPosix } from '../../shared/files.js';
+import { showProgress } from '../../shared/progress.js';
+import { readCachedImports } from './incremental.js';
+
+let esModuleInitPromise = null;
+let cjsInitPromise = null;
+
+const ensureEsModuleLexer = async () => {
+  if (!esModuleInitPromise) esModuleInitPromise = initEsModuleLexer;
+  await esModuleInitPromise;
+};
+
+const ensureCjsLexer = async () => {
+  if (!cjsInitPromise) cjsInitPromise = initCjsLexer();
+  await cjsInitPromise;
+};
+
+const collectModuleImportsFast = async ({ text, ext }) => {
+  if (!isJsLike(ext) && !isTypeScript(ext)) return null;
+  const imports = new Set();
+  let success = false;
+  try {
+    await ensureEsModuleLexer();
+    const [entries] = parseEsModuleLexer(text);
+    if (Array.isArray(entries)) {
+      success = true;
+      for (const entry of entries) {
+        const spec = entry?.n;
+        if (typeof spec === 'string' && spec) imports.add(spec);
+      }
+    }
+  } catch {}
+  try {
+    await ensureCjsLexer();
+    const result = parseCjsLexer(text);
+    if (result) {
+      success = true;
+      if (Array.isArray(result.reexports)) {
+        result.reexports.forEach((imp) => {
+          if (imp) imports.add(imp);
+        });
+      }
+    }
+  } catch {}
+  if (success) {
+    const requireRegex = /(?:^|[^.\w$])require\s*\(\s*['"]([^'"\n]+)['"]\s*\)/g;
+    for (const match of text.matchAll(requireRegex)) {
+      if (match[1]) imports.add(match[1]);
+    }
+  }
+  return success ? Array.from(imports) : null;
+};
+
+export function sortImportScanItems(items, cachedImportCounts) {
+  const haveCounts = cachedImportCounts instanceof Map && cachedImportCounts.size > 0;
+  items.sort((a, b) => {
+    if (haveCounts) {
+      const aCount = cachedImportCounts.get(a.relKey) || 0;
+      const bCount = cachedImportCounts.get(b.relKey) || 0;
+      if (bCount !== aCount) return bCount - aCount;
+    }
+    const aSize = a.stat?.size || 0;
+    const bSize = b.stat?.size || 0;
+    if (bSize !== aSize) return bSize - aSize;
+    return a.index - b.index;
+  });
+}
+
+/**
+ * Scan files for imports to build cross-link map.
+ * @param {{files:Array<string|{abs:string,rel?:string,stat?:import('node:fs').Stats}>,root:string,mode:'code'|'prose',languageOptions:object,importConcurrency:number,queue?:object,incrementalState?:object}} input
+ * @returns {Promise<{allImports:Record<string,string[]>,durationMs:number,stats:{modules:number,edges:number,files:number,scanned:number}}>}
+ */
+export async function scanImports({ files, root, mode, languageOptions, importConcurrency, queue = null, incrementalState = null }) {
+  const allImports = new Map();
+  const start = Date.now();
+  let processed = 0;
+  let filesWithImports = 0;
+  const items = files.map((entry, index) => {
+    const absPath = typeof entry === 'string' ? entry : entry.abs;
+    const rel = typeof entry === 'object' && entry.rel ? entry.rel : path.relative(root, absPath);
+    return {
+      entry,
+      absPath,
+      relKey: toPosix(rel),
+      stat: typeof entry === 'object' ? entry.stat : null,
+      index
+    };
+  });
+  const runner = queue
+    ? (items, worker, options) => runWithQueue(queue, items, worker, options)
+    : (items, worker, options) => runWithConcurrency(items, importConcurrency, worker, options);
+
+  const cachedImportsByFile = new Map();
+  const cachedImportCounts = new Map();
+  if (incrementalState?.enabled) {
+    await runner(
+      items,
+      async (item) => {
+        if (!item.stat) return;
+        const cachedImports = await readCachedImports({
+          enabled: true,
+          absPath: item.absPath,
+          relKey: item.relKey,
+          fileStat: item.stat,
+          manifest: incrementalState.manifest,
+          bundleDir: incrementalState.bundleDir,
+          bundleFormat: incrementalState.bundleFormat
+        });
+        if (Array.isArray(cachedImports)) {
+          if (cachedImports.length > 0) {
+            cachedImportCounts.set(item.relKey, cachedImports.length);
+          }
+          cachedImportsByFile.set(item.relKey, cachedImports);
+        }
+      },
+      { collectResults: false }
+    );
+    sortImportScanItems(items, cachedImportCounts);
+  }
+
+  await runner(
+    items,
+    async (item) => {
+      const relKey = item.relKey;
+      const ext = fileExt(relKey);
+      const cachedImports = cachedImportsByFile.get(relKey);
+      if (Array.isArray(cachedImports)) {
+        cachedImportsByFile.delete(relKey);
+        for (const mod of cachedImports) {
+          if (!allImports.has(mod)) allImports.set(mod, new Set());
+          allImports.get(mod).add(relKey);
+        }
+        if (cachedImports.length > 0) filesWithImports += 1;
+        processed += 1;
+        showProgress('Imports', processed, items.length);
+        return;
+      }
+      if (incrementalState?.enabled && item.stat) {
+        const cachedImportsFallback = await readCachedImports({
+          enabled: true,
+          absPath: item.absPath,
+          relKey,
+          fileStat: item.stat,
+          manifest: incrementalState.manifest,
+          bundleDir: incrementalState.bundleDir,
+          bundleFormat: incrementalState.bundleFormat
+        });
+        if (Array.isArray(cachedImportsFallback)) {
+          for (const mod of cachedImportsFallback) {
+            if (!allImports.has(mod)) allImports.set(mod, new Set());
+            allImports.get(mod).add(relKey);
+          }
+          if (cachedImportsFallback.length > 0) filesWithImports += 1;
+          processed += 1;
+          showProgress('Imports', processed, items.length);
+          return;
+        }
+      }
+      let text;
+      try {
+        ({ text } = await readTextFile(item.absPath));
+      } catch {
+        processed += 1;
+        showProgress('Imports', processed, items.length);
+        return;
+      }
+      const fastImports = await collectModuleImportsFast({ text, ext });
+      const imports = Array.isArray(fastImports)
+        ? fastImports
+        : collectLanguageImports({
+          ext,
+          relPath: relKey,
+          text,
+          mode,
+          options: languageOptions
+        });
+      if (imports.length > 0) filesWithImports += 1;
+      for (const mod of imports) {
+        if (!allImports.has(mod)) allImports.set(mod, new Set());
+        allImports.get(mod).add(relKey);
+      }
+      processed += 1;
+      showProgress('Imports', processed, items.length);
+    },
+    { collectResults: false }
+  );
+
+  showProgress('Imports', items.length, items.length);
+  const dedupedImports = {};
+  for (const [mod, entries] of allImports.entries()) {
+    dedupedImports[mod] = Array.from(entries);
+  }
+  let edgeCount = 0;
+  for (const entries of allImports.values()) {
+    edgeCount += entries.size;
+  }
+  return {
+    allImports: dedupedImports,
+    durationMs: Date.now() - start,
+    stats: {
+      modules: allImports.size,
+      edges: edgeCount,
+      files: filesWithImports,
+      scanned: processed
+    }
+  };
+}
+
+export function buildImportLinksFromRelations(fileRelations) {
+  if (!fileRelations || typeof fileRelations.entries !== 'function') {
+    return { allImports: {}, stats: { modules: 0, edges: 0, files: 0, scanned: 0 } };
+  }
+  const moduleMap = new Map();
+  let filesWithImports = 0;
+  let scanned = 0;
+  for (const [file, relations] of fileRelations.entries()) {
+    scanned += 1;
+    const imports = Array.isArray(relations?.imports) ? relations.imports : [];
+    if (imports.length) filesWithImports += 1;
+    for (const mod of imports) {
+      if (!moduleMap.has(mod)) moduleMap.set(mod, new Set());
+      moduleMap.get(mod).add(file);
+    }
+  }
+  const dedupedImports = {};
+  let edgeCount = 0;
+  for (const [mod, files] of moduleMap.entries()) {
+    dedupedImports[mod] = Array.from(files);
+    edgeCount += files.size;
+  }
+  for (const [file, relations] of fileRelations.entries()) {
+    const imports = Array.isArray(relations?.imports) ? relations.imports : [];
+    const importLinks = imports
+      .map((imp) => dedupedImports[imp])
+      .filter(Boolean)
+      .flat();
+    fileRelations.set(file, { ...relations, importLinks });
+  }
+  return {
+    allImports: dedupedImports,
+    stats: {
+      modules: moduleMap.size,
+      edges: edgeCount,
+      files: filesWithImports,
+      scanned
+    }
+  };
+}
diff --git a/src/index/build/incremental.js b/src/index/build/incremental.js
new file mode 100644
index 000000000..d0566ff59
--- /dev/null
+++ b/src/index/build/incremental.js
@@ -0,0 +1,380 @@
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { sha1 } from '../../shared/hash.js';
+import {
+  normalizeBundleFormat,
+  readBundleFile,
+  resolveBundleFilename,
+  resolveBundleFormatFromName,
+  writeBundleFile
+} from '../../shared/bundle-io.js';
+
+/**
+ * Initialize incremental cache state for a mode.
+ * @param {{repoCacheRoot:string,mode:'code'|'prose',enabled:boolean,tokenizationKey?:string,log?:(msg:string)=>void}} input
+ * @returns {Promise<{enabled:boolean,incrementalDir:string,bundleDir:string,manifestPath:string,manifest:object}>}
+ */
+export async function loadIncrementalState({
+  repoCacheRoot,
+  mode,
+  enabled,
+  tokenizationKey = null,
+  cacheSignature = null,
+  bundleFormat = null,
+  log = null
+}) {
+  const incrementalDir = path.join(repoCacheRoot, 'incremental', mode);
+  const bundleDir = path.join(incrementalDir, 'files');
+  const manifestPath = path.join(incrementalDir, 'manifest.json');
+  const requestedBundleFormat = typeof bundleFormat === 'string'
+    ? normalizeBundleFormat(bundleFormat)
+    : null;
+  const defaultBundleFormat = requestedBundleFormat || 'json';
+  let manifest = {
+    version: 5,
+    mode,
+    tokenizationKey: tokenizationKey || null,
+    cacheSignature: cacheSignature || null,
+    bundleFormat: defaultBundleFormat,
+    files: {},
+    shards: null
+  };
+  if (enabled && fsSync.existsSync(manifestPath)) {
+    try {
+      const loaded = JSON.parse(await fs.readFile(manifestPath, 'utf8'));
+      if (loaded && typeof loaded === 'object') {
+        const loadedKey = typeof loaded.tokenizationKey === 'string'
+          ? loaded.tokenizationKey
+          : null;
+        const loadedSignature = typeof loaded.cacheSignature === 'string'
+          ? loaded.cacheSignature
+          : null;
+        const loadedBundleFormat = normalizeBundleFormat(loaded.bundleFormat);
+        const effectiveBundleFormat = requestedBundleFormat || loadedBundleFormat || defaultBundleFormat;
+        const signatureMismatch = cacheSignature
+          ? cacheSignature !== loadedSignature
+          : false;
+        if (signatureMismatch || (tokenizationKey && loadedKey !== tokenizationKey)) {
+          if (typeof log === 'function') {
+            const reason = signatureMismatch ? 'signature changed' : 'tokenization config changed';
+            log(`[incremental] ${mode} cache reset: ${reason}.`);
+          }
+        } else {
+          manifest = {
+            version: loaded.version || 1,
+            mode,
+            tokenizationKey: loadedKey || tokenizationKey || null,
+            cacheSignature: loadedSignature || cacheSignature || null,
+            bundleFormat: effectiveBundleFormat,
+            files: loaded.files || {},
+            shards: loaded.shards || null
+          };
+          if (requestedBundleFormat && loadedBundleFormat !== requestedBundleFormat && typeof log === 'function') {
+            log(`[incremental] ${mode} bundle format updated: ${loadedBundleFormat} -> ${requestedBundleFormat}.`);
+          }
+        }
+      }
+    } catch {}
+  }
+  if (enabled) {
+    await fs.mkdir(bundleDir, { recursive: true });
+  }
+  return { enabled, incrementalDir, bundleDir, manifestPath, manifest, bundleFormat: manifest.bundleFormat };
+}
+
+const STAGE_ORDER = {
+  stage1: 1,
+  stage2: 2,
+  stage3: 3,
+  stage4: 4
+};
+
+const stageSatisfied = (requested, existing) => {
+  if (!requested) return true;
+  const target = STAGE_ORDER[requested] || 0;
+  const current = STAGE_ORDER[existing] || 0;
+  return current >= target;
+};
+
+export async function shouldReuseIncrementalIndex({
+  outDir,
+  entries,
+  manifest,
+  stage
+}) {
+  if (!outDir || !manifest || !Array.isArray(entries) || entries.length === 0) {
+    return false;
+  }
+  const manifestFiles = manifest.files || {};
+  const indexStatePath = path.join(outDir, 'index_state.json');
+  const piecesPath = path.join(outDir, 'pieces', 'manifest.json');
+  if (!fsSync.existsSync(indexStatePath) || !fsSync.existsSync(piecesPath)) {
+    return false;
+  }
+  let indexState = null;
+  let pieceManifest = null;
+  try {
+    indexState = JSON.parse(await fs.readFile(indexStatePath, 'utf8'));
+    pieceManifest = JSON.parse(await fs.readFile(piecesPath, 'utf8'));
+  } catch {
+    return false;
+  }
+  if (!stageSatisfied(stage, indexState?.stage || null)) return false;
+  if (!Array.isArray(pieceManifest?.pieces) || pieceManifest.pieces.length === 0) {
+    return false;
+  }
+  const entryKeys = new Set();
+  for (const entry of entries) {
+    if (entry?.rel) entryKeys.add(entry.rel);
+  }
+  for (const relKey of Object.keys(manifestFiles)) {
+    if (!entryKeys.has(relKey)) {
+      return false;
+    }
+  }
+  for (const entry of entries) {
+    const relKey = entry?.rel;
+    if (!relKey) return false;
+    const cached = manifestFiles[relKey];
+    if (!cached || !entry.stat) return false;
+    if (cached.size !== entry.stat.size || cached.mtimeMs !== entry.stat.mtimeMs) {
+      return false;
+    }
+  }
+  return true;
+}
+
+/**
+ * Attempt to load a cached bundle for a file.
+ * @param {{enabled:boolean,absPath:string,relKey:string,fileStat:import('node:fs').Stats,manifest:object,bundleDir:string}} input
+ * @returns {Promise<{cachedBundle:object|null,fileHash:string|null,buffer:Buffer|null}>}
+ */
+export async function readCachedBundle({
+  enabled,
+  absPath,
+  relKey,
+  fileStat,
+  manifest,
+  bundleDir,
+  bundleFormat = null
+}) {
+  let cachedBundle = null;
+  let fileHash = null;
+  let buffer = null;
+  if (!enabled) return { cachedBundle, fileHash, buffer };
+
+  const resolvedBundleFormat = normalizeBundleFormat(bundleFormat || manifest?.bundleFormat);
+  const cachedEntry = manifest.files[relKey];
+  const bundleName = cachedEntry?.bundle || resolveBundleFilename(relKey, resolvedBundleFormat);
+  const bundlePath = path.join(bundleDir, bundleName);
+  if (cachedEntry && cachedEntry.size === fileStat.size && cachedEntry.mtimeMs === fileStat.mtimeMs && fsSync.existsSync(bundlePath)) {
+    try {
+      const result = await readBundleFile(bundlePath, {
+        format: resolveBundleFormatFromName(bundleName, resolvedBundleFormat)
+      });
+      cachedBundle = result.ok ? result.bundle : null;
+    } catch {
+      cachedBundle = null;
+    }
+  } else if (cachedEntry && cachedEntry.hash && fsSync.existsSync(bundlePath)) {
+    try {
+      buffer = await fs.readFile(absPath);
+      fileHash = sha1(buffer);
+      if (fileHash === cachedEntry.hash) {
+        const result = await readBundleFile(bundlePath, {
+          format: resolveBundleFormatFromName(bundleName, resolvedBundleFormat)
+        });
+        cachedBundle = result.ok ? result.bundle : null;
+      }
+    } catch {
+      cachedBundle = null;
+    }
+  }
+
+  return { cachedBundle, fileHash, buffer };
+}
+
+/**
+ * Attempt to load cached imports for a file when size/mtime match.
+ * @param {{enabled:boolean,absPath:string,relKey:string,fileStat:import('node:fs').Stats,manifest:object,bundleDir:string}} input
+ * @returns {Promise<string[]|null>}
+ */
+export async function readCachedImports({
+  enabled,
+  absPath,
+  relKey,
+  fileStat,
+  manifest,
+  bundleDir,
+  bundleFormat = null
+}) {
+  if (!enabled) return null;
+  const resolvedBundleFormat = normalizeBundleFormat(bundleFormat || manifest?.bundleFormat);
+  const cachedEntry = manifest.files?.[relKey];
+  if (!cachedEntry || cachedEntry.size !== fileStat.size || cachedEntry.mtimeMs !== fileStat.mtimeMs) {
+    if (!cachedEntry || !cachedEntry.hash) return null;
+    const bundleName = cachedEntry.bundle || resolveBundleFilename(relKey, resolvedBundleFormat);
+    const bundlePath = path.join(bundleDir, bundleName);
+    if (!fsSync.existsSync(bundlePath)) return null;
+    try {
+      const buffer = await fs.readFile(absPath);
+      const fileHash = sha1(buffer);
+      if (fileHash !== cachedEntry.hash) return null;
+      const result = await readBundleFile(bundlePath, {
+        format: resolveBundleFormatFromName(bundleName, resolvedBundleFormat)
+      });
+      if (!result.ok) return null;
+      const bundle = result.bundle;
+      const imports = bundle?.fileRelations?.imports;
+      return Array.isArray(imports) ? imports : null;
+    } catch {
+      return null;
+    }
+  }
+  const bundleName = cachedEntry.bundle || resolveBundleFilename(relKey, resolvedBundleFormat);
+  const bundlePath = path.join(bundleDir, bundleName);
+  if (!fsSync.existsSync(bundlePath)) return null;
+  try {
+    const result = await readBundleFile(bundlePath, {
+      format: resolveBundleFormatFromName(bundleName, resolvedBundleFormat)
+    });
+    if (!result.ok) return null;
+    const bundle = result.bundle;
+    const imports = bundle?.fileRelations?.imports;
+    return Array.isArray(imports) ? imports : null;
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Write bundle and return manifest entry.
+ * @param {{enabled:boolean,bundleDir:string,relKey:string,fileStat:import('node:fs').Stats,fileHash:string,fileChunks:object[],fileRelations:object|null}} input
+ * @returns {Promise<object|null>}
+ */
+export async function writeIncrementalBundle({
+  enabled,
+  bundleDir,
+  relKey,
+  fileStat,
+  fileHash,
+  fileChunks,
+  fileRelations,
+  bundleFormat = null
+}) {
+  if (!enabled) return null;
+  const resolvedBundleFormat = normalizeBundleFormat(bundleFormat);
+  const bundleName = resolveBundleFilename(relKey, resolvedBundleFormat);
+  const bundlePath = path.join(bundleDir, bundleName);
+  const bundle = {
+    file: relKey,
+    hash: fileHash,
+    mtimeMs: fileStat.mtimeMs,
+    size: fileStat.size,
+    chunks: fileChunks,
+    fileRelations
+  };
+  try {
+    const writeResult = await writeBundleFile({
+      bundlePath,
+      bundle,
+      format: resolvedBundleFormat
+    });
+    const checksum = writeResult.checksum;
+    const checksumAlgo = writeResult.checksumAlgo;
+    const bundleChecksum = checksum && checksumAlgo
+      ? `${checksumAlgo}:${checksum}`
+      : (checksum || null);
+    return {
+      hash: fileHash,
+      mtimeMs: fileStat.mtimeMs,
+      size: fileStat.size,
+      bundle: path.basename(bundlePath),
+      bundleFormat: resolvedBundleFormat,
+      bundleChecksum
+    };
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Remove incremental entries for deleted files and persist manifest.
+ * @param {{enabled:boolean,manifest:object,manifestPath:string,bundleDir:string,seenFiles:Set<string>}} input
+ */
+export async function pruneIncrementalManifest({ enabled, manifest, manifestPath, bundleDir, seenFiles }) {
+  if (!enabled) return;
+  for (const relKey of Object.keys(manifest.files)) {
+    if (seenFiles.has(relKey)) continue;
+    const entry = manifest.files[relKey];
+    if (entry?.bundle) {
+      const bundlePath = path.join(bundleDir, entry.bundle);
+      if (fsSync.existsSync(bundlePath)) {
+        try {
+          await fs.rm(bundlePath);
+        } catch {}
+      }
+    }
+    delete manifest.files[relKey];
+  }
+  try {
+    await fs.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
+  } catch {}
+}
+
+/**
+ * Update incremental bundles after cross-file inference.
+ * @param {{enabled:boolean,manifest:object,bundleDir:string,chunks:object[],fileRelations:Map<string,object>|object|null,log:(msg:string)=>void}} input
+ */
+export async function updateBundlesWithChunks({
+  enabled,
+  manifest,
+  bundleDir,
+  chunks,
+  fileRelations,
+  bundleFormat = null,
+  log
+}) {
+  if (!enabled) return;
+  const chunkMap = new Map();
+  for (const chunk of chunks) {
+    if (!chunk?.file) continue;
+    const list = chunkMap.get(chunk.file) || [];
+    list.push(chunk);
+    chunkMap.set(chunk.file, list);
+  }
+  let bundleUpdates = 0;
+  const resolvedBundleFormat = normalizeBundleFormat(bundleFormat || manifest?.bundleFormat);
+  for (const [file, entry] of Object.entries(manifest.files || {})) {
+    const bundleName = entry?.bundle || resolveBundleFilename(file, resolvedBundleFormat);
+    const fileChunks = chunkMap.get(file);
+    if (!bundleName || !fileChunks) continue;
+    let relations = null;
+    if (fileRelations) {
+      relations = typeof fileRelations.get === 'function'
+        ? (fileRelations.get(file) || null)
+        : (fileRelations[file] || null);
+    }
+    const bundlePath = path.join(bundleDir, bundleName);
+    const bundle = {
+      file,
+      hash: entry.hash,
+      mtimeMs: entry.mtimeMs,
+      size: entry.size,
+      chunks: fileChunks,
+      fileRelations: relations
+    };
+    try {
+      await writeBundleFile({
+        bundlePath,
+        bundle,
+        format: resolveBundleFormatFromName(bundleName, resolvedBundleFormat)
+      });
+      bundleUpdates += 1;
+    } catch {}
+  }
+  if (bundleUpdates) {
+    log(`Cross-file inference updated ${bundleUpdates} incremental bundle(s).`);
+  }
+}
diff --git a/src/index/build/indexer.js b/src/index/build/indexer.js
new file mode 100644
index 000000000..fa90505bb
--- /dev/null
+++ b/src/index/build/indexer.js
@@ -0,0 +1 @@
+export { buildIndexForMode } from './indexer/pipeline.js';
diff --git a/src/index/build/indexer/embedding-queue.js b/src/index/build/indexer/embedding-queue.js
new file mode 100644
index 000000000..1a99d3563
--- /dev/null
+++ b/src/index/build/indexer/embedding-queue.js
@@ -0,0 +1,34 @@
+import path from 'node:path';
+import { getCacheRoot } from '../../../../tools/dict-utils.js';
+import { log } from '../../../shared/progress.js';
+import { ensureQueueDir, enqueueJob } from '../../../../tools/service/queue.js';
+
+export const enqueueEmbeddingJob = async ({ runtime, mode }) => {
+  if (!runtime.embeddingService) return null;
+  const queueDir = runtime.embeddingQueue?.dir
+    ? path.resolve(runtime.embeddingQueue.dir)
+    : path.join(getCacheRoot(), 'service', 'queue');
+  const maxQueued = Number.isFinite(runtime.embeddingQueue?.maxQueued)
+    ? runtime.embeddingQueue.maxQueued
+    : null;
+  const jobId = `${Date.now()}-${Math.random().toString(16).slice(2, 10)}`;
+  await ensureQueueDir(queueDir);
+  const result = await enqueueJob(
+    queueDir,
+    {
+      id: jobId,
+      createdAt: new Date().toISOString(),
+      repo: runtime.root,
+      mode,
+      reason: 'embeddings'
+    },
+    maxQueued,
+    'embeddings'
+  );
+  if (!result.ok) {
+    log('[embeddings] Queue full or unavailable; skipped enqueue.');
+    return null;
+  }
+  log(`[embeddings] Queued embedding job ${jobId} (${mode}).`);
+  return result.job || null;
+};
diff --git a/src/index/build/indexer/pipeline.js b/src/index/build/indexer/pipeline.js
new file mode 100644
index 000000000..7bc3c30fa
--- /dev/null
+++ b/src/index/build/indexer/pipeline.js
@@ -0,0 +1,203 @@
+import fs from 'node:fs/promises';
+import { applyAdaptiveDictConfig, getIndexDir, getMetricsDir } from '../../../../tools/dict-utils.js';
+import { buildRecordsIndexForRepo } from '../../../integrations/triage/index-records.js';
+import { createCacheReporter } from '../../../shared/cache.js';
+import { getEnvConfig } from '../../../shared/env.js';
+import { log } from '../../../shared/progress.js';
+import { createCrashLogger } from '../crash-log.js';
+import { estimateContextWindow } from '../context-window.js';
+import { createPerfProfile, loadPerfProfile } from '../perf-profile.js';
+import { createIndexState } from '../state.js';
+import { enqueueEmbeddingJob } from './embedding-queue.js';
+import { buildIncrementalSignature, buildTokenizationKey } from './signatures.js';
+import { runDiscovery } from './steps/discover.js';
+import { loadIncrementalPlan, pruneIncrementalState, updateIncrementalBundles } from './steps/incremental.js';
+import { buildIndexPostings } from './steps/postings.js';
+import { processFiles } from './steps/process-files.js';
+import { postScanImports, preScanImports, runCrossFileInference } from './steps/relations.js';
+import { writeIndexArtifactsForMode } from './steps/write.js';
+
+const buildFeatureSettings = (runtime, mode) => ({
+  tokenize: true,
+  embeddings: runtime.embeddingEnabled || runtime.embeddingService,
+  gitBlame: runtime.gitBlameEnabled,
+  pythonAst: runtime.languageOptions?.pythonAst?.enabled !== false && mode === 'code',
+  treeSitter: runtime.languageOptions?.treeSitter?.enabled !== false,
+  typeInference: runtime.typeInferenceEnabled && mode === 'code',
+  riskAnalysis: runtime.riskAnalysisEnabled && mode === 'code',
+  lint: runtime.lintEnabled && mode === 'code',
+  complexity: runtime.complexityEnabled && mode === 'code',
+  astDataflow: runtime.astDataflowEnabled && mode === 'code',
+  controlFlow: runtime.controlFlowEnabled && mode === 'code',
+  typeInferenceCrossFile: runtime.typeInferenceCrossFileEnabled && mode === 'code',
+  riskAnalysisCrossFile: runtime.riskAnalysisCrossFileEnabled && mode === 'code'
+});
+
+/**
+ * Build indexes for a given mode.
+ * @param {{mode:'code'|'prose'|'records'|'extracted-prose',runtime:object,discovery?:{entries:Array,skippedFiles:Array}}} input
+ */
+export async function buildIndexForMode({ mode, runtime, discovery = null }) {
+  if (mode === 'records') {
+    await buildRecordsIndexForRepo({ runtime });
+    return;
+  }
+  const crashLogger = await createCrashLogger({
+    repoCacheRoot: runtime.repoCacheRoot,
+    enabled: runtime.debugCrash,
+    log
+  });
+  const outDir = getIndexDir(runtime.root, mode, runtime.userConfig, { indexRoot: runtime.buildRoot });
+  await fs.mkdir(outDir, { recursive: true });
+  log(`\n📄  Scanning ${mode} …`);
+  const timing = { start: Date.now() };
+  const metricsDir = getMetricsDir(runtime.root, runtime.userConfig);
+  const perfFeatures = {
+    stage: runtime.stage || null,
+    embeddings: runtime.embeddingEnabled || runtime.embeddingService,
+    treeSitter: runtime.languageOptions?.treeSitter?.enabled !== false,
+    relations: runtime.stage !== 'stage1',
+    tooling: runtime.toolingEnabled,
+    typeInference: runtime.typeInferenceEnabled,
+    riskAnalysis: runtime.riskAnalysisEnabled
+  };
+  const perfProfile = createPerfProfile({
+    configHash: runtime.configHash,
+    mode,
+    buildId: runtime.buildId,
+    features: perfFeatures
+  });
+  const featureMetrics = runtime.featureMetrics || null;
+  if (featureMetrics?.registerSettings) {
+    featureMetrics.registerSettings(mode, buildFeatureSettings(runtime, mode));
+  }
+  const priorPerfProfile = await loadPerfProfile({
+    metricsDir,
+    mode,
+    configHash: runtime.configHash,
+    log
+  });
+  const shardPerfProfile = priorPerfProfile?.totals?.durationMs
+    ? priorPerfProfile
+    : null;
+  crashLogger.updatePhase(`scan:${mode}`);
+
+  const state = createIndexState();
+  const cacheReporter = createCacheReporter({ enabled: runtime.verboseCache, log });
+  const seenFiles = new Set();
+
+  const allEntries = await runDiscovery({ runtime, mode, discovery, state, timing });
+  runtime.dictConfig = applyAdaptiveDictConfig(runtime.dictConfig, allEntries.length);
+  const tokenizationKey = buildTokenizationKey(runtime, mode);
+  const cacheSignature = buildIncrementalSignature(runtime, mode, tokenizationKey);
+  const { incrementalState, reused } = await loadIncrementalPlan({
+    runtime,
+    mode,
+    outDir,
+    entries: allEntries,
+    tokenizationKey,
+    cacheSignature,
+    cacheReporter
+  });
+  if (reused) {
+    cacheReporter.report();
+    return;
+  }
+
+  const relationsEnabled = runtime.stage !== 'stage1';
+  let { importResult, scanPlan } = await preScanImports({
+    runtime,
+    mode,
+    relationsEnabled,
+    entries: allEntries,
+    crashLogger,
+    timing,
+    incrementalState
+  });
+
+  const contextWin = await estimateContextWindow({
+    files: allEntries.map((entry) => entry.abs),
+    root: runtime.root,
+    mode,
+    languageOptions: runtime.languageOptions
+  });
+  log(`Auto-selected context window: ${contextWin} lines`);
+
+  const processResult = await processFiles({
+    mode,
+    runtime,
+    discovery,
+    entries: allEntries,
+    importResult,
+    contextWin,
+    timing,
+    crashLogger,
+    state,
+    perfProfile,
+    cacheReporter,
+    seenFiles,
+    incrementalState,
+    relationsEnabled,
+    shardPerfProfile
+  });
+  const { tokenizationStats, shardSummary } = processResult;
+
+  const postImportResult = postScanImports({
+    mode,
+    relationsEnabled,
+    scanPlan,
+    state,
+    timing
+  });
+  if (postImportResult) importResult = postImportResult;
+
+  const { crossFileEnabled, graphRelations } = await runCrossFileInference({
+    runtime,
+    mode,
+    state,
+    crashLogger,
+    featureMetrics,
+    relationsEnabled
+  });
+  if (mode === 'code' && crossFileEnabled) {
+    await updateIncrementalBundles({
+      runtime,
+      incrementalState,
+      state,
+      log
+    });
+  }
+
+  const envConfig = getEnvConfig();
+  if (envConfig.verbose === true && tokenizationStats.chunks) {
+    const avgTokens = (tokenizationStats.tokens / tokenizationStats.chunks).toFixed(1);
+    const avgChargrams = (tokenizationStats.chargrams / tokenizationStats.chunks).toFixed(1);
+    log(`[tokenization] ${mode}: chunks=${tokenizationStats.chunks}, tokens=${tokenizationStats.tokens}, avgTokens=${avgTokens}, avgChargrams=${avgChargrams}`);
+  }
+
+  await pruneIncrementalState({
+    runtime,
+    incrementalState,
+    seenFiles
+  });
+
+  log(`   → Indexed ${state.chunks.length} chunks, total tokens: ${state.totalTokens.toLocaleString()}`);
+
+  const postings = await buildIndexPostings({ runtime, state });
+
+  await writeIndexArtifactsForMode({
+    runtime,
+    mode,
+    outDir,
+    state,
+    postings,
+    timing,
+    entries: allEntries,
+    perfProfile,
+    graphRelations,
+    shardSummary
+  });
+  await enqueueEmbeddingJob({ runtime, mode });
+  crashLogger.updatePhase('done');
+  cacheReporter.report();
+}
diff --git a/src/index/build/indexer/signatures.js b/src/index/build/indexer/signatures.js
new file mode 100644
index 000000000..91b6d70ce
--- /dev/null
+++ b/src/index/build/indexer/signatures.js
@@ -0,0 +1,72 @@
+import { sha1 } from '../../../shared/hash.js';
+
+export const buildTokenizationKey = (runtime, mode) => {
+  const commentsConfig = runtime.commentsConfig || {};
+  const payload = {
+    signatureVersion: 1,
+    toolVersion: runtime.toolVersion || null,
+    mode,
+    dictConfig: runtime.dictConfig || {},
+    postingsConfig: runtime.postingsConfig || {},
+    dictSignature: runtime.dictSignature || null,
+    segmentsConfig: runtime.segmentsConfig || {},
+    commentsConfig: {
+      ...commentsConfig,
+      licensePattern: commentsConfig.licensePattern?.source || null,
+      generatedPattern: commentsConfig.generatedPattern?.source || null,
+      linterPattern: commentsConfig.linterPattern?.source || null
+    }
+  };
+  return sha1(JSON.stringify(payload));
+};
+
+export const buildIncrementalSignature = (runtime, mode, tokenizationKey) => {
+  const languageOptions = runtime.languageOptions || {};
+  const payload = {
+    mode,
+    tokenizationKey,
+    features: {
+      astDataflowEnabled: runtime.astDataflowEnabled,
+      controlFlowEnabled: runtime.controlFlowEnabled,
+      lintEnabled: runtime.lintEnabled,
+      complexityEnabled: runtime.complexityEnabled,
+      riskAnalysisEnabled: runtime.riskAnalysisEnabled,
+      riskAnalysisCrossFileEnabled: runtime.riskAnalysisCrossFileEnabled,
+      typeInferenceEnabled: runtime.typeInferenceEnabled,
+      typeInferenceCrossFileEnabled: runtime.typeInferenceCrossFileEnabled,
+      gitBlameEnabled: runtime.gitBlameEnabled
+    },
+    riskRules: runtime.indexingConfig?.riskRules || null,
+    riskCaps: runtime.indexingConfig?.riskCaps || null,
+    parsers: {
+      javascript: languageOptions.javascript?.parser || null,
+      javascriptFlow: languageOptions.javascript?.flow || null,
+      typescript: languageOptions.typescript?.parser || null,
+      typescriptImportsOnly: languageOptions.typescript?.importsOnly === true
+    },
+    treeSitter: languageOptions.treeSitter
+      ? {
+        enabled: languageOptions.treeSitter.enabled !== false,
+        languages: languageOptions.treeSitter.languages || {},
+        configChunking: languageOptions.treeSitter.configChunking === true,
+        maxBytes: languageOptions.treeSitter.maxBytes ?? null,
+        maxLines: languageOptions.treeSitter.maxLines ?? null,
+        maxParseMs: languageOptions.treeSitter.maxParseMs ?? null,
+        byLanguage: languageOptions.treeSitter.byLanguage || {}
+      }
+      : { enabled: false },
+    importScan: runtime.indexingConfig?.importScan ?? null,
+    yamlChunking: languageOptions.yamlChunking || null,
+    kotlin: languageOptions.kotlin || null,
+    embeddings: {
+      enabled: runtime.embeddingEnabled || runtime.embeddingService,
+      mode: runtime.embeddingMode,
+      service: runtime.embeddingService === true,
+      batchSize: runtime.embeddingBatchSize
+    },
+    fileCaps: runtime.fileCaps,
+    fileScan: runtime.fileScan,
+    incrementalBundleFormat: runtime.incrementalBundleFormat || null
+  };
+  return sha1(JSON.stringify(payload));
+};
diff --git a/src/index/build/indexer/steps/discover.js b/src/index/build/indexer/steps/discover.js
new file mode 100644
index 000000000..a3e78a198
--- /dev/null
+++ b/src/index/build/indexer/steps/discover.js
@@ -0,0 +1,35 @@
+import { log } from '../../../../shared/progress.js';
+import { discoverFiles } from '../../discover.js';
+
+export const runDiscovery = async ({ runtime, mode, discovery, state, timing }) => {
+  if (discovery && Array.isArray(discovery.skippedFiles) && state?.skippedFiles) {
+    for (const file of discovery.skippedFiles) {
+      state.skippedFiles.push(file);
+    }
+  }
+  log('Discovering files...');
+  const discoverStart = Date.now();
+  let entries = null;
+  if (discovery && Array.isArray(discovery.entries)) {
+    entries = discovery.entries.slice();
+    log('→ Reusing shared discovery results.');
+  } else {
+    entries = await runtime.queues.io.add(() => discoverFiles({
+      root: runtime.root,
+      mode,
+      ignoreMatcher: runtime.ignoreMatcher,
+      skippedFiles: state?.skippedFiles || [],
+      maxFileBytes: runtime.maxFileBytes,
+      fileCaps: runtime.fileCaps,
+      maxDepth: runtime.guardrails?.maxDepth ?? null,
+      maxFiles: runtime.guardrails?.maxFiles ?? null
+    }));
+  }
+  entries.sort((a, b) => (a.rel < b.rel ? -1 : a.rel > b.rel ? 1 : 0));
+  entries.forEach((entry, index) => {
+    entry.orderIndex = index;
+  });
+  log(`→ Found ${entries.length} files.`);
+  if (timing) timing.discoverMs = Date.now() - discoverStart;
+  return entries;
+};
diff --git a/src/index/build/indexer/steps/incremental.js b/src/index/build/indexer/steps/incremental.js
new file mode 100644
index 000000000..929ee567f
--- /dev/null
+++ b/src/index/build/indexer/steps/incremental.js
@@ -0,0 +1,60 @@
+import { loadIncrementalState, pruneIncrementalManifest, shouldReuseIncrementalIndex, updateBundlesWithChunks } from '../../incremental.js';
+import { configureGitMetaCache } from '../../../git.js';
+import { log } from '../../../../shared/progress.js';
+
+export const loadIncrementalPlan = async ({
+  runtime,
+  mode,
+  outDir,
+  entries,
+  tokenizationKey,
+  cacheSignature,
+  cacheReporter
+}) => {
+  const incrementalState = await loadIncrementalState({
+    repoCacheRoot: runtime.repoCacheRoot,
+    mode,
+    enabled: runtime.incrementalEnabled,
+    tokenizationKey,
+    cacheSignature,
+    bundleFormat: runtime.incrementalBundleFormat,
+    log
+  });
+  configureGitMetaCache(runtime.cacheConfig?.gitMeta, cacheReporter);
+  let reused = false;
+  if (incrementalState?.enabled) {
+    const reuse = await shouldReuseIncrementalIndex({
+      outDir,
+      entries,
+      manifest: incrementalState.manifest,
+      stage: runtime.stage
+    });
+    if (reuse) {
+      log(`→ Reusing ${mode} index artifacts (no changes).`);
+      reused = true;
+    }
+  }
+  return { incrementalState, reused };
+};
+
+export const pruneIncrementalState = async ({ runtime, incrementalState, seenFiles }) => {
+  await pruneIncrementalManifest({
+    enabled: runtime.incrementalEnabled,
+    manifest: incrementalState.manifest,
+    manifestPath: incrementalState.manifestPath,
+    bundleDir: incrementalState.bundleDir,
+    seenFiles
+  });
+};
+
+export const updateIncrementalBundles = async ({ runtime, incrementalState, state, log: logFn }) => {
+  await updateBundlesWithChunks({
+    enabled: runtime.incrementalEnabled,
+    manifest: incrementalState.manifest,
+    bundleDir: incrementalState.bundleDir,
+    bundleFormat: incrementalState.bundleFormat,
+    chunks: state.chunks,
+    fileRelations: state.fileRelations,
+    log: logFn
+  });
+};
diff --git a/src/index/build/indexer/steps/postings.js b/src/index/build/indexer/steps/postings.js
new file mode 100644
index 000000000..687f4f23b
--- /dev/null
+++ b/src/index/build/indexer/steps/postings.js
@@ -0,0 +1,83 @@
+import { log as sharedLog } from '../../../../shared/progress.js';
+import { buildPostings } from '../../postings.js';
+import { applyTokenRetention, appendChunk, normalizeTokenRetention } from '../../state.js';
+
+export const createTokenRetentionState = ({ runtime, totalFiles, log = sharedLog }) => {
+  const tokenizationStats = {
+    chunks: 0,
+    tokens: 0,
+    seq: 0,
+    ngrams: 0,
+    chargrams: 0
+  };
+  const indexingConfig = runtime.userConfig?.indexing || {};
+  const tokenModeRaw = indexingConfig.chunkTokenMode || 'auto';
+  const tokenMode = ['auto', 'full', 'sample', 'none'].includes(tokenModeRaw)
+    ? tokenModeRaw
+    : 'auto';
+  const tokenMaxFiles = Number.isFinite(Number(indexingConfig.chunkTokenMaxFiles))
+    ? Math.max(0, Number(indexingConfig.chunkTokenMaxFiles))
+    : 5000;
+  const tokenMaxTotalRaw = Number(indexingConfig.chunkTokenMaxTokens);
+  const tokenMaxTotal = Number.isFinite(tokenMaxTotalRaw) && tokenMaxTotalRaw > 0
+    ? Math.floor(tokenMaxTotalRaw)
+    : 5000000;
+  const tokenSampleSize = Number.isFinite(Number(indexingConfig.chunkTokenSampleSize))
+    ? Math.max(1, Math.floor(Number(indexingConfig.chunkTokenSampleSize)))
+    : 32;
+  const resolvedTokenMode = tokenMode === 'auto'
+    ? (totalFiles <= tokenMaxFiles ? 'full' : 'sample')
+    : tokenMode;
+  const tokenRetention = normalizeTokenRetention({
+    mode: resolvedTokenMode,
+    sampleSize: tokenSampleSize
+  });
+  const tokenRetentionAuto = tokenMode === 'auto';
+  let tokenTotal = 0;
+
+  const applyRetentionToState = (target) => {
+    if (!target?.chunks) return;
+    for (const chunk of target.chunks) {
+      applyTokenRetention(chunk, tokenRetention);
+    }
+  };
+
+  const appendChunkWithRetention = (stateRef, chunk, mainState) => {
+    const seqLen = Array.isArray(chunk.seq) && chunk.seq.length
+      ? chunk.seq.length
+      : (Array.isArray(chunk.tokens) ? chunk.tokens.length : 0);
+    tokenTotal += seqLen;
+    appendChunk(stateRef, { ...chunk }, runtime.postingsConfig, tokenRetention);
+    if (tokenRetentionAuto && tokenRetention.mode === 'full'
+      && tokenMaxTotal
+      && tokenTotal > tokenMaxTotal) {
+      tokenRetention.mode = 'sample';
+      applyRetentionToState(mainState);
+      if (stateRef !== mainState) applyRetentionToState(stateRef);
+      log(`Chunk token mode auto -> sample (token budget ${tokenTotal} > ${tokenMaxTotal}).`);
+    }
+  };
+
+  return {
+    tokenizationStats,
+    appendChunkWithRetention
+  };
+};
+
+export const buildIndexPostings = async ({ runtime, state }) => buildPostings({
+  chunks: state.chunks,
+  df: state.df,
+  tokenPostings: state.tokenPostings,
+  docLengths: state.docLengths,
+  fieldPostings: state.fieldPostings,
+  fieldDocLengths: state.fieldDocLengths,
+  phrasePost: state.phrasePost,
+  triPost: state.triPost,
+  postingsConfig: runtime.postingsConfig,
+  modelId: runtime.modelId,
+  useStubEmbeddings: runtime.useStubEmbeddings,
+  log: sharedLog,
+  workerPool: runtime.workerPool,
+  quantizePool: runtime.quantizePool,
+  embeddingsEnabled: runtime.embeddingEnabled
+});
diff --git a/src/index/build/indexer/steps/process-files.js b/src/index/build/indexer/steps/process-files.js
new file mode 100644
index 000000000..59e6644c2
--- /dev/null
+++ b/src/index/build/indexer/steps/process-files.js
@@ -0,0 +1,499 @@
+import os from 'node:os';
+import path from 'node:path';
+import { createTaskQueues, runWithQueue } from '../../../../shared/concurrency.js';
+import { getEnvConfig } from '../../../../shared/env.js';
+import { toPosix } from '../../../../shared/files.js';
+import { countLinesForEntries } from '../../../../shared/file-stats.js';
+import { log, logLine, showProgress } from '../../../../shared/progress.js';
+import { createBuildCheckpoint } from '../../build-state.js';
+import { createFileProcessor } from '../../file-processor.js';
+import { loadStructuralMatches } from '../../../structural.js';
+import { planShardBatches, planShards } from '../../shards.js';
+import { recordFileMetric } from '../../perf-profile.js';
+import { createTokenRetentionState } from './postings.js';
+
+const buildOrderedAppender = (handleFileResult, state) => {
+  const pending = new Map();
+  let nextIndex = 0;
+  let flushing = null;
+  const flush = async () => {
+    while (pending.has(nextIndex)) {
+      const entry = pending.get(nextIndex);
+      pending.delete(nextIndex);
+      if (entry?.result) {
+        handleFileResult(entry.result, state, entry.shardMeta);
+      }
+      nextIndex += 1;
+    }
+  };
+  const scheduleFlush = async () => {
+    if (flushing) return flushing;
+    flushing = (async () => {
+      try {
+        await flush();
+      } finally {
+        flushing = null;
+      }
+    })();
+    return flushing;
+  };
+  return {
+    enqueue(orderIndex, result, shardMeta) {
+      const index = Number.isFinite(orderIndex) ? orderIndex : nextIndex;
+      pending.set(index, { result, shardMeta });
+      return scheduleFlush();
+    }
+  };
+};
+
+const resolveCheckpointBatchSize = (totalFiles, shardPlan) => {
+  if (!Number.isFinite(totalFiles) || totalFiles <= 0) return 10;
+  const minBatch = 10;
+  const maxBatch = 250;
+  if (Array.isArray(shardPlan) && shardPlan.length) {
+    const perShard = Math.max(1, Math.ceil(totalFiles / shardPlan.length));
+    const target = Math.ceil(perShard / 10);
+    return Math.max(minBatch, Math.min(maxBatch, target));
+  }
+  const target = Math.ceil(totalFiles / 200);
+  return Math.max(minBatch, Math.min(maxBatch, target));
+};
+
+const createShardRuntime = (baseRuntime, { fileConcurrency, importConcurrency, embeddingConcurrency }) => {
+  const ioConcurrency = Math.max(fileConcurrency, importConcurrency);
+  const cpuLimit = Math.max(1, os.cpus().length * 2);
+  const cpuConcurrency = Math.max(1, Math.min(cpuLimit, fileConcurrency));
+  const maxFilePending = Math.min(10000, fileConcurrency * 1000);
+  const maxIoPending = Math.min(10000, ioConcurrency * 1000);
+  const maxEmbeddingPending = Math.min(64, embeddingConcurrency * 8);
+  const queues = createTaskQueues({
+    ioConcurrency,
+    cpuConcurrency,
+    embeddingConcurrency,
+    ioPendingLimit: maxIoPending,
+    cpuPendingLimit: maxFilePending,
+    embeddingPendingLimit: maxEmbeddingPending
+  });
+  const destroyQueues = async () => {
+    await Promise.all([
+      queues.io.onIdle(),
+      queues.cpu.onIdle(),
+      queues.embedding.onIdle()
+    ]);
+    queues.io.clear();
+    queues.cpu.clear();
+    queues.embedding.clear();
+  };
+  return {
+    ...baseRuntime,
+    fileConcurrency,
+    importConcurrency,
+    ioConcurrency,
+    cpuConcurrency,
+    embeddingConcurrency,
+    queues,
+    destroyQueues
+  };
+};
+
+export const processFiles = async ({
+  mode,
+  runtime,
+  discovery,
+  entries,
+  importResult,
+  contextWin,
+  timing,
+  crashLogger,
+  state,
+  perfProfile,
+  cacheReporter,
+  seenFiles,
+  incrementalState,
+  relationsEnabled,
+  shardPerfProfile
+}) => {
+  log('Processing and indexing files...');
+  crashLogger.updatePhase('processing');
+  const processStart = Date.now();
+  log(`Indexing concurrency: files=${runtime.fileConcurrency}, imports=${runtime.importConcurrency}, io=${runtime.ioConcurrency}, cpu=${runtime.cpuConcurrency}`);
+  const envConfig = getEnvConfig();
+  const showFileProgress = envConfig.progressFiles === true;
+
+  const structuralMatches = loadStructuralMatches({
+    repoRoot: runtime.root,
+    repoCacheRoot: runtime.repoCacheRoot,
+    log
+  });
+  const tokenRetentionState = createTokenRetentionState({
+    runtime,
+    totalFiles: entries.length,
+    log
+  });
+  const { tokenizationStats, appendChunkWithRetention } = tokenRetentionState;
+  let checkpoint = null;
+  let progress = null;
+  const orderedAppender = buildOrderedAppender(
+    (result, stateRef, shardMeta) => {
+      if (!result) return;
+      if (result.fileMetrics) {
+        recordFileMetric(perfProfile, result.fileMetrics);
+      }
+      for (const chunk of result.chunks) {
+        appendChunkWithRetention(stateRef, chunk, state);
+      }
+      stateRef.scannedFilesTimes.push({ file: result.abs, duration_ms: result.durationMs, cached: result.cached });
+      stateRef.scannedFiles.push(result.abs);
+      if (result.manifestEntry) {
+        if (shardMeta?.id) result.manifestEntry.shard = shardMeta.id;
+        incrementalState.manifest.files[result.relKey] = result.manifestEntry;
+      }
+      if (result.fileRelations) {
+        stateRef.fileRelations.set(result.relKey, result.fileRelations);
+      }
+    },
+    state
+  );
+  const processEntries = async ({ entries: shardEntries, runtime: runtimeRef, shardMeta = null, stateRef }) => {
+    const shardLabel = shardMeta?.label || shardMeta?.id || null;
+  const { processFile } = createFileProcessor({
+      root: runtimeRef.root,
+      mode,
+      dictConfig: runtimeRef.dictConfig,
+      dictWords: runtimeRef.dictWords,
+      dictShared: runtimeRef.dictShared,
+      languageOptions: runtimeRef.languageOptions,
+      postingsConfig: runtimeRef.postingsConfig,
+      segmentsConfig: runtimeRef.segmentsConfig,
+      commentsConfig: runtimeRef.commentsConfig,
+      allImports: importResult.allImports,
+      contextWin,
+      incrementalState,
+      getChunkEmbedding: runtimeRef.getChunkEmbedding,
+      getChunkEmbeddings: runtimeRef.getChunkEmbeddings,
+      embeddingBatchSize: runtimeRef.embeddingBatchSize,
+      embeddingEnabled: runtimeRef.embeddingEnabled,
+      typeInferenceEnabled: runtimeRef.typeInferenceEnabled,
+      riskAnalysisEnabled: runtimeRef.riskAnalysisEnabled,
+      riskConfig: runtimeRef.riskConfig,
+      toolInfo: runtimeRef.toolInfo,
+      seenFiles,
+      gitBlameEnabled: runtimeRef.gitBlameEnabled,
+      lintEnabled: runtimeRef.lintEnabled,
+      complexityEnabled: runtimeRef.complexityEnabled,
+      tokenizationStats,
+      structuralMatches,
+      cacheConfig: runtimeRef.cacheConfig,
+      cacheReporter,
+      queues: runtimeRef.queues,
+      useCpuQueue: false,
+      workerPool: runtimeRef.workerPool,
+      crashLogger,
+      relationsEnabled,
+      skippedFiles: stateRef.skippedFiles,
+      fileCaps: runtimeRef.fileCaps,
+      fileScan: runtimeRef.fileScan,
+      featureMetrics: runtimeRef.featureMetrics
+    });
+    await runWithQueue(
+      runtimeRef.queues.cpu,
+      shardEntries,
+      async (entry, fileIndex) => {
+        if (showFileProgress) {
+          const rel = entry.rel || toPosix(path.relative(runtimeRef.root, entry.abs));
+          const shardText = shardLabel ? `shard ${shardLabel}` : 'shard';
+          const shardPrefix = `[${shardText}]`;
+          const countText = `${progress.count + 1}/${progress.total}`;
+          const lineText = Number.isFinite(entry.lines) ? `lines ${entry.lines}` : null;
+          const parts = [shardPrefix, countText, lineText, rel].filter(Boolean);
+          logLine(parts.join(' '));
+        }
+        crashLogger.updateFile({
+          phase: 'processing',
+          mode,
+          stage: runtimeRef.stage,
+          fileIndex,
+          total: progress.total,
+          file: entry.rel,
+          size: entry.stat?.size || null,
+          shardId: shardMeta?.id || null
+        });
+        try {
+          const result = await processFile(entry, fileIndex);
+          progress.tick();
+          return result;
+        } catch (err) {
+          crashLogger.logError({
+            phase: 'processing',
+            mode,
+            stage: runtimeRef.stage,
+            file: entry.rel,
+            shardId: shardMeta?.id || null,
+            message: err?.message || String(err),
+            stack: err?.stack || null
+          });
+          throw err;
+        }
+      },
+      {
+        collectResults: false,
+        onResult: (result, index) => {
+          const entry = shardEntries[index];
+          const orderIndex = Number.isFinite(entry?.orderIndex) ? entry.orderIndex : index;
+          return orderedAppender.enqueue(orderIndex, result, shardMeta);
+        },
+        retries: 2,
+        retryDelayMs: 200
+      }
+    );
+  };
+
+  const discoveryLineCounts = discovery?.lineCounts instanceof Map ? discovery.lineCounts : null;
+  let lineCounts = discoveryLineCounts;
+  if (runtime.shards?.enabled && !lineCounts) {
+    const hasEntryLines = entries.some((entry) => Number.isFinite(entry?.lines) && entry.lines > 0);
+    if (!hasEntryLines) {
+      const lineStart = Date.now();
+      const lineConcurrency = Math.max(1, Math.min(32, runtime.cpuConcurrency * 2));
+      if (envConfig.verbose === true) {
+        log(`→ Shard planning: counting lines (${lineConcurrency} workers)...`);
+      }
+      lineCounts = await countLinesForEntries(entries, { concurrency: lineConcurrency });
+      timing.lineCountsMs = Date.now() - lineStart;
+    }
+  }
+  const shardFeatureWeights = {
+    relations: relationsEnabled ? 0.15 : 0,
+    flow: (runtime.astDataflowEnabled || runtime.controlFlowEnabled) ? 0.1 : 0,
+    treeSitter: runtime.languageOptions?.treeSitter?.enabled !== false ? 0.1 : 0,
+    tooling: runtime.toolingEnabled ? 0.1 : 0,
+    embeddings: runtime.embeddingEnabled ? 0.2 : 0
+  };
+  const shardPlan = runtime.shards?.enabled
+    ? planShards(entries, {
+      mode,
+      maxShards: runtime.shards.maxShards,
+      minFiles: runtime.shards.minFiles,
+      dirDepth: runtime.shards.dirDepth,
+      lineCounts,
+      perfProfile: shardPerfProfile,
+      featureWeights: shardFeatureWeights,
+      maxShardBytes: runtime.shards.maxShardBytes,
+      maxShardLines: runtime.shards.maxShardLines
+    })
+    : null;
+  const shardSummary = shardPlan
+    ? shardPlan.map((shard) => ({
+      id: shard.id,
+      label: shard.label || shard.id,
+      dir: shard.dir,
+      lang: shard.lang,
+      fileCount: shard.entries.length,
+      lineCount: shard.lineCount || 0,
+      byteCount: shard.byteCount || 0,
+      costMs: shard.costMs || 0
+    }))
+    : [];
+  if (incrementalState?.manifest) {
+    const updatedAt = new Date().toISOString();
+    incrementalState.manifest.shards = runtime.shards?.enabled
+      ? { enabled: true, updatedAt, plan: shardSummary }
+      : { enabled: false, updatedAt };
+  }
+  const checkpointBatchSize = resolveCheckpointBatchSize(entries.length, shardPlan);
+  checkpoint = createBuildCheckpoint({
+    buildRoot: runtime.buildRoot,
+    mode,
+    totalFiles: entries.length,
+    batchSize: checkpointBatchSize
+  });
+  progress = {
+    total: entries.length,
+    count: 0,
+    tick() {
+      this.count += 1;
+      showProgress('Files', this.count, this.total);
+      checkpoint.tick();
+    }
+  };
+  if (shardPlan && shardPlan.length > 1) {
+    const shardExecutionPlan = [...shardPlan].sort((a, b) => {
+      const costDelta = (b.costMs || 0) - (a.costMs || 0);
+      if (costDelta !== 0) return costDelta;
+      const lineDelta = (b.lineCount || 0) - (a.lineCount || 0);
+      if (lineDelta !== 0) return lineDelta;
+      const sizeDelta = b.entries.length - a.entries.length;
+      if (sizeDelta !== 0) return sizeDelta;
+      const labelA = a.label || a.id;
+      const labelB = b.label || b.id;
+      return labelA < labelB ? -1 : labelA > labelB ? 1 : 0;
+    });
+    const shardIndexById = new Map(
+      shardExecutionPlan.map((shard, index) => [shard.id, index + 1])
+    );
+    const totalFiles = shardPlan.reduce((sum, shard) => sum + shard.entries.length, 0);
+    const totalLines = shardPlan.reduce((sum, shard) => sum + (shard.lineCount || 0), 0);
+    const totalBytes = shardPlan.reduce((sum, shard) => sum + (shard.byteCount || 0), 0);
+    const totalCost = shardPlan.reduce((sum, shard) => sum + (shard.costMs || 0), 0);
+    if (envConfig.verbose === true) {
+      const top = shardExecutionPlan.slice(0, Math.min(10, shardExecutionPlan.length));
+      const costLabel = totalCost ? `, est ${Math.round(totalCost).toLocaleString()}ms` : '';
+      log(`→ Shard plan: ${shardPlan.length} shards, ${totalFiles.toLocaleString()} files, ${totalLines.toLocaleString()} lines${costLabel}.`);
+      for (const shard of top) {
+        const lineCount = shard.lineCount || 0;
+        const byteCount = shard.byteCount || 0;
+        const costMs = shard.costMs || 0;
+        const costText = costMs ? ` | est ${Math.round(costMs).toLocaleString()}ms` : '';
+        log(`[shards] ${shard.label || shard.id} | files ${shard.entries.length.toLocaleString()} | lines ${lineCount.toLocaleString()} | bytes ${byteCount.toLocaleString()}${costText}`);
+      }
+      const splitGroups = new Map();
+      for (const shard of shardPlan) {
+        if (!shard.splitFrom) continue;
+        const group = splitGroups.get(shard.splitFrom) || { count: 0, lines: 0, bytes: 0, cost: 0 };
+        group.count += 1;
+        group.lines += shard.lineCount || 0;
+        group.bytes += shard.byteCount || 0;
+        group.cost += shard.costMs || 0;
+        splitGroups.set(shard.splitFrom, group);
+      }
+      for (const [label, group] of splitGroups) {
+        const costText = group.cost ? `, est ${Math.round(group.cost).toLocaleString()}ms` : '';
+        log(`[shards] split ${label} -> ${group.count} parts (${group.lines.toLocaleString()} lines, ${group.bytes.toLocaleString()} bytes${costText})`);
+      }
+    }
+    const buildShardWorkPlan = () => {
+      const work = [];
+      const totalShards = shardExecutionPlan.length;
+      for (const shard of shardExecutionPlan) {
+        const fileCount = shard.entries.length;
+        const costPerFile = shard.costMs && fileCount ? shard.costMs / fileCount : 0;
+        const fileShare = totalFiles > 0 ? fileCount / totalFiles : 0;
+        const lineCount = shard.lineCount || 0;
+        const lineShare = totalLines > 0 ? lineCount / totalLines : 0;
+        const byteCount = shard.byteCount || 0;
+        const byteShare = totalBytes > 0 ? byteCount / totalBytes : 0;
+        const costMs = shard.costMs || 0;
+        const costShare = totalCost > 0 ? costMs / totalCost : 0;
+        const share = Math.max(fileShare, lineShare, byteShare, costShare);
+        let parts = 1;
+        if (share > 0.05) parts = share > 0.1 ? 4 : 2;
+        parts = Math.min(parts, Math.max(1, fileCount));
+        if (parts <= 1) {
+          work.push({
+            shard,
+            entries: shard.entries,
+            partIndex: 1,
+            partTotal: 1,
+            predictedCostMs: costPerFile ? costPerFile * fileCount : costMs,
+            shardIndex: shardIndexById.get(shard.id) || 1,
+            shardTotal: totalShards
+          });
+          continue;
+        }
+        const perPart = Math.ceil(fileCount / parts);
+        for (let i = 0; i < parts; i += 1) {
+          const start = i * perPart;
+          const end = Math.min(start + perPart, fileCount);
+          if (start >= end) continue;
+          const partCount = end - start;
+          work.push({
+            shard,
+            entries: shard.entries.slice(start, end),
+            partIndex: i + 1,
+            partTotal: parts,
+            predictedCostMs: costPerFile ? costPerFile * partCount : costMs / parts,
+            shardIndex: shardIndexById.get(shard.id) || 1,
+            shardTotal: totalShards
+          });
+        }
+      }
+      return work;
+    };
+    const shardWorkPlan = buildShardWorkPlan();
+    let defaultShardConcurrency = Math.max(1, Math.min(4, runtime.fileConcurrency));
+    if (process.platform === 'win32') {
+      defaultShardConcurrency = Math.max(1, runtime.cpuConcurrency);
+    }
+    let shardConcurrency = Number.isFinite(runtime.shards.maxWorkers)
+      ? Math.max(1, Math.floor(runtime.shards.maxWorkers))
+      : defaultShardConcurrency;
+    shardConcurrency = Math.min(shardConcurrency, runtime.fileConcurrency);
+    let shardBatches = planShardBatches(shardWorkPlan, shardConcurrency, {
+      resolveWeight: (workItem) => Number.isFinite(workItem.predictedCostMs)
+        ? workItem.predictedCostMs
+        : (workItem.shard.costMs || workItem.shard.lineCount || workItem.entries.length || 0)
+    });
+    if (!shardBatches.length && shardWorkPlan.length) {
+      shardBatches = [shardWorkPlan.slice()];
+    }
+    shardConcurrency = Math.max(1, shardBatches.length);
+    const perShardFileConcurrency = Math.max(
+      1,
+      Math.min(2, Math.floor(runtime.fileConcurrency / shardConcurrency))
+    );
+    const perShardImportConcurrency = Math.max(1, Math.floor(runtime.importConcurrency / shardConcurrency));
+    const baseEmbedConcurrency = Number.isFinite(runtime.embeddingConcurrency)
+      ? runtime.embeddingConcurrency
+      : runtime.cpuConcurrency;
+    const perShardEmbeddingConcurrency = Math.max(
+      1,
+      Math.min(perShardFileConcurrency, Math.floor(baseEmbedConcurrency / shardConcurrency))
+    );
+    log(`→ Sharding enabled: ${shardPlan.length} shards (concurrency=${shardConcurrency}, per-shard files=${perShardFileConcurrency}).`);
+    const runShardWorker = async (batch) => {
+      const shardRuntime = createShardRuntime(runtime, {
+        fileConcurrency: perShardFileConcurrency,
+        importConcurrency: perShardImportConcurrency,
+        embeddingConcurrency: perShardEmbeddingConcurrency
+      });
+      try {
+        for (const workItem of batch) {
+          const {
+            shard,
+            entries: shardEntries,
+            partIndex,
+            partTotal,
+            shardIndex,
+            shardTotal
+          } = workItem;
+          const shardLabel = shard.label || shard.id;
+          let shardBracket = shardLabel === shard.id ? null : shard.id;
+          if (partTotal > 1) {
+            const partLabel = `part ${partIndex}/${partTotal}`;
+            shardBracket = shardBracket ? `${shardBracket} ${partLabel}` : partLabel;
+          }
+          const shardDisplay = shardLabel + (shardBracket ? ` [${shardBracket}]` : '');
+          log(
+            `→ Shard ${shardIndex}/${shardTotal}: ${shardDisplay} (${shardEntries.length} files)`,
+            {
+              shardId: shard.id,
+              shardIndex,
+              shardTotal,
+              partIndex,
+              partTotal,
+              fileCount: shardEntries.length
+            }
+          );
+          await processEntries({
+            entries: shardEntries,
+            runtime: shardRuntime,
+            shardMeta: shard,
+            stateRef: state
+          });
+        }
+      } finally {
+        await shardRuntime.destroyQueues?.();
+      }
+    };
+    await Promise.all(
+      shardBatches.map((batch) => runShardWorker(batch))
+    );
+  } else {
+    await processEntries({ entries, runtime, stateRef: state });
+  }
+  showProgress('Files', progress.total, progress.total);
+  checkpoint.finish();
+  timing.processMs = Date.now() - processStart;
+
+  return { tokenizationStats, shardSummary, shardPlan };
+};
diff --git a/src/index/build/indexer/steps/relations.js b/src/index/build/indexer/steps/relations.js
new file mode 100644
index 000000000..ca0ae2b8a
--- /dev/null
+++ b/src/index/build/indexer/steps/relations.js
@@ -0,0 +1,124 @@
+import { log } from '../../../../shared/progress.js';
+import { applyCrossFileInference } from '../../../type-inference-crossfile.js';
+import { buildRelationGraphs } from '../../graphs.js';
+import { buildImportLinksFromRelations, scanImports } from '../../imports.js';
+
+export const resolveImportScanPlan = ({ runtime, mode, relationsEnabled }) => {
+  const importScanRaw = runtime.indexingConfig?.importScan;
+  const importScanMode = typeof importScanRaw === 'string'
+    ? importScanRaw.trim().toLowerCase()
+    : (importScanRaw === false ? 'off' : 'post');
+  const enableImportLinks = importScanMode !== 'off';
+  const usePreScan = importScanMode === 'pre' || importScanMode === 'prescan';
+  const shouldScan = mode === 'code' && relationsEnabled && enableImportLinks;
+  return { importScanMode, enableImportLinks, usePreScan, shouldScan };
+};
+
+export const preScanImports = async ({
+  runtime,
+  mode,
+  relationsEnabled,
+  entries,
+  crashLogger,
+  timing,
+  incrementalState
+}) => {
+  const scanPlan = resolveImportScanPlan({ runtime, mode, relationsEnabled });
+  let importResult = { allImports: {}, durationMs: 0, stats: null };
+  if (scanPlan.shouldScan && scanPlan.usePreScan) {
+    log('Scanning for imports...');
+    crashLogger.updatePhase('imports');
+    importResult = await scanImports({
+      files: entries,
+      root: runtime.root,
+      mode,
+      languageOptions: runtime.languageOptions,
+      importConcurrency: runtime.importConcurrency,
+      queue: runtime.queues.io,
+      incrementalState
+    });
+    timing.importsMs = importResult.durationMs;
+    if (importResult?.stats) {
+      const { modules, edges, files } = importResult.stats;
+      log(`→ Imports: modules=${modules}, edges=${edges}, files=${files}`);
+    }
+  } else if (scanPlan.shouldScan) {
+    log('Skipping import pre-scan; will enrich import links from relations.');
+  } else if (mode === 'code' && relationsEnabled) {
+    log('Import link enrichment disabled via indexing.importScan.');
+  } else if (mode === 'code') {
+    log('Skipping import scan for sparse stage.');
+  }
+  return { importResult, scanPlan };
+};
+
+export const postScanImports = ({ mode, relationsEnabled, scanPlan, state, timing }) => {
+  if (!scanPlan?.shouldScan) return null;
+  if (mode === 'code' && relationsEnabled && scanPlan.enableImportLinks && !scanPlan.usePreScan) {
+    const importStart = Date.now();
+    const importLinks = buildImportLinksFromRelations(state.fileRelations);
+    const importResult = {
+      allImports: importLinks.allImports || {},
+      stats: importLinks.stats || null,
+      durationMs: Date.now() - importStart
+    };
+    timing.importsMs = importResult.durationMs;
+    if (importResult?.stats) {
+      const { modules, edges, files } = importResult.stats;
+      log(`→ Imports: modules=${modules}, edges=${edges}, files=${files}`);
+    }
+    return importResult;
+  }
+  return null;
+};
+
+export const runCrossFileInference = async ({
+  runtime,
+  mode,
+  state,
+  crashLogger,
+  featureMetrics,
+  relationsEnabled
+}) => {
+  const crossFileEnabled = runtime.typeInferenceCrossFileEnabled || runtime.riskAnalysisCrossFileEnabled;
+  if (mode === 'code' && crossFileEnabled) {
+    crashLogger.updatePhase('cross-file');
+    const crossFileStart = Date.now();
+    const crossFileStats = await applyCrossFileInference({
+      rootDir: runtime.root,
+      chunks: state.chunks,
+      enabled: true,
+      log,
+      useTooling: runtime.typeInferenceEnabled && runtime.typeInferenceCrossFileEnabled && runtime.toolingEnabled,
+      enableTypeInference: runtime.typeInferenceEnabled,
+      enableRiskCorrelation: runtime.riskAnalysisEnabled && runtime.riskAnalysisCrossFileEnabled,
+      fileRelations: state.fileRelations
+    });
+    const crossFileDurationMs = Date.now() - crossFileStart;
+    if (featureMetrics?.recordSettingByLanguageShare) {
+      const crossFileTargets = [];
+      if (runtime.typeInferenceCrossFileEnabled) crossFileTargets.push('typeInferenceCrossFile');
+      if (runtime.riskAnalysisCrossFileEnabled) crossFileTargets.push('riskAnalysisCrossFile');
+      const shareMs = crossFileTargets.length ? crossFileDurationMs / crossFileTargets.length : 0;
+      for (const target of crossFileTargets) {
+        featureMetrics.recordSettingByLanguageShare({
+          mode,
+          setting: target,
+          enabled: true,
+          durationMs: shareMs
+        });
+      }
+    }
+    if (crossFileStats) {
+      const riskFlows = Number.isFinite(crossFileStats.riskFlows) ? crossFileStats.riskFlows : 0;
+      log(`Cross-file inference: callLinks=${crossFileStats.linkedCalls}, usageLinks=${crossFileStats.linkedUsages}, returns=${crossFileStats.inferredReturns}, riskFlows=${riskFlows}`);
+    }
+  }
+  const graphRelations = mode === 'code' && relationsEnabled
+    ? buildRelationGraphs({
+      chunks: state.chunks,
+      fileRelations: state.fileRelations
+    })
+    : null;
+  return { crossFileEnabled, graphRelations };
+};
diff --git a/src/index/build/indexer/steps/write.js b/src/index/build/indexer/steps/write.js
new file mode 100644
index 000000000..45a6d8f51
--- /dev/null
+++ b/src/index/build/indexer/steps/write.js
@@ -0,0 +1,66 @@
+import { writeIndexArtifacts } from '../../artifacts.js';
+import { finalizePerfProfile } from '../../perf-profile.js';
+
+export const writeIndexArtifactsForMode = async ({
+  runtime,
+  mode,
+  outDir,
+  state,
+  postings,
+  timing,
+  entries,
+  perfProfile,
+  graphRelations,
+  shardSummary
+}) => {
+  const finalizedPerfProfile = finalizePerfProfile(perfProfile);
+  await writeIndexArtifacts({
+    outDir,
+    mode,
+    state,
+    postings,
+    postingsConfig: runtime.postingsConfig,
+    modelId: runtime.modelId,
+    useStubEmbeddings: runtime.useStubEmbeddings,
+    dictSummary: runtime.dictSummary,
+    timing,
+    root: runtime.root,
+    userConfig: runtime.userConfig,
+    incrementalEnabled: runtime.incrementalEnabled,
+    fileCounts: { candidates: entries.length },
+    perfProfile: finalizedPerfProfile,
+    graphRelations,
+    indexState: {
+      generatedAt: new Date().toISOString(),
+      mode,
+      stage: runtime.stage || null,
+      embeddings: {
+        enabled: runtime.embeddingEnabled || runtime.embeddingService,
+        ready: runtime.embeddingEnabled,
+        mode: runtime.embeddingMode,
+        service: runtime.embeddingService === true
+      },
+      features: {
+        treeSitter: runtime.languageOptions?.treeSitter?.enabled !== false,
+        lint: runtime.lintEnabled,
+        complexity: runtime.complexityEnabled,
+        riskAnalysis: runtime.riskAnalysisEnabled,
+        riskAnalysisCrossFile: runtime.riskAnalysisCrossFileEnabled,
+        typeInference: runtime.typeInferenceEnabled,
+        typeInferenceCrossFile: runtime.typeInferenceCrossFileEnabled,
+        gitBlame: runtime.gitBlameEnabled
+      },
+      shards: runtime.shards?.enabled
+        ? { enabled: true, plan: shardSummary }
+        : { enabled: false },
+      enrichment: runtime.twoStage?.enabled
+        ? {
+          enabled: true,
+          pending: runtime.stage === 'stage1',
+          stage: runtime.stage || null
+        }
+        : { enabled: false }
+    }
+  });
+  return finalizedPerfProfile;
+};
diff --git a/src/indexer/build/lock.js b/src/index/build/lock.js
similarity index 74%
rename from src/indexer/build/lock.js
rename to src/index/build/lock.js
index 5df649a02..1482d83c5 100644
--- a/src/indexer/build/lock.js
+++ b/src/index/build/lock.js
@@ -6,6 +6,17 @@ const DEFAULT_STALE_MS = 30 * 60 * 1000;
 
 const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
 
+const isProcessAlive = (pid) => {
+  if (!Number.isFinite(pid) || pid <= 0) return false;
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch (err) {
+    if (err?.code === 'EPERM') return true;
+    return false;
+  }
+};
+
 const readLockInfo = async (lockPath) => {
   try {
     const raw = await fs.readFile(lockPath, 'utf8');
@@ -70,6 +81,16 @@ export async function acquireIndexLock({
       if (stale) {
         try {
           await fs.rm(lockPath, { force: true });
+          log(`Removed stale index lock at ${lockPath}.`);
+          continue;
+        } catch {}
+      }
+      const info = await readLockInfo(lockPath);
+      const pid = Number.isFinite(info?.pid) ? Number(info.pid) : null;
+      if (pid && !isProcessAlive(pid)) {
+        try {
+          await fs.rm(lockPath, { force: true });
+          log(`Removed stale index lock at ${lockPath} (pid ${pid} not running).`);
           continue;
         } catch {}
       }
@@ -77,8 +98,8 @@ export async function acquireIndexLock({
         await sleep(pollMs);
         continue;
       }
-      const info = fsSync.existsSync(lockPath) ? await readLockInfo(lockPath) : null;
-      const detail = info?.pid ? ` (pid ${info.pid})` : '';
+      const detailInfo = info || (fsSync.existsSync(lockPath) ? await readLockInfo(lockPath) : null);
+      const detail = detailInfo?.pid ? ` (pid ${detailInfo.pid})` : '';
       log(`Index lock held, skipping build${detail}.`);
       return null;
     }
diff --git a/src/index/build/perf-profile.js b/src/index/build/perf-profile.js
new file mode 100644
index 000000000..f25c4f3c9
--- /dev/null
+++ b/src/index/build/perf-profile.js
@@ -0,0 +1,165 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+
+const PERF_PROFILE_VERSION = 1;
+const DEFAULT_BUCKETS = [
+  { id: 'xs', maxBytes: 8 * 1024 },
+  { id: 's', maxBytes: 32 * 1024 },
+  { id: 'm', maxBytes: 128 * 1024 },
+  { id: 'l', maxBytes: 512 * 1024 },
+  { id: 'xl', maxBytes: 1024 * 1024 },
+  { id: 'xxl', maxBytes: null }
+];
+
+const normalizeBucketId = (value, fallback) => (
+  typeof value === 'string' && value.trim() ? value.trim() : fallback
+);
+
+const resolveBucket = (bytes, buckets) => {
+  const size = Number.isFinite(bytes) ? Math.max(0, bytes) : 0;
+  for (const bucket of buckets) {
+    if (bucket.maxBytes == null || size <= bucket.maxBytes) return bucket;
+  }
+  return buckets[buckets.length - 1];
+};
+
+const initTotals = () => ({
+  files: 0,
+  bytes: 0,
+  lines: 0,
+  durationMs: 0,
+  parseMs: 0,
+  tokenizeMs: 0,
+  enrichMs: 0,
+  embeddingMs: 0
+});
+
+const addTotals = (totals, metric) => {
+  totals.files += 1;
+  totals.bytes += metric.bytes || 0;
+  totals.lines += metric.lines || 0;
+  totals.durationMs += metric.durationMs || 0;
+  totals.parseMs += metric.parseMs || 0;
+  totals.tokenizeMs += metric.tokenizeMs || 0;
+  totals.enrichMs += metric.enrichMs || 0;
+  totals.embeddingMs += metric.embeddingMs || 0;
+};
+
+const finalizeTotals = (totals) => {
+  const files = totals.files || 0;
+  const bytes = totals.bytes || 0;
+  const lines = totals.lines || 0;
+  const durationMs = totals.durationMs || 0;
+  return {
+    ...totals,
+    avgMsPerFile: files ? durationMs / files : 0,
+    byteCostMs: bytes ? durationMs / bytes : 0,
+    lineCostMs: lines ? durationMs / lines : 0,
+    bytesPerMs: durationMs ? bytes / durationMs : 0,
+    linesPerMs: durationMs ? lines / durationMs : 0
+  };
+};
+
+export function createPerfProfile({
+  configHash,
+  mode,
+  buildId = null,
+  buckets = DEFAULT_BUCKETS,
+  features = null
+} = {}) {
+  return {
+    version: PERF_PROFILE_VERSION,
+    generatedAt: new Date().toISOString(),
+    configHash: configHash || null,
+    mode: mode || null,
+    buildId,
+    buckets: buckets.map((bucket) => ({
+      id: normalizeBucketId(bucket.id, 'bucket'),
+      maxBytes: bucket.maxBytes ?? null
+    })),
+    features: features && typeof features === 'object' ? features : null,
+    totals: initTotals(),
+    languages: {}
+  };
+}
+
+export function recordFileMetric(profile, metric) {
+  if (!profile || !metric || metric.cached) return;
+  const languageId = typeof metric.languageId === 'string' && metric.languageId
+    ? metric.languageId
+    : 'unknown';
+  const buckets = Array.isArray(profile.buckets) && profile.buckets.length
+    ? profile.buckets
+    : DEFAULT_BUCKETS;
+  const bucket = resolveBucket(metric.bytes, buckets);
+  const langEntry = profile.languages[languageId] || {
+    totals: initTotals(),
+    buckets: {}
+  };
+  const bucketEntry = langEntry.buckets[bucket.id] || initTotals();
+  addTotals(langEntry.totals, metric);
+  addTotals(profile.totals, metric);
+  addTotals(bucketEntry, metric);
+  langEntry.buckets[bucket.id] = bucketEntry;
+  profile.languages[languageId] = langEntry;
+}
+
+export function finalizePerfProfile(profile) {
+  if (!profile) return null;
+  profile.totals = finalizeTotals(profile.totals || initTotals());
+  for (const entry of Object.values(profile.languages || {})) {
+    entry.totals = finalizeTotals(entry.totals || initTotals());
+    for (const [bucketId, bucketTotals] of Object.entries(entry.buckets || {})) {
+      entry.buckets[bucketId] = finalizeTotals(bucketTotals);
+    }
+  }
+  return profile;
+}
+
+export async function loadPerfProfile({ metricsDir, mode, configHash, log }) {
+  if (!metricsDir) return null;
+  const fileName = mode ? `perf-profile-${mode}.json` : 'perf-profile.json';
+  const filePath = path.join(metricsDir, fileName);
+  try {
+    const raw = await fs.readFile(filePath, 'utf8');
+    const parsed = JSON.parse(raw);
+    if (!parsed || parsed.version !== PERF_PROFILE_VERSION) return null;
+    if (configHash && parsed.configHash && parsed.configHash !== configHash) {
+      if (log) log(`[shards] Perf profile config hash mismatch; ignoring.`);
+      return null;
+    }
+    return parsed;
+  } catch {
+    return null;
+  }
+}
+
+export function estimateFileCost({
+  perfProfile,
+  languageId,
+  bytes = 0,
+  lines = 0,
+  featureWeights = null
+} = {}) {
+  const profile = perfProfile || null;
+  const buckets = Array.isArray(profile?.buckets) && profile.buckets.length
+    ? profile.buckets
+    : DEFAULT_BUCKETS;
+  const langKey = typeof languageId === 'string' && languageId ? languageId : 'unknown';
+  const langProfile = profile?.languages?.[langKey] || profile?.languages?.unknown || null;
+  const bucket = resolveBucket(bytes, buckets);
+  const bucketProfile = langProfile?.buckets?.[bucket.id] || null;
+  const fallback = langProfile?.totals || profile?.totals || initTotals();
+  const resolved = bucketProfile || fallback;
+  const overhead = resolved.avgMsPerFile || fallback.avgMsPerFile || 0;
+  const byteCost = resolved.byteCostMs || fallback.byteCostMs || 0;
+  const lineCost = resolved.lineCostMs || fallback.lineCostMs || 0;
+  const baseCost = overhead + (byteCost * bytes) + (lineCost * lines);
+  if (!featureWeights || typeof featureWeights !== 'object') return baseCost;
+  let multiplier = 1;
+  for (const value of Object.values(featureWeights)) {
+    if (typeof value === 'number') multiplier += value;
+  }
+  return baseCost * multiplier;
+}
+
diff --git a/src/index/build/piece-assembly.js b/src/index/build/piece-assembly.js
new file mode 100644
index 000000000..b59249357
--- /dev/null
+++ b/src/index/build/piece-assembly.js
@@ -0,0 +1,459 @@
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { loadChunkMeta, loadTokenPostings, readJsonFile } from '../../shared/artifact-io.js';
+import { buildFilterIndex, serializeFilterIndex } from '../../retrieval/filter-index.js';
+import { normalizePostingsConfig } from '../../shared/postings-config.js';
+import { log as defaultLog } from '../../shared/progress.js';
+import { createIndexState } from './state.js';
+import { writeIndexArtifacts } from './artifacts.js';
+
+const STAGE_ORDER = {
+  stage1: 1,
+  stage2: 2,
+  stage3: 3,
+  stage4: 4
+};
+
+const normalizeStage = (raw) => {
+  const value = typeof raw === 'string' ? raw.trim().toLowerCase() : '';
+  if (!value) return null;
+  if (value === '1' || value === 'stage1' || value === 'sparse') return 'stage1';
+  if (value === '2' || value === 'stage2' || value === 'enrich' || value === 'full') return 'stage2';
+  if (value === '3' || value === 'stage3' || value === 'embeddings' || value === 'embed') return 'stage3';
+  if (value === '4' || value === 'stage4' || value === 'sqlite' || value === 'ann') return 'stage4';
+  return null;
+};
+
+const readJsonOptional = (dir, name) => {
+  const filePath = path.join(dir, name);
+  try {
+    return readJsonFile(filePath);
+  } catch (err) {
+    if (err?.code === 'ERR_JSON_TOO_LARGE') throw err;
+    return null;
+  }
+};
+
+const readArray = (value, key) => {
+  if (!value || typeof value !== 'object') return [];
+  if (Array.isArray(value[key])) return value[key];
+  if (value.arrays && Array.isArray(value.arrays[key])) return value.arrays[key];
+  return [];
+};
+
+const readField = (value, key) => {
+  if (!value || typeof value !== 'object') return null;
+  if (value.fields && Object.prototype.hasOwnProperty.call(value.fields, key)) {
+    return value.fields[key];
+  }
+  if (Object.prototype.hasOwnProperty.call(value, key)) return value[key];
+  return null;
+};
+
+const loadIndexArtifacts = (dir) => {
+  if (!fsSync.existsSync(dir)) {
+    throw new Error(`Missing input index directory: ${dir}`);
+  }
+  const chunkMeta = loadChunkMeta(dir);
+  const fileMeta = readJsonOptional(dir, 'file_meta.json');
+  const fileMetaById = new Map();
+  if (Array.isArray(fileMeta)) {
+    for (const entry of fileMeta) {
+      if (entry && entry.id != null) fileMetaById.set(entry.id, entry);
+    }
+  }
+  for (const chunk of chunkMeta) {
+    if (!chunk || (chunk.file && chunk.ext)) continue;
+    const meta = fileMetaById.get(chunk.fileId);
+    if (!meta) continue;
+    if (!chunk.file) chunk.file = meta.file;
+    if (!chunk.ext) chunk.ext = meta.ext;
+    if (!chunk.externalDocs) chunk.externalDocs = meta.externalDocs;
+    if (!chunk.last_modified) chunk.last_modified = meta.last_modified;
+    if (!chunk.last_author) chunk.last_author = meta.last_author;
+    if (!chunk.churn) chunk.churn = meta.churn;
+    if (!chunk.churn_added) chunk.churn_added = meta.churn_added;
+    if (!chunk.churn_deleted) chunk.churn_deleted = meta.churn_deleted;
+    if (!chunk.churn_commits) chunk.churn_commits = meta.churn_commits;
+  }
+  const missingFile = chunkMeta.some((chunk) => chunk && !chunk.file);
+  if (missingFile) {
+    throw new Error(`file_meta.json required for chunk metadata in ${dir}`);
+  }
+  const tokenPostings = loadTokenPostings(dir);
+  return {
+    dir,
+    chunkMeta,
+    tokenPostings,
+    fieldPostings: readJsonOptional(dir, 'field_postings.json'),
+    fieldTokens: readJsonOptional(dir, 'field_tokens.json'),
+    minhash: readJsonOptional(dir, 'minhash_signatures.json'),
+    phraseNgrams: readJsonOptional(dir, 'phrase_ngrams.json'),
+    chargrams: readJsonOptional(dir, 'chargram_postings.json'),
+    denseVec: readJsonOptional(dir, 'dense_vectors_uint8.json'),
+    denseVecDoc: readJsonOptional(dir, 'dense_vectors_doc_uint8.json'),
+    denseVecCode: readJsonOptional(dir, 'dense_vectors_code_uint8.json'),
+    fileRelations: readJsonOptional(dir, 'file_relations.json'),
+    indexState: readJsonOptional(dir, 'index_state.json')
+  };
+};
+
+const mergeTfPostings = (map, token, postings, docOffset) => {
+  if (!Array.isArray(postings)) return;
+  let dest = map.get(token);
+  if (!dest) {
+    if (docOffset) {
+      for (const entry of postings) {
+        if (!Array.isArray(entry)) continue;
+        const docId = entry[0];
+        if (!Number.isFinite(docId)) continue;
+        entry[0] = docId + docOffset;
+      }
+    }
+    map.set(token, postings);
+    return;
+  }
+  for (const entry of postings) {
+    if (!Array.isArray(entry)) continue;
+    const docId = entry[0];
+    if (!Number.isFinite(docId)) continue;
+    if (docOffset) {
+      entry[0] = docId + docOffset;
+    }
+    dest.push(entry);
+  }
+};
+
+const mergeIdPostings = (map, token, postings, docOffset) => {
+  if (!Array.isArray(postings)) return;
+  let dest = map.get(token);
+  if (!dest) {
+    if (docOffset) {
+      for (let i = 0; i < postings.length; i += 1) {
+        const docId = postings[i];
+        if (!Number.isFinite(docId)) continue;
+        postings[i] = docId + docOffset;
+      }
+    }
+    map.set(token, postings);
+    return;
+  }
+  if (!docOffset) {
+    for (const docId of postings) {
+      if (!Number.isFinite(docId)) continue;
+      dest.push(docId);
+    }
+    return;
+  }
+  for (const docId of postings) {
+    if (!Number.isFinite(docId)) continue;
+    dest.push(docId + docOffset);
+  }
+};
+
+const computeBm25 = (docLengths) => {
+  if (!Array.isArray(docLengths) || docLengths.length === 0) {
+    return { avgChunkLen: 0, k1: 1.2, b: 0.75 };
+  }
+  const total = docLengths.reduce((sum, len) => sum + (Number.isFinite(len) ? len : 0), 0);
+  const avgChunkLen = total / docLengths.length;
+  const b = avgChunkLen > 800 ? 0.6 : 0.8;
+  const k1 = avgChunkLen > 800 ? 1.2 : 1.7;
+  return { avgChunkLen, k1, b };
+};
+
+const validateLengths = (label, list, expected) => {
+  if (!Array.isArray(list)) return;
+  if (list.length !== expected) {
+    throw new Error(`${label} length mismatch (${list.length} !== ${expected})`);
+  }
+};
+
+export async function assembleIndexPieces({
+  inputs,
+  outDir,
+  root,
+  mode,
+  userConfig,
+  stage = null,
+  log = defaultLog
+}) {
+  if (!Array.isArray(inputs) || inputs.length === 0) {
+    throw new Error('assembleIndexPieces requires input index directories.');
+  }
+  const assembledStage = normalizeStage(stage);
+  const state = createIndexState();
+  const mergedTokenPostings = new Map();
+  const mergedFieldPostings = {
+    name: new Map(),
+    signature: new Map(),
+    doc: new Map(),
+    comment: new Map(),
+    body: new Map()
+  };
+  const mergedFieldDocLengths = {
+    name: [],
+    signature: [],
+    doc: [],
+    comment: [],
+    body: []
+  };
+  const mergedPhrasePostings = new Map();
+  const mergedChargramPostings = new Map();
+  const mergedMinhash = [];
+  const mergedDense = [];
+  const mergedDenseDoc = [];
+  const mergedDenseCode = [];
+  let denseModel = null;
+  let denseDims = 0;
+  let denseScale = null;
+  let embeddingsSeen = false;
+  let fieldTokensSeen = false;
+  const stageInputs = [];
+
+  for (const dir of inputs) {
+    const input = loadIndexArtifacts(dir);
+    const chunks = Array.isArray(input.chunkMeta) ? input.chunkMeta : [];
+    const docLengths = Array.isArray(input.tokenPostings?.docLengths)
+      ? input.tokenPostings.docLengths
+      : [];
+    validateLengths('docLengths', docLengths, chunks.length);
+    const docOffset = state.chunks.length;
+    stageInputs.push({ indexState: input.indexState, chunkCount: chunks.length });
+    for (let i = 0; i < chunks.length; i += 1) {
+      const chunk = { ...chunks[i] };
+      chunk.id = docOffset + i;
+      if (chunk.fileId != null) delete chunk.fileId;
+      state.chunks.push(chunk);
+    }
+    state.docLengths.push(...docLengths);
+    for (const len of docLengths) {
+      if (Number.isFinite(len)) state.totalTokens += len;
+    }
+
+    const vocab = Array.isArray(input.tokenPostings?.vocab) ? input.tokenPostings.vocab : [];
+    const postings = Array.isArray(input.tokenPostings?.postings) ? input.tokenPostings.postings : [];
+    for (let i = 0; i < vocab.length; i += 1) {
+      mergeTfPostings(mergedTokenPostings, vocab[i], postings[i], docOffset);
+    }
+
+    const fieldPostings = input.fieldPostings?.fields;
+    if (fieldPostings && typeof fieldPostings === 'object') {
+      for (const [field, entry] of Object.entries(fieldPostings)) {
+        const fieldVocab = Array.isArray(entry?.vocab) ? entry.vocab : [];
+        const fieldPosting = Array.isArray(entry?.postings) ? entry.postings : [];
+        const fieldDocLengths = Array.isArray(entry?.docLengths) ? entry.docLengths : [];
+        validateLengths(`fieldDocLengths:${field}`, fieldDocLengths, chunks.length);
+        if (mergedFieldDocLengths[field]) {
+          mergedFieldDocLengths[field].push(...fieldDocLengths);
+        }
+        const destMap = mergedFieldPostings[field] || null;
+        if (!destMap) continue;
+        for (let i = 0; i < fieldVocab.length; i += 1) {
+          mergeTfPostings(destMap, fieldVocab[i], fieldPosting[i], docOffset);
+        }
+      }
+    }
+
+    const fieldTokens = Array.isArray(input.fieldTokens) ? input.fieldTokens : null;
+    if (fieldTokens && fieldTokens.length) {
+      fieldTokensSeen = true;
+      for (let i = 0; i < chunks.length; i += 1) {
+        state.fieldTokens[docOffset + i] = fieldTokens[i] || null;
+      }
+    } else if (fieldTokensSeen) {
+      for (let i = 0; i < chunks.length; i += 1) {
+        state.fieldTokens[docOffset + i] = null;
+      }
+    }
+
+    const minhash = readArray(input.minhash, 'signatures');
+    if (minhash.length) {
+      validateLengths('minhash', minhash, chunks.length);
+      mergedMinhash.push(...minhash);
+    }
+
+    const phraseVocab = readArray(input.phraseNgrams, 'vocab');
+    const phrasePosting = readArray(input.phraseNgrams, 'postings');
+    for (let i = 0; i < phraseVocab.length; i += 1) {
+      mergeIdPostings(mergedPhrasePostings, phraseVocab[i], phrasePosting[i], docOffset);
+    }
+
+    const chargramVocab = readArray(input.chargrams, 'vocab');
+    const chargramPosting = readArray(input.chargrams, 'postings');
+    for (let i = 0; i < chargramVocab.length; i += 1) {
+      mergeIdPostings(mergedChargramPostings, chargramVocab[i], chargramPosting[i], docOffset);
+    }
+
+    const denseVec = readArray(input.denseVec, 'vectors');
+    const denseVecDoc = readArray(input.denseVecDoc, 'vectors');
+    const denseVecCode = readArray(input.denseVecCode, 'vectors');
+    const inputDims = Number(readField(input.denseVec, 'dims')) || 0;
+    const inputModel = readField(input.denseVec, 'model');
+    const inputScale = readField(input.denseVec, 'scale');
+    if (denseVec.length) {
+      embeddingsSeen = true;
+      if (denseDims && inputDims && denseDims !== inputDims) {
+        throw new Error(`Embedding dims mismatch (${denseDims} !== ${inputDims})`);
+      }
+      if (denseModel && inputModel && denseModel !== inputModel) {
+        throw new Error(`Embedding model mismatch (${denseModel} !== ${inputModel})`);
+      }
+      if (denseScale && inputScale && denseScale !== inputScale) {
+        throw new Error(`Embedding scale mismatch (${denseScale} !== ${inputScale})`);
+      }
+      denseDims = denseDims || inputDims;
+      denseModel = denseModel || inputModel || null;
+      denseScale = denseScale || inputScale || null;
+      validateLengths('dense vectors', denseVec, chunks.length);
+      mergedDense.push(...denseVec);
+      if (denseVecDoc.length) {
+        validateLengths('dense doc vectors', denseVecDoc, chunks.length);
+        mergedDenseDoc.push(...denseVecDoc);
+      }
+      if (denseVecCode.length) {
+        validateLengths('dense code vectors', denseVecCode, chunks.length);
+        mergedDenseCode.push(...denseVecCode);
+      }
+    }
+
+    if (Array.isArray(input.fileRelations)) {
+      if (!state.fileRelations) state.fileRelations = new Map();
+      for (const entry of input.fileRelations) {
+        if (!entry?.file) continue;
+        state.fileRelations.set(entry.file, entry.relations || null);
+      }
+    }
+  }
+
+  if (!state.chunks.length) {
+    throw new Error('assembleIndexPieces found no chunks to merge.');
+  }
+
+  if (embeddingsSeen) {
+    validateLengths('merged dense vectors', mergedDense, state.chunks.length);
+    if (mergedDenseDoc.length) {
+      validateLengths('merged dense doc vectors', mergedDenseDoc, state.chunks.length);
+    }
+    if (mergedDenseCode.length) {
+      validateLengths('merged dense code vectors', mergedDenseCode, state.chunks.length);
+    }
+  }
+  if (mergedMinhash.length) {
+    validateLengths('merged minhash', mergedMinhash, state.chunks.length);
+  }
+
+  const sortKey = (a, b) => (a < b ? -1 : (a > b ? 1 : 0));
+  const tokenVocab = Array.from(mergedTokenPostings.keys()).sort(sortKey);
+  const tokenPostingsList = tokenVocab.map((token) => mergedTokenPostings.get(token));
+  const phraseVocab = Array.from(mergedPhrasePostings.keys()).sort(sortKey);
+  const phrasePostings = phraseVocab.map((token) => mergedPhrasePostings.get(token));
+  const chargramVocab = Array.from(mergedChargramPostings.keys()).sort(sortKey);
+  const chargramPostings = chargramVocab.map((token) => mergedChargramPostings.get(token));
+  const fieldPostings = {};
+  const fieldNames = Object.keys(mergedFieldPostings).sort(sortKey);
+  for (const field of fieldNames) {
+    const map = mergedFieldPostings[field];
+    const vocab = Array.from(map.keys()).sort(sortKey);
+    if (!vocab.length) continue;
+    const postings = vocab.map((token) => map.get(token));
+    const lengths = mergedFieldDocLengths[field] || [];
+    const avgLen = lengths.length
+      ? lengths.reduce((sum, len) => sum + (Number.isFinite(len) ? len : 0), 0) / lengths.length
+      : 0;
+    fieldPostings[field] = {
+      vocab,
+      postings,
+      docLengths: lengths,
+      avgDocLen: avgLen,
+      totalDocs: lengths.length
+    };
+  }
+
+  const { avgChunkLen, k1, b } = computeBm25(state.docLengths);
+  const avgDocLen = state.docLengths.length
+    ? state.docLengths.reduce((sum, len) => sum + (Number.isFinite(len) ? len : 0), 0) / state.docLengths.length
+    : 0;
+  const postings = {
+    k1,
+    b,
+    avgChunkLen,
+    totalDocs: state.chunks.length,
+    fieldPostings: Object.keys(fieldPostings).length ? { fields: fieldPostings } : null,
+    phraseVocab,
+    phrasePostings,
+    chargramVocab,
+    chargramPostings,
+    tokenVocab,
+    tokenPostingsList,
+    avgDocLen,
+    minhashSigs: mergedMinhash,
+    dims: embeddingsSeen ? denseDims : 0,
+    quantizedVectors: embeddingsSeen ? mergedDense : [],
+    quantizedDocVectors: embeddingsSeen ? (mergedDenseDoc.length ? mergedDenseDoc : mergedDense) : [],
+    quantizedCodeVectors: embeddingsSeen ? (mergedDenseCode.length ? mergedDenseCode : mergedDense) : []
+  };
+
+  const uniqueFiles = new Set();
+  for (const chunk of state.chunks) {
+    if (chunk?.file) uniqueFiles.add(chunk.file);
+  }
+  const timing = { start: Date.now() };
+  const filterIndex = serializeFilterIndex(buildFilterIndex(state.chunks, {
+    includeBitmaps: false
+  }));
+  state.fileRelations = state.fileRelations || new Map();
+  state.scannedFilesTimes = [];
+  state.scannedFiles = [];
+  state.skippedFiles = [];
+  state.fieldDocLengths = mergedFieldDocLengths;
+
+  const pickIndexState = () => {
+    if (!stageInputs.length) return {};
+    let best = stageInputs[0]?.indexState?.fields || stageInputs[0]?.indexState || {};
+    let bestScore = 0;
+    for (const entry of stageInputs) {
+      const candidate = entry?.indexState?.fields || entry?.indexState || {};
+      const candidateStage = normalizeStage(candidate.stage);
+      const score = candidateStage ? (STAGE_ORDER[candidateStage] || 0) : 0;
+      if (score > bestScore) {
+        bestScore = score;
+        best = candidate;
+      }
+    }
+    return best && typeof best === 'object' ? best : {};
+  };
+
+  const baseIndexState = pickIndexState();
+  const resolvedStage = assembledStage || normalizeStage(baseIndexState.stage);
+  const assembledIndexState = {
+    ...baseIndexState,
+    generatedAt: new Date().toISOString(),
+    mode,
+    stage: resolvedStage || baseIndexState.stage || null,
+    assembled: true
+  };
+  if (filterIndex) {
+    assembledIndexState.filterIndex = { ready: true };
+  }
+
+  const postingsConfig = normalizePostingsConfig(userConfig?.indexing?.postings || {});
+  await writeIndexArtifacts({
+    outDir,
+    mode,
+    state,
+    postings,
+    postingsConfig,
+    modelId: denseModel || userConfig?.indexing?.model || null,
+    useStubEmbeddings: false,
+    dictSummary: null,
+    timing,
+    root,
+    userConfig,
+    incrementalEnabled: false,
+    fileCounts: { candidates: uniqueFiles.size },
+    indexState: assembledIndexState
+  });
+
+  log(`Assembled index from ${inputs.length} piece set(s) into ${outDir}.`);
+}
diff --git a/src/index/build/postings.js b/src/index/build/postings.js
new file mode 100644
index 000000000..f9321a321
--- /dev/null
+++ b/src/index/build/postings.js
@@ -0,0 +1,228 @@
+import { quantizeVec } from '../embedding.js';
+import { normalizePostingsConfig } from '../../shared/postings-config.js';
+
+const resolveTokenCount = (chunk) => (
+  Number.isFinite(chunk?.tokenCount)
+    ? chunk.tokenCount
+    : (Array.isArray(chunk?.tokens) ? chunk.tokens.length : 0)
+);
+
+const tuneBM25Params = (chunks) => {
+  const avgLen = chunks.reduce((s, c) => s + resolveTokenCount(c), 0) / chunks.length;
+  const b = avgLen > 800 ? 0.6 : 0.8;
+  const k1 = avgLen > 800 ? 1.2 : 1.7;
+  return { k1, b };
+};
+
+/**
+ * Build postings and vector artifacts for the index.
+ * @param {object} input
+ * @returns {object}
+ */
+export async function buildPostings(input) {
+  const {
+    chunks,
+    df,
+    tokenPostings,
+    docLengths,
+    fieldPostings,
+    fieldDocLengths,
+    phrasePost,
+    triPost,
+    postingsConfig,
+    modelId,
+    useStubEmbeddings,
+    log,
+    workerPool,
+    quantizePool,
+    embeddingsEnabled = true
+  } = input;
+
+  const resolvedConfig = normalizePostingsConfig(postingsConfig || {});
+  const fieldedEnabled = resolvedConfig.fielded !== false;
+  const buildEmptyFieldPostings = () => {
+    if (!fieldedEnabled) return null;
+    const fields = {};
+    const fieldNames = new Set();
+    if (fieldPostings && typeof fieldPostings === 'object') {
+      Object.keys(fieldPostings).forEach((field) => fieldNames.add(field));
+    }
+    if (fieldDocLengths && typeof fieldDocLengths === 'object') {
+      Object.keys(fieldDocLengths).forEach((field) => fieldNames.add(field));
+    }
+    if (!fieldNames.size) {
+      ['name', 'signature', 'doc', 'comment', 'body'].forEach((field) => fieldNames.add(field));
+    }
+    for (const field of fieldNames) {
+      const lengths = Array.isArray(fieldDocLengths?.[field])
+        ? fieldDocLengths[field]
+        : [];
+      fields[field] = {
+        vocab: [],
+        postings: [],
+        docLengths: lengths,
+        avgDocLen: 0,
+        totalDocs: lengths.length
+      };
+    }
+    return { fields };
+  };
+
+  if (!Array.isArray(chunks) || chunks.length === 0) {
+    return {
+      k1: 1.2,
+      b: 0.75,
+      avgChunkLen: 0,
+      totalDocs: 0,
+      fieldPostings: buildEmptyFieldPostings(),
+      phraseVocab: [],
+      phrasePostings: [],
+      chargramVocab: [],
+      chargramPostings: [],
+      tokenVocab: [],
+      tokenPostingsList: [],
+      avgDocLen: 0,
+      minhashSigs: [],
+      dims: embeddingsEnabled ? 384 : 0,
+      quantizedVectors: [],
+      quantizedDocVectors: [],
+      quantizedCodeVectors: []
+    };
+  }
+
+  const phraseEnabled = resolvedConfig.enablePhraseNgrams !== false;
+  const chargramEnabled = resolvedConfig.enableChargrams !== false;
+
+  const { k1, b } = tuneBM25Params(chunks);
+  const N = chunks.length;
+  const avgChunkLen = chunks.reduce((sum, c) => sum + resolveTokenCount(c), 0) / Math.max(N, 1);
+
+  let dims = 0;
+  let quantizedVectors = [];
+  let quantizedDocVectors = [];
+  let quantizedCodeVectors = [];
+  if (embeddingsEnabled) {
+    const embedLabel = useStubEmbeddings ? 'stub' : 'model';
+    log(`Using ${embedLabel} embeddings for dense vectors (${modelId})...`);
+    dims = Array.isArray(chunks[0]?.embedding) ? chunks[0].embedding.length : 384;
+    const zeroVec = new Array(dims).fill(0);
+    const selectEmbedding = (chunk) => (
+      Array.isArray(chunk?.embedding) && chunk.embedding.length ? chunk.embedding : zeroVec
+    );
+    const selectDocEmbedding = (chunk) => {
+      if (Array.isArray(chunk?.embed_doc) && chunk.embed_doc.length) return chunk.embed_doc;
+      if (Array.isArray(chunk?.embedding) && chunk.embedding.length) return chunk.embedding;
+      return zeroVec;
+    };
+    const selectCodeEmbedding = (chunk) => {
+      if (Array.isArray(chunk?.embed_code) && chunk.embed_code.length) return chunk.embed_code;
+      if (Array.isArray(chunk?.embedding) && chunk.embedding.length) return chunk.embedding;
+      return zeroVec;
+    };
+    const quantizeWorker = quantizePool || workerPool;
+    let quantizeWarned = false;
+    const warnQuantizeFallback = () => {
+      if (quantizeWarned) return;
+      if (typeof log === 'function') {
+        log('Quantize worker unavailable; falling back to inline quantization.');
+      }
+      quantizeWarned = true;
+    };
+    const quantizeVectors = async (selector) => {
+      const out = new Array(chunks.length);
+      if (!quantizeWorker) {
+        for (let i = 0; i < chunks.length; i += 1) {
+          out[i] = quantizeVec(selector(chunks[i]));
+        }
+        return out;
+      }
+      const batchSize = quantizeWorker.config?.quantizeBatchSize || 128;
+      for (let i = 0; i < chunks.length; i += batchSize) {
+        const end = Math.min(i + batchSize, chunks.length);
+        const batch = [];
+        for (let j = i; j < end; j += 1) {
+          batch.push(selector(chunks[j]));
+        }
+        try {
+          const chunk = await quantizeWorker.runQuantize({ vectors: batch });
+          if (Array.isArray(chunk) && chunk.length === batch.length) {
+            for (let j = 0; j < chunk.length; j += 1) {
+              out[i + j] = chunk[j];
+            }
+          } else {
+            warnQuantizeFallback();
+            for (let j = 0; j < batch.length; j += 1) {
+              out[i + j] = quantizeVec(batch[j]);
+            }
+          }
+        } catch {
+          warnQuantizeFallback();
+          for (let j = 0; j < batch.length; j += 1) {
+            out[i + j] = quantizeVec(batch[j]);
+          }
+        }
+      }
+      return out;
+    };
+    quantizedVectors = await quantizeVectors(selectEmbedding);
+    quantizedDocVectors = await quantizeVectors(selectDocEmbedding);
+    quantizedCodeVectors = await quantizeVectors(selectCodeEmbedding);
+  } else {
+    log('Embeddings disabled; skipping dense vector build.');
+  }
+
+  const phraseVocab = phraseEnabled ? Array.from(phrasePost.keys()) : [];
+  const phrasePostings = phraseEnabled ? phraseVocab.map((k) => Array.from(phrasePost.get(k))) : [];
+  const chargramVocab = chargramEnabled ? Array.from(triPost.keys()) : [];
+  const chargramPostings = chargramEnabled ? chargramVocab.map((k) => Array.from(triPost.get(k))) : [];
+
+  const tokenVocab = Array.from(tokenPostings.keys());
+  const tokenPostingsList = tokenVocab.map((t) => tokenPostings.get(t));
+  const avgDocLen = docLengths.length
+    ? docLengths.reduce((sum, len) => sum + len, 0) / docLengths.length
+    : 0;
+
+  const minhashSigs = chunks.map((c) => c.minhashSig);
+
+  const buildFieldPostings = () => {
+    if (!fieldPostings || !fieldDocLengths) return null;
+    const fields = {};
+    for (const [field, postingsMap] of Object.entries(fieldPostings)) {
+      if (!postingsMap || typeof postingsMap.keys !== 'function') continue;
+      const vocab = Array.from(postingsMap.keys());
+      const postings = vocab.map((token) => postingsMap.get(token));
+      const lengths = fieldDocLengths[field] || [];
+      const avgLen = lengths.length
+        ? lengths.reduce((sum, len) => sum + len, 0) / lengths.length
+        : 0;
+      fields[field] = {
+        vocab,
+        postings,
+        docLengths: lengths,
+        avgDocLen: avgLen,
+        totalDocs: lengths.length
+      };
+    }
+    return Object.keys(fields).length ? { fields } : null;
+  };
+
+  return {
+    k1,
+    b,
+    avgChunkLen,
+    totalDocs: N,
+    fieldPostings: buildFieldPostings(),
+    phraseVocab,
+    phrasePostings,
+    chargramVocab,
+    chargramPostings,
+    tokenVocab,
+    tokenPostingsList,
+    avgDocLen,
+    minhashSigs,
+    dims,
+    quantizedVectors,
+    quantizedDocVectors,
+    quantizedCodeVectors
+  };
+}
diff --git a/src/index/build/preprocess.js b/src/index/build/preprocess.js
new file mode 100644
index 000000000..aba3f6cb5
--- /dev/null
+++ b/src/index/build/preprocess.js
@@ -0,0 +1,220 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { EXTS_CODE, EXTS_PROSE } from '../constants.js';
+import { getLanguageForFile } from '../language-registry.js';
+import { countLinesForEntries } from '../../shared/file-stats.js';
+import { toPosix } from '../../shared/files.js';
+import { runWithConcurrency } from '../../shared/concurrency.js';
+import { createFileScanner, readFileSample } from './file-scan.js';
+import { discoverEntries } from './discover.js';
+
+const normalizeLimit = (value, fallback) => {
+  const parsed = Number(value);
+  if (Number.isFinite(parsed) && parsed > 0) return Math.floor(parsed);
+  return fallback;
+};
+
+const hasMaxLinesCaps = (fileCaps) => {
+  const defaultMax = fileCaps?.default?.maxLines;
+  if (Number.isFinite(Number(defaultMax)) && Number(defaultMax) > 0) return true;
+  const byExt = fileCaps?.byExt || {};
+  for (const entry of Object.values(byExt)) {
+    if (Number.isFinite(Number(entry?.maxLines)) && Number(entry.maxLines) > 0) return true;
+  }
+  const byLang = fileCaps?.byLanguage || {};
+  for (const entry of Object.values(byLang)) {
+    if (Number.isFinite(Number(entry?.maxLines)) && Number(entry.maxLines) > 0) return true;
+  }
+  return false;
+};
+
+const pickMinLimit = (...values) => {
+  const candidates = values.filter((value) => Number.isFinite(value) && value > 0);
+  return candidates.length ? Math.min(...candidates) : null;
+};
+
+const resolveMaxLines = ({ ext, lang }, fileCaps) => {
+  const extKey = typeof ext === 'string' ? ext.toLowerCase() : '';
+  const langKey = typeof lang === 'string' ? lang.toLowerCase() : '';
+  const defaultCaps = fileCaps?.default || {};
+  const extCaps = extKey ? fileCaps?.byExt?.[extKey] : null;
+  const langCaps = langKey ? fileCaps?.byLanguage?.[langKey] : null;
+  return pickMinLimit(defaultCaps.maxLines, extCaps?.maxLines, langCaps?.maxLines);
+};
+
+const isSupportedEntry = (entry, mode) => {
+  if (!entry) return false;
+  if (mode === 'code') return EXTS_CODE.has(entry.ext) || entry.isSpecial;
+  if (mode === 'prose') return EXTS_PROSE.has(entry.ext);
+  return false;
+};
+
+const summarizeSkips = (skipped) => {
+  const counts = {};
+  for (const entry of skipped) {
+    const reason = entry?.reason || 'unknown';
+    counts[reason] = (counts[reason] || 0) + 1;
+  }
+  return counts;
+};
+
+const validateEntries = (entries) => {
+  const invalid = [];
+  for (const entry of entries || []) {
+    const rel = entry?.rel || '';
+    const abs = entry?.abs || '';
+    if (!rel || !abs || rel.startsWith('..')) {
+      invalid.push({ rel, abs });
+    }
+  }
+  return invalid;
+};
+
+export async function preprocessFiles({
+  root,
+  modes,
+  ignoreMatcher,
+  maxFileBytes = null,
+  fileCaps = null,
+  maxDepth = null,
+  maxFiles = null,
+  fileScan = null,
+  lineCounts = false,
+  concurrency = 8,
+  log = null
+}) {
+  const { entries, skippedCommon } = await discoverEntries({
+    root,
+    ignoreMatcher,
+    maxFileBytes,
+    fileCaps,
+    maxDepth,
+    maxFiles
+  });
+  const fileScanner = createFileScanner(fileScan);
+  const scanSkips = [];
+  await runWithConcurrency(
+    entries,
+    Math.max(1, Math.floor(concurrency)),
+    async (entry) => {
+      if (!entry) return;
+      const scanResult = await fileScanner.scanFile({
+        absPath: entry.abs,
+        stat: entry.stat,
+        ext: entry.ext,
+        readSample: readFileSample
+      });
+      if (scanResult?.skip) {
+        entry.skip = scanResult.skip;
+        scanSkips.push({ file: entry.abs, reason: scanResult.skip.reason, ...scanResult.skip });
+        return;
+      }
+      entry.scan = {
+        checkedBinary: scanResult?.checkedBinary === true,
+        checkedMinified: scanResult?.checkedMinified === true
+      };
+    },
+    { collectResults: false }
+  );
+  if (scanSkips.length) skippedCommon.push(...scanSkips);
+
+  const needsLines = lineCounts === true || hasMaxLinesCaps(fileCaps);
+  const supportedEntries = entries.filter((entry) => !entry.skip
+    && (isSupportedEntry(entry, 'code') || isSupportedEntry(entry, 'prose')));
+  let lineCountMap = new Map();
+  if (needsLines && supportedEntries.length) {
+    lineCountMap = await countLinesForEntries(supportedEntries, {
+      concurrency: Math.max(1, Math.floor(concurrency))
+    });
+    for (const entry of supportedEntries) {
+      const lines = lineCountMap.get(toPosix(entry.rel || ''));
+      if (Number.isFinite(lines)) entry.lines = lines;
+    }
+  }
+
+  const entriesByMode = {};
+  const skippedByMode = {};
+  const lineCountsByMode = {};
+  const statsByMode = {};
+  for (const mode of modes) {
+    const modeSkipped = [...skippedCommon];
+    const modeEntries = [];
+    for (const entry of entries) {
+      if (!isSupportedEntry(entry, mode)) {
+        modeSkipped.push({ file: entry.abs, reason: 'unsupported' });
+        continue;
+      }
+      if (entry.skip) continue;
+      const lang = mode === 'code'
+        ? getLanguageForFile(entry.ext, entry.rel)?.id || null
+        : null;
+      const maxLines = resolveMaxLines({ ext: entry.ext, lang }, fileCaps);
+      if (maxLines && Number.isFinite(entry.lines) && entry.lines > maxLines) {
+        modeSkipped.push({
+          file: entry.abs,
+          reason: 'oversize',
+          lines: entry.lines,
+          maxLines
+        });
+        continue;
+      }
+      modeEntries.push({
+        abs: entry.abs,
+        rel: entry.rel,
+        stat: entry.stat,
+        ext: entry.ext,
+        lines: entry.lines,
+        scan: entry.scan
+      });
+    }
+    entriesByMode[mode] = modeEntries;
+    skippedByMode[mode] = modeSkipped;
+    const modeLineCounts = new Map();
+    for (const entry of modeEntries) {
+      const lines = lineCountMap.get(toPosix(entry.rel || ''));
+      if (Number.isFinite(lines)) modeLineCounts.set(toPosix(entry.rel || ''), lines);
+    }
+    lineCountsByMode[mode] = modeLineCounts;
+    statsByMode[mode] = {
+      totalCandidates: entries.length,
+      included: modeEntries.length,
+      skipped: summarizeSkips(modeSkipped),
+      lines: modeEntries.reduce((sum, entry) => sum + (entry.lines || 0), 0)
+    };
+  }
+
+  const invalidByMode = {};
+  for (const mode of modes) {
+    const invalid = validateEntries(entriesByMode[mode]);
+    if (invalid.length) invalidByMode[mode] = invalid;
+  }
+  if (Object.keys(invalidByMode).length) {
+    const detail = Object.entries(invalidByMode)
+      .map(([mode, list]) => `${mode}:${list.length}`)
+      .join(', ');
+    throw new Error(`Preprocess output invalid (${detail}).`);
+  }
+
+  const stats = {
+    root,
+    createdAt: new Date().toISOString(),
+    modes: statsByMode
+  };
+  if (log && typeof log === 'function') {
+    const totalIncluded = Object.values(statsByMode).reduce((sum, entry) => sum + entry.included, 0);
+    log(`→ Preprocess: ${totalIncluded.toLocaleString()} files across ${modes.length} mode(s).`);
+  }
+  return { entriesByMode, skippedByMode, lineCountsByMode, stats };
+}
+
+export async function writePreprocessStats(repoCacheRoot, stats) {
+  if (!repoCacheRoot || !stats) return null;
+  const output = path.join(repoCacheRoot, 'preprocess.json');
+  try {
+    await fs.mkdir(repoCacheRoot, { recursive: true });
+    await fs.writeFile(output, JSON.stringify(stats, null, 2));
+    return output;
+  } catch {
+    return null;
+  }
+}
diff --git a/src/index/build/promotion.js b/src/index/build/promotion.js
new file mode 100644
index 000000000..8f758a42d
--- /dev/null
+++ b/src/index/build/promotion.js
@@ -0,0 +1,70 @@
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+import {
+  getBuildsRoot,
+  getRepoCacheRoot,
+  getToolVersion
+} from '../../../tools/dict-utils.js';
+import { writeJsonObjectFile } from '../../shared/json-stream.js';
+
+export async function promoteBuild({
+  repoRoot,
+  userConfig,
+  buildId,
+  buildRoot,
+  stage,
+  modes,
+  configHash,
+  repoProvenance
+}) {
+  if (!repoRoot || !buildId || !buildRoot) return null;
+  const buildsRoot = getBuildsRoot(repoRoot, userConfig);
+  const repoCacheRoot = getRepoCacheRoot(repoRoot, userConfig);
+  const relativeRoot = path.relative(repoCacheRoot, buildRoot).split(path.sep).join('/');
+  const normalizeRelativeRoot = (value) => {
+    if (typeof value !== 'string' || !value.trim()) return null;
+    const resolved = path.isAbsolute(value) ? value : path.join(repoCacheRoot, value);
+    return path.relative(repoCacheRoot, resolved).split(path.sep).join('/');
+  };
+  const currentPath = path.join(buildsRoot, 'current.json');
+  let priorRoots = {};
+  if (fsSync.existsSync(currentPath)) {
+    try {
+      const current = JSON.parse(await fs.readFile(currentPath, 'utf8')) || {};
+      if (current.buildRoots && typeof current.buildRoots === 'object' && !Array.isArray(current.buildRoots)) {
+        for (const [mode, value] of Object.entries(current.buildRoots)) {
+          const normalized = normalizeRelativeRoot(value);
+          if (normalized) priorRoots[mode] = normalized;
+        }
+      } else if (typeof current.buildRoot === 'string' && Array.isArray(current.modes)) {
+        const normalized = normalizeRelativeRoot(current.buildRoot);
+        if (normalized) {
+          for (const mode of current.modes) {
+            if (typeof mode !== 'string') continue;
+            priorRoots[mode] = normalized;
+          }
+        }
+      }
+    } catch {}
+  }
+  const promotedModes = Array.isArray(modes) ? modes.filter((mode) => typeof mode === 'string') : [];
+  const buildRoots = { ...priorRoots };
+  for (const mode of promotedModes) {
+    buildRoots[mode] = relativeRoot;
+  }
+  const payload = {
+    buildId,
+    buildRoot: relativeRoot,
+    buildRoots: Object.keys(buildRoots).length ? buildRoots : null,
+    promotedAt: new Date().toISOString(),
+    stage: stage || null,
+    modes: promotedModes.length ? promotedModes : null,
+    configHash: configHash || null,
+    tool: { version: getToolVersion() },
+    repo: repoProvenance || null
+  };
+  await fs.mkdir(buildsRoot, { recursive: true });
+  await writeJsonObjectFile(currentPath, { fields: payload, atomic: true });
+  return payload;
+}
diff --git a/src/index/build/runtime.js b/src/index/build/runtime.js
new file mode 100644
index 000000000..b29b76b45
--- /dev/null
+++ b/src/index/build/runtime.js
@@ -0,0 +1 @@
+export { createBuildRuntime } from './runtime/runtime.js';
diff --git a/src/index/build/runtime/caps.js b/src/index/build/runtime/caps.js
new file mode 100644
index 000000000..deb398de9
--- /dev/null
+++ b/src/index/build/runtime/caps.js
@@ -0,0 +1,134 @@
+export const normalizeLimit = (value, fallback) => {
+  if (value === 0 || value === false) return null;
+  const parsed = Number(value);
+  if (Number.isFinite(parsed) && parsed > 0) return Math.floor(parsed);
+  return fallback;
+};
+
+export const pickMinLimit = (...values) => {
+  const candidates = values.filter((value) => Number.isFinite(value) && value > 0);
+  return candidates.length ? Math.min(...candidates) : null;
+};
+
+export const normalizeDepth = (value, fallback) => {
+  if (value === 0) return 0;
+  if (value === false) return null;
+  const parsed = Number(value);
+  if (Number.isFinite(parsed) && parsed > 0) return Math.floor(parsed);
+  return fallback;
+};
+
+export const normalizeRatio = (value, fallback) => {
+  if (value === undefined || value === null || value === false) return fallback;
+  const parsed = Number(value);
+  if (!Number.isFinite(parsed)) return fallback;
+  return Math.min(1, Math.max(0, parsed));
+};
+
+const normalizeCapValue = (value) => {
+  if (value === 0 || value === false) return null;
+  const parsed = Number(value);
+  if (Number.isFinite(parsed) && parsed > 0) return Math.floor(parsed);
+  return null;
+};
+
+export const normalizeCapEntry = (raw) => {
+  const input = raw && typeof raw === 'object' ? raw : {};
+  const maxBytes = normalizeCapValue(input.maxBytes);
+  const maxLines = normalizeCapValue(input.maxLines);
+  return { maxBytes, maxLines };
+};
+
+export const normalizeCapsByExt = (raw) => {
+  const input = raw && typeof raw === 'object' ? raw : {};
+  const output = {};
+  for (const [key, value] of Object.entries(input)) {
+    const entry = normalizeCapEntry(value);
+    if (entry.maxBytes == null && entry.maxLines == null) continue;
+    const normalizedKey = key.startsWith('.') ? key.toLowerCase() : `.${key.toLowerCase()}`;
+    output[normalizedKey] = entry;
+  }
+  return output;
+};
+
+export const normalizeCapsByLanguage = (raw) => {
+  const input = raw && typeof raw === 'object' ? raw : {};
+  const output = {};
+  for (const [key, value] of Object.entries(input)) {
+    const entry = normalizeCapEntry(value);
+    if (entry.maxBytes == null && entry.maxLines == null) continue;
+    output[key.toLowerCase()] = entry;
+  }
+  return output;
+};
+
+export const normalizeOptionalLimit = (value) => {
+  if (value === 0 || value === false) return null;
+  if (value === undefined || value === null) return null;
+  const parsed = Number(value);
+  return Number.isFinite(parsed) && parsed > 0 ? Math.floor(parsed) : null;
+};
+
+export const normalizeTreeSitterByLanguage = (raw) => {
+  const input = raw && typeof raw === 'object' ? raw : {};
+  const output = {};
+  for (const [key, value] of Object.entries(input)) {
+    const entry = value && typeof value === 'object' ? value : {};
+    const maxBytes = normalizeCapValue(entry.maxBytes);
+    const maxLines = normalizeCapValue(entry.maxLines);
+    const maxParseMs = normalizeOptionalLimit(entry.maxParseMs);
+    if (maxBytes == null && maxLines == null && maxParseMs == null) continue;
+    output[key.toLowerCase()] = { maxBytes, maxLines, maxParseMs };
+  }
+  return output;
+};
+
+export const resolveFileCapsAndGuardrails = (indexingConfig) => {
+  const maxFileBytes = normalizeLimit(indexingConfig.maxFileBytes, 5 * 1024 * 1024);
+  const fileCapsConfig = indexingConfig.fileCaps || {};
+  const fileCaps = {
+    default: normalizeCapEntry(fileCapsConfig.default || {}),
+    byExt: normalizeCapsByExt(fileCapsConfig.byExt || {}),
+    byLanguage: normalizeCapsByLanguage(fileCapsConfig.byLanguage || {})
+  };
+  const untrustedConfig = indexingConfig.untrusted || {};
+  const untrustedEnabled = untrustedConfig.enabled === true;
+  const untrustedDefaults = {
+    maxFileBytes: 1024 * 1024,
+    maxLines: 10000,
+    maxFiles: 100000,
+    maxDepth: 25
+  };
+  const untrustedMaxFileBytes = normalizeLimit(untrustedConfig.maxFileBytes, untrustedDefaults.maxFileBytes);
+  const untrustedMaxLines = normalizeLimit(untrustedConfig.maxLines, untrustedDefaults.maxLines);
+  const untrustedMaxFiles = normalizeLimit(untrustedConfig.maxFiles, untrustedDefaults.maxFiles);
+  const untrustedMaxDepth = normalizeDepth(untrustedConfig.maxDepth, untrustedDefaults.maxDepth);
+  let guardrails = {
+    enabled: false,
+    maxFiles: null,
+    maxDepth: null,
+    maxFileBytes: null,
+    maxLines: null
+  };
+  let resolvedMaxFileBytes = maxFileBytes;
+  if (untrustedEnabled) {
+    guardrails = {
+      enabled: true,
+      maxFiles: untrustedMaxFiles,
+      maxDepth: untrustedMaxDepth,
+      maxFileBytes: untrustedMaxFileBytes,
+      maxLines: untrustedMaxLines
+    };
+    const nextMaxFileBytes = pickMinLimit(resolvedMaxFileBytes, untrustedMaxFileBytes);
+    if (nextMaxFileBytes != null) {
+      resolvedMaxFileBytes = nextMaxFileBytes;
+    }
+    if (untrustedMaxFileBytes) {
+      fileCaps.default.maxBytes = pickMinLimit(fileCaps.default.maxBytes, untrustedMaxFileBytes);
+    }
+    if (untrustedMaxLines) {
+      fileCaps.default.maxLines = pickMinLimit(fileCaps.default.maxLines, untrustedMaxLines);
+    }
+  }
+  return { maxFileBytes: resolvedMaxFileBytes, fileCaps, guardrails };
+};
diff --git a/src/index/build/runtime/embeddings.js b/src/index/build/runtime/embeddings.js
new file mode 100644
index 000000000..ef989cccb
--- /dev/null
+++ b/src/index/build/runtime/embeddings.js
@@ -0,0 +1,114 @@
+import fs from 'node:fs/promises';
+import os from 'node:os';
+import { DEFAULT_MODEL_ID, getModelConfig } from '../../../../tools/dict-utils.js';
+import { createEmbedder } from '../../embedding.js';
+import { normalizeEmbeddingProvider, normalizeOnnxConfig } from '../../../shared/onnx-embeddings.js';
+
+export const resolveEmbeddingRuntime = async ({
+  rootDir,
+  userConfig,
+  indexingConfig,
+  envConfig,
+  argv,
+  cpuConcurrency
+}) => {
+  const embeddingsConfig = indexingConfig.embeddings || {};
+  const embeddingBatchRaw = Number(indexingConfig.embeddingBatchSize);
+  let embeddingBatchSize = Number.isFinite(embeddingBatchRaw)
+    ? Math.max(0, Math.floor(embeddingBatchRaw))
+    : 0;
+  if (!embeddingBatchSize) {
+    const totalGb = os.totalmem() / (1024 ** 3);
+    const autoBatch = Math.floor(totalGb * 16);
+    embeddingBatchSize = Math.min(128, Math.max(16, autoBatch));
+  }
+  const embeddingProvider = normalizeEmbeddingProvider(embeddingsConfig.provider);
+  const embeddingOnnx = normalizeOnnxConfig(embeddingsConfig.onnx || {});
+  const embeddingQueueConfig = embeddingsConfig.queue || {};
+  const embeddingCacheConfig = embeddingsConfig.cache || {};
+  const embeddingModeRaw = typeof embeddingsConfig.mode === 'string'
+    ? embeddingsConfig.mode.trim().toLowerCase()
+    : 'auto';
+  const embeddingQueueDir = typeof embeddingQueueConfig.dir === 'string'
+    ? embeddingQueueConfig.dir.trim()
+    : '';
+  const embeddingQueueMaxRaw = Number(embeddingQueueConfig.maxQueued);
+  const embeddingQueueMaxQueued = Number.isFinite(embeddingQueueMaxRaw)
+    ? Math.max(0, Math.floor(embeddingQueueMaxRaw))
+    : null;
+  const embeddingCacheDir = typeof embeddingCacheConfig.dir === 'string'
+    ? embeddingCacheConfig.dir.trim()
+    : '';
+  const embeddingConcurrencyRaw = Number(embeddingsConfig.concurrency);
+  let embeddingConcurrency = Number.isFinite(embeddingConcurrencyRaw) && embeddingConcurrencyRaw > 0
+    ? Math.floor(embeddingConcurrencyRaw)
+    : 0;
+  if (!embeddingConcurrency) {
+    const defaultEmbedding = process.platform === 'win32'
+      ? Math.min(2, cpuConcurrency)
+      : Math.min(4, cpuConcurrency);
+    embeddingConcurrency = Math.max(1, defaultEmbedding);
+  }
+  embeddingConcurrency = Math.max(1, Math.min(embeddingConcurrency, cpuConcurrency));
+  const baseStubEmbeddings = argv['stub-embeddings'] === true
+    || envConfig.embeddings === 'stub';
+  const normalizedEmbeddingMode = ['auto', 'inline', 'service', 'stub', 'off'].includes(embeddingModeRaw)
+    ? embeddingModeRaw
+    : 'auto';
+  const resolvedEmbeddingMode = normalizedEmbeddingMode === 'auto'
+    ? (baseStubEmbeddings ? 'stub' : 'inline')
+    : normalizedEmbeddingMode;
+  const embeddingService = embeddingsConfig.enabled !== false
+    && resolvedEmbeddingMode === 'service';
+  const embeddingEnabled = embeddingsConfig.enabled !== false
+    && resolvedEmbeddingMode !== 'off'
+    && !embeddingService;
+  const useStubEmbeddings = resolvedEmbeddingMode === 'stub' || baseStubEmbeddings;
+  const modelConfig = getModelConfig(rootDir, userConfig);
+  const modelId = argv.model || modelConfig.id || DEFAULT_MODEL_ID;
+  const modelsDir = modelConfig.dir;
+  if (modelsDir) {
+    try {
+      await fs.mkdir(modelsDir, { recursive: true });
+    } catch {}
+  }
+
+  let getChunkEmbedding = async () => [];
+  let getChunkEmbeddings = async () => [];
+  if (embeddingEnabled) {
+    const embedder = createEmbedder({
+      rootDir,
+      useStubEmbeddings,
+      modelId,
+      dims: argv.dims,
+      modelsDir,
+      provider: embeddingProvider,
+      onnx: embeddingOnnx
+    });
+    getChunkEmbedding = embedder.getChunkEmbedding;
+    getChunkEmbeddings = embedder.getChunkEmbeddings;
+  }
+
+  return {
+    embeddingBatchSize,
+    embeddingConcurrency,
+    embeddingEnabled,
+    embeddingMode: resolvedEmbeddingMode,
+    embeddingService,
+    embeddingProvider,
+    embeddingOnnx,
+    embeddingQueue: {
+      dir: embeddingQueueDir || null,
+      maxQueued: embeddingQueueMaxQueued
+    },
+    embeddingCache: {
+      dir: embeddingCacheDir || null
+    },
+    useStubEmbeddings,
+    modelConfig,
+    modelId,
+    modelsDir,
+    getChunkEmbedding,
+    getChunkEmbeddings
+  };
+};
diff --git a/src/index/build/runtime/hash.js b/src/index/build/runtime/hash.js
new file mode 100644
index 000000000..f226177ea
--- /dev/null
+++ b/src/index/build/runtime/hash.js
@@ -0,0 +1,25 @@
+import { sha1 } from '../../../shared/hash.js';
+import { stableStringify } from '../../../shared/stable-json.js';
+
+export const normalizeContentConfig = (config) => {
+  if (!config || typeof config !== 'object') return config || {};
+  const cloned = JSON.parse(JSON.stringify(config));
+  if (cloned.indexing && typeof cloned.indexing === 'object') {
+    delete cloned.indexing.shards;
+    delete cloned.indexing.fileListSampleSize;
+    delete cloned.indexing.concurrency;
+    delete cloned.indexing.importConcurrency;
+    delete cloned.indexing.workerPool;
+    delete cloned.indexing.debugCrash;
+  }
+  return cloned;
+};
+
+export const buildContentConfigHash = (config, envConfig) => {
+  const normalizedEnv = { ...envConfig, cacheRoot: '' };
+  const payload = {
+    config: normalizeContentConfig(config),
+    env: normalizedEnv
+  };
+  return sha1(stableStringify(payload));
+};
diff --git a/src/index/build/runtime/logging.js b/src/index/build/runtime/logging.js
new file mode 100644
index 000000000..3518d0ee1
--- /dev/null
+++ b/src/index/build/runtime/logging.js
@@ -0,0 +1,34 @@
+import { configureLogger } from '../../../shared/progress.js';
+
+export const configureRuntimeLogger = ({ envConfig, loggingConfig, buildId, configHash, stage, root }) => {
+  const logFormatRaw = envConfig.logFormat || loggingConfig.format || 'text';
+  const logFormat = ['text', 'json', 'pretty'].includes(logFormatRaw)
+    ? logFormatRaw
+    : 'text';
+  const logLevelRaw = envConfig.logLevel || loggingConfig.level || 'info';
+  const logLevel = typeof logLevelRaw === 'string' && logLevelRaw.trim()
+    ? logLevelRaw.trim().toLowerCase()
+    : 'info';
+  const ringMax = Number.isFinite(Number(loggingConfig.ringMax))
+    ? Math.max(1, Math.floor(Number(loggingConfig.ringMax)))
+    : 200;
+  const ringMaxBytes = Number.isFinite(Number(loggingConfig.ringMaxBytes))
+    ? Math.max(1024, Math.floor(Number(loggingConfig.ringMaxBytes)))
+    : 2 * 1024 * 1024;
+  configureLogger({
+    enabled: logFormat !== 'text',
+    pretty: logFormat === 'pretty',
+    level: logLevel,
+    ringMax,
+    ringMaxBytes,
+    redact: loggingConfig.redact,
+    context: {
+      runId: buildId,
+      buildId,
+      stage: stage || null,
+      configHash: configHash || null,
+      repoRoot: root
+    }
+  });
+  return { logFormat, logLevel, ringMax, ringMaxBytes };
+};
diff --git a/src/index/build/runtime/runtime.js b/src/index/build/runtime/runtime.js
new file mode 100644
index 000000000..1de321647
--- /dev/null
+++ b/src/index/build/runtime/runtime.js
@@ -0,0 +1,555 @@
+import fs from 'node:fs/promises';
+import os from 'node:os';
+import path from 'node:path';
+import {
+  getCacheRuntimeConfig,
+  getDictionaryPaths,
+  getDictConfig,
+  getEffectiveConfigHash,
+  getBuildsRoot,
+  getRepoCacheRoot,
+  getToolVersion,
+  getToolingConfig,
+  loadUserConfig,
+  resolveIndexRoot
+} from '../../../../tools/dict-utils.js';
+import { normalizeBundleFormat } from '../../../shared/bundle-io.js';
+import { normalizeCommentConfig } from '../../comments.js';
+import { normalizeSegmentsConfig } from '../../segments.js';
+import { log } from '../../../shared/progress.js';
+import { getEnvConfig } from '../../../shared/env.js';
+import { buildIgnoreMatcher } from '../ignore.js';
+import { normalizePostingsConfig } from '../../../shared/postings-config.js';
+import { createSharedDictionary, createSharedDictionaryView } from '../../../shared/dictionary.js';
+import { normalizeEmbeddingBatchMultipliers } from '../embedding-batch.js';
+import { mergeConfig } from '../../../shared/config.js';
+import { sha1 } from '../../../shared/hash.js';
+import { getRepoProvenance } from '../../git.js';
+import { normalizeRiskConfig } from '../../risk.js';
+import { buildContentConfigHash } from './hash.js';
+import { normalizeStage, buildStageOverrides } from './stage.js';
+import { configureRuntimeLogger } from './logging.js';
+import { normalizeLimit, normalizeRatio, normalizeDepth, resolveFileCapsAndGuardrails } from './caps.js';
+import { resolveEmbeddingRuntime } from './embeddings.js';
+import { resolveTreeSitterRuntime, preloadTreeSitterRuntimeLanguages } from './tree-sitter.js';
+import {
+  resolveThreadLimitsConfig,
+  createRuntimeQueues,
+  resolveWorkerPoolRuntimeConfig,
+  createRuntimeWorkerPools
+} from './workers.js';
+
+const formatBuildTimestamp = (date) => (
+  date.toISOString().replace(/\.\d{3}Z$/, 'Z').replace(/[-:]/g, '')
+);
+
+/**
+ * Create runtime configuration for build_index.
+ * @param {{root:string,argv:object,rawArgv:string[]}} input
+ * @returns {Promise<object>}
+ */
+export async function createBuildRuntime({ root, argv, rawArgv }) {
+  const userConfig = loadUserConfig(root);
+  const envConfig = getEnvConfig();
+  const rawIndexingConfig = userConfig.indexing || {};
+  let indexingConfig = rawIndexingConfig;
+  const stage = normalizeStage(argv.stage || envConfig.stage);
+  const twoStageConfig = indexingConfig.twoStage || {};
+  const stageOverrides = buildStageOverrides(twoStageConfig, stage);
+  if (stageOverrides) {
+    indexingConfig = mergeConfig(indexingConfig, stageOverrides);
+  }
+  const repoCacheRoot = getRepoCacheRoot(root, userConfig);
+  const currentIndexRoot = resolveIndexRoot(root, userConfig);
+  const configHash = getEffectiveConfigHash(root, userConfig);
+  const contentConfigHash = buildContentConfigHash(userConfig, envConfig);
+  const repoProvenance = await getRepoProvenance(root);
+  const toolVersion = getToolVersion();
+  const gitShortSha = repoProvenance?.commit ? repoProvenance.commit.slice(0, 7) : 'nogit';
+  const configHash8 = configHash ? configHash.slice(0, 8) : 'nohash';
+  const buildId = `${formatBuildTimestamp(new Date())}_${gitShortSha}_${configHash8}`;
+  const buildRoot = path.join(getBuildsRoot(root, userConfig), buildId);
+  const loggingConfig = userConfig.logging || {};
+  configureRuntimeLogger({ envConfig, loggingConfig, buildId, configHash, stage, root });
+  const toolingConfig = getToolingConfig(root, userConfig);
+  const toolingEnabled = toolingConfig.autoEnableOnDetect !== false;
+  const postingsConfig = normalizePostingsConfig(indexingConfig.postings || {});
+  const { maxFileBytes, fileCaps, guardrails } = resolveFileCapsAndGuardrails(indexingConfig);
+  const astDataflowEnabled = indexingConfig.astDataflow !== false;
+  const controlFlowEnabled = indexingConfig.controlFlow !== false;
+  const typeInferenceEnabled = indexingConfig.typeInference === true;
+  const typeInferenceCrossFileEnabled = indexingConfig.typeInferenceCrossFile === true;
+  const riskAnalysisEnabled = indexingConfig.riskAnalysis !== false;
+  const riskAnalysisCrossFileEnabled = riskAnalysisEnabled
+    && indexingConfig.riskAnalysisCrossFile !== false;
+  const riskConfig = normalizeRiskConfig({
+    enabled: riskAnalysisEnabled,
+    rules: indexingConfig.riskRules,
+    caps: indexingConfig.riskCaps,
+    regex: indexingConfig.riskRegex || indexingConfig.riskRules?.regex
+  }, { rootDir: root });
+  const gitBlameEnabled = indexingConfig.gitBlame !== false;
+  const lintEnabled = indexingConfig.lint !== false;
+  const complexityEnabled = indexingConfig.complexity !== false;
+  const yamlChunkingModeRaw = typeof indexingConfig.yamlChunking === 'string'
+    ? indexingConfig.yamlChunking.trim().toLowerCase()
+    : '';
+  const yamlChunkingMode = ['auto', 'root', 'top-level'].includes(yamlChunkingModeRaw)
+    ? yamlChunkingModeRaw
+    : 'root';
+  const yamlTopLevelMaxBytesRaw = Number(indexingConfig.yamlTopLevelMaxBytes);
+  const yamlTopLevelMaxBytes = Number.isFinite(yamlTopLevelMaxBytesRaw)
+    ? Math.max(0, Math.floor(yamlTopLevelMaxBytesRaw))
+    : 200 * 1024;
+  const kotlinConfig = indexingConfig.kotlin || {};
+  const kotlinFlowMaxBytes = normalizeLimit(kotlinConfig.flowMaxBytes, 200 * 1024);
+  const kotlinFlowMaxLines = normalizeLimit(kotlinConfig.flowMaxLines, 3000);
+  const kotlinRelationsMaxBytes = normalizeLimit(kotlinConfig.relationsMaxBytes, 200 * 1024);
+  const kotlinRelationsMaxLines = normalizeLimit(kotlinConfig.relationsMaxLines, 2000);
+  const normalizeParser = (raw, fallback, allowed) => {
+    const normalized = typeof raw === 'string' ? raw.trim().toLowerCase() : '';
+    return allowed.includes(normalized) ? normalized : fallback;
+  };
+  const normalizeFlow = (raw) => {
+    if (raw === true) return 'on';
+    if (raw === false) return 'off';
+    const normalized = typeof raw === 'string' ? raw.trim().toLowerCase() : '';
+    return ['auto', 'on', 'off'].includes(normalized) ? normalized : 'auto';
+  };
+  const javascriptParser = normalizeParser(
+    indexingConfig.javascriptParser,
+    'babel',
+    ['auto', 'babel', 'acorn', 'esprima']
+  );
+  const typescriptParser = normalizeParser(
+    indexingConfig.typescriptParser,
+    'auto',
+    ['auto', 'typescript', 'babel', 'heuristic']
+  );
+  const typescriptConfig = indexingConfig.typescript || {};
+  const typescriptImportsOnly = typescriptConfig.importsOnly === true;
+  const typescriptEmbeddingBatchRaw = Number(typescriptConfig.embeddingBatchMultiplier);
+  const typescriptEmbeddingBatchMultiplier = Number.isFinite(typescriptEmbeddingBatchRaw)
+    && typescriptEmbeddingBatchRaw > 0
+    ? typescriptEmbeddingBatchRaw
+    : null;
+  const embeddingBatchMultipliers = normalizeEmbeddingBatchMultipliers(
+    indexingConfig.embeddingBatchMultipliers || {},
+    typescriptEmbeddingBatchMultiplier ? { typescript: typescriptEmbeddingBatchMultiplier } : {}
+  );
+  const javascriptFlow = normalizeFlow(indexingConfig.javascriptFlow);
+  const pythonAstConfig = indexingConfig.pythonAst || {};
+  const pythonAstEnabled = pythonAstConfig.enabled !== false;
+  const segmentsConfig = normalizeSegmentsConfig(indexingConfig.segments || {});
+  const commentsConfig = normalizeCommentConfig(indexingConfig.comments || {});
+  const chunkingConfig = indexingConfig.chunking || {};
+  const chunking = {
+    maxBytes: normalizeLimit(chunkingConfig.maxBytes, null),
+    maxLines: normalizeLimit(chunkingConfig.maxLines, null)
+  };
+  const {
+    treeSitterEnabled,
+    treeSitterLanguages,
+    treeSitterConfigChunking,
+    treeSitterMaxBytes,
+    treeSitterMaxLines,
+    treeSitterMaxParseMs,
+    treeSitterByLanguage,
+    treeSitterPreload,
+    treeSitterPreloadConcurrency,
+    treeSitterWorker
+  } = resolveTreeSitterRuntime(indexingConfig);
+  const sqlConfig = userConfig.sql || {};
+  const defaultSqlDialects = {
+    '.psql': 'postgres',
+    '.pgsql': 'postgres',
+    '.mysql': 'mysql',
+    '.sqlite': 'sqlite'
+  };
+  const sqlDialectByExt = { ...defaultSqlDialects, ...(sqlConfig.dialectByExt || {}) };
+  const sqlDialectOverride = typeof sqlConfig.dialect === 'string' && sqlConfig.dialect.trim()
+    ? sqlConfig.dialect.trim()
+    : '';
+  const resolveSqlDialect = (ext) => (sqlDialectOverride || sqlDialectByExt[ext] || 'generic');
+  const twoStageEnabled = twoStageConfig.enabled === true;
+  const twoStageBackground = twoStageConfig.background === true;
+  const twoStageQueue = twoStageConfig.queue !== false && twoStageBackground;
+
+  const threadLimits = resolveThreadLimitsConfig({ argv, rawArgv, envConfig, indexingConfig, log });
+  const {
+    cpuCount,
+    maxConcurrencyCap,
+    fileConcurrency,
+    importConcurrency,
+    ioConcurrency,
+    cpuConcurrency
+  } = threadLimits;
+
+  const embeddingRuntime = await resolveEmbeddingRuntime({
+    rootDir: root,
+    userConfig,
+    indexingConfig,
+    envConfig,
+    argv,
+    cpuConcurrency
+  });
+  const {
+    embeddingBatchSize,
+    embeddingConcurrency,
+    embeddingEnabled,
+    embeddingMode: resolvedEmbeddingMode,
+    embeddingService,
+    embeddingProvider,
+    embeddingOnnx,
+    embeddingQueue,
+    embeddingCache,
+    useStubEmbeddings,
+    modelConfig,
+    modelId,
+    modelsDir,
+    getChunkEmbedding,
+    getChunkEmbeddings
+  } = embeddingRuntime;
+  const queueConfig = createRuntimeQueues({
+    ioConcurrency,
+    cpuConcurrency,
+    fileConcurrency,
+    embeddingConcurrency
+  });
+  const { queues } = queueConfig;
+  const pythonAstRuntimeConfig = {
+    ...pythonAstConfig,
+    defaultMaxWorkers: Math.min(4, fileConcurrency),
+    hardMaxWorkers: 8
+  };
+  const workerPoolConfig = resolveWorkerPoolRuntimeConfig({
+    indexingConfig,
+    envConfig,
+    cpuConcurrency,
+    fileConcurrency
+  });
+
+  const incrementalEnabled = argv.incremental === true;
+  const incrementalBundlesConfig = indexingConfig.incrementalBundles || {};
+  const incrementalBundleFormat = typeof incrementalBundlesConfig.format === 'string'
+    ? normalizeBundleFormat(incrementalBundlesConfig.format)
+    : null;
+  const debugCrash = argv['debug-crash'] === true
+    || envConfig.debugCrash === true
+    || indexingConfig.debugCrash === true;
+
+  const dictConfig = getDictConfig(root, userConfig);
+  const dictionaryPaths = await getDictionaryPaths(root, dictConfig);
+  const dictWords = new Set();
+  for (const dictFile of dictionaryPaths) {
+    try {
+      const contents = await fs.readFile(dictFile, 'utf8');
+      for (const line of contents.split(/\r?\n/)) {
+        const trimmed = line.trim();
+        if (trimmed) dictWords.add(trimmed);
+      }
+    } catch {}
+  }
+  const dictSignatureParts = [];
+  for (const dictFile of dictionaryPaths) {
+    try {
+      const stat = await fs.stat(dictFile);
+      dictSignatureParts.push(`${dictFile}:${stat.size}:${stat.mtimeMs}`);
+    } catch {
+      dictSignatureParts.push(`${dictFile}:missing`);
+    }
+  }
+  dictSignatureParts.sort();
+  const dictSignature = dictSignatureParts.length
+    ? sha1(dictSignatureParts.join('|'))
+    : null;
+  const dictSummary = { files: dictionaryPaths.length, words: dictWords.size };
+  const LARGE_DICT_SHARED_THRESHOLD = 200000;
+  const shouldShareDict = dictSummary.words
+    && (workerPoolConfig.enabled !== false || dictSummary.words >= LARGE_DICT_SHARED_THRESHOLD);
+  const dictSharedPayload = shouldShareDict ? createSharedDictionary(dictWords) : null;
+  const dictShared = dictSharedPayload ? createSharedDictionaryView(dictSharedPayload) : null;
+
+  const { ignoreMatcher, config: ignoreConfig, ignoreFiles } = await buildIgnoreMatcher({ root, userConfig });
+  const cacheConfig = getCacheRuntimeConfig(root, userConfig);
+  const verboseCache = envConfig.verbose === true;
+
+  if (dictSummary.files) {
+    log(`Wordlists enabled: ${dictSummary.files} file(s), ${dictSummary.words.toLocaleString()} words for identifier splitting.`);
+  } else {
+    log('Wordlists disabled: no dictionary files found; identifier splitting will be limited.');
+  }
+  if (stage === 'stage1') {
+    log('Two-stage indexing: stage1 (sparse) overrides enabled.');
+  } else if (stage === 'stage2') {
+    log('Two-stage indexing: stage2 (enrichment) running.');
+  } else if (stage === 'stage3') {
+    log('Indexing stage3 (embeddings pass) running.');
+  } else if (stage === 'stage4') {
+    log('Indexing stage4 (sqlite/ann pass) running.');
+  }
+  if (!embeddingEnabled) {
+    const label = embeddingService ? 'service queue' : 'disabled';
+    log(`Embeddings: ${label}.`);
+  } else if (useStubEmbeddings) {
+    log('Embeddings: stub mode enabled (no model downloads).');
+  } else {
+    const providerLabel = embeddingProvider === 'onnx' ? 'onnxruntime' : 'xenova';
+    log(`Embeddings: model ${modelId} (${providerLabel}).`);
+  }
+  if (embeddingEnabled) {
+    log(`Embedding batch size: ${embeddingBatchSize}`);
+    log(`Embedding concurrency: ${embeddingConcurrency}`);
+  }
+  if (incrementalEnabled) {
+    log(`Incremental cache enabled (root: ${path.join(repoCacheRoot, 'incremental')}).`);
+  }
+  log(`Queue concurrency: io=${ioConcurrency}, cpu=${cpuConcurrency}.`);
+  if (!astDataflowEnabled) {
+    log('AST dataflow metadata disabled via indexing.astDataflow.');
+  }
+  if (!controlFlowEnabled) {
+    log('Control-flow metadata disabled via indexing.controlFlow.');
+  }
+  if (!pythonAstEnabled) {
+    log('Python AST metadata disabled via indexing.pythonAst.enabled.');
+  }
+  if (!treeSitterEnabled) {
+    log('Tree-sitter chunking disabled via indexing.treeSitter.enabled.');
+  } else {
+    await preloadTreeSitterRuntimeLanguages({
+      treeSitterEnabled,
+      treeSitterLanguages,
+      treeSitterPreload,
+      treeSitterPreloadConcurrency,
+      log
+    });
+  }
+  if (typeInferenceEnabled) {
+    log('Type inference metadata enabled via indexing.typeInference.');
+  }
+  if (typeInferenceCrossFileEnabled && !typeInferenceEnabled) {
+    log('Cross-file type inference requested but indexing.typeInference is disabled.');
+  }
+  if (!gitBlameEnabled) {
+    log('Git blame metadata disabled via indexing.gitBlame.');
+  }
+  if (!lintEnabled) {
+    log('Lint metadata disabled via indexing.lint.');
+  }
+  if (!complexityEnabled) {
+    log('Complexity metadata disabled via indexing.complexity.');
+  }
+  if (!riskAnalysisEnabled) {
+    log('Risk analysis disabled via indexing.riskAnalysis.');
+  }
+  if (!riskAnalysisCrossFileEnabled && riskAnalysisEnabled) {
+    log('Cross-file risk correlation disabled via indexing.riskAnalysisCrossFile.');
+  }
+  if (postingsConfig.enablePhraseNgrams === false) {
+    log('Phrase n-gram postings disabled via indexing.postings.enablePhraseNgrams.');
+  }
+  if (postingsConfig.enableChargrams === false) {
+    log('Chargram postings disabled via indexing.postings.enableChargrams.');
+  }
+
+  const workerPoolsResult = await createRuntimeWorkerPools({
+    workerPoolConfig,
+    repoCacheRoot,
+    dictWords,
+    dictSharedPayload,
+    dictConfig,
+    postingsConfig,
+    debugCrash,
+    log
+  });
+  const { workerPools, workerPool, quantizePool } = workerPoolsResult;
+
+  log('Build environment snapshot.', {
+    event: 'build.env',
+    node: process.version,
+    platform: process.platform,
+    arch: process.arch,
+    cpuCount,
+    memoryMb: Math.round(os.totalmem() / (1024 * 1024)),
+    configHash,
+    stage: stage || null,
+    features: {
+      embeddings: embeddingEnabled || embeddingService,
+      treeSitter: treeSitterEnabled,
+      relations: stage !== 'stage1',
+      tooling: toolingEnabled,
+      typeInference: typeInferenceEnabled,
+      riskAnalysis: riskAnalysisEnabled
+    }
+  });
+
+  const fileScanConfig = indexingConfig.fileScan || {};
+  const minifiedScanConfig = fileScanConfig.minified || {};
+  const binaryScanConfig = fileScanConfig.binary || {};
+  const fileScan = {
+    sampleBytes: normalizeLimit(fileScanConfig.sampleBytes, 8192),
+    minified: {
+      sampleMinBytes: normalizeLimit(minifiedScanConfig.sampleMinBytes, 4096),
+      minChars: normalizeLimit(minifiedScanConfig.minChars, 1024),
+      singleLineChars: normalizeLimit(minifiedScanConfig.singleLineChars, 4096),
+      avgLineThreshold: normalizeLimit(minifiedScanConfig.avgLineThreshold, 300),
+      maxLineThreshold: normalizeLimit(minifiedScanConfig.maxLineThreshold, 600),
+      maxWhitespaceRatio: normalizeRatio(minifiedScanConfig.maxWhitespaceRatio, 0.2)
+    },
+    binary: {
+      sampleMinBytes: normalizeLimit(binaryScanConfig.sampleMinBytes, 65536),
+      maxNonTextRatio: normalizeRatio(binaryScanConfig.maxNonTextRatio, 0.3)
+    }
+  };
+  const shardsConfig = indexingConfig.shards || {};
+  const shardsEnabled = shardsConfig.enabled === true;
+  const shardsMaxWorkers = normalizeLimit(shardsConfig.maxWorkers, null);
+  const shardsMaxShards = normalizeLimit(shardsConfig.maxShards, null);
+  const shardsMinFiles = normalizeLimit(shardsConfig.minFiles, null);
+  const shardsDirDepth = normalizeDepth(shardsConfig.dirDepth, 0);
+  const shardsMaxShardBytes = normalizeLimit(
+    shardsConfig.maxShardBytes,
+    64 * 1024 * 1024
+  );
+  const shardsMaxShardLines = normalizeLimit(shardsConfig.maxShardLines, 200000);
+
+  const languageOptions = {
+    rootDir: root,
+    astDataflowEnabled,
+    controlFlowEnabled,
+    javascript: {
+      parser: javascriptParser,
+      flow: javascriptFlow
+    },
+    typescript: {
+      parser: typescriptParser,
+      importsOnly: typescriptImportsOnly
+    },
+    embeddingBatchMultipliers,
+    chunking,
+    pythonAst: pythonAstRuntimeConfig,
+    kotlin: {
+      flowMaxBytes: kotlinFlowMaxBytes,
+      flowMaxLines: kotlinFlowMaxLines,
+      relationsMaxBytes: kotlinRelationsMaxBytes,
+      relationsMaxLines: kotlinRelationsMaxLines
+    },
+    treeSitter: {
+      enabled: treeSitterEnabled,
+      languages: treeSitterLanguages,
+      configChunking: treeSitterConfigChunking,
+      maxBytes: treeSitterMaxBytes,
+      maxLines: treeSitterMaxLines,
+      maxParseMs: treeSitterMaxParseMs,
+      byLanguage: treeSitterByLanguage,
+      preload: treeSitterPreload,
+      preloadConcurrency: treeSitterPreloadConcurrency,
+      worker: treeSitterWorker
+    },
+    resolveSqlDialect,
+    yamlChunking: {
+      mode: yamlChunkingMode,
+      maxBytes: yamlTopLevelMaxBytes
+    },
+    log
+  };
+
+  try {
+    await fs.mkdir(buildRoot, { recursive: true });
+  } catch {}
+
+  return {
+    root,
+    argv,
+    rawArgv,
+    userConfig,
+    repoCacheRoot,
+    buildId,
+    buildRoot,
+    currentIndexRoot,
+    configHash,
+    repoProvenance,
+    toolInfo: {
+      tool: 'pairofcleats',
+      version: toolVersion,
+      configHash: contentConfigHash || null
+    },
+    toolingConfig,
+    toolingEnabled,
+    indexingConfig,
+    postingsConfig,
+    segmentsConfig,
+    commentsConfig,
+    astDataflowEnabled,
+    controlFlowEnabled,
+    typeInferenceEnabled,
+    typeInferenceCrossFileEnabled,
+    riskAnalysisEnabled,
+    riskAnalysisCrossFileEnabled,
+    riskConfig,
+    embeddingBatchSize,
+    embeddingConcurrency,
+    embeddingEnabled,
+    embeddingMode: resolvedEmbeddingMode,
+    embeddingService,
+    embeddingProvider,
+    embeddingOnnx,
+    embeddingQueue,
+    embeddingCache,
+    fileCaps,
+    guardrails,
+    fileScan,
+    shards: {
+      enabled: shardsEnabled,
+      maxWorkers: shardsMaxWorkers,
+      maxShards: shardsMaxShards,
+      minFiles: shardsMinFiles,
+      dirDepth: shardsDirDepth,
+      maxShardBytes: shardsMaxShardBytes,
+      maxShardLines: shardsMaxShardLines
+    },
+    twoStage: {
+      enabled: twoStageEnabled,
+      background: twoStageBackground,
+      stage,
+      queue: twoStageQueue
+    },
+    stage,
+    gitBlameEnabled,
+    lintEnabled,
+    complexityEnabled,
+    resolveSqlDialect,
+    fileConcurrency,
+    importConcurrency,
+    ioConcurrency,
+    cpuConcurrency,
+    queues,
+    incrementalEnabled,
+    incrementalBundleFormat,
+    debugCrash,
+    useStubEmbeddings,
+    modelConfig,
+    modelId,
+    modelsDir,
+    workerPoolConfig,
+    workerPools,
+    workerPool,
+    quantizePool,
+    dictConfig,
+    dictionaryPaths,
+    dictWords,
+    dictShared,
+    dictSummary,
+    dictSignature,
+    getChunkEmbedding,
+    getChunkEmbeddings,
+    languageOptions,
+    ignoreMatcher,
+    ignoreConfig,
+    ignoreFiles,
+    maxFileBytes,
+    cacheConfig,
+    verboseCache
+  };
+}
diff --git a/src/index/build/runtime/stage.js b/src/index/build/runtime/stage.js
new file mode 100644
index 000000000..2ec3fd123
--- /dev/null
+++ b/src/index/build/runtime/stage.js
@@ -0,0 +1,57 @@
+import { isPlainObject, mergeConfig } from '../../../shared/config.js';
+
+export const normalizeStage = (raw) => {
+  const value = typeof raw === 'string' ? raw.trim().toLowerCase() : '';
+  if (!value) return null;
+  if (value === '1' || value === 'stage1' || value === 'sparse') return 'stage1';
+  if (value === '2' || value === 'stage2' || value === 'enrich' || value === 'full') return 'stage2';
+  if (value === '3' || value === 'stage3' || value === 'embeddings' || value === 'embed') return 'stage3';
+  if (value === '4' || value === 'stage4' || value === 'sqlite' || value === 'ann') return 'stage4';
+  return null;
+};
+
+export const buildStageOverrides = (twoStageConfig, stage) => {
+  if (!['stage1', 'stage2', 'stage3', 'stage4'].includes(stage)) return null;
+  if (!isPlainObject(twoStageConfig)) return null;
+  const defaults = stage === 'stage1'
+    ? {
+      embeddings: { enabled: false, mode: 'off' },
+      treeSitter: { enabled: false },
+      lint: false,
+      complexity: false,
+      riskAnalysis: false,
+      riskAnalysisCrossFile: false,
+      typeInference: false,
+      typeInferenceCrossFile: false
+    }
+    : stage === 'stage3'
+      ? {
+        treeSitter: { enabled: false },
+        lint: false,
+        complexity: false,
+        riskAnalysis: false,
+        riskAnalysisCrossFile: false,
+        typeInference: false,
+        typeInferenceCrossFile: false
+      }
+      : stage === 'stage4'
+        ? {
+          embeddings: { enabled: false, mode: 'off' },
+          treeSitter: { enabled: false },
+          lint: false,
+          complexity: false,
+          riskAnalysis: false,
+          riskAnalysisCrossFile: false,
+          typeInference: false,
+          typeInferenceCrossFile: false
+        }
+        : {};
+  const stageOverrides = stage === 'stage1'
+    ? (isPlainObject(twoStageConfig.stage1) ? twoStageConfig.stage1 : {})
+    : stage === 'stage2'
+      ? (isPlainObject(twoStageConfig.stage2) ? twoStageConfig.stage2 : {})
+      : stage === 'stage3'
+        ? (isPlainObject(twoStageConfig.stage3) ? twoStageConfig.stage3 : {})
+        : (isPlainObject(twoStageConfig.stage4) ? twoStageConfig.stage4 : {});
+  return mergeConfig(defaults, stageOverrides);
+};
diff --git a/src/index/build/runtime/tree-sitter.js b/src/index/build/runtime/tree-sitter.js
new file mode 100644
index 000000000..82b1c372e
--- /dev/null
+++ b/src/index/build/runtime/tree-sitter.js
@@ -0,0 +1,60 @@
+import {
+  preloadTreeSitterLanguages,
+  resolveEnabledTreeSitterLanguages
+} from '../../../lang/tree-sitter.js';
+import {
+  normalizeLimit,
+  normalizeOptionalLimit,
+  normalizeTreeSitterByLanguage
+} from './caps.js';
+
+export const resolveTreeSitterRuntime = (indexingConfig) => {
+  const treeSitterConfig = indexingConfig.treeSitter || {};
+  const treeSitterEnabled = treeSitterConfig.enabled !== false;
+  const treeSitterLanguages = treeSitterConfig.languages || {};
+  const treeSitterMaxBytes = normalizeLimit(treeSitterConfig.maxBytes, 512 * 1024);
+  const treeSitterMaxLines = normalizeLimit(treeSitterConfig.maxLines, 10000);
+  const treeSitterMaxParseMs = normalizeLimit(treeSitterConfig.maxParseMs, 1000);
+  const treeSitterByLanguage = normalizeTreeSitterByLanguage(
+    treeSitterConfig.byLanguage || {}
+  );
+  const treeSitterConfigChunking = treeSitterConfig.configChunking === true;
+  const treeSitterPreloadRaw = typeof treeSitterConfig.preload === 'string'
+    ? treeSitterConfig.preload.trim().toLowerCase()
+    : (treeSitterConfig.preload === true ? 'parallel' : '');
+  const treeSitterPreload = treeSitterPreloadRaw === 'parallel' ? 'parallel' : 'serial';
+  const treeSitterPreloadConcurrency = normalizeOptionalLimit(
+    treeSitterConfig.preloadConcurrency
+  );
+  return {
+    treeSitterEnabled,
+    treeSitterLanguages,
+    treeSitterConfigChunking,
+    treeSitterMaxBytes,
+    treeSitterMaxLines,
+    treeSitterMaxParseMs,
+    treeSitterByLanguage,
+    treeSitterPreload,
+    treeSitterPreloadConcurrency,
+    treeSitterWorker: treeSitterConfig.worker || null
+  };
+};
+
+export const preloadTreeSitterRuntimeLanguages = async ({
+  treeSitterEnabled,
+  treeSitterLanguages,
+  treeSitterPreload,
+  treeSitterPreloadConcurrency,
+  log
+}) => {
+  if (!treeSitterEnabled) return;
+  const enabledTreeSitterLanguages = resolveEnabledTreeSitterLanguages({
+    enabled: treeSitterEnabled,
+    languages: treeSitterLanguages
+  });
+  await preloadTreeSitterLanguages(enabledTreeSitterLanguages, {
+    log,
+    parallel: treeSitterPreload === 'parallel',
+    concurrency: treeSitterPreloadConcurrency
+  });
+};
diff --git a/src/index/build/runtime/workers.js b/src/index/build/runtime/workers.js
new file mode 100644
index 000000000..8a5189f73
--- /dev/null
+++ b/src/index/build/runtime/workers.js
@@ -0,0 +1,140 @@
+import { createTaskQueues } from '../../../shared/concurrency.js';
+import { resolveThreadLimits } from '../../../shared/threads.js';
+import { createIndexerWorkerPools, resolveWorkerPoolConfig } from '../worker-pool.js';
+import { createCrashLogger } from '../crash-log.js';
+
+export const resolveThreadLimitsConfig = ({ argv, rawArgv, envConfig, indexingConfig, log }) => {
+  const configConcurrency = Number(indexingConfig.concurrency);
+  const importConcurrencyConfig = Number(indexingConfig.importConcurrency);
+  const ioConcurrencyCapConfig = Number(indexingConfig.ioConcurrencyCap);
+  const threadLimits = resolveThreadLimits({
+    argv,
+    rawArgv,
+    envConfig,
+    configConcurrency,
+    importConcurrencyConfig,
+    ioConcurrencyCapConfig
+  });
+  const {
+    cpuCount,
+    maxConcurrencyCap,
+    fileConcurrency,
+    importConcurrency,
+    ioConcurrency,
+    cpuConcurrency
+  } = threadLimits;
+  if (envConfig.verbose) {
+    log(`Thread limits (${threadLimits.source}): cpu=${cpuCount}, cap=${maxConcurrencyCap}, files=${fileConcurrency}, imports=${importConcurrency}, io=${ioConcurrency}, cpuWork=${cpuConcurrency}.`);
+  }
+  const parsedUvThreadpool = Number(process.env.UV_THREADPOOL_SIZE);
+  const effectiveUvThreadpoolSize = Number.isFinite(parsedUvThreadpool) && parsedUvThreadpool > 0
+    ? Math.floor(parsedUvThreadpool)
+    : null;
+  if (envConfig.verbose) {
+    const uvLabel = effectiveUvThreadpoolSize ? String(effectiveUvThreadpoolSize) : 'default (4)';
+    log(`libuv threadpool: UV_THREADPOOL_SIZE=${uvLabel}.`);
+  }
+  if (effectiveUvThreadpoolSize && ioConcurrency > effectiveUvThreadpoolSize * 2) {
+    log(
+      `Warning: ioConcurrency (${ioConcurrency}) is much higher than UV_THREADPOOL_SIZE (${effectiveUvThreadpoolSize}). `
+        + 'Consider setting runtime.uvThreadpoolSize (or UV_THREADPOOL_SIZE) or lowering indexing concurrency.'
+    );
+  } else if (!effectiveUvThreadpoolSize && envConfig.verbose && ioConcurrency >= 16) {
+    log(
+      `Hint: ioConcurrency (${ioConcurrency}) is high but UV_THREADPOOL_SIZE is not set (Node default is 4). `
+        + 'Consider setting runtime.uvThreadpoolSize.'
+    );
+  }
+
+  return {
+    threadLimits,
+    cpuCount,
+    maxConcurrencyCap,
+    fileConcurrency,
+    importConcurrency,
+    ioConcurrency,
+    cpuConcurrency
+  };
+};
+
+export const createRuntimeQueues = ({
+  ioConcurrency,
+  cpuConcurrency,
+  fileConcurrency,
+  embeddingConcurrency
+}) => {
+  const maxFilePending = Math.min(10000, fileConcurrency * 1000);
+  const maxIoPending = Math.min(10000, Math.max(ioConcurrency, fileConcurrency) * 1000);
+  const maxEmbeddingPending = Math.min(64, embeddingConcurrency * 8);
+  const queues = createTaskQueues({
+    ioConcurrency,
+    cpuConcurrency,
+    embeddingConcurrency,
+    ioPendingLimit: maxIoPending,
+    cpuPendingLimit: maxFilePending,
+    embeddingPendingLimit: maxEmbeddingPending
+  });
+  return { queues, maxFilePending, maxIoPending, maxEmbeddingPending };
+};
+
+export const resolveWorkerPoolRuntimeConfig = ({ indexingConfig, envConfig, cpuConcurrency, fileConcurrency }) => {
+  const workerPoolDefaultMax = Math.min(8, fileConcurrency);
+  return resolveWorkerPoolConfig(
+    indexingConfig.workerPool || {},
+    envConfig,
+    {
+      cpuLimit: cpuConcurrency,
+      defaultMaxWorkers: workerPoolDefaultMax,
+      hardMaxWorkers: 16
+    }
+  );
+};
+
+export const createRuntimeWorkerPools = async ({
+  workerPoolConfig,
+  repoCacheRoot,
+  dictWords,
+  dictSharedPayload,
+  dictConfig,
+  postingsConfig,
+  debugCrash,
+  log
+}) => {
+  const workerCrashLogger = await createCrashLogger({
+    repoCacheRoot,
+    enabled: debugCrash,
+    log: null
+  });
+
+  let workerPools = { tokenizePool: null, quantizePool: null, destroy: async () => {} };
+  let workerPool = null;
+  let quantizePool = null;
+  if (workerPoolConfig.enabled !== false) {
+    workerPools = await createIndexerWorkerPools({
+      config: workerPoolConfig,
+      dictWords,
+      dictSharedPayload,
+      dictConfig,
+      postingsConfig,
+      crashLogger: workerCrashLogger,
+      log
+    });
+    workerPool = workerPools.tokenizePool;
+    quantizePool = workerPools.quantizePool;
+    if (workerPool) {
+      const modeLabel = workerPoolConfig.enabled === 'auto' ? 'auto' : 'on';
+      const maxThreads = workerPool?.options?.maxThreads ?? workerPoolConfig.maxWorkers;
+      const splitLabel = workerPoolConfig.splitByTask
+        ? `, split tasks (quantizeMax=${workerPoolConfig.quantizeMaxWorkers || Math.max(1, Math.floor(workerPoolConfig.maxWorkers / 2))})`
+        : '';
+      log(`Worker pool enabled (${modeLabel}, maxThreads=${maxThreads}${splitLabel}).`);
+      if (workerPoolConfig.enabled === 'auto') {
+        log(`Worker pool auto threshold: maxFileBytes=${workerPoolConfig.maxFileBytes}.`);
+      }
+    } else {
+      log('Worker pool disabled (fallback to main thread).');
+    }
+  }
+
+  return { workerPools, workerPool, quantizePool };
+};
diff --git a/src/index/build/shards.js b/src/index/build/shards.js
new file mode 100644
index 000000000..e414a604e
--- /dev/null
+++ b/src/index/build/shards.js
@@ -0,0 +1,473 @@
+import path from 'node:path';
+import { fileExt, toPosix } from '../../shared/files.js';
+import { sha1 } from '../../shared/hash.js';
+import { resolveSpecialCodeExt } from '../constants.js';
+import { getLanguageForFile } from '../language-registry.js';
+import { estimateFileCost } from './perf-profile.js';
+import greedyNumberPartitioning from 'greedy-number-partitioning';
+
+const resolveExt = (absPath) => {
+  const baseName = path.basename(absPath);
+  const specialExt = resolveSpecialCodeExt(baseName);
+  if (specialExt) return specialExt;
+  return fileExt(absPath);
+};
+
+const resolveDirKey = (rel, depth) => {
+  const relPosix = toPosix(rel || '');
+  const parts = relPosix.split('/').filter(Boolean);
+  if (parts.length <= 1) return '.';
+  const dirParts = parts.slice(0, -1);
+  if (!Number.isFinite(depth)) {
+    return dirParts.join('/');
+  }
+  if (depth <= 0) {
+    return '.';
+  }
+  if (depth >= dirParts.length) {
+    return dirParts.join('/');
+  }
+  return dirParts.slice(0, depth).join('/');
+};
+
+const buildShardLabel = (dirKey, langKey) => `${dirKey}/${langKey}`;
+
+const resolveParentDirKey = (dirKey) => {
+  if (!dirKey || dirKey === '.') return '.';
+  const parts = dirKey.split('/').filter(Boolean);
+  if (parts.length <= 1) return '.';
+  return parts.slice(0, -1).join('/');
+};
+
+const getEntryLineCount = (entry, lineCounts) => {
+  const rel = entry?.rel || '';
+  const fromMap = lineCounts ? lineCounts.get(toPosix(rel)) : null;
+  const candidate = fromMap ?? entry?.lines ?? entry?.lineCount ?? entry?.stat?.lines;
+  return Number.isFinite(candidate) ? candidate : 0;
+};
+
+const getEntryByteCount = (entry) => {
+  const candidate = entry?.bytes ?? entry?.size ?? entry?.stat?.size ?? 0;
+  return Number.isFinite(candidate) ? candidate : 0;
+};
+
+const computeShardLineTotal = (shard, lineCounts) => {
+  let total = 0;
+  for (const entry of shard.entries) {
+    total += getEntryLineCount(entry, lineCounts);
+  }
+  return total;
+};
+
+const computeShardByteTotal = (shard) => {
+  let total = 0;
+  for (const entry of shard.entries) {
+    total += getEntryByteCount(entry);
+  }
+  return total;
+};
+
+const computeShardCostTotal = (shard) => {
+  let total = 0;
+  for (const entry of shard.entries) {
+    const cost = Number.isFinite(entry?.costMs) ? entry.costMs : 0;
+    total += cost;
+  }
+  return total;
+};
+
+const computeTenthLargest = (values) => {
+  if (!values.length) return 0;
+  const sorted = [...values].sort((a, b) => b - a);
+  const index = Math.min(9, sorted.length - 1);
+  return sorted[index] || 0;
+};
+
+const hasHugeFile = (shard, lineCounts, threshold) => {
+  if (!threshold || threshold <= 0) return false;
+  for (const entry of shard.entries) {
+    if (getEntryLineCount(entry, lineCounts) >= threshold) return true;
+  }
+  return false;
+};
+
+const splitShardByLines = (shard, lineCounts, targetLines) => {
+  if (!targetLines || targetLines <= 0) return [shard];
+  const entries = [...shard.entries].sort((a, b) => ((a.rel || '') < (b.rel || '') ? -1 : (a.rel || '') > (b.rel || '') ? 1 : 0));
+  if (entries.length <= 1) return [shard];
+  const parts = [];
+  let current = [];
+  let currentLines = 0;
+  let currentBytes = 0;
+  let currentCost = 0;
+  for (const entry of entries) {
+    const lines = getEntryLineCount(entry, lineCounts);
+    const bytes = getEntryByteCount(entry);
+    const cost = Number.isFinite(entry?.costMs) ? entry.costMs : lines;
+    if (current.length && currentLines + lines > targetLines) {
+      parts.push({
+        entries: current,
+        lines: currentLines,
+        bytes: currentBytes,
+        cost: currentCost
+      });
+      current = [];
+      currentLines = 0;
+      currentBytes = 0;
+      currentCost = 0;
+    }
+    current.push(entry);
+    currentLines += lines;
+    currentBytes += bytes;
+    currentCost += cost;
+  }
+  if (current.length) {
+    parts.push({
+      entries: current,
+      lines: currentLines,
+      bytes: currentBytes,
+      cost: currentCost
+    });
+  }
+  if (parts.length <= 1) return [shard];
+  return parts.map((part, index) => {
+    const label = `${shard.label}#${index + 1}of${parts.length}`;
+    return {
+      id: buildShardId(shard.mode, label),
+      label,
+      dir: shard.dir,
+      lang: shard.lang,
+      mode: shard.mode,
+      entries: part.entries,
+      lineCount: part.lines,
+      byteCount: part.bytes,
+      costMs: part.cost,
+      splitFrom: shard.label,
+      splitIndex: index + 1,
+      splitTotal: parts.length
+    };
+  });
+};
+
+const splitShardByCapacity = (shard, lineCounts, options = {}) => {
+  const targetCost = Number.isFinite(options.targetCost) ? options.targetCost : null;
+  const maxBytes = Number.isFinite(options.maxBytes) ? options.maxBytes : null;
+  const maxLines = Number.isFinite(options.maxLines) ? options.maxLines : null;
+  if (!targetCost && !maxBytes && !maxLines) return [shard];
+  const entries = [...shard.entries].sort((a, b) => ((a.rel || '') < (b.rel || '') ? -1 : (a.rel || '') > (b.rel || '') ? 1 : 0));
+  if (entries.length <= 1) return [shard];
+  const parts = [];
+  let current = [];
+  let currentLines = 0;
+  let currentBytes = 0;
+  let currentCost = 0;
+  for (const entry of entries) {
+    const lines = getEntryLineCount(entry, lineCounts);
+    const bytes = getEntryByteCount(entry);
+    const cost = Number.isFinite(entry?.costMs) ? entry.costMs : lines;
+    const exceedsCost = targetCost && (currentCost + cost) > targetCost;
+    const exceedsBytes = maxBytes && (currentBytes + bytes) > maxBytes;
+    const exceedsLines = maxLines && (currentLines + lines) > maxLines;
+    if (current.length && (exceedsCost || exceedsBytes || exceedsLines)) {
+      parts.push({
+        entries: current,
+        lines: currentLines,
+        bytes: currentBytes,
+        cost: currentCost
+      });
+      current = [];
+      currentLines = 0;
+      currentBytes = 0;
+      currentCost = 0;
+    }
+    current.push(entry);
+    currentLines += lines;
+    currentBytes += bytes;
+    currentCost += cost;
+  }
+  if (current.length) {
+    parts.push({
+      entries: current,
+      lines: currentLines,
+      bytes: currentBytes,
+      cost: currentCost
+    });
+  }
+  if (parts.length <= 1) return [shard];
+  return parts.map((part, index) => {
+    const label = `${shard.label}#${index + 1}of${parts.length}`;
+    return {
+      id: buildShardId(shard.mode, label),
+      label,
+      dir: shard.dir,
+      lang: shard.lang,
+      mode: shard.mode,
+      entries: part.entries,
+      lineCount: part.lines,
+      byteCount: part.bytes,
+      costMs: part.cost,
+      splitFrom: shard.label,
+      splitIndex: index + 1,
+      splitTotal: parts.length
+    };
+  });
+};
+
+const buildShardId = (mode, label) => {
+  const key = `${mode || 'unknown'}:${label}`;
+  return `s-${sha1(key).slice(0, 12)}`;
+};
+
+export function planShardBatches(items, batchCount, { resolveWeight } = {}) {
+  const list = Array.isArray(items) ? items : [];
+  const count = Number.isFinite(batchCount) ? Math.max(1, Math.floor(batchCount)) : 1;
+  if (!list.length) return [];
+  if (count <= 1) return [list.slice()];
+  const weights = list.map((item, index) => {
+    const value = resolveWeight ? resolveWeight(item, index) : 0;
+    return Number.isFinite(value) && value > 0 ? value : 0;
+  });
+  const totalWeight = weights.reduce((sum, value) => sum + value, 0);
+  if (!totalWeight) {
+    const buckets = Array.from({ length: count }, () => []);
+    list.forEach((item, index) => {
+      buckets[index % count].push(item);
+    });
+    return buckets.filter((bucket) => bucket.length);
+  }
+  const partitions = greedyNumberPartitioning(weights.slice(), count);
+  const weightQueues = new Map();
+  list.forEach((item, index) => {
+    const weight = weights[index];
+    const entry = weightQueues.get(weight) || { items: [], offset: 0 };
+    entry.items.push(item);
+    weightQueues.set(weight, entry);
+  });
+  const takeNext = (weight) => {
+    const entry = weightQueues.get(weight);
+    if (!entry || entry.offset >= entry.items.length) return null;
+    const item = entry.items[entry.offset];
+    entry.offset += 1;
+    return item;
+  };
+  const batches = partitions.map((partition) => {
+    const batch = [];
+    for (const weight of partition) {
+      const item = takeNext(weight);
+      if (item) batch.push(item);
+    }
+    return batch;
+  });
+  const assignedCount = batches.reduce((sum, batch) => sum + batch.length, 0);
+  if (assignedCount < list.length) {
+    const remainder = [];
+    for (const entry of weightQueues.values()) {
+      for (let i = entry.offset; i < entry.items.length; i += 1) {
+        remainder.push(entry.items[i]);
+      }
+    }
+    if (remainder.length) {
+      const target = batches.reduce((best, batch) => (batch.length < best.length ? batch : best), batches[0]);
+      target.push(...remainder);
+    }
+  }
+  return batches.filter((batch) => batch.length);
+}
+
+const balanceShardsGreedy = (shards, targetCount, mode) => {
+  if (!Number.isFinite(targetCount) || targetCount <= 0) return shards;
+  if (shards.length <= targetCount) return shards;
+  const batches = planShardBatches(shards, Math.floor(targetCount), {
+    resolveWeight: (shard) => shard.costMs || shard.lineCount || shard.entries.length || 0
+  });
+  return batches
+    .filter((batch) => batch.length)
+    .map((batch, index) => {
+      const entries = [];
+      let lineCount = 0;
+      let byteCount = 0;
+      let costMs = 0;
+      const mergedFrom = [];
+      for (const shard of batch) {
+        entries.push(...shard.entries);
+        lineCount += shard.lineCount || 0;
+        byteCount += shard.byteCount || 0;
+        costMs += shard.costMs || 0;
+        mergedFrom.push(shard.label || shard.id);
+      }
+      const label = `balanced/${index + 1}`;
+      return {
+        id: buildShardId(mode, label),
+        label,
+        dir: 'balanced',
+        lang: 'mixed',
+        mode,
+        entries: entries.sort((a, b) => ((a.rel || '') < (b.rel || '') ? -1 : (a.rel || '') > (b.rel || '') ? 1 : 0)),
+        lineCount,
+        byteCount,
+        costMs,
+        mergedFrom
+      };
+    });
+};
+
+export function planShards(
+  entries,
+  {
+    mode,
+    maxShards = null,
+    minFiles = null,
+    dirDepth = 1,
+    lineCounts = null,
+    perfProfile = null,
+    featureWeights = null,
+    maxShardBytes = null,
+    maxShardLines = null
+  } = {}
+) {
+  const lineCountMap = lineCounts instanceof Map ? lineCounts : null;
+  if (lineCountMap) {
+    for (const entry of entries) {
+      const lines = lineCountMap.get(toPosix(entry.rel || ''));
+      if (Number.isFinite(lines)) entry.lines = lines;
+    }
+  }
+  const groups = new Map();
+  for (const entry of entries) {
+    const rel = entry.rel || '';
+    const dirKey = resolveDirKey(rel, dirDepth);
+    const ext = resolveExt(entry.abs || rel);
+    const lang = mode === 'code' ? getLanguageForFile(ext, rel) : null;
+    const langKey = mode === 'code'
+      ? (lang?.id || ext || 'unknown')
+      : (ext || 'prose');
+    const entryLines = getEntryLineCount(entry, lineCountMap);
+    const entryBytes = getEntryByteCount(entry);
+    if (Number.isFinite(entryLines)) entry.lines = entryLines;
+    if (Number.isFinite(entryBytes)) entry.bytes = entryBytes;
+    const entryCost = perfProfile
+      ? estimateFileCost({
+        perfProfile,
+        languageId: lang?.id || langKey,
+        bytes: entryBytes,
+        lines: entryLines,
+        featureWeights
+      })
+      : entryLines;
+    entry.costMs = Number.isFinite(entryCost) ? entryCost : entryLines;
+    const label = buildShardLabel(dirKey, langKey);
+    const shardId = buildShardId(mode, label);
+    const shard = groups.get(label)
+      || { id: shardId, label, dir: dirKey, lang: langKey, mode, entries: [] };
+    shard.entries.push(entry);
+    groups.set(label, shard);
+  }
+
+  let shards = Array.from(groups.values());
+  const lineTotals = shards.map((shard) => computeShardLineTotal(shard, lineCountMap));
+  const tenthLargest = computeTenthLargest(lineTotals);
+  const hugeThreshold = tenthLargest > 0 ? Math.floor(tenthLargest * 0.5) : 0;
+  const minFilesLimit = Number.isFinite(minFiles) && minFiles > 0 ? Math.floor(minFiles) : 3;
+  const minFilesForSubdir = Math.max(3, minFilesLimit);
+  if (Number.isFinite(dirDepth) && dirDepth > 0 && shards.length) {
+    const merged = new Map();
+    for (const shard of shards) {
+      const isSubdir = shard.dir && shard.dir !== '.';
+      const allowSmall = !isSubdir
+        || shard.entries.length >= minFilesForSubdir
+        || hasHugeFile(shard, lineCountMap, hugeThreshold);
+      if (allowSmall) {
+        const existing = merged.get(shard.label);
+        if (existing && existing !== shard) {
+          existing.entries.push(...shard.entries);
+        } else {
+          merged.set(shard.label, shard);
+        }
+        continue;
+      }
+      const parentDir = resolveParentDirKey(shard.dir);
+      const parentLabel = buildShardLabel(parentDir, shard.lang);
+      const parentId = buildShardId(mode, parentLabel);
+      const target = merged.get(parentLabel)
+        || { id: parentId, label: parentLabel, dir: parentDir, lang: shard.lang, mode, entries: [] };
+      target.entries.push(...shard.entries);
+      merged.set(parentLabel, target);
+    }
+    shards = Array.from(merged.values());
+  }
+
+  for (const shard of shards) {
+    shard.entries.sort((a, b) => ((a.rel || '') < (b.rel || '') ? -1 : (a.rel || '') > (b.rel || '') ? 1 : 0));
+    shard.lineCount = computeShardLineTotal(shard, lineCountMap);
+    shard.byteCount = computeShardByteTotal(shard);
+    shard.costMs = computeShardCostTotal(shard);
+  }
+  const mergedLineTotals = shards.map((shard) => shard.lineCount || 0);
+  const mergedCostTotals = shards.map((shard) => shard.costMs || 0);
+  const splitLineTarget = computeTenthLargest(mergedLineTotals);
+  const splitCostTarget = perfProfile ? computeTenthLargest(mergedCostTotals) : 0;
+  const splitThreshold = splitLineTarget > 0 ? splitLineTarget : 0;
+  const capBytes = Number.isFinite(maxShardBytes) ? maxShardBytes : null;
+  const capLines = Number.isFinite(maxShardLines) ? maxShardLines : null;
+  const targetCost = splitCostTarget > 0 ? splitCostTarget : null;
+  const splitShards = [];
+  for (const shard of shards) {
+    const needsCapacitySplit = shard.entries.length > 1
+      && ((targetCost && shard.costMs > targetCost)
+        || (capBytes && shard.byteCount > capBytes)
+        || (capLines && shard.lineCount > capLines));
+    if (needsCapacitySplit) {
+      splitShards.push(...splitShardByCapacity(shard, lineCountMap, {
+        targetCost,
+        maxBytes: capBytes,
+        maxLines: capLines
+      }));
+      continue;
+    }
+    if (!targetCost && !capBytes && !capLines
+      && splitThreshold > 0
+      && shard.lineCount > splitThreshold
+      && shard.entries.length > 1) {
+      splitShards.push(...splitShardByLines(shard, lineCountMap, splitThreshold));
+      continue;
+    }
+    splitShards.push(shard);
+  }
+  shards = splitShards;
+  if (Number.isFinite(minFiles) && minFiles > 1) {
+    const keep = [];
+    const remainder = [];
+    for (const shard of shards) {
+      if (shard.entries.length >= minFiles) {
+        keep.push(shard);
+      } else {
+        remainder.push(...shard.entries);
+      }
+    }
+    if (remainder.length) {
+      const label = 'misc/other';
+      const scratch = { entries: remainder };
+      keep.push({
+        id: buildShardId(mode, label),
+        label,
+        dir: 'misc',
+        lang: 'other',
+        entries: remainder,
+        lineCount: computeShardLineTotal(scratch, lineCountMap),
+        byteCount: computeShardByteTotal(scratch),
+        costMs: computeShardCostTotal(scratch)
+      });
+    }
+    shards = keep;
+  }
+
+  if (Number.isFinite(maxShards) && maxShards > 0) {
+    shards = balanceShardsGreedy(shards, Math.floor(maxShards), mode);
+  }
+
+  return shards.sort((a, b) => {
+  const labelA = a.label || a.id;
+  const labelB = b.label || b.id;
+  return labelA < labelB ? -1 : labelA > labelB ? 1 : 0;
+});
+}
diff --git a/src/index/build/state.js b/src/index/build/state.js
new file mode 100644
index 000000000..dfd6cf75c
--- /dev/null
+++ b/src/index/build/state.js
@@ -0,0 +1,309 @@
+import { extractNgrams, tri } from '../../shared/tokenize.js';
+import { normalizePostingsConfig } from '../../shared/postings-config.js';
+
+const DEFAULT_POSTINGS_CONFIG = normalizePostingsConfig();
+const TOKEN_RETENTION_MODES = new Set(['full', 'sample', 'none']);
+
+export function normalizeTokenRetention(raw = {}) {
+  if (!raw || typeof raw !== 'object') {
+    return { mode: 'full', sampleSize: 32 };
+  }
+  const modeRaw = typeof raw.mode === 'string' ? raw.mode.trim().toLowerCase() : 'full';
+  const mode = TOKEN_RETENTION_MODES.has(modeRaw) ? modeRaw : 'full';
+  const sampleSize = Number.isFinite(Number(raw.sampleSize))
+    ? Math.max(1, Math.floor(Number(raw.sampleSize)))
+    : 32;
+  return { mode, sampleSize };
+}
+
+export function applyTokenRetention(chunk, retention) {
+  if (!chunk || !retention || retention.mode === 'full') return;
+  if (retention.mode === 'none') {
+    if (chunk.tokens) delete chunk.tokens;
+    if (chunk.ngrams) delete chunk.ngrams;
+    return;
+  }
+  if (retention.mode === 'sample') {
+    if (Array.isArray(chunk.tokens) && chunk.tokens.length > retention.sampleSize) {
+      chunk.tokens = chunk.tokens.slice(0, retention.sampleSize);
+    }
+    if (Array.isArray(chunk.ngrams) && chunk.ngrams.length > retention.sampleSize) {
+      chunk.ngrams = chunk.ngrams.slice(0, retention.sampleSize);
+    }
+  }
+}
+
+/**
+ * Create the mutable state for index building.
+ * @returns {object}
+ */
+export function createIndexState() {
+  return {
+    df: new Map(),
+    chunks: [],
+    tokenPostings: new Map(),
+    fieldPostings: {
+      name: new Map(),
+      signature: new Map(),
+      doc: new Map(),
+      comment: new Map(),
+      body: new Map()
+    },
+    docLengths: [],
+    fieldDocLengths: {
+      name: [],
+      signature: [],
+      doc: [],
+      comment: [],
+      body: []
+    },
+    fieldTokens: [],
+    triPost: new Map(),
+    phrasePost: new Map(),
+    scannedFiles: [],
+    scannedFilesTimes: [],
+    skippedFiles: [],
+    totalTokens: 0,
+    fileRelations: new Map()
+  };
+}
+
+/**
+ * Append a processed chunk into global index structures.
+ * @param {object} state
+ * @param {object} chunk
+ */
+export function appendChunk(
+  state,
+  chunk,
+  postingsConfig = DEFAULT_POSTINGS_CONFIG,
+  tokenRetention = null
+) {
+  const tokens = Array.isArray(chunk.tokens) ? chunk.tokens : [];
+  const seq = Array.isArray(chunk.seq) && chunk.seq.length ? chunk.seq : tokens;
+  if (!seq.length) return;
+
+  const phraseEnabled = postingsConfig?.enablePhraseNgrams !== false;
+  const chargramEnabled = postingsConfig?.enableChargrams !== false;
+  const fieldedEnabled = postingsConfig?.fielded !== false;
+  const chargramMaxTokenLength = postingsConfig?.chargramMaxTokenLength == null
+    ? null
+    : Math.max(2, Math.floor(Number(postingsConfig.chargramMaxTokenLength)));
+
+  state.totalTokens += seq.length;
+  const ngrams = phraseEnabled
+    ? (Array.isArray(chunk.ngrams) && chunk.ngrams.length
+      ? chunk.ngrams
+      : extractNgrams(seq, postingsConfig.phraseMinN, postingsConfig.phraseMaxN))
+    : [];
+
+  const charSet = new Set();
+  if (chargramEnabled) {
+    const chargrams = Array.isArray(chunk.chargrams) && chunk.chargrams.length
+      ? chunk.chargrams
+      : null;
+    if (chargrams) {
+      chargrams.forEach((g) => charSet.add(g));
+    } else {
+      seq.forEach((w) => {
+        if (chargramMaxTokenLength && w.length > chargramMaxTokenLength) return;
+        for (let n = postingsConfig.chargramMinN; n <= postingsConfig.chargramMaxN; ++n) {
+          tri(w, n).forEach((g) => charSet.add(g));
+        }
+      });
+    }
+  }
+
+  const freq = new Map();
+  tokens.forEach((t) => {
+    freq.set(t, (freq.get(t) || 0) + 1);
+  });
+  const chunkId = state.chunks.length;
+
+  state.docLengths[chunkId] = tokens.length;
+  for (const [tok, count] of freq.entries()) {
+    let postings = state.tokenPostings.get(tok);
+    if (!postings) {
+      postings = [];
+      state.tokenPostings.set(tok, postings);
+    }
+    postings.push([chunkId, count]);
+  }
+
+  if (phraseEnabled) {
+    for (const ng of ngrams) {
+      if (!state.phrasePost.has(ng)) state.phrasePost.set(ng, new Set());
+      state.phrasePost.get(ng).add(chunkId);
+    }
+  }
+  if (chargramEnabled) {
+    for (const tg of charSet) {
+      if (!state.triPost.has(tg)) state.triPost.set(tg, new Set());
+      state.triPost.get(tg).add(chunkId);
+    }
+  }
+
+  const uniqueTokens = new Set(tokens);
+  uniqueTokens.forEach((t) => state.df.set(t, (state.df.get(t) || 0) + 1));
+  if (fieldedEnabled) {
+    const fields = chunk.fieldTokens || {};
+    const fieldNames = ['name', 'signature', 'doc', 'comment', 'body'];
+    for (const field of fieldNames) {
+      const fieldTokens = Array.isArray(fields[field]) ? fields[field] : [];
+      state.fieldDocLengths[field][chunkId] = fieldTokens.length;
+      state.fieldTokens[chunkId] = state.fieldTokens[chunkId] || {};
+      state.fieldTokens[chunkId][field] = fieldTokens;
+      if (!fieldTokens.length) continue;
+      const fieldFreq = new Map();
+      fieldTokens.forEach((tok) => {
+        fieldFreq.set(tok, (fieldFreq.get(tok) || 0) + 1);
+      });
+      for (const [tok, count] of fieldFreq.entries()) {
+        let postings = state.fieldPostings[field].get(tok);
+        if (!postings) {
+          postings = [];
+          state.fieldPostings[field].set(tok, postings);
+        }
+        postings.push([chunkId, count]);
+      }
+    }
+  }
+  chunk.id = chunkId;
+  chunk.tokenCount = tokens.length;
+  const commentMeta = chunk.docmeta?.comments;
+  if (Array.isArray(commentMeta)) {
+    for (const entry of commentMeta) {
+      if (!entry || entry.anchorChunkId != null) continue;
+      entry.anchorChunkId = chunkId;
+    }
+  }
+  applyTokenRetention(chunk, tokenRetention);
+  if (chunk.seq) delete chunk.seq;
+  if (chunk.chargrams) delete chunk.chargrams;
+  if (chunk.fieldTokens) delete chunk.fieldTokens;
+  state.chunks.push(chunk);
+}
+
+/**
+ * Merge a shard state into the main state with doc id offsets.
+ * @param {object} target
+ * @param {object} source
+ */
+export function mergeIndexState(target, source) {
+  if (!target || !source) return;
+  const offset = target.chunks.length;
+  const srcChunks = Array.isArray(source.chunks) ? source.chunks : [];
+
+  for (let i = 0; i < srcChunks.length; i += 1) {
+    const chunk = srcChunks[i];
+    if (!chunk) continue;
+    const sourceId = Number.isFinite(chunk.id) ? chunk.id : i;
+    target.chunks.push({ ...chunk, id: offset + sourceId });
+  }
+
+  const mergeLengths = (dest, src) => {
+    if (!Array.isArray(src)) return;
+    for (let i = 0; i < src.length; i += 1) {
+      dest[offset + i] = src[i];
+    }
+  };
+
+  mergeLengths(target.docLengths, source.docLengths);
+  if (target.fieldDocLengths && source.fieldDocLengths) {
+    for (const [field, lengths] of Object.entries(source.fieldDocLengths)) {
+      if (!target.fieldDocLengths[field]) target.fieldDocLengths[field] = [];
+      mergeLengths(target.fieldDocLengths[field], lengths);
+    }
+  }
+  if (Array.isArray(source.fieldTokens)) {
+    mergeLengths(target.fieldTokens, source.fieldTokens);
+  }
+
+  if (source.df && typeof source.df.entries === 'function') {
+    for (const [token, count] of source.df.entries()) {
+      target.df.set(token, (target.df.get(token) || 0) + count);
+    }
+  }
+
+  if (source.tokenPostings && typeof source.tokenPostings.entries === 'function') {
+    for (const [token, postings] of source.tokenPostings.entries()) {
+      let dest = target.tokenPostings.get(token);
+      if (!dest) {
+        dest = [];
+        target.tokenPostings.set(token, dest);
+      }
+      for (const entry of postings || []) {
+        const docId = Array.isArray(entry) ? entry[0] : null;
+        const tf = Array.isArray(entry) ? entry[1] : null;
+        if (!Number.isFinite(docId)) continue;
+        dest.push([docId + offset, tf]);
+      }
+    }
+  }
+
+  if (source.fieldPostings) {
+    for (const [field, postingsMap] of Object.entries(source.fieldPostings)) {
+      if (!target.fieldPostings[field]) target.fieldPostings[field] = new Map();
+      if (!postingsMap || typeof postingsMap.entries !== 'function') continue;
+      for (const [token, postings] of postingsMap.entries()) {
+        let dest = target.fieldPostings[field].get(token);
+        if (!dest) {
+          dest = [];
+          target.fieldPostings[field].set(token, dest);
+        }
+        for (const entry of postings || []) {
+          const docId = Array.isArray(entry) ? entry[0] : null;
+          const tf = Array.isArray(entry) ? entry[1] : null;
+          if (!Number.isFinite(docId)) continue;
+          dest.push([docId + offset, tf]);
+        }
+      }
+    }
+  }
+
+  if (source.phrasePost && typeof source.phrasePost.entries === 'function') {
+    for (const [phrase, postingSet] of source.phrasePost.entries()) {
+      let dest = target.phrasePost.get(phrase);
+      if (!dest) {
+        dest = new Set();
+        target.phrasePost.set(phrase, dest);
+      }
+      for (const docId of postingSet || []) {
+        if (!Number.isFinite(docId)) continue;
+        dest.add(docId + offset);
+      }
+    }
+  }
+
+  if (source.triPost && typeof source.triPost.entries === 'function') {
+    for (const [gram, postingSet] of source.triPost.entries()) {
+      let dest = target.triPost.get(gram);
+      if (!dest) {
+        dest = new Set();
+        target.triPost.set(gram, dest);
+      }
+      for (const docId of postingSet || []) {
+        if (!Number.isFinite(docId)) continue;
+        dest.add(docId + offset);
+      }
+    }
+  }
+
+  if (Array.isArray(source.scannedFiles)) {
+    target.scannedFiles.push(...source.scannedFiles);
+  }
+  if (Array.isArray(source.scannedFilesTimes)) {
+    target.scannedFilesTimes.push(...source.scannedFilesTimes);
+  }
+  if (Array.isArray(source.skippedFiles)) {
+    target.skippedFiles.push(...source.skippedFiles);
+  }
+  if (Number.isFinite(source.totalTokens)) {
+    target.totalTokens += source.totalTokens;
+  }
+  if (source.fileRelations && typeof source.fileRelations.entries === 'function') {
+    for (const [file, relations] of source.fileRelations.entries()) {
+      target.fileRelations.set(file, relations);
+    }
+  }
+}
diff --git a/src/index/build/tokenization.js b/src/index/build/tokenization.js
new file mode 100644
index 000000000..b2d0c4f80
--- /dev/null
+++ b/src/index/build/tokenization.js
@@ -0,0 +1,224 @@
+import { SimpleMinHash } from '../minhash.js';
+import { STOP, SYN } from '../constants.js';
+import { extractNgrams, extractPunctuationTokens, splitId, splitWordsWithDict, stem, tri } from '../../shared/tokenize.js';
+
+const normalizeRange = (value, fallback) => {
+  const parsed = Number(value);
+  return Number.isFinite(parsed) ? parsed : fallback;
+};
+
+/**
+ * Build a tokenization context shared across chunks.
+ * @param {{dictWords:Set<string>|string[]|{size:number,has:function},dictConfig:object,postingsConfig:object}} input
+ * @returns {object}
+ */
+export function createTokenizationContext(input) {
+  const dictWordsRaw = input?.dictWords || new Set();
+  let dictWords = null;
+  if (dictWordsRaw && typeof dictWordsRaw.has === 'function' && typeof dictWordsRaw.size === 'number') {
+    dictWords = dictWordsRaw;
+  } else if (dictWordsRaw instanceof Set) {
+    dictWords = dictWordsRaw;
+  } else if (Array.isArray(dictWordsRaw)) {
+    dictWords = new Set(dictWordsRaw);
+  } else {
+    dictWords = new Set();
+  }
+  const dictConfig = input?.dictConfig || {};
+  const postingsConfig = input?.postingsConfig || {};
+  const phraseMinN = normalizeRange(postingsConfig.phraseMinN, 2);
+  const phraseMaxN = Math.max(phraseMinN, normalizeRange(postingsConfig.phraseMaxN, 4));
+  const chargramMinN = normalizeRange(postingsConfig.chargramMinN, 3);
+  const chargramMaxN = Math.max(chargramMinN, normalizeRange(postingsConfig.chargramMaxN, 5));
+  const chargramMaxTokenLength = postingsConfig.chargramMaxTokenLength == null
+    ? null
+    : Math.max(2, Math.floor(Number(postingsConfig.chargramMaxTokenLength)));
+  const chargramSourceRaw = typeof postingsConfig.chargramSource === 'string'
+    ? postingsConfig.chargramSource.trim().toLowerCase()
+    : '';
+  const chargramSource = ['full', 'fields'].includes(chargramSourceRaw)
+    ? chargramSourceRaw
+    : 'fields';
+  return {
+    dictWords,
+    dictConfig,
+    phraseMinN,
+    phraseMaxN,
+    chargramMinN,
+    chargramMaxN,
+    chargramMaxTokenLength,
+    chargramSource,
+    phraseEnabled: postingsConfig.enablePhraseNgrams !== false,
+    chargramEnabled: postingsConfig.enableChargrams !== false
+  };
+}
+
+export function createTokenizationBuffers() {
+  return {
+    tokens: [],
+    seq: [],
+    scratch: [],
+    scratch2: [],
+    chargramSet: new Set(),
+    minhash: new SimpleMinHash()
+  };
+}
+
+const normalizeToken = (value) => {
+  for (let i = 0; i < value.length; i += 1) {
+    if (value.charCodeAt(i) > 127) return value.normalize('NFKD');
+  }
+  return value;
+};
+
+export function buildTokenSequence({ text, mode, ext, dictWords, dictConfig, buffers = null }) {
+  const useBuffers = !!buffers;
+  const tokensOut = useBuffers ? buffers.tokens : [];
+  const seqOut = useBuffers ? buffers.seq : [];
+  const scratch = useBuffers ? buffers.scratch : [];
+  const scratch2 = useBuffers ? buffers.scratch2 : [];
+  if (useBuffers) {
+    tokensOut.length = 0;
+    seqOut.length = 0;
+    scratch.length = 0;
+    scratch2.length = 0;
+  }
+
+  const baseTokens = splitId(text);
+  for (const token of baseTokens) {
+    scratch.push(normalizeToken(token));
+  }
+  if (mode === 'code') {
+    const punctuation = extractPunctuationTokens(text);
+    for (const token of punctuation) scratch.push(token);
+  }
+
+  let working = scratch;
+  if (!(mode === 'prose' && ext === '.md')) {
+    for (const token of working) {
+      const parts = splitWordsWithDict(token, dictWords, dictConfig);
+      if (Array.isArray(parts) && parts.length) {
+        for (const part of parts) scratch2.push(part);
+      }
+    }
+    working = scratch2;
+  }
+
+  if (mode === 'prose') {
+    for (const token of working) {
+      if (STOP.has(token)) continue;
+      tokensOut.push(token);
+      tokensOut.push(stem(token));
+    }
+  } else {
+    for (const token of working) tokensOut.push(token);
+  }
+
+  for (const w of tokensOut) {
+    seqOut.push(w);
+    if (SYN[w]) seqOut.push(SYN[w]);
+  }
+
+  return {
+    tokens: useBuffers ? tokensOut.slice() : tokensOut,
+    seq: useBuffers ? seqOut.slice() : seqOut
+  };
+}
+
+export function buildChargramsFromTokens(tokens, options, buffers = null) {
+  const { chargramMinN, chargramMaxN, chargramMaxTokenLength } = options;
+  const charSet = buffers?.chargramSet || new Set();
+  if (buffers?.chargramSet) {
+    charSet.clear();
+  }
+  const maxLen = Number.isFinite(chargramMaxTokenLength) ? chargramMaxTokenLength : null;
+  for (let i = 0; i < tokens.length; i += 1) {
+    const token = tokens[i];
+    if (maxLen && token.length > maxLen) continue;
+    for (let n = chargramMinN; n <= chargramMaxN; ++n) {
+      tri(token, n).forEach((g) => charSet.add(g));
+    }
+  }
+  const out = Array.from(charSet);
+  if (buffers?.chargramSet) {
+    charSet.clear();
+  }
+  return out;
+}
+
+const computeTokenStats = (tokens) => {
+  const freq = Object.create(null);
+  for (let i = 0; i < tokens.length; i += 1) {
+    const token = tokens[i];
+    freq[token] = (freq[token] || 0) + 1;
+  }
+  const counts = Object.values(freq);
+  const unique = counts.length;
+  let sum = 0;
+  for (let i = 0; i < counts.length; i += 1) sum += counts[i];
+  let entropy = 0;
+  if (sum) {
+    for (let i = 0; i < counts.length; i += 1) {
+      const ratio = counts[i] / sum;
+      entropy -= ratio * Math.log2(ratio);
+    }
+  }
+  return { unique, entropy, sum };
+};
+
+/**
+ * Tokenize chunk text into tokens, ngrams, chargrams, and minhash signature.
+ * @param {{text:string,mode:'code'|'prose',ext:string,context:object}} input
+ * @returns {{tokens:string[],seq:string[],ngrams:string[]|null,chargrams:string[]|null,minhashSig:number[],stats:object}}
+ */
+export function tokenizeChunkText(input) {
+  const { text, mode, ext, context, buffers = null } = input;
+  const {
+    dictWords,
+    dictConfig,
+    phraseMinN,
+    phraseMaxN,
+    chargramMinN,
+    chargramMaxN,
+    chargramMaxTokenLength,
+    phraseEnabled,
+    chargramEnabled
+  } = context;
+
+  const { tokens, seq } = buildTokenSequence({
+    text,
+    mode,
+    ext,
+    dictWords,
+    dictConfig,
+    buffers
+  });
+
+  const ngrams = phraseEnabled ? extractNgrams(seq, phraseMinN, phraseMaxN) : null;
+  let chargrams = null;
+  if (chargramEnabled) {
+    const sourceTokens = Array.isArray(input.chargramTokens) && input.chargramTokens.length
+      ? input.chargramTokens
+      : seq;
+    chargrams = buildChargramsFromTokens(sourceTokens, {
+      chargramMinN,
+      chargramMaxN,
+      chargramMaxTokenLength
+    }, buffers);
+  }
+
+  const mh = buffers?.minhash || new SimpleMinHash();
+  if (buffers?.minhash) mh.reset();
+  for (let i = 0; i < tokens.length; i += 1) {
+    mh.update(tokens[i]);
+  }
+
+  return {
+    tokens,
+    seq,
+    ngrams,
+    chargrams,
+    minhashSig: buffers?.minhash ? mh.hashValues.slice() : mh.hashValues,
+    stats: computeTokenStats(tokens)
+  };
+}
diff --git a/src/index/build/watch.js b/src/index/build/watch.js
new file mode 100644
index 000000000..0709962b1
--- /dev/null
+++ b/src/index/build/watch.js
@@ -0,0 +1,345 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import chokidar from 'chokidar';
+import { acquireIndexLock } from './lock.js';
+import { discoverFiles } from './discover.js';
+import { buildIndexForMode } from './indexer.js';
+import {
+  EXTS_CODE,
+  EXTS_PROSE,
+  isLockFile,
+  isManifestFile,
+  isSpecialCodeFile,
+  resolveSpecialCodeExt
+} from '../constants.js';
+import { log } from '../../shared/progress.js';
+import {
+  incWatchBurst,
+  incWatchDebounce,
+  incWatchEvent,
+  observeWatchBuildDuration,
+  setWatchBacklog
+} from '../../shared/metrics.js';
+import { fileExt, toPosix } from '../../shared/files.js';
+
+const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
+
+export function createDebouncedScheduler({ debounceMs, onRun, onSchedule, onCancel, onFire, onError }) {
+  let timer = null;
+  const schedule = () => {
+    if (timer) {
+      clearTimeout(timer);
+      if (onCancel) onCancel();
+    }
+    timer = setTimeout(() => {
+      timer = null;
+
+      const handleError = (err) => {
+        if (!onError) return;
+        try {
+          onError(err);
+        } catch {}
+      };
+
+      if (onFire) {
+        try {
+          onFire();
+        } catch (err) {
+          handleError(err);
+        }
+      }
+
+      try {
+        const result = onRun();
+        if (result && typeof result.then === 'function') {
+          result.catch(handleError);
+        }
+      } catch (err) {
+        handleError(err);
+      }
+    }, debounceMs);
+    if (onSchedule) onSchedule();
+  };
+  const cancel = () => {
+    if (!timer) return;
+    clearTimeout(timer);
+    timer = null;
+    if (onCancel) onCancel();
+  };
+  return { schedule, cancel };
+}
+
+export function isIndexablePath({ absPath, root, ignoreMatcher, modes }) {
+  const relPosix = toPosix(path.relative(root, absPath));
+  if (!relPosix || relPosix === '.' || relPosix.startsWith('..')) return false;
+  if (ignoreMatcher?.ignores(relPosix)) return false;
+  const baseName = path.basename(absPath);
+  const ext = resolveSpecialCodeExt(baseName) || fileExt(absPath);
+  const isManifest = isManifestFile(baseName);
+  const isLock = isLockFile(baseName);
+  const isSpecial = isSpecialCodeFile(baseName) || isManifest || isLock;
+  const wantsCode = modes.includes('code') || modes.includes('extracted-prose');
+  const wantsProse = modes.includes('prose') || modes.includes('extracted-prose');
+  const allowCode = wantsCode && (EXTS_CODE.has(ext) || isSpecial);
+  const allowProse = wantsProse && EXTS_PROSE.has(ext);
+  return allowCode || allowProse;
+}
+
+const scanFiles = async ({ root, modes, ignoreMatcher, maxFileBytes, fileCaps, maxDepth, maxFiles }) => {
+  const files = new Set();
+  const skippedFiles = [];
+  for (const mode of modes) {
+    const modeFiles = await discoverFiles({
+      root,
+      mode,
+      ignoreMatcher,
+      skippedFiles,
+      maxFileBytes,
+      fileCaps,
+      maxDepth,
+      maxFiles
+    });
+    modeFiles.forEach((entry) => files.add(entry.abs || entry));
+  }
+  return Array.from(files);
+};
+
+const resolveMaxBytesForExt = (ext, maxFileBytes, fileCaps) => {
+  const extKey = ext ? ext.toLowerCase() : '';
+  const defaultCap = fileCaps?.default?.maxBytes;
+  const extCap = extKey ? fileCaps?.byExt?.[extKey]?.maxBytes : null;
+  const capValue = Number.isFinite(Number(extCap ?? defaultCap))
+    ? Number(extCap ?? defaultCap)
+    : null;
+  if (!Number.isFinite(Number(maxFileBytes)) || Number(maxFileBytes) <= 0) {
+    return capValue;
+  }
+  if (!Number.isFinite(capValue) || capValue <= 0) {
+    return Number(maxFileBytes);
+  }
+  return Math.min(Number(maxFileBytes), capValue);
+};
+
+const isWithinMaxBytes = async (absPath, maxFileBytes, fileCaps) => {
+  const baseName = path.basename(absPath);
+  const ext = resolveSpecialCodeExt(baseName) || fileExt(absPath);
+  const resolvedMax = resolveMaxBytesForExt(ext, maxFileBytes, fileCaps);
+  if (!Number.isFinite(Number(resolvedMax)) || Number(resolvedMax) <= 0) {
+    return true;
+  }
+  try {
+    const stat = await fs.stat(absPath);
+    return stat.size <= Number(resolvedMax);
+  } catch {
+    return false;
+  }
+};
+
+const buildIgnoredMatcher = ({ root, ignoreMatcher }) => (targetPath, stats) => {
+  const relPosix = toPosix(path.relative(root, targetPath));
+  if (!relPosix || relPosix === '.' || relPosix.startsWith('..')) return false;
+  if (stats?.isDirectory && stats.isDirectory()) {
+    const dirPath = relPosix.endsWith('/') ? relPosix : `${relPosix}/`;
+    if (ignoreMatcher.ignores(dirPath)) return true;
+  }
+  return ignoreMatcher.ignores(relPosix);
+};
+
+/**
+ * Watch for file changes and rebuild indexes incrementally.
+ * @param {{runtime:object,modes:string[],pollMs:number,debounceMs:number}} input
+ */
+export async function watchIndex({ runtime, modes, pollMs, debounceMs }) {
+  const root = runtime.root;
+  const ignoreMatcher = runtime.ignoreMatcher;
+  const maxFileBytes = runtime.maxFileBytes;
+  const fileCaps = runtime.fileCaps;
+  const guardrails = runtime.guardrails || {};
+  const maxDepth = guardrails.maxDepth ?? null;
+  const maxFiles = guardrails.maxFiles ?? null;
+  runtime.incrementalEnabled = true;
+  runtime.argv.incremental = true;
+
+  let running = false;
+  let pending = false;
+  let shouldExit = false;
+  let shutdownSignal = null;
+  let resolveExit = null;
+  const trackedFiles = new Set();
+  const pendingPaths = new Set();
+  const burstWindowMs = 1000;
+  const burstThreshold = 25;
+  let burstStart = 0;
+  let burstCount = 0;
+  let burstMax = 0;
+  let scheduler;
+
+  const stop = () => {
+    if (resolveExit) {
+      resolveExit();
+      resolveExit = null;
+    }
+  };
+
+  const requestShutdown = (signal) => {
+    if (shouldExit) return;
+    shouldExit = true;
+    shutdownSignal = signal;
+    scheduler.cancel();
+    log(`[watch] ${signal} received; shutting down...`);
+    stop();
+  };
+
+  const handleSigint = () => requestShutdown('SIGINT');
+  const handleSigterm = () => requestShutdown('SIGTERM');
+  process.on('SIGINT', handleSigint);
+  process.on('SIGTERM', handleSigterm);
+
+  const scheduleBuild = () => {
+    if (shouldExit) return;
+    scheduler?.schedule();
+  };
+
+  const runBuild = async () => {
+    if (running) {
+      pending = true;
+      return;
+    }
+    if (shouldExit) return;
+    running = true;
+    const startTime = process.hrtime.bigint();
+    let status = 'ok';
+    let lock = null;
+    lock = await acquireIndexLock({ repoCacheRoot: runtime.repoCacheRoot, log });
+    if (!lock) {
+      status = 'unknown';
+      running = false;
+      pending = true;
+      if (!shouldExit) scheduleBuild();
+      return;
+    }
+    const batchSize = pendingPaths.size;
+    if (batchSize > 0) {
+      pendingPaths.clear();
+      setWatchBacklog(0);
+      log(`[watch] Rebuilding index for ${batchSize} change(s)...`);
+    }
+    try {
+      for (const mode of modes) {
+        await buildIndexForMode({ mode, runtime });
+      }
+      log('[watch] Index update complete.');
+    } catch (err) {
+      status = 'error';
+      log(`[watch] Index update failed: ${err?.message || err}`);
+    } finally {
+      await lock.release();
+      running = false;
+      observeWatchBuildDuration({
+        status,
+        seconds: Number(process.hrtime.bigint() - startTime) / 1e9
+      });
+    }
+    if (pending) {
+      pending = false;
+      if (!shouldExit) scheduleBuild();
+    }
+  };
+
+  scheduler = createDebouncedScheduler({
+    debounceMs,
+    onRun: runBuild,
+    onSchedule: () => incWatchDebounce('scheduled'),
+    onCancel: () => incWatchDebounce('canceled'),
+    onFire: () => incWatchDebounce('fired'),
+    onError: (err) => log(`[watch] debounced task error: ${err?.stack || err}`)
+  });
+
+  const recordAddOrChange = async (absPath) => {
+    if (!isIndexablePath({ absPath, root, ignoreMatcher, modes })) return;
+    pendingPaths.add(absPath);
+    setWatchBacklog(pendingPaths.size);
+    const withinMax = await isWithinMaxBytes(absPath, maxFileBytes, fileCaps);
+    if (!withinMax) {
+      if (trackedFiles.delete(absPath)) scheduleBuild();
+      return;
+    }
+    trackedFiles.add(absPath);
+    scheduleBuild();
+  };
+
+  const recordRemove = (absPath) => {
+    if (!isIndexablePath({ absPath, root, ignoreMatcher, modes })) return;
+    pendingPaths.add(absPath);
+    setWatchBacklog(pendingPaths.size);
+    if (trackedFiles.delete(absPath)) scheduleBuild();
+  };
+
+  const initialFiles = await scanFiles({ root, modes, ignoreMatcher, maxFileBytes, fileCaps, maxDepth, maxFiles });
+  initialFiles.forEach((file) => trackedFiles.add(file));
+  const pollingEnabled = Number.isFinite(Number(pollMs)) && Number(pollMs) > 0;
+  const pollLabel = pollingEnabled ? ` polling ${Number(pollMs)}ms` : ' fs events';
+  log(`[watch] Monitoring ${trackedFiles.size} file(s)${pollLabel}.`);
+
+  const watcher = chokidar.watch(root, {
+    persistent: true,
+    ignoreInitial: true,
+    ignored: buildIgnoredMatcher({ root, ignoreMatcher }),
+    usePolling: pollingEnabled,
+    interval: pollingEnabled ? Number(pollMs) : undefined,
+    binaryInterval: pollingEnabled ? Number(pollMs) : undefined,
+    awaitWriteFinish: debounceMs
+      ? { stabilityThreshold: debounceMs, pollInterval: pollingEnabled ? Math.min(100, Number(pollMs)) : 100 }
+      : false
+  });
+
+  const recordBurst = () => {
+    const now = Date.now();
+    if (!burstStart || now - burstStart > burstWindowMs) {
+      burstStart = now;
+      burstCount = 0;
+    }
+    burstCount += 1;
+    burstMax = Math.max(burstMax, burstCount);
+    if (burstCount === burstThreshold) {
+      incWatchBurst();
+      log(`[watch] Burst detected: ${burstCount} events in ${burstWindowMs}ms (max ${burstMax}).`);
+    }
+  };
+
+  watcher.on('add', (filePath) => {
+    incWatchEvent('add');
+    recordBurst();
+    void recordAddOrChange(filePath);
+  });
+  watcher.on('change', (filePath) => {
+    incWatchEvent('change');
+    recordBurst();
+    void recordAddOrChange(filePath);
+  });
+  watcher.on('unlink', (filePath) => {
+    incWatchEvent('unlink');
+    recordBurst();
+    recordRemove(filePath);
+  });
+  watcher.on('error', (err) => {
+    incWatchEvent('error');
+    log(`[watch] Watcher error: ${err?.message || err}`);
+  });
+
+  await new Promise((resolve) => {
+    resolveExit = resolve;
+  });
+
+  await watcher.close();
+
+  if (running) {
+    log('[watch] Waiting for active build to finish...');
+    while (running) {
+      await sleep(200);
+    }
+  }
+  process.off('SIGINT', handleSigint);
+  process.off('SIGTERM', handleSigterm);
+  log(`[watch] Shutdown complete${shutdownSignal ? ` (${shutdownSignal})` : ''}.`);
+}
diff --git a/src/index/build/watch/backends/chokidar.js b/src/index/build/watch/backends/chokidar.js
new file mode 100644
index 000000000..e9cc66dfb
--- /dev/null
+++ b/src/index/build/watch/backends/chokidar.js
@@ -0,0 +1,27 @@
+import chokidar from 'chokidar';
+
+export function startChokidarWatcher({ root, ignored, onEvent, onError, pollMs, awaitWriteFinishMs }) {
+  const pollingEnabled = Number.isFinite(Number(pollMs)) && Number(pollMs) > 0;
+  const watcher = chokidar.watch(root, {
+    persistent: true,
+    ignoreInitial: true,
+    ignored,
+    usePolling: pollingEnabled,
+    interval: pollingEnabled ? Number(pollMs) : undefined,
+    binaryInterval: pollingEnabled ? Number(pollMs) : undefined,
+    awaitWriteFinish: awaitWriteFinishMs
+      ? {
+        stabilityThreshold: awaitWriteFinishMs,
+        pollInterval: pollingEnabled ? Math.min(100, Number(pollMs)) : 100
+      }
+      : false
+  });
+  const emit = (type) => (filePath) => onEvent({ type, absPath: filePath });
+  watcher.on('add', emit('add'));
+  watcher.on('change', emit('change'));
+  watcher.on('unlink', emit('unlink'));
+  watcher.on('error', (err) => onError?.(err));
+  return {
+    close: () => watcher.close()
+  };
+}
diff --git a/src/index/build/watch/backends/parcel.js b/src/index/build/watch/backends/parcel.js
new file mode 100644
index 000000000..f16df0bec
--- /dev/null
+++ b/src/index/build/watch/backends/parcel.js
@@ -0,0 +1,46 @@
+import { tryRequire } from '../../../../shared/optional-deps.js';
+
+const mapParcelEvent = (type) => {
+  if (type === 'create') return 'add';
+  if (type === 'update') return 'change';
+  if (type === 'delete') return 'unlink';
+  return null;
+};
+
+export async function startParcelWatcher({ root, ignored, onEvent, onError }) {
+  const result = tryRequire('@parcel/watcher');
+  if (!result.ok || !result.mod) {
+    throw new Error('Parcel watcher not available.');
+  }
+  const { subscribe } = result.mod;
+  if (typeof subscribe !== 'function') {
+    throw new Error('Parcel watcher does not expose subscribe().');
+  }
+  const unsubscribe = await subscribe(
+    root,
+    (err, events) => {
+      if (err) {
+        onError?.(err);
+        return;
+      }
+      if (!Array.isArray(events)) return;
+      for (const entry of events) {
+        const mapped = mapParcelEvent(entry?.type);
+        if (!mapped) continue;
+        const absPath = entry?.path;
+        if (!absPath) continue;
+        onEvent({ type: mapped, absPath });
+      }
+    },
+    {
+      ignore: ignored
+    }
+  );
+  return {
+    close: async () => {
+      if (typeof unsubscribe === 'function') {
+        await unsubscribe();
+      }
+    }
+  };
+}
diff --git a/src/index/build/watch/backends/types.js b/src/index/build/watch/backends/types.js
new file mode 100644
index 000000000..f278cd457
--- /dev/null
+++ b/src/index/build/watch/backends/types.js
@@ -0,0 +1,14 @@
+/**
+ * @typedef {'add'|'change'|'unlink'} WatchEventType
+ * @typedef {{ type: WatchEventType, absPath: string }} WatchEvent
+ * @typedef {{
+ *  root: string,
+ *  ignored: (path: string, stats?: any) => boolean,
+ *  onEvent: (event: WatchEvent) => void,
+ *  onError?: (error: Error) => void,
+ *  pollMs?: number
+ * }} WatchBackendOptions
+ * @typedef {{ close: () => Promise<void> }} WatchBackendHandle
+ */
+
+export const WATCH_EVENT_TYPES = ['add', 'change', 'unlink'];
diff --git a/src/index/build/worker-pool.js b/src/index/build/worker-pool.js
new file mode 100644
index 000000000..045e1202f
--- /dev/null
+++ b/src/index/build/worker-pool.js
@@ -0,0 +1,656 @@
+import os from 'node:os';
+import util from 'node:util';
+import { fileURLToPath } from 'node:url';
+import { log as defaultLog } from '../../shared/progress.js';
+import {
+  incWorkerRetries,
+  observeWorkerTaskDuration,
+  setWorkerActiveTasks,
+  setWorkerQueueDepth
+} from '../../shared/metrics.js';
+
+const summarizeError = (err, options = {}) => {
+  const {
+    maxLen = 240,
+    fullDepth = false
+  } = options;
+  if (!err) return '';
+  const asString = (value) => (typeof value === 'string' ? value.trim() : '');
+  let detail = asString(err?.message)
+    || asString(err?.code)
+    || asString(err?.name)
+    || asString(typeof err === 'string' ? err : '');
+  if (!detail || detail === '[object Object]' || detail === '{}') {
+    detail = util.inspect(err, {
+      depth: fullDepth ? null : 2,
+      breakLength: 120,
+      maxArrayLength: fullDepth ? null : 6,
+      maxStringLength: fullDepth ? null : 200,
+      showHidden: true,
+      getters: true
+    });
+    if (detail === '{}' || detail === '[object Object]') {
+      try {
+        detail = JSON.stringify(err, Object.getOwnPropertyNames(err), 2);
+      } catch (jsonErr) {
+        detail = detail || `unserializable error: ${asString(jsonErr?.message)}`;
+      }
+    }
+  }
+  detail = detail.replace(/\s+/g, ' ').trim();
+  if (maxLen > 3 && detail.length > maxLen) {
+    detail = `${detail.slice(0, maxLen - 3)}...`;
+  }
+  return detail;
+};
+
+const normalizeEnabled = (raw) => {
+  if (raw === true || raw === false) return raw;
+  const value = typeof raw === 'string' ? raw.trim().toLowerCase() : '';
+  if (value === 'true') return true;
+  if (value === 'false') return false;
+  if (value === 'auto') return 'auto';
+  return 'auto';
+};
+
+const buildWorkerExecArgv = () => process.execArgv.filter((arg) => {
+  if (!arg) return false;
+  return !arg.startsWith('--max-old-space-size')
+    && !arg.startsWith('--max-semi-space-size');
+});
+
+const resolveMemoryWorkerCap = (requested) => {
+  const totalMemMb = Math.floor(os.totalmem() / (1024 * 1024));
+  if (!Number.isFinite(requested) || requested <= 0) return null;
+  if (!Number.isFinite(totalMemMb) || totalMemMb <= 0) return null;
+  const cap = Math.max(1, Math.floor(totalMemMb / 4096));
+  return Math.min(requested, cap);
+};
+
+const parseMaxOldSpaceMb = () => {
+  const args = process.execArgv || [];
+  for (let i = 0; i < args.length; i += 1) {
+    const arg = args[i];
+    if (typeof arg !== 'string') continue;
+    if (arg.startsWith('--max-old-space-size=')) {
+      const value = Number(arg.split('=')[1]);
+      if (Number.isFinite(value) && value > 0) return Math.floor(value);
+    }
+    if (arg === '--max-old-space-size') {
+      const value = Number(args[i + 1]);
+      if (Number.isFinite(value) && value > 0) return Math.floor(value);
+    }
+  }
+  return null;
+};
+
+const resolveWorkerResourceLimits = (maxWorkers) => {
+  const maxOldSpaceMb = parseMaxOldSpaceMb();
+  const totalMemMb = Math.floor(os.totalmem() / (1024 * 1024));
+  if (!Number.isFinite(maxWorkers) || maxWorkers <= 0) return null;
+  const basisMb = Number.isFinite(maxOldSpaceMb) && maxOldSpaceMb > 0
+    ? maxOldSpaceMb
+    : totalMemMb;
+  if (!Number.isFinite(basisMb) || basisMb <= 0) return null;
+  const perWorker = Math.max(256, Math.floor(basisMb / Math.max(1, maxWorkers * 2)));
+  const capped = Math.min(2048, perWorker);
+  return { maxOldGenerationSizeMb: capped };
+};
+
+/**
+ * Normalize worker pool configuration.
+ * @param {object} raw
+ * @param {{cpuLimit?:number}} options
+ * @returns {object}
+ */
+export function normalizeWorkerPoolConfig(raw = {}, options = {}) {
+  const enabled = normalizeEnabled(raw.enabled);
+  const cpuLimit = Number.isFinite(options.cpuLimit)
+    ? Math.max(1, Math.floor(options.cpuLimit))
+    : Math.max(1, os.cpus().length * 4);
+  const defaultMaxWorkers = Number.isFinite(options.defaultMaxWorkers)
+    ? Math.max(1, Math.floor(options.defaultMaxWorkers))
+    : Math.max(1, cpuLimit);
+  const hardMaxWorkers = Number.isFinite(options.hardMaxWorkers)
+    ? Math.max(1, Math.floor(options.hardMaxWorkers))
+    : null;
+  const maxWorkersRaw = Number(raw.maxWorkers);
+  const allowOverCap = raw.allowOverCap === true || options.allowOverCap === true;
+  const requestedMax = Number.isFinite(maxWorkersRaw) && maxWorkersRaw > 0
+    ? Math.max(1, Math.floor(maxWorkersRaw))
+    : defaultMaxWorkers;
+  const cappedMax = (!allowOverCap && Number.isFinite(hardMaxWorkers))
+    ? Math.min(requestedMax, hardMaxWorkers)
+    : requestedMax;
+  const maxWorkers = Math.max(1, cappedMax);
+  const maxFileBytesRaw = raw.maxFileBytes;
+  let maxFileBytes = 512 * 1024;
+  if (maxFileBytesRaw === false || maxFileBytesRaw === 0) {
+    maxFileBytes = null;
+  } else {
+    const maxFileBytesParsed = Number(maxFileBytesRaw);
+    if (Number.isFinite(maxFileBytesParsed) && maxFileBytesParsed > 0) {
+      maxFileBytes = Math.floor(maxFileBytesParsed);
+    }
+  }
+  const idleTimeoutMsRaw = Number(raw.idleTimeoutMs);
+  const idleTimeoutMs = Number.isFinite(idleTimeoutMsRaw) && idleTimeoutMsRaw > 0
+    ? Math.floor(idleTimeoutMsRaw)
+    : 30000;
+  const taskTimeoutMsRaw = Number(raw.taskTimeoutMs);
+  const taskTimeoutMs = Number.isFinite(taskTimeoutMsRaw) && taskTimeoutMsRaw > 0
+    ? Math.floor(taskTimeoutMsRaw)
+    : 60000;
+  const quantizeBatchRaw = Number(raw.quantizeBatchSize);
+  const quantizeBatchSize = Number.isFinite(quantizeBatchRaw) && quantizeBatchRaw > 0
+    ? Math.floor(quantizeBatchRaw)
+    : 128;
+  const splitByTask = raw.splitByTask === true || raw.splitTasks === true;
+  const quantizeMaxWorkersRaw = Number(raw.quantizeMaxWorkers);
+  const quantizeMaxWorkers = Number.isFinite(quantizeMaxWorkersRaw) && quantizeMaxWorkersRaw > 0
+    ? Math.max(1, Math.floor(quantizeMaxWorkersRaw))
+    : null;
+  return {
+    enabled,
+    maxWorkers,
+    maxFileBytes,
+    idleTimeoutMs,
+    taskTimeoutMs,
+    quantizeBatchSize,
+    splitByTask,
+    quantizeMaxWorkers
+  };
+}
+
+/**
+ * Resolve worker pool configuration with environment overrides.
+ * @param {object} raw
+ * @param {{workerPool?:string}|null} envConfig
+ * @param {{cpuLimit?:number}} [options]
+ * @returns {object}
+ */
+export function resolveWorkerPoolConfig(raw = {}, envConfig = null, options = {}) {
+  const config = normalizeWorkerPoolConfig(raw, options);
+  const override = typeof envConfig?.workerPool === 'string'
+    ? envConfig.workerPool.trim().toLowerCase()
+    : '';
+  if (override) {
+    if (['0', 'false', 'off', 'disable', 'disabled'].includes(override)) {
+      config.enabled = false;
+    } else if (['1', 'true', 'on', 'enable', 'enabled'].includes(override)) {
+      config.enabled = true;
+    } else if (override === 'auto') {
+      config.enabled = 'auto';
+    }
+  }
+  return config;
+}
+
+/**
+ * Create a worker pool for CPU-bound tokenization/quantization work.
+ * @param {object} input
+ * @returns {object|null}
+ */
+export async function createIndexerWorkerPool(input = {}) {
+  const {
+    config,
+    dictWords,
+    dictSharedPayload,
+    dictConfig,
+    postingsConfig,
+    crashLogger = null,
+    log = defaultLog,
+    poolName = 'tokenize'
+  } = input;
+  const poolLabel = typeof poolName === 'string' && poolName.trim()
+    ? poolName.trim().toLowerCase()
+    : 'tokenize';
+  const memoryCappedMax = resolveMemoryWorkerCap(config?.maxWorkers);
+  const poolConfig = Number.isFinite(memoryCappedMax) && memoryCappedMax > 0
+    ? { ...config, maxWorkers: memoryCappedMax }
+    : config;
+  if (config?.maxWorkers && poolConfig?.maxWorkers && config.maxWorkers !== poolConfig.maxWorkers) {
+    log(`Worker pool capped to ${poolConfig.maxWorkers} threads based on host memory.`);
+  }
+  const dictWordsForPool = poolLabel === 'quantize' ? [] : dictWords;
+  const dictSharedForPool = poolLabel === 'quantize' ? null : dictSharedPayload;
+  const sanitizeDictConfig = (raw) => {
+    const cfg = raw && typeof raw === 'object' ? raw : {};
+    return {
+      segmentation: typeof cfg.segmentation === 'string' ? cfg.segmentation : 'auto',
+      dpMaxTokenLength: Number.isFinite(Number(cfg.dpMaxTokenLength))
+        ? Number(cfg.dpMaxTokenLength)
+        : 32
+    };
+  };
+  if (!poolConfig || poolConfig.enabled === false) return null;
+  let Piscina;
+  try {
+    Piscina = (await import('piscina')).default;
+  } catch (err) {
+    log(`Worker pool unavailable (piscina missing): ${err?.message || err}`);
+    return null;
+  }
+  const maxRestartAttempts = 3;
+  const restartBaseDelayMs = 5000;
+  const restartMaxDelayMs = 10000;
+  try {
+    let pool = null;
+    let disabled = false;
+    let permanentlyDisabled = false;
+    let restartAttempts = 0;
+    let restartAtMs = 0;
+    let restarting = null;
+    let activeTasks = 0;
+    let pendingRestart = false;
+    const workerExecArgv = buildWorkerExecArgv();
+    const resourceLimits = resolveWorkerResourceLimits(poolConfig.maxWorkers);
+    const createPool = () => {
+      const workerData = {
+        dictConfig: sanitizeDictConfig(dictConfig),
+        postingsConfig: postingsConfig || {}
+      };
+      if (dictSharedForPool?.bytes && dictSharedForPool?.offsets) {
+        workerData.dictShared = dictSharedForPool;
+      } else {
+        workerData.dictWords = Array.isArray(dictWordsForPool)
+          ? dictWordsForPool
+          : Array.from(dictWordsForPool || []);
+      }
+      return new Piscina({
+        filename: fileURLToPath(new URL('./workers/indexer-worker.js', import.meta.url)),
+        maxThreads: poolConfig.maxWorkers,
+        idleTimeout: poolConfig.idleTimeoutMs,
+        taskTimeout: poolConfig.taskTimeoutMs,
+        recordTiming: true,
+        execArgv: workerExecArgv,
+        ...(resourceLimits ? { resourceLimits } : {}),
+        workerData
+      });
+    };
+    const updatePoolMetrics = () => {
+      if (!pool) return;
+      setWorkerQueueDepth({ pool: poolLabel, value: pool.queueSize });
+      setWorkerActiveTasks({ pool: poolLabel, value: activeTasks });
+    };
+    const shutdownPool = async () => {
+      if (!pool) return;
+      try {
+        await pool.destroy();
+      } catch (err) {
+        const detail = summarizeError(err);
+        log(`Worker pool shutdown failed: ${detail || 'unknown error'}`);
+      }
+      pool = null;
+    };
+    const disablePermanently = async (reason) => {
+      if (permanentlyDisabled) return;
+      permanentlyDisabled = true;
+      disabled = true;
+      pendingRestart = false;
+      restartAttempts = maxRestartAttempts + 1;
+      if (reason) log(`Worker pool disabled permanently: ${reason}`);
+      if (activeTasks === 0) {
+        await shutdownPool();
+      }
+    };
+    const scheduleRestart = async (reason) => {
+      if (permanentlyDisabled) return;
+      if (!pool && disabled && restartAttempts > maxRestartAttempts) return;
+      disabled = true;
+      restartAttempts += 1;
+      incWorkerRetries({ pool: poolLabel });
+      if (restartAttempts > maxRestartAttempts) {
+        pendingRestart = false;
+        if (reason) log(`Worker pool disabled: ${reason}`);
+        return;
+      }
+      const delayMs = Math.min(
+        restartMaxDelayMs,
+        restartBaseDelayMs * (2 ** Math.max(0, restartAttempts - 1))
+      );
+      restartAtMs = Date.now() + delayMs;
+      pendingRestart = true;
+      if (activeTasks === 0) {
+        await shutdownPool();
+      }
+      if (reason) log(`Worker pool disabled: ${reason} (retry in ${delayMs}ms).`);
+    };
+    const maybeRestart = async () => {
+      if (permanentlyDisabled) return false;
+      if (!pendingRestart || !disabled) return false;
+      if (activeTasks > 0) return false;
+      if (Date.now() < restartAtMs) return false;
+      return ensurePool();
+    };
+    const ensurePool = async () => {
+      if (permanentlyDisabled) return false;
+      if (pool && !disabled) return true;
+      if (restartAttempts > maxRestartAttempts) return false;
+      if (!pendingRestart) return false;
+      if (activeTasks > 0) return false;
+      if (Date.now() < restartAtMs) return false;
+      if (!restarting) {
+        restarting = (async () => {
+          try {
+            await shutdownPool();
+            pool = createPool();
+            attachPoolListeners(pool);
+            disabled = false;
+            restartAttempts = 0;
+            restartAtMs = 0;
+            pendingRestart = false;
+            log('Worker pool restarted.');
+          } catch (err) {
+            const detail = summarizeError(err);
+            await scheduleRestart(`restart failed: ${detail || 'unknown error'}`);
+          } finally {
+            restarting = null;
+          }
+        })();
+      }
+      await restarting;
+      return !!pool && !disabled;
+    };
+    const sanitizePayload = (payload) => {
+      if (!payload || typeof payload !== 'object') return payload;
+      const safe = {
+        text: typeof payload.text === 'string' ? payload.text : '',
+        mode: typeof payload.mode === 'string' ? payload.mode : 'code',
+        ext: typeof payload.ext === 'string' ? payload.ext : ''
+      };
+      if (Array.isArray(payload.chargramTokens)) {
+        safe.chargramTokens = payload.chargramTokens.filter((token) => typeof token === 'string');
+      }
+      if (payload.dictConfig && typeof payload.dictConfig === 'object') {
+        safe.dictConfig = sanitizeDictConfig(payload.dictConfig);
+      }
+      return safe;
+    };
+    const attachPoolListeners = (poolInstance) => {
+      if (!poolInstance?.on) return;
+      poolInstance.on('message', (message) => {
+        if (!message || typeof message !== 'object') return;
+        if (message.type === 'worker-task') {
+          observeWorkerTaskDuration({
+            pool: poolLabel,
+            task: message.task,
+            worker: message.threadId,
+            status: message.status,
+            seconds: Number(message.durationMs) / 1000
+          });
+          return;
+        }
+        if (message.type === 'worker-crash') {
+          const detail = message.message || message.raw || 'unknown worker error';
+          const cloneIssue = message.cloneIssue
+            ? `non-cloneable ${message.cloneIssue.type}${message.cloneIssue.name ? ` (${message.cloneIssue.name})` : ''} at ${message.cloneIssue.path}`
+            : null;
+          const taskHint = message.task ? ` task=${message.task}` : '';
+          const stageHint = message.stage ? ` stage=${message.stage}` : '';
+          const suffix = [cloneIssue, `${taskHint}${stageHint}`.trim()].filter(Boolean).join(' | ');
+          log(`Worker crash reported: ${detail}${suffix ? ` | ${suffix}` : ''}`);
+          if (crashLogger?.enabled) {
+            crashLogger.logError({
+              phase: 'worker-thread',
+              message: message.message || 'worker crash',
+              stack: message.stack || null,
+              name: message.name || null,
+              code: null,
+              task: message.label || null,
+              cloneIssue: message.cloneIssue || null,
+              cloneStage: message.stage || null,
+              payloadMeta: {
+                threadId: message.threadId ?? null
+              },
+              raw: message.raw || null,
+              cause: message.cause || null
+            });
+          }
+        }
+      });
+      if (!crashLogger?.enabled) return;
+      const formatPoolError = (err) => ({
+        message: summarizeError(err, { fullDepth: true, maxLen: 0 }) || err?.message || String(err),
+        stack: err?.stack || null,
+        name: err?.name || null,
+        code: err?.code || null,
+        raw: util.inspect(err, { depth: 4, breakLength: 120, showHidden: true, getters: true })
+      });
+      poolInstance.on('error', (err) => {
+        crashLogger.logError({ phase: 'worker-pool', ...formatPoolError(err) });
+      });
+      poolInstance.on('workerCreate', (worker) => {
+        if (!worker) return;
+        const target = typeof worker.on === 'function'
+          ? worker
+          : (worker?.worker && typeof worker.worker.on === 'function'
+            ? worker.worker
+            : null);
+        if (!target) return;
+        target.on('error', (err) => {
+          const detail = summarizeError(err, { fullDepth: true, maxLen: 0 }) || err?.message || String(err);
+          log(`Worker thread error: ${detail}`);
+          crashLogger.logError({
+            phase: 'worker-thread',
+            threadId: worker.threadId ?? worker.id ?? worker.worker?.threadId,
+            ...formatPoolError(err)
+          });
+        });
+        target.on('exit', (code) => {
+          if (code === 0) return;
+          log(`Worker thread exited with code ${code}.`);
+          crashLogger.logError({
+            phase: 'worker-exit',
+            threadId: worker.threadId ?? worker.id ?? worker.worker?.threadId,
+            message: `worker exited with code ${code}`
+          });
+        });
+      });
+    };
+    pool = createPool();
+    attachPoolListeners(pool);
+    updatePoolMetrics();
+    return {
+      config,
+      get pool() {
+        return pool;
+      },
+      dictConfig: sanitizeDictConfig(dictConfig),
+      shouldUseForFile(sizeBytes) {
+        if (disabled || permanentlyDisabled) return false;
+        if (config.enabled === true) return true;
+        if (config.enabled === 'auto') {
+          if (config.maxFileBytes == null) return true;
+          return !Number.isFinite(sizeBytes) || sizeBytes <= config.maxFileBytes;
+        }
+        return false;
+      },
+      async runTokenize(payload) {
+        activeTasks += 1;
+        updatePoolMetrics();
+        try {
+          if (disabled && !(await ensurePool())) return null;
+          const result = await pool.run(sanitizePayload(payload), { name: 'tokenizeChunk' });
+          updatePoolMetrics();
+          return result;
+        } catch (err) {
+          const isCloneError = err?.name === 'DataCloneError'
+            || /could not be cloned|DataCloneError/i.test(err?.message || '');
+          const detail = summarizeError(err, { fullDepth: true, maxLen: 0 });
+          const opaqueFailure = !detail || detail === '{}' || detail === '[object Object]';
+          const reason = isCloneError
+            ? (detail ? `data-clone error: ${detail}` : 'data-clone error')
+            : (detail ? `worker failure: ${detail}` : 'worker failure');
+          if (opaqueFailure) {
+            await disablePermanently(reason || 'worker failure');
+          } else {
+            await scheduleRestart(reason);
+          }
+          if (crashLogger?.enabled) {
+            crashLogger.logError({
+              phase: 'worker-tokenize',
+              message: detail || err?.message || String(err),
+              stack: err?.stack || null,
+              name: err?.name || null,
+              code: err?.code || null,
+              task: 'tokenizeChunk',
+              payloadMeta: payload
+                ? {
+                  file: typeof payload.file === 'string' ? payload.file : null,
+                  size: Number.isFinite(payload.size) ? payload.size : null,
+                  textLength: typeof payload.text === 'string' ? payload.text.length : null,
+                  mode: payload.mode || null,
+                  ext: payload.ext || null
+                }
+                : null,
+              raw: util.inspect(err, { depth: 4, breakLength: 120, showHidden: true, getters: true }),
+              errors: Array.isArray(err?.errors)
+                ? err.errors.map((inner) => ({
+                  message: inner?.message || String(inner),
+                  stack: inner?.stack || null,
+                  name: inner?.name || null,
+                  code: inner?.code || null,
+                  raw: util.inspect(inner, { depth: 3, breakLength: 120, showHidden: true, getters: true })
+                }))
+                : null,
+              cause: err?.cause
+                ? {
+                  message: err.cause?.message || String(err.cause),
+                  stack: err.cause?.stack || null,
+                  name: err.cause?.name || null,
+                  code: err.cause?.code || null,
+                  raw: util.inspect(err.cause, { depth: 3, breakLength: 120, showHidden: true, getters: true })
+                }
+                : null
+            });
+          }
+          return null;
+        } finally {
+          activeTasks = Math.max(0, activeTasks - 1);
+          updatePoolMetrics();
+          if (activeTasks === 0) {
+            await maybeRestart();
+          }
+        }
+      },
+      async runQuantize(payload) {
+        activeTasks += 1;
+        updatePoolMetrics();
+        try {
+          if (disabled && !(await ensurePool())) {
+            if (crashLogger?.enabled) {
+              crashLogger.logError({
+                phase: 'worker-quantize',
+                message: 'worker pool unavailable',
+                stack: null,
+                name: 'Error',
+                code: null,
+                task: 'quantizeVectors',
+                payloadMeta: payload
+                  ? {
+                    vectorCount: Array.isArray(payload.vectors)
+                      ? payload.vectors.length
+                      : null,
+                    levels: payload.levels ?? null
+                  }
+                  : null
+              });
+            }
+            return null;
+          }
+          const result = await pool.run(payload, { name: 'quantizeVectors' });
+          updatePoolMetrics();
+          return result;
+        } catch (err) {
+          if (crashLogger?.enabled) {
+            crashLogger.logError({
+              phase: 'worker-quantize',
+              message: err?.message || String(err),
+              stack: err?.stack || null,
+              name: err?.name || null,
+              code: err?.code || null,
+              task: 'quantizeVectors',
+              payloadMeta: payload
+                ? {
+                  vectorCount: Array.isArray(payload.vectors) ? payload.vectors.length : null,
+                  levels: payload.levels ?? null
+                }
+                : null,
+              raw: util.inspect(err, { depth: 4, breakLength: 120, showHidden: true, getters: true }),
+              errors: Array.isArray(err?.errors)
+                ? err.errors.map((inner) => ({
+                  message: inner?.message || String(inner),
+                  stack: inner?.stack || null,
+                  name: inner?.name || null,
+                  code: inner?.code || null,
+                  raw: util.inspect(inner, { depth: 3, breakLength: 120, showHidden: true, getters: true })
+                }))
+                : null,
+              cause: err?.cause
+                ? {
+                  message: err.cause?.message || String(err.cause),
+                  stack: err.cause?.stack || null,
+                  name: err.cause?.name || null,
+                  code: err.cause?.code || null,
+                  raw: util.inspect(err.cause, { depth: 3, breakLength: 120, showHidden: true, getters: true })
+                }
+                : null
+            });
+          }
+          return null;
+        } finally {
+          activeTasks = Math.max(0, activeTasks - 1);
+          updatePoolMetrics();
+          if (activeTasks === 0) {
+            await maybeRestart();
+          }
+        }
+      },
+      async destroy() {
+        disabled = true;
+        restartAttempts = maxRestartAttempts + 1;
+        await shutdownPool();
+      }
+    };
+  } catch (err) {
+    log(`Worker pool unavailable: ${err?.message || err}`);
+    return null;
+  }
+}
+
+export async function createIndexerWorkerPools(input = {}) {
+  const baseConfig = input.config;
+  if (!baseConfig || baseConfig.enabled === false) {
+    return { tokenizePool: null, quantizePool: null, destroy: async () => {} };
+  }
+  if (!baseConfig.splitByTask) {
+    const pool = await createIndexerWorkerPool({ ...input, poolName: 'tokenize' });
+    const destroy = async () => {
+      if (pool?.destroy) await pool.destroy();
+    };
+    return { tokenizePool: pool, quantizePool: pool, destroy };
+  }
+  const quantizeMaxWorkers = Number.isFinite(baseConfig.quantizeMaxWorkers)
+    ? Math.max(1, Math.floor(baseConfig.quantizeMaxWorkers))
+    : Math.max(1, Math.floor(baseConfig.maxWorkers / 2));
+  const tokenizePool = await createIndexerWorkerPool({ ...input, poolName: 'tokenize' });
+  const quantizePool = await createIndexerWorkerPool({
+    ...input,
+    config: { ...baseConfig, maxWorkers: quantizeMaxWorkers },
+    poolName: 'quantize'
+  });
+  const finalTokenizePool = tokenizePool || quantizePool;
+  const finalQuantizePool = quantizePool || tokenizePool;
+  const destroy = async () => {
+    if (finalTokenizePool?.destroy) await finalTokenizePool.destroy();
+    if (finalQuantizePool?.destroy && finalQuantizePool !== finalTokenizePool) {
+      await finalQuantizePool.destroy();
+    }
+  };
+  return {
+    tokenizePool: finalTokenizePool,
+    quantizePool: finalQuantizePool,
+    destroy
+  };
+}
diff --git a/src/index/build/workers/indexer-worker.js b/src/index/build/workers/indexer-worker.js
new file mode 100644
index 000000000..54cde58d2
--- /dev/null
+++ b/src/index/build/workers/indexer-worker.js
@@ -0,0 +1,304 @@
+import { parentPort, threadId, workerData } from 'node:worker_threads';
+import util from 'node:util';
+import { quantizeVec } from '../../embedding.js';
+import { createTokenizationContext, tokenizeChunkText } from '../tokenization.js';
+import { createSharedDictionaryView } from '../../../shared/dictionary.js';
+
+const dictShared = createSharedDictionaryView(workerData?.dictShared);
+const dictWords = dictShared || new Set(Array.isArray(workerData?.dictWords) ? workerData.dictWords : []);
+const dictConfig = workerData?.dictConfig || {};
+const postingsConfig = workerData?.postingsConfig || {};
+const tokenContext = createTokenizationContext({
+  dictWords,
+  dictConfig,
+  postingsConfig
+});
+
+const normalizeEmptyMessage = (value) => {
+  if (typeof value !== 'string') return value;
+  const trimmed = value.trim();
+  if (!trimmed) return null;
+  if (trimmed === '{}' || trimmed === '[object Object]') return null;
+  if (/^Error:?\s*\{\}$/i.test(trimmed)) return null;
+  if (/^Error:?\s*\[object Object\]$/i.test(trimmed)) return null;
+  return value;
+};
+
+const formatWorkerError = (err, label) => {
+  const name = err?.name || 'Error';
+  const rawMessage = err?.message || (typeof err === 'string' ? err : null) || String(err);
+  const message = normalizeEmptyMessage(rawMessage) || 'unhelpful worker error';
+  const stack = typeof err?.stack === 'string' ? err.stack : '';
+  let detail = '';
+  if (err?.cause) {
+    if (typeof err.cause === 'string') {
+      detail = err.cause;
+    } else if (typeof err.cause?.message === 'string') {
+      detail = err.cause.message;
+    } else {
+      try {
+        detail = JSON.stringify(err.cause);
+      } catch {
+        detail = String(err.cause);
+      }
+    }
+  }
+  if (!detail && err && typeof err === 'object') {
+    try {
+      detail = util.inspect(err, { depth: 3, breakLength: 120 });
+    } catch {
+      // ignore
+    }
+  }
+  const lines = [
+    `[${label}] ${name}: ${message}`,
+    stack ? `Stack: ${stack}` : '',
+    detail ? `Cause: ${detail}` : ''
+  ].filter(Boolean);
+  return new Error(lines.join('\n'));
+};
+
+const cloneScanDefaults = { maxDepth: 4, maxItems: 40, tailItems: 3 };
+const cloneScanCrash = { maxDepth: 6, maxItems: 80, tailItems: 6 };
+const isPrimitive = (value) => value == null
+  || typeof value === 'string'
+  || typeof value === 'number'
+  || typeof value === 'boolean'
+  || typeof value === 'bigint';
+const isCloneError = (err) => err?.name === 'DataCloneError'
+  || /could not be cloned|DataCloneError/i.test(err?.message || '');
+const isTypedArray = (value) => ArrayBuffer.isView(value);
+const findNonCloneable = (value, path = '$', depth = 0, limits = cloneScanDefaults, seen = new Set()) => {
+  if (typeof value === 'function') {
+    return { path, type: 'function', name: value.name || 'anonymous' };
+  }
+  if (typeof value === 'symbol') {
+    return { path, type: 'symbol', name: value.description || value.toString() };
+  }
+  if (isPrimitive(value)) return null;
+  if (!value || typeof value !== 'object') return null;
+  if (seen.has(value)) return null;
+  if (depth >= limits.maxDepth) return null;
+  seen.add(value);
+  if (isTypedArray(value) || value instanceof ArrayBuffer || value instanceof SharedArrayBuffer) return null;
+  const tag = Object.prototype.toString.call(value);
+  if (tag === '[object Promise]') return { path, type: 'promise' };
+  if (tag === '[object WeakMap]') return { path, type: 'weakmap' };
+  if (tag === '[object WeakSet]') return { path, type: 'weakset' };
+  if (tag === '[object WeakRef]') return { path, type: 'weakref' };
+  if (tag === '[object Date]' || tag === '[object RegExp]' || tag === '[object URL]' || tag === '[object Error]') {
+    return null;
+  }
+  if (Array.isArray(value)) {
+    const limit = Math.min(value.length, limits.maxItems);
+    for (let i = 0; i < limit; i += 1) {
+      const entry = value[i];
+      if (typeof entry === 'function' || typeof entry === 'symbol' || (entry && typeof entry === 'object')) {
+        const issue = findNonCloneable(entry, `${path}[${i}]`, depth + 1, limits, seen);
+        if (issue) return issue;
+      }
+    }
+    if (value.length > limit) {
+      const tailStart = Math.max(limit, value.length - limits.tailItems);
+      for (let i = tailStart; i < value.length; i += 1) {
+        const entry = value[i];
+        if (typeof entry === 'function' || typeof entry === 'symbol' || (entry && typeof entry === 'object')) {
+          const issue = findNonCloneable(entry, `${path}[${i}]`, depth + 1, limits, seen);
+          if (issue) return issue;
+        }
+      }
+    }
+    return null;
+  }
+  if (tag === '[object Map]') {
+    let idx = 0;
+    for (const [key, val] of value.entries()) {
+      if (idx >= limits.maxItems) break;
+      const keyIssue = findNonCloneable(key, `${path}.<mapKey:${idx}>`, depth + 1, limits, seen);
+      if (keyIssue) return keyIssue;
+      const valueIssue = findNonCloneable(val, `${path}.<mapValue:${idx}>`, depth + 1, limits, seen);
+      if (valueIssue) return valueIssue;
+      idx += 1;
+    }
+    return null;
+  }
+  if (tag === '[object Set]') {
+    let idx = 0;
+    for (const entry of value.values()) {
+      if (idx >= limits.maxItems) break;
+      const issue = findNonCloneable(entry, `${path}.<set:${idx}>`, depth + 1, limits, seen);
+      if (issue) return issue;
+      idx += 1;
+    }
+    return null;
+  }
+  const keys = Object.keys(value);
+  for (let i = 0; i < Math.min(keys.length, limits.maxItems); i += 1) {
+    const key = keys[i];
+    const issue = findNonCloneable(value[key], `${path}.${key}`, depth + 1, limits, seen);
+    if (issue) return issue;
+  }
+  const symbols = Object.getOwnPropertySymbols(value);
+  for (let i = 0; i < Math.min(symbols.length, limits.maxItems); i += 1) {
+    const sym = symbols[i];
+    const issue = findNonCloneable(value[sym], `${path}[${sym.toString()}]`, depth + 1, limits, seen);
+    if (issue) return issue;
+  }
+  return null;
+};
+
+const validateCloneable = (value, label) => {
+  const issue = findNonCloneable(value, '$', 0, cloneScanDefaults, new Set());
+  if (!issue) return;
+  const detail = issue.name ? `${issue.type} (${issue.name})` : issue.type;
+  throw new Error(`[${label}] non-cloneable ${detail} at ${issue.path}`);
+};
+
+const normalizeStringArray = (value) => {
+  if (!Array.isArray(value)) return [];
+  const out = [];
+  for (const entry of value) {
+    if (typeof entry === 'string') out.push(entry);
+  }
+  return out;
+};
+
+const normalizeNumberArray = (value) => {
+  if (Array.isArray(value)) return value.map((entry) => Number(entry));
+  if (ArrayBuffer.isView(value)) return Array.from(value, (entry) => Number(entry));
+  return [];
+};
+
+const sanitizeTokenizeResult = (result) => {
+  const stats = result && typeof result.stats === 'object' ? result.stats : {};
+  return {
+    tokens: normalizeStringArray(result?.tokens),
+    seq: normalizeStringArray(result?.seq),
+    ngrams: Array.isArray(result?.ngrams) ? normalizeStringArray(result.ngrams) : null,
+    chargrams: Array.isArray(result?.chargrams) ? normalizeStringArray(result.chargrams) : null,
+    minhashSig: normalizeNumberArray(result?.minhashSig),
+    stats: {
+      unique: Number(stats.unique) || 0,
+      entropy: Number.isFinite(Number(stats.entropy)) ? Number(stats.entropy) : 0,
+      sum: Number(stats.sum) || 0
+    }
+  };
+};
+
+const reportTiming = (label, startedAt, status) => {
+  if (!parentPort) return;
+  const durationMs = Number(process.hrtime.bigint() - startedAt) / 1e6;
+  parentPort.postMessage({
+    type: 'worker-task',
+    task: label === 'tokenizeChunk' ? 'tokenize' : 'quantize',
+    threadId,
+    durationMs,
+    status
+  });
+};
+
+const reportWorkerCrash = (err, label, meta = null) => {
+  if (!parentPort) return;
+  const message = err?.message || (typeof err === 'string' ? err : null);
+  const stack = typeof err?.stack === 'string' ? err.stack : null;
+  const cause = err?.cause
+    ? (err.cause?.stack || err.cause?.message || util.inspect(err.cause, { depth: 4, breakLength: 120 }))
+    : null;
+  parentPort.postMessage({
+    type: 'worker-crash',
+    label,
+    threadId,
+    name: err?.name || 'Error',
+    message,
+    stack,
+    cause,
+    raw: util.inspect(err, { depth: 5, breakLength: 120, showHidden: true, getters: true }),
+    task: meta?.task || null,
+    stage: meta?.stage || null,
+    cloneIssue: meta?.cloneIssue || null
+  });
+};
+
+process.on('uncaughtException', (err) => {
+  const cloneIssue = isCloneError(err)
+    ? findNonCloneable(lastStage === 'result' ? lastResult : lastPayload, '$', 0, cloneScanCrash, new Set())
+    : null;
+  reportWorkerCrash(err, 'uncaughtException', {
+    task: lastTask,
+    stage: lastStage,
+    cloneIssue
+  });
+});
+
+process.on('unhandledRejection', (reason) => {
+  if (reason instanceof Error) {
+    const cloneIssue = isCloneError(reason)
+      ? findNonCloneable(lastStage === 'result' ? lastResult : lastPayload, '$', 0, cloneScanCrash, new Set())
+      : null;
+    reportWorkerCrash(reason, 'unhandledRejection', {
+      task: lastTask,
+      stage: lastStage,
+      cloneIssue
+    });
+  } else {
+    const wrapped = new Error(
+      typeof reason === 'string'
+        ? reason
+        : util.inspect(reason, { depth: 4, breakLength: 120, showHidden: true, getters: true })
+    );
+    const cloneIssue = isCloneError(wrapped)
+      ? findNonCloneable(lastStage === 'result' ? lastResult : lastPayload, '$', 0, cloneScanCrash, new Set())
+      : null;
+    reportWorkerCrash(wrapped, 'unhandledRejection', {
+      task: lastTask,
+      stage: lastStage,
+      cloneIssue
+    });
+  }
+});
+
+let lastTask = null;
+let lastStage = null;
+let lastPayload = null;
+let lastResult = null;
+
+const withWorkerError = (fn, label) => (input) => {
+  const startedAt = process.hrtime.bigint();
+  try {
+    lastTask = label;
+    lastStage = 'payload';
+    lastPayload = input;
+    lastResult = null;
+    validateCloneable(input, `${label} payload`);
+    const result = fn(input);
+    lastStage = 'result';
+    lastResult = result;
+    validateCloneable(result, `${label} result`);
+    reportTiming(label, startedAt, 'ok');
+    return result;
+  } catch (err) {
+    reportTiming(label, startedAt, 'error');
+    throw formatWorkerError(err, label);
+  }
+};
+
+export const tokenizeChunk = withWorkerError(
+  (input) => {
+    const hasOverrides = input && (input.dictConfig || input.postingsConfig);
+    const context = hasOverrides
+      ? createTokenizationContext({
+        dictWords,
+        dictConfig: input.dictConfig || dictConfig,
+        postingsConfig: input.postingsConfig || postingsConfig
+      })
+      : tokenContext;
+    const result = tokenizeChunkText({ ...input, context });
+    return sanitizeTokenizeResult(result);
+  },
+  'tokenizeChunk'
+);
+
+export const quantizeVectors = withWorkerError((input) => {
+  const { vectors = [], minVal = -1, maxVal = 1, levels = 256 } = input || {};
+  return vectors.map((vec) => quantizeVec(vec, minVal, maxVal, levels));
+}, 'quantizeVectors');
diff --git a/src/index/chunking.js b/src/index/chunking.js
new file mode 100644
index 000000000..5a399783d
--- /dev/null
+++ b/src/index/chunking.js
@@ -0,0 +1,7 @@
+export { smartChunk } from './chunking/dispatch.js';
+export { chunkMarkdown } from './chunking/formats/markdown.js';
+export { chunkJson } from './chunking/formats/json.js';
+export { chunkYaml } from './chunking/formats/yaml.js';
+export { chunkIniToml } from './chunking/formats/ini-toml.js';
+export { chunkXml } from './chunking/formats/xml.js';
+export { chunkRst, chunkAsciiDoc } from './chunking/formats/rst-asciidoc.js';
diff --git a/src/index/chunking/dispatch.js b/src/index/chunking/dispatch.js
new file mode 100644
index 000000000..d8fcfd832
--- /dev/null
+++ b/src/index/chunking/dispatch.js
@@ -0,0 +1,539 @@
+import {
+  EXTS_PROSE,
+  isCLike,
+  isGo,
+  isJava,
+  isJsLike,
+  isPerl,
+  isRust,
+  isShell,
+  isTypeScript,
+  isCSharp,
+  isKotlin,
+  isRuby,
+  isPhp,
+  isHtml,
+  isCss,
+  isLua,
+  isSql,
+  CMAKE_EXTS,
+  STARLARK_EXTS,
+  NIX_EXTS,
+  DART_EXTS,
+  SCALA_EXTS,
+  GROOVY_EXTS,
+  R_EXTS,
+  JULIA_EXTS,
+  HANDLEBARS_EXTS,
+  MUSTACHE_EXTS,
+  JINJA_EXTS,
+  RAZOR_EXTS
+} from '../constants.js';
+import { buildJsChunks } from '../../lang/javascript.js';
+import { buildTypeScriptChunks } from '../../lang/typescript.js';
+import { buildCSharpChunks } from '../../lang/csharp.js';
+import { buildKotlinChunks } from '../../lang/kotlin.js';
+import { buildRubyChunks } from '../../lang/ruby.js';
+import { buildPhpChunks } from '../../lang/php.js';
+import { buildHtmlChunks } from '../../lang/html.js';
+import { buildCssChunks } from '../../lang/css.js';
+import { buildLuaChunks } from '../../lang/lua.js';
+import { buildSqlChunks } from '../../lang/sql.js';
+import { buildCLikeChunks } from '../../lang/clike.js';
+import { buildPythonChunksFromAst, buildPythonHeuristicChunks } from '../../lang/python.js';
+import { buildRustChunks } from '../../lang/rust.js';
+import { buildSwiftChunks } from '../../lang/swift.js';
+import { buildGoChunks } from '../../lang/go.js';
+import { buildJavaChunks } from '../../lang/java.js';
+import { buildPerlChunks } from '../../lang/perl.js';
+import { buildShellChunks } from '../../lang/shell.js';
+import { buildLineIndex } from '../../shared/lines.js';
+import { chunkIniToml } from './formats/ini-toml.js';
+import { chunkJson } from './formats/json.js';
+import { chunkMarkdown } from './formats/markdown.js';
+import { chunkRst, chunkAsciiDoc } from './formats/rst-asciidoc.js';
+import { chunkXml } from './formats/xml.js';
+import { chunkYaml } from './formats/yaml.js';
+import { applyChunkingLimits } from './limits.js';
+import { getTreeSitterOptions } from './tree-sitter.js';
+
+const buildChunksFromLineHeadings = (text, headings) => {
+  if (!headings.length) return null;
+  const lineIndex = buildLineIndex(text);
+  const chunks = [];
+  for (let i = 0; i < headings.length; ++i) {
+    const startLine = headings[i].line;
+    const endLine = i + 1 < headings.length ? headings[i + 1].line : lineIndex.length;
+    const start = lineIndex[startLine] || 0;
+    const end = endLine < lineIndex.length ? lineIndex[endLine] : text.length;
+    const title = headings[i].title || 'section';
+    chunks.push({
+      start,
+      end,
+      name: title,
+      kind: 'Section',
+      meta: { title }
+    });
+  }
+  return chunks;
+};
+
+const applyFormatMeta = (chunks, format, kind) => {
+  if (!chunks) return null;
+  return chunks.map((chunk) => ({
+    ...chunk,
+    kind: kind || chunk.kind,
+    meta: format ? { ...(chunk.meta || {}), format } : chunk.meta
+  }));
+};
+
+const chunkByLineRegex = (text, matcher, options = {}) => {
+  const lines = text.split('\n');
+  const headings = [];
+  const skipLine = typeof options.skipLine === 'function' ? options.skipLine : null;
+  const titleFor = typeof options.title === 'function' ? options.title : null;
+  for (let i = 0; i < lines.length; ++i) {
+    const line = lines[i];
+    if (skipLine && skipLine(line)) continue;
+    const match = line.match(matcher);
+    if (!match) continue;
+    const title = titleFor ? titleFor(match, line) : (match[1] || '').trim();
+    if (!title) continue;
+    headings.push({ line: i, title });
+  }
+  const chunks = buildChunksFromLineHeadings(text, headings);
+  if (chunks && chunks.length) {
+    return applyFormatMeta(chunks, options.format || null, options.kind || null);
+  }
+  return [{
+    start: 0,
+    end: text.length,
+    name: options.defaultName || 'section',
+    kind: options.kind || 'Section',
+    meta: options.format ? { format: options.format } : {}
+  }];
+};
+
+const chunkDockerfile = (text) => {
+  const lines = text.split('\n');
+  const headings = [];
+  const rx = /^\s*([A-Z][A-Z0-9_-]+)\b/;
+  for (let i = 0; i < lines.length; ++i) {
+    const match = lines[i].match(rx);
+    if (match) headings.push({ line: i, title: match[1] });
+  }
+  const chunks = buildChunksFromLineHeadings(text, headings);
+  if (chunks && chunks.length) {
+    return applyFormatMeta(chunks, 'dockerfile', 'ConfigSection');
+  }
+  return [{ start: 0, end: text.length, name: 'Dockerfile', kind: 'ConfigSection', meta: { format: 'dockerfile' } }];
+};
+
+const chunkMakefile = (text) => {
+  const lines = text.split('\n');
+  const headings = [];
+  const rx = /^([A-Za-z0-9_./-]+)\s*:/;
+  for (let i = 0; i < lines.length; ++i) {
+    const line = lines[i];
+    if (line.trim().startsWith('#') || !line.trim()) continue;
+    const match = line.match(rx);
+    if (match) headings.push({ line: i, title: match[1] });
+  }
+  const chunks = buildChunksFromLineHeadings(text, headings);
+  if (chunks && chunks.length) {
+    return applyFormatMeta(chunks, 'makefile', 'ConfigSection');
+  }
+  return [{ start: 0, end: text.length, name: 'Makefile', kind: 'ConfigSection', meta: { format: 'makefile' } }];
+};
+
+const chunkProto = (text) => {
+  const lines = text.split('\n');
+  const headings = [];
+  const rx = /^\s*(message|enum|service|extend|oneof)\s+([A-Za-z_][A-Za-z0-9_]*)/;
+  for (let i = 0; i < lines.length; ++i) {
+    const match = lines[i].match(rx);
+    if (match) {
+      const kind = match[1];
+      const name = match[2];
+      headings.push({ line: i, title: `${kind} ${name}`.trim() });
+    }
+  }
+  const chunks = buildChunksFromLineHeadings(text, headings);
+  return chunks || [{ start: 0, end: text.length, name: 'proto', kind: 'Section', meta: { format: 'proto' } }];
+};
+
+const chunkGraphql = (text) => {
+  const lines = text.split('\n');
+  const headings = [];
+  const rx = /^\s*(schema|type|interface|enum|union|input|scalar|directive|fragment)\b\s*([A-Za-z_][A-Za-z0-9_]*)?/;
+  for (let i = 0; i < lines.length; ++i) {
+    const match = lines[i].match(rx);
+    if (match) {
+      const kind = match[1];
+      const name = match[2] || '';
+      const title = name ? `${kind} ${name}` : kind;
+      headings.push({ line: i, title });
+    }
+  }
+  const chunks = buildChunksFromLineHeadings(text, headings);
+  return chunks || [{ start: 0, end: text.length, name: 'graphql', kind: 'Section', meta: { format: 'graphql' } }];
+};
+
+const chunkCmake = (text) => chunkByLineRegex(text, /^\s*([A-Za-z_][A-Za-z0-9_]*)\s*\(/, {
+  format: 'cmake',
+  kind: 'ConfigSection',
+  defaultName: 'cmake',
+  skipLine: (line) => line.trim().startsWith('#')
+});
+
+const chunkStarlark = (text) => {
+  const lines = text.split('\n');
+  const headings = [];
+  const defRx = /^\s*(def|class)\s+([A-Za-z_][A-Za-z0-9_]*)\b/;
+  const callRx = /^\s*([A-Za-z_][A-Za-z0-9_]*)\s*\(/;
+  for (let i = 0; i < lines.length; ++i) {
+    const line = lines[i];
+    if (line.trim().startsWith('#')) continue;
+    const defMatch = line.match(defRx);
+    if (defMatch) {
+      headings.push({ line: i, title: `${defMatch[1]} ${defMatch[2]}` });
+      continue;
+    }
+    const callMatch = line.match(callRx);
+    if (callMatch) headings.push({ line: i, title: callMatch[1] });
+  }
+  const chunks = buildChunksFromLineHeadings(text, headings);
+  if (chunks && chunks.length) return applyFormatMeta(chunks, 'starlark', 'Section');
+  return [{
+    start: 0,
+    end: text.length,
+    name: 'starlark',
+    kind: 'Section',
+    meta: { format: 'starlark' }
+  }];
+};
+
+const chunkNix = (text) => {
+  const skipLine = (line) => {
+    const trimmed = line.trim();
+    return !trimmed || trimmed.startsWith('#') || trimmed === 'in' || trimmed === 'let';
+  };
+  return chunkByLineRegex(text, /^\s*([A-Za-z0-9_.-]+)\s*=/, {
+    format: 'nix',
+    kind: 'Section',
+    defaultName: 'nix',
+    skipLine
+  });
+};
+
+const chunkDart = (text) => {
+  const lines = text.split('\n');
+  const headings = [];
+  const typeRx = /^\s*(class|mixin|enum|extension|typedef)\s+([A-Za-z_][A-Za-z0-9_]*)/;
+  const funcRx = /^\s*(?:[A-Za-z_][A-Za-z0-9_<>]*\s+)+([A-Za-z_][A-Za-z0-9_]*)\s*\(/;
+  const skipNames = new Set(['if', 'for', 'while', 'switch', 'catch', 'return', 'new']);
+  for (let i = 0; i < lines.length; ++i) {
+    const line = lines[i];
+    if (line.trim().startsWith('//')) continue;
+    const typeMatch = line.match(typeRx);
+    if (typeMatch) {
+      headings.push({ line: i, title: typeMatch[2] });
+      continue;
+    }
+    const funcMatch = line.match(funcRx);
+    if (funcMatch && !skipNames.has(funcMatch[1])) {
+      headings.push({ line: i, title: funcMatch[1] });
+    }
+  }
+  const chunks = buildChunksFromLineHeadings(text, headings);
+  if (chunks && chunks.length) return applyFormatMeta(chunks, 'dart', 'Section');
+  return [{
+    start: 0,
+    end: text.length,
+    name: 'dart',
+    kind: 'Section',
+    meta: { format: 'dart' }
+  }];
+};
+
+const chunkScala = (text) => {
+  const lines = text.split('\n');
+  const headings = [];
+  const typeRx = /^\s*(?:case\s+class|class|object|trait|enum)\s+([A-Za-z_][A-Za-z0-9_]*)/;
+  const defRx = /^\s*def\s+([A-Za-z_][A-Za-z0-9_]*)/;
+  for (let i = 0; i < lines.length; ++i) {
+    const line = lines[i];
+    if (line.trim().startsWith('//')) continue;
+    const typeMatch = line.match(typeRx);
+    if (typeMatch) {
+      headings.push({ line: i, title: typeMatch[1] });
+      continue;
+    }
+    const defMatch = line.match(defRx);
+    if (defMatch) headings.push({ line: i, title: defMatch[1] });
+  }
+  const chunks = buildChunksFromLineHeadings(text, headings);
+  if (chunks && chunks.length) return applyFormatMeta(chunks, 'scala', 'Section');
+  return [{
+    start: 0,
+    end: text.length,
+    name: 'scala',
+    kind: 'Section',
+    meta: { format: 'scala' }
+  }];
+};
+
+const chunkGroovy = (text) => {
+  const lines = text.split('\n');
+  const headings = [];
+  const typeRx = /^\s*(class|interface|trait|enum)\s+([A-Za-z_][A-Za-z0-9_]*)/;
+  const defRx = /^\s*def\s+([A-Za-z_][A-Za-z0-9_]*)/;
+  for (let i = 0; i < lines.length; ++i) {
+    const line = lines[i];
+    if (line.trim().startsWith('//')) continue;
+    const typeMatch = line.match(typeRx);
+    if (typeMatch) {
+      headings.push({ line: i, title: typeMatch[2] });
+      continue;
+    }
+    const defMatch = line.match(defRx);
+    if (defMatch) headings.push({ line: i, title: defMatch[1] });
+  }
+  const chunks = buildChunksFromLineHeadings(text, headings);
+  if (chunks && chunks.length) return applyFormatMeta(chunks, 'groovy', 'Section');
+  return [{
+    start: 0,
+    end: text.length,
+    name: 'groovy',
+    kind: 'Section',
+    meta: { format: 'groovy' }
+  }];
+};
+
+const chunkR = (text) => chunkByLineRegex(text, /^\s*([A-Za-z.][A-Za-z0-9_.]*)\s*(?:<-|=)\s*function\b/, {
+  format: 'r',
+  kind: 'Section',
+  defaultName: 'r'
+});
+
+const chunkJulia = (text) => {
+  const lines = text.split('\n');
+  const headings = [];
+  const rx = /^\s*(module|function|macro)\s+([A-Za-z_][A-Za-z0-9_!.]*)/;
+  for (let i = 0; i < lines.length; ++i) {
+    const line = lines[i];
+    if (line.trim().startsWith('#')) continue;
+    const match = line.match(rx);
+    if (match) {
+      headings.push({ line: i, title: match[2] });
+    }
+  }
+  const chunks = buildChunksFromLineHeadings(text, headings);
+  if (chunks && chunks.length) return applyFormatMeta(chunks, 'julia', 'Section');
+  return [{
+    start: 0,
+    end: text.length,
+    name: 'julia',
+    kind: 'Section',
+    meta: { format: 'julia' }
+  }];
+};
+
+const chunkHandlebars = (text) => chunkByLineRegex(text, /{{[#^]\s*([A-Za-z0-9_.-]+)\b/, {
+  format: 'handlebars',
+  kind: 'Section',
+  defaultName: 'handlebars'
+});
+
+const chunkMustache = (text) => chunkByLineRegex(text, /{{[#^]\s*([A-Za-z0-9_.-]+)\b/, {
+  format: 'mustache',
+  kind: 'Section',
+  defaultName: 'mustache'
+});
+
+const chunkJinja = (text) => chunkByLineRegex(text, /{%\s*(block|macro|for|if|set|include|extends)\s+([^%]+)%}/, {
+  format: 'jinja',
+  kind: 'Section',
+  defaultName: 'jinja',
+  title: (match) => {
+    const name = String(match[2] || '').trim().split(/\s+/)[0];
+    return name ? `${match[1]} ${name}` : match[1];
+  }
+});
+
+const chunkRazor = (text) => {
+  const lines = text.split('\n');
+  const headings = [];
+  const rx = /^\s*@\s*(page|model|inherits|functions|code|section)\b\s*([A-Za-z_][A-Za-z0-9_]*)?/i;
+  for (let i = 0; i < lines.length; ++i) {
+    const line = lines[i];
+    const match = line.match(rx);
+    if (!match) continue;
+    const name = match[2] ? `${match[1]} ${match[2]}` : match[1];
+    headings.push({ line: i, title: name });
+  }
+  const chunks = buildChunksFromLineHeadings(text, headings);
+  if (chunks && chunks.length) return applyFormatMeta(chunks, 'razor', 'Section');
+  return [{
+    start: 0,
+    end: text.length,
+    name: 'razor',
+    kind: 'Section',
+    meta: { format: 'razor' }
+  }];
+};
+
+const CODE_CHUNKERS = [
+  { id: 'javascript', match: (ext) => isJsLike(ext), chunk: ({ text, ext, context }) => {
+    if (context?.jsChunks) return context.jsChunks;
+    return buildJsChunks(text, {
+      ext,
+      ast: context?.jsAst,
+      javascript: context?.javascript,
+      flowMode: context?.javascript?.flow,
+      treeSitter: context?.treeSitter,
+      log: context?.log
+    });
+  } },
+  { id: 'typescript', match: (ext) => isTypeScript(ext), chunk: ({ text, ext, relPath, context }) => {
+    if (context?.tsChunks) return context.tsChunks;
+    const parser = context?.typescript?.importsOnly ? 'heuristic' : context?.typescript?.parser;
+    return buildTypeScriptChunks(text, {
+      ext,
+      relPath,
+      parser,
+      treeSitter: context?.treeSitter,
+      log: context?.log
+    });
+  } },
+  { id: 'html', match: (ext) => isHtml(ext), chunk: ({ text, context }) =>
+    context?.htmlChunks || buildHtmlChunks(text, getTreeSitterOptions(context)) },
+  { id: 'css', match: (ext) => isCss(ext), chunk: ({ text, context }) =>
+    context?.cssChunks || buildCssChunks(text) },
+  { id: 'python', match: (ext) => ext === '.py', chunk: ({ text, context }) => {
+    const astChunks = buildPythonChunksFromAst(text, context?.pythonAst || null);
+    if (astChunks && astChunks.length) return astChunks;
+    if (context?.pythonTreeChunks && context.pythonTreeChunks.length) {
+      return context.pythonTreeChunks;
+    }
+    return buildPythonHeuristicChunks(text);
+  } },
+  { id: 'swift', match: (ext) => ext === '.swift', chunk: ({ text, context }) =>
+    context?.swiftChunks || buildSwiftChunks(text, getTreeSitterOptions(context)) },
+  { id: 'clike', match: (ext) => isCLike(ext), chunk: ({ text, ext, context }) =>
+    context?.clikeChunks || buildCLikeChunks(text, ext, getTreeSitterOptions(context)) },
+  { id: 'rust', match: (ext) => isRust(ext), chunk: ({ text, context }) =>
+    context?.rustChunks || buildRustChunks(text, getTreeSitterOptions(context)) },
+  { id: 'go', match: (ext) => isGo(ext), chunk: ({ text, context }) =>
+    context?.goChunks || buildGoChunks(text, getTreeSitterOptions(context)) },
+  { id: 'java', match: (ext) => isJava(ext), chunk: ({ text, context }) =>
+    context?.javaChunks || buildJavaChunks(text, getTreeSitterOptions(context)) },
+  { id: 'perl', match: (ext) => isPerl(ext), chunk: ({ text, context }) =>      
+    context?.perlChunks || buildPerlChunks(text) },
+  { id: 'shell', match: (ext) => isShell(ext), chunk: ({ text, context }) =>    
+    context?.shellChunks || buildShellChunks(text) },
+  { id: 'dockerfile', match: (ext) => ext === '.dockerfile', chunk: ({ text }) =>
+    chunkDockerfile(text) },
+  { id: 'makefile', match: (ext) => ext === '.makefile', chunk: ({ text }) =>
+    chunkMakefile(text) },
+  { id: 'csharp', match: (ext) => isCSharp(ext), chunk: ({ text, context }) =>  
+    context?.csharpChunks || buildCSharpChunks(text, getTreeSitterOptions(context)) },
+  { id: 'kotlin', match: (ext) => isKotlin(ext), chunk: ({ text, context }) =>
+    context?.kotlinChunks || buildKotlinChunks(text, getTreeSitterOptions(context)) },
+  { id: 'ruby', match: (ext) => isRuby(ext), chunk: ({ text, context }) =>
+    context?.rubyChunks || buildRubyChunks(text) },
+  { id: 'php', match: (ext) => isPhp(ext), chunk: ({ text, context }) =>
+    context?.phpChunks || buildPhpChunks(text) },
+  { id: 'lua', match: (ext) => isLua(ext), chunk: ({ text, context }) =>
+    context?.luaChunks || buildLuaChunks(text) },
+  { id: 'sql', match: (ext) => isSql(ext), chunk: ({ text, context }) =>
+    context?.sqlChunks || buildSqlChunks(text) },
+  { id: 'cmake', match: (ext) => CMAKE_EXTS.has(ext), chunk: ({ text }) => chunkCmake(text) },
+  { id: 'starlark', match: (ext) => STARLARK_EXTS.has(ext), chunk: ({ text }) => chunkStarlark(text) },
+  { id: 'nix', match: (ext) => NIX_EXTS.has(ext), chunk: ({ text }) => chunkNix(text) },
+  { id: 'dart', match: (ext) => DART_EXTS.has(ext), chunk: ({ text }) => chunkDart(text) },
+  { id: 'scala', match: (ext) => SCALA_EXTS.has(ext), chunk: ({ text }) => chunkScala(text) },
+  { id: 'groovy', match: (ext) => GROOVY_EXTS.has(ext), chunk: ({ text }) => chunkGroovy(text) },
+  { id: 'r', match: (ext) => R_EXTS.has(ext), chunk: ({ text }) => chunkR(text) },
+  { id: 'julia', match: (ext) => JULIA_EXTS.has(ext), chunk: ({ text }) => chunkJulia(text) },
+  { id: 'handlebars', match: (ext) => HANDLEBARS_EXTS.has(ext), chunk: ({ text }) => chunkHandlebars(text) },
+  { id: 'mustache', match: (ext) => MUSTACHE_EXTS.has(ext), chunk: ({ text }) => chunkMustache(text) },
+  { id: 'jinja', match: (ext) => JINJA_EXTS.has(ext), chunk: ({ text }) => chunkJinja(text) },
+  { id: 'razor', match: (ext) => RAZOR_EXTS.has(ext), chunk: ({ text }) => chunkRazor(text) }
+];
+
+const CODE_FORMAT_CHUNKERS = [
+  { id: 'json', match: (ext) => ext === '.json', chunk: ({ text, context }) => chunkJson(text, context) },
+  { id: 'ini', match: (ext) => ['.toml', '.ini', '.cfg', '.conf'].includes(ext), chunk: ({ text, ext, context }) =>
+    chunkIniToml(text, ext === '.toml' ? 'toml' : 'ini', context) },
+  { id: 'xml', match: (ext) => ext === '.xml', chunk: ({ text }) => chunkXml(text) },
+  { id: 'yaml', match: (ext) => ext === '.yaml' || ext === '.yml', chunk: ({ text, relPath, context }) => chunkYaml(text, relPath, context) }
+];
+
+const PROSE_CHUNKERS = [
+  { id: 'markdown', match: (ext) => ext === '.md' || ext === '.mdx', chunk: ({ text, ext, context }) => chunkMarkdown(text, ext, context) },
+  { id: 'rst', match: (ext) => ext === '.rst', chunk: ({ text }) => chunkRst(text) },
+  { id: 'asciidoc', match: (ext) => ext === '.adoc' || ext === '.asciidoc', chunk: ({ text }) => chunkAsciiDoc(text) }
+];
+
+const resolveChunker = (chunkers, ext, relPath) => (
+  chunkers.find((entry) => entry.match(ext, relPath)) || null
+);
+
+/**
+ * Build chunks for a single file using language-aware heuristics.
+ * Falls back to generic fixed-size chunks when no parser matches.
+ * @param {object} params
+ * @param {string} params.text
+ * @param {string} params.ext
+ * @param {string|null} [params.relPath]
+ * @param {'code'|'prose'} params.mode
+ * @param {object} [params.context]
+ * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>}
+ */
+export function smartChunk({
+  text,
+  ext,
+  relPath = null,
+  mode,
+  context = {}
+}) {
+  if (mode === 'prose') {
+    const chunker = resolveChunker(PROSE_CHUNKERS, ext, relPath);
+    if (chunker) {
+      const chunks = chunker.chunk({ text, ext, relPath, context });
+      if (chunks && chunks.length) return applyChunkingLimits(chunks, text, context);
+    }
+  }
+  if (mode === 'code') {
+    const codeChunker = resolveChunker(CODE_CHUNKERS, ext, relPath);
+    if (codeChunker) {
+      const chunks = codeChunker.chunk({ text, ext, relPath, context });
+      if (chunks && chunks.length) return applyChunkingLimits(chunks, text, context);
+    }
+    const formatChunker = resolveChunker(CODE_FORMAT_CHUNKERS, ext, relPath);
+    if (formatChunker) {
+      const chunks = formatChunker.chunk({ text, ext, relPath, context });
+      if (chunks && chunks.length) return applyChunkingLimits(chunks, text, context);
+    }
+  }
+  if (mode === 'prose' && EXTS_PROSE.has(ext)) {
+    return applyChunkingLimits(
+      [{ start: 0, end: text.length, name: 'root', kind: 'Section', meta: {} }],
+      text,
+      context
+    );
+  }
+  const fallbackChunkSize = 800;
+  const out = [];
+  for (let off = 0; off < text.length; off += fallbackChunkSize) {
+    out.push({
+      start: off,
+      end: Math.min(text.length, off + fallbackChunkSize),
+      name: 'blob',
+      kind: 'Blob',
+      meta: {}
+    });
+  }
+  return applyChunkingLimits(out, text, context);
+}
diff --git a/src/index/chunking/formats/ini-toml.js b/src/index/chunking/formats/ini-toml.js
new file mode 100644
index 000000000..cc60b9504
--- /dev/null
+++ b/src/index/chunking/formats/ini-toml.js
@@ -0,0 +1,54 @@
+import { buildTreeSitterChunks } from '../../../lang/tree-sitter.js';
+import { buildLineIndex } from '../../../shared/lines.js';
+import { getTreeSitterOptions } from '../tree-sitter.js';
+
+const buildChunksFromLineHeadings = (text, headings) => {
+  if (!headings.length) return null;
+  const lineIndex = buildLineIndex(text);
+  const chunks = [];
+  for (let i = 0; i < headings.length; ++i) {
+    const startLine = headings[i].line;
+    const endLine = i + 1 < headings.length ? headings[i + 1].line : lineIndex.length;
+    const start = lineIndex[startLine] || 0;
+    const end = endLine < lineIndex.length ? lineIndex[endLine] : text.length;
+    const title = headings[i].title || 'section';
+    chunks.push({
+      start,
+      end,
+      name: title,
+      kind: 'Section',
+      meta: { title }
+    });
+  }
+  return chunks;
+};
+
+export function chunkIniToml(text, format = 'ini', context) {
+  if (format === 'toml' && context?.treeSitter?.configChunking === true) {
+    const treeChunks = buildTreeSitterChunks({
+      text,
+      languageId: 'toml',
+      ext: '.toml',
+      options: getTreeSitterOptions(context)
+    });
+    if (treeChunks && treeChunks.length) return treeChunks;
+  }
+  const lines = text.split('\n');
+  const headings = [];
+  for (let i = 0; i < lines.length; ++i) {
+    const line = lines[i];
+    const match = line.match(/^\s*\[\[?([^\]]+)\]\]?\s*$/);
+    if (match) {
+      headings.push({ line: i, title: match[1].trim() });
+    }
+  }
+  const chunks = buildChunksFromLineHeadings(text, headings);
+  if (chunks) {
+    return chunks.map((chunk) => ({
+      ...chunk,
+      kind: 'ConfigSection',
+      meta: { ...chunk.meta, format }
+    }));
+  }
+  return [{ start: 0, end: text.length, name: 'root', kind: 'ConfigSection', meta: { format } }];
+}
diff --git a/src/index/chunking/formats/json.js b/src/index/chunking/formats/json.js
new file mode 100644
index 000000000..1e8afd8dd
--- /dev/null
+++ b/src/index/chunking/formats/json.js
@@ -0,0 +1,81 @@
+import { buildTreeSitterChunks } from '../../../lang/tree-sitter.js';
+import { getTreeSitterOptions } from '../tree-sitter.js';
+
+const parseJsonString = (text, start) => {
+  let i = start + 1;
+  let value = '';
+  while (i < text.length) {
+    const ch = text[i];
+    if (ch === '\\\\') {
+      if (i + 1 < text.length) {
+        value += text[i + 1];
+        i += 2;
+        continue;
+      }
+      i += 1;
+      continue;
+    }
+    if (ch === '"') {
+      return { value, end: i };
+    }
+    value += ch;
+    i += 1;
+  }
+  return null;
+};
+
+export function chunkJson(text, context) {
+  if (context?.treeSitter?.configChunking === true) {
+    const treeChunks = buildTreeSitterChunks({
+      text,
+      languageId: 'json',
+      ext: '.json',
+      options: getTreeSitterOptions(context)
+    });
+    if (treeChunks && treeChunks.length) return treeChunks;
+  }
+  let parsed;
+  try {
+    parsed = JSON.parse(text);
+  } catch {
+    return null;
+  }
+  if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
+    return [{ start: 0, end: text.length, name: 'root', kind: 'ConfigSection', meta: { format: 'json' } }];
+  }
+  const keys = [];
+  let depth = 0;
+  let i = 0;
+  while (i < text.length) {
+    const ch = text[i];
+    if (ch === '"') {
+      const parsedString = parseJsonString(text, i);
+      if (!parsedString) break;
+      const nextIdx = text.slice(parsedString.end + 1).search(/\S/);
+      const nextPos = nextIdx >= 0 ? parsedString.end + 1 + nextIdx : -1;
+      if (nextPos > 0 && text[nextPos] === ':' && depth === 1) {
+        keys.push({ name: parsedString.value, index: i });
+      }
+      i = parsedString.end + 1;
+      continue;
+    }
+    if (ch === '{' || ch === '[') depth += 1;
+    if (ch === '}' || ch === ']') depth -= 1;
+    i += 1;
+  }
+  if (!keys.length) return [{ start: 0, end: text.length, name: 'root', kind: 'ConfigSection', meta: { format: 'json' } }];
+  const chunks = [];
+  for (let k = 0; k < keys.length; ++k) {
+    const start = keys[k].index;
+    const end = k + 1 < keys.length ? keys[k + 1].index : text.length;
+    const title = keys[k].name || 'section';
+    chunks.push({
+      start,
+      end,
+      name: title,
+      kind: 'ConfigSection',
+      meta: { title, format: 'json' }
+    });
+  }
+  return chunks;
+}
diff --git a/src/index/chunking/formats/markdown.js b/src/index/chunking/formats/markdown.js
new file mode 100644
index 000000000..4d495e22a
--- /dev/null
+++ b/src/index/chunking/formats/markdown.js
@@ -0,0 +1,34 @@
+import { buildTreeSitterChunks } from '../../../lang/tree-sitter.js';
+import { getTreeSitterOptions } from '../tree-sitter.js';
+
+const buildChunksFromMatches = (text, matches, titleTransform) => {
+  const chunks = [];
+  for (let i = 0; i < matches.length; ++i) {
+    const start = matches[i].index;
+    const end = i + 1 < matches.length ? matches[i + 1].index : text.length;
+    const rawTitle = matches[i][0];
+    const title = titleTransform ? titleTransform(rawTitle) : rawTitle.trim();
+    chunks.push({
+      start,
+      end,
+      name: title || 'section',
+      kind: 'Section',
+      meta: { title }
+    });
+  }
+  return chunks.length ? chunks : null;
+};
+
+export function chunkMarkdown(text, ext, context) {
+  if (context?.treeSitter?.configChunking === true) {
+    const treeChunks = buildTreeSitterChunks({
+      text,
+      languageId: 'markdown',
+      ext: ext || '.md',
+      options: getTreeSitterOptions(context)
+    });
+    if (treeChunks && treeChunks.length) return treeChunks;
+  }
+  const matches = [...text.matchAll(/^#{1,6} .+$/gm)];
+  return buildChunksFromMatches(text, matches, (raw) => raw.replace(/^#+ /, '').trim());
+}
diff --git a/src/index/chunking/formats/rst-asciidoc.js b/src/index/chunking/formats/rst-asciidoc.js
new file mode 100644
index 000000000..f0572001b
--- /dev/null
+++ b/src/index/chunking/formats/rst-asciidoc.js
@@ -0,0 +1,59 @@
+import { buildLineIndex } from '../../../shared/lines.js';
+
+const buildChunksFromMatches = (text, matches, titleTransform) => {
+  const chunks = [];
+  for (let i = 0; i < matches.length; ++i) {
+    const start = matches[i].index;
+    const end = i + 1 < matches.length ? matches[i + 1].index : text.length;
+    const rawTitle = matches[i][0];
+    const title = titleTransform ? titleTransform(rawTitle) : rawTitle.trim();
+    chunks.push({
+      start,
+      end,
+      name: title || 'section',
+      kind: 'Section',
+      meta: { title }
+    });
+  }
+  return chunks.length ? chunks : null;
+};
+
+const buildChunksFromLineHeadings = (text, headings) => {
+  if (!headings.length) return null;
+  const lineIndex = buildLineIndex(text);
+  const chunks = [];
+  for (let i = 0; i < headings.length; ++i) {
+    const startLine = headings[i].line;
+    const endLine = i + 1 < headings.length ? headings[i + 1].line : lineIndex.length;
+    const start = lineIndex[startLine] || 0;
+    const end = endLine < lineIndex.length ? lineIndex[endLine] : text.length;
+    const title = headings[i].title || 'section';
+    chunks.push({
+      start,
+      end,
+      name: title,
+      kind: 'Section',
+      meta: { title }
+    });
+  }
+  return chunks;
+};
+
+export function chunkAsciiDoc(text) {
+  const matches = [...text.matchAll(/^={1,6} .+$/gm)];
+  return buildChunksFromMatches(text, matches, (raw) => raw.replace(/^=+ /, '').trim());
+}
+
+export function chunkRst(text) {
+  const lines = text.split('\n');
+  const headings = [];
+  for (let i = 1; i < lines.length; ++i) {
+    const underline = lines[i].trim();
+    if (!underline) continue;
+    if (/^([=~^"'#*\\-])\1{2,}$/.test(underline)) {
+      const title = lines[i - 1].trim();
+      if (title) headings.push({ line: i - 1, title });
+    }
+  }
+  return buildChunksFromLineHeadings(text, headings);
+}
diff --git a/src/index/chunking/formats/xml.js b/src/index/chunking/formats/xml.js
new file mode 100644
index 000000000..c5a01d1d4
--- /dev/null
+++ b/src/index/chunking/formats/xml.js
@@ -0,0 +1,55 @@
+export function chunkXml(text) {
+  const keys = [];
+  let depth = 0;
+  let i = 0;
+  while (i < text.length) {
+    if (text[i] !== '<') {
+      i += 1;
+      continue;
+    }
+    if (text.startsWith('<!--', i)) {
+      const end = text.indexOf('-->', i + 4);
+      i = end === -1 ? text.length : end + 3;
+      continue;
+    }
+    if (text.startsWith('<?', i) || text.startsWith('<!', i)) {
+      const end = text.indexOf('>', i + 2);
+      i = end === -1 ? text.length : end + 1;
+      continue;
+    }
+    if (text.startsWith('</', i)) {
+      depth = Math.max(0, depth - 1);
+      const end = text.indexOf('>', i + 2);
+      i = end === -1 ? text.length : end + 1;
+      continue;
+    }
+    const tagMatch = text.slice(i + 1).match(/^([A-Za-z0-9:_-]+)/);
+    if (!tagMatch) {
+      i += 1;
+      continue;
+    }
+    const tag = tagMatch[1];
+    const closeIdx = text.indexOf('>', i + 1);
+    const selfClose = closeIdx >= 0 && text[closeIdx - 1] === '/';
+    if (depth === 1) {
+      keys.push({ name: tag, index: i });
+    }
+    if (!selfClose) depth += 1;
+    i = closeIdx === -1 ? text.length : closeIdx + 1;
+  }
+  if (!keys.length) return [{ start: 0, end: text.length, name: 'root', kind: 'ConfigSection', meta: { format: 'xml' } }];
+  const chunks = [];
+  for (let k = 0; k < keys.length; ++k) {
+    const start = keys[k].index;
+    const end = k + 1 < keys.length ? keys[k + 1].index : text.length;
+    const title = keys[k].name || 'section';
+    chunks.push({
+      start,
+      end,
+      name: title,
+      kind: 'ConfigSection',
+      meta: { title, format: 'xml' }
+    });
+  }
+  return chunks;
+}
diff --git a/src/index/chunking/formats/yaml.js b/src/index/chunking/formats/yaml.js
new file mode 100644
index 000000000..5c99759dd
--- /dev/null
+++ b/src/index/chunking/formats/yaml.js
@@ -0,0 +1,109 @@
+import { buildTreeSitterChunks } from '../../../lang/tree-sitter.js';
+import { buildLineIndex } from '../../../shared/lines.js';
+import { getTreeSitterOptions } from '../tree-sitter.js';
+
+const buildChunksFromLineHeadings = (text, headings) => {
+  if (!headings.length) return null;
+  const lineIndex = buildLineIndex(text);
+  const chunks = [];
+  for (let i = 0; i < headings.length; ++i) {
+    const startLine = headings[i].line;
+    const endLine = i + 1 < headings.length ? headings[i + 1].line : lineIndex.length;
+    const start = lineIndex[startLine] || 0;
+    const end = endLine < lineIndex.length ? lineIndex[endLine] : text.length;
+    const title = headings[i].title || 'section';
+    chunks.push({
+      start,
+      end,
+      name: title,
+      kind: 'Section',
+      meta: { title }
+    });
+  }
+  return chunks;
+};
+
+const chunkGitHubActions = (text) => {
+  const lines = text.split('\n');
+  const headings = [];
+  let jobsLine = -1;
+  for (let i = 0; i < lines.length; ++i) {
+    if (/^\s*jobs:\s*$/.test(lines[i])) {
+      jobsLine = i;
+      break;
+    }
+  }
+  if (jobsLine >= 0) {
+    for (let i = jobsLine + 1; i < lines.length; ++i) {
+      const match = lines[i].match(/^\s{2}([A-Za-z0-9_-]+):\s*$/);
+      if (match) headings.push({ line: i, title: match[1] });
+    }
+  }
+  const chunks = buildChunksFromLineHeadings(text, headings);
+  return chunks || [{ start: 0, end: text.length, name: 'workflow', kind: 'ConfigSection', meta: { format: 'github-actions' } }];
+};
+
+const parseYamlTopLevelKey = (line) => {
+  const quoted = line.match(/^(['"])(.+?)\1\s*:/);
+  if (quoted) return quoted[2].trim();
+  const unquoted = line.match(/^([A-Za-z0-9_.-]+)\s*:/);
+  if (unquoted) return unquoted[1].trim();
+  return null;
+};
+
+const chunkYamlTopLevel = (text) => {
+  const lines = text.split('\n');
+  const headings = [];
+  for (let i = 0; i < lines.length; ++i) {
+    const line = lines[i];
+    if (!line || line.trim().length === 0) continue;
+    if (line.startsWith(' ') || line.startsWith('\\t')) continue;
+    const trimmed = line.trim();
+    if (trimmed.startsWith('#') || trimmed === '---' || trimmed === '...') continue;
+    if (trimmed.startsWith('-')) continue;
+    const key = parseYamlTopLevelKey(line);
+    if (key) headings.push({ line: i, title: key });
+  }
+  const chunks = buildChunksFromLineHeadings(text, headings);
+  return chunks && chunks.length
+    ? chunks.map((chunk) => ({
+      ...chunk,
+      kind: 'ConfigSection',
+      meta: { ...(chunk.meta || {}), format: 'yaml', title: chunk.name }
+    }))
+    : null;
+};
+
+const resolveYamlChunkMode = (text, context) => {
+  const config = context?.yamlChunking || {};
+  const modeRaw = typeof config.mode === 'string' ? config.mode.toLowerCase() : '';
+  const mode = ['auto', 'root', 'top-level'].includes(modeRaw) ? modeRaw : 'root';
+  const maxBytesRaw = Number(config.maxBytes);
+  const maxBytes = Number.isFinite(maxBytesRaw) ? Math.max(0, Math.floor(maxBytesRaw)) : 200 * 1024;
+  const textBytes = Buffer.byteLength(text, 'utf8');
+  if (mode === 'top-level' && textBytes > maxBytes) return 'root';
+  if (mode === 'auto') {
+    return textBytes <= maxBytes ? 'top-level' : 'root';
+  }
+  return mode;
+};
+
+export function chunkYaml(text, relPath, context) {
+  const isWorkflow = relPath ? relPath.replace(/\\\\/g, '/').includes('.github/workflows/') : false;
+  if (isWorkflow) return chunkGitHubActions(text);
+  if (context?.treeSitter?.configChunking === true) {
+    const treeChunks = buildTreeSitterChunks({
+      text,
+      languageId: 'yaml',
+      ext: '.yaml',
+      options: getTreeSitterOptions(context)
+    });
+    if (treeChunks && treeChunks.length) return treeChunks;
+  }
+  const mode = resolveYamlChunkMode(text, context);
+  if (mode === 'top-level') {
+    const chunks = chunkYamlTopLevel(text);
+    if (chunks && chunks.length) return chunks;
+  }
+  return [{ start: 0, end: text.length, name: 'root', kind: 'ConfigSection', meta: { format: 'yaml' } }];
+}
diff --git a/src/index/chunking/limits.js b/src/index/chunking/limits.js
new file mode 100644
index 000000000..853038f84
--- /dev/null
+++ b/src/index/chunking/limits.js
@@ -0,0 +1,113 @@
+import { buildLineIndex, offsetToLine } from '../../shared/lines.js';
+
+const normalizeChunkLimit = (value) => {
+  const num = Number(value);
+  if (!Number.isFinite(num)) return null;
+  const limit = Math.max(0, Math.floor(num));
+  return limit > 0 ? limit : null;
+};
+
+export const resolveChunkingLimits = (context) => {
+  const raw = context?.chunking && typeof context.chunking === 'object'
+    ? context.chunking
+    : {};
+  return {
+    maxBytes: normalizeChunkLimit(raw.maxBytes),
+    maxLines: normalizeChunkLimit(raw.maxLines)
+  };
+};
+
+const buildChunkWithRange = (chunk, start, end, lineIndex) => {
+  const next = { ...chunk, start, end };
+  if (lineIndex) {
+    const meta = chunk.meta && typeof chunk.meta === 'object' ? { ...chunk.meta } : {};
+    const startLine = offsetToLine(lineIndex, start);
+    const endOffset = end > start ? end - 1 : start;
+    const endLine = offsetToLine(lineIndex, endOffset);
+    meta.startLine = startLine;
+    meta.endLine = endLine;
+    next.meta = meta;
+  }
+  return next;
+};
+
+export const splitChunkByLines = (chunk, text, lineIndex, maxLines) => {
+  if (!lineIndex || !maxLines) return [chunk];
+  const start = Number.isFinite(chunk.start) ? chunk.start : 0;
+  const end = Number.isFinite(chunk.end) ? chunk.end : start;
+  if (end <= start) return [chunk];
+  const startLineIdx = offsetToLine(lineIndex, start) - 1;
+  const endOffset = end > start ? end - 1 : start;
+  const endLineIdx = offsetToLine(lineIndex, endOffset) - 1;
+  const totalLines = endLineIdx - startLineIdx + 1;
+  if (totalLines <= maxLines) return [chunk];
+  const output = [];
+  let currentStart = start;
+  let lineCount = 0;
+  for (let lineIdx = startLineIdx; lineIdx <= endLineIdx; lineIdx += 1) {
+    if (lineCount >= maxLines) {
+      const splitAt = lineIndex[lineIdx] ?? end;
+      if (splitAt > currentStart) {
+        output.push(buildChunkWithRange(chunk, currentStart, splitAt, lineIndex));
+      }
+      currentStart = splitAt;
+      lineCount = 0;
+    }
+    lineCount += 1;
+  }
+  if (currentStart < end) {
+    output.push(buildChunkWithRange(chunk, currentStart, end, lineIndex));
+  }
+  return output.length ? output : [chunk];
+};
+
+const resolveByteBoundary = (text, start, end, maxBytes) => {
+  let lo = start;
+  let hi = end;
+  let best = start;
+  while (lo <= hi) {
+    const mid = Math.floor((lo + hi) / 2);
+    const bytes = Buffer.byteLength(text.slice(start, mid), 'utf8');
+    if (bytes <= maxBytes) {
+      best = mid;
+      lo = mid + 1;
+    } else {
+      hi = mid - 1;
+    }
+  }
+  return best;
+};
+
+export const splitChunkByBytes = (chunk, text, lineIndex, maxBytes) => {
+  if (!maxBytes) return [chunk];
+  const start = Number.isFinite(chunk.start) ? chunk.start : 0;
+  const end = Number.isFinite(chunk.end) ? chunk.end : start;
+  if (end <= start) return [chunk];
+  const bytes = Buffer.byteLength(text.slice(start, end), 'utf8');
+  if (bytes <= maxBytes) return [chunk];
+  const output = [];
+  let cursor = start;
+  while (cursor < end) {
+    const next = resolveByteBoundary(text, cursor, end, maxBytes);
+    const safeNext = next > cursor ? next : Math.min(cursor + 1, end);
+    output.push(buildChunkWithRange(chunk, cursor, safeNext, lineIndex));
+    if (safeNext <= cursor) break;
+    cursor = safeNext;
+  }
+  return output.length ? output : [chunk];
+};
+
+export const applyChunkingLimits = (chunks, text, context) => {
+  if (!Array.isArray(chunks) || !chunks.length) return chunks;
+  const { maxBytes, maxLines } = resolveChunkingLimits(context);
+  if (!maxBytes && !maxLines) return chunks;
+  const lineIndex = buildLineIndex(text);
+  let output = chunks;
+  if (maxLines) {
+    output = output.flatMap((chunk) => splitChunkByLines(chunk, text, lineIndex, maxLines));
+  }
+  if (maxBytes) {
+    output = output.flatMap((chunk) => splitChunkByBytes(chunk, text, lineIndex, maxBytes));
+  }
+  return output;
+};
diff --git a/src/index/chunking/tree-sitter.js b/src/index/chunking/tree-sitter.js
new file mode 100644
index 000000000..d19a77b4e
--- /dev/null
+++ b/src/index/chunking/tree-sitter.js
@@ -0,0 +1,5 @@
+export const getTreeSitterOptions = (context) => (
+  context?.treeSitter
+    ? { treeSitter: context.treeSitter, log: context.log }
+    : {}
+);
diff --git a/src/index/comments.js b/src/index/comments.js
new file mode 100644
index 000000000..837bc435c
--- /dev/null
+++ b/src/index/comments.js
@@ -0,0 +1,520 @@
+import { parse as parseYaml } from 'yaml';
+import { parse as parseToml } from 'smol-toml';
+import { parse as parseJsonc } from 'jsonc-parser';
+import { offsetToLine } from '../shared/lines.js';
+
+const DEFAULT_COMMENT_CONFIG = {
+  extract: 'doc',
+  includeLicense: false,
+  minDocChars: 15,
+  minInlineChars: 30,
+  minTokens: 5,
+  maxPerChunk: 5,
+  maxBytesPerChunk: 8 * 1024,
+  headerMaxLines: 200,
+  licensePattern: 'copyright|license|spdx|apache|mit|gpl|bsd',
+  generatedPattern: 'generated by|do not edit|@generated|autogenerated',
+  linterPattern: 'eslint-disable|prettier-ignore|noinspection',
+  skipGenerated: true,
+  skipLinter: true
+};
+
+const COMMENT_STYLES = [
+  {
+    ids: new Set([
+      'javascript',
+      'typescript',
+      'jsx',
+      'tsx',
+      'java',
+      'c',
+      'cpp',
+      'csharp',
+      'go',
+      'rust',
+      'php',
+      'kotlin',
+      'swift',
+      'objc',
+      'objective-c',
+      'protobuf',
+      'dart',
+      'scala',
+      'groovy'
+    ]),
+    line: ['//'],
+    block: [{ start: '/*', end: '*/', stripStar: true }],
+    strings: ['"', '\'', '`']
+  },
+  {
+    ids: new Set(['css', 'scss', 'sass', 'less']),
+    line: ['//'],
+    block: [{ start: '/*', end: '*/', stripStar: true }],
+    strings: ['"', '\'']
+  },
+  {
+    ids: new Set(['nix']),
+    line: ['#'],
+    block: [{ start: '/*', end: '*/', stripStar: true }],
+    strings: ['"', '\'', '`']
+  },
+  {
+    ids: new Set(['julia']),
+    line: ['#'],
+    block: [{ start: '#=', end: '=#', stripStar: false }],
+    strings: ['"', '\'', '`']
+  },
+  {
+    ids: new Set([
+      'python',
+      'shell',
+      'ruby',
+      'yaml',
+      'toml',
+      'ini',
+      'dockerfile',
+      'makefile',
+      'graphql',
+      'cmake',
+      'starlark',
+      'r'
+    ]),
+    line: ['#'],
+    block: [],
+    strings: ['"', '\'']
+  },
+  {
+    ids: new Set(['sql']),
+    line: ['--'],
+    block: [{ start: '/*', end: '*/', stripStar: true }],
+    strings: ['"', '\'']
+  },
+  {
+    ids: new Set(['handlebars', 'mustache']),
+    line: [],
+    block: [
+      { start: '{{!--', end: '--}}', stripStar: false },
+      { start: '{{!', end: '}}', stripStar: false }
+    ],
+    strings: ['"', '\'']
+  },
+  {
+    ids: new Set(['jinja']),
+    line: [],
+    block: [{ start: '{#', end: '#}', stripStar: false }],
+    strings: ['"', '\'']
+  },
+  {
+    ids: new Set(['razor']),
+    line: [],
+    block: [{ start: '@*', end: '*@', stripStar: false }],
+    strings: ['"', '\'']
+  },
+  {
+    ids: new Set(['markdown', 'html', 'xml', 'astro', 'vue', 'svelte']),
+    line: [],
+    block: [{ start: '<!--', end: '-->', stripStar: false }],
+    strings: ['"', '\'']
+  }
+];
+
+const EXT_OVERRIDES = new Map([
+  ['.js', 'javascript'],
+  ['.jsx', 'jsx'],
+  ['.ts', 'typescript'],
+  ['.tsx', 'tsx'],
+  ['.c', 'c'],
+  ['.h', 'c'],
+  ['.cpp', 'cpp'],
+  ['.hpp', 'cpp'],
+  ['.cs', 'csharp'],
+  ['.go', 'go'],
+  ['.rs', 'rust'],
+  ['.java', 'java'],
+  ['.kt', 'kotlin'],
+  ['.swift', 'swift'],
+  ['.m', 'objc'],
+  ['.mm', 'objc'],
+  ['.php', 'php'],
+  ['.css', 'css'],
+  ['.scss', 'scss'],
+  ['.sass', 'sass'],
+  ['.less', 'less'],
+  ['.py', 'python'],
+  ['.rb', 'ruby'],
+  ['.sh', 'shell'],
+  ['.bash', 'shell'],
+  ['.yml', 'yaml'],
+  ['.yaml', 'yaml'],
+  ['.toml', 'toml'],
+  ['.ini', 'ini'],
+  ['.cfg', 'ini'],
+  ['.conf', 'ini'],
+  ['.sql', 'sql'],
+  ['.md', 'markdown'],
+  ['.mdx', 'markdown'],
+  ['.html', 'html'],
+  ['.xml', 'xml'],
+  ['.vue', 'vue'],
+  ['.svelte', 'svelte'],
+  ['.astro', 'astro'],
+  ['.dockerfile', 'dockerfile'],
+  ['.makefile', 'makefile'],
+  ['.proto', 'protobuf'],
+  ['.graphql', 'graphql'],
+  ['.gql', 'graphql'],
+  ['.cmake', 'cmake'],
+  ['.bzl', 'starlark'],
+  ['.bazel', 'starlark'],
+  ['.star', 'starlark'],
+  ['.nix', 'nix'],
+  ['.dart', 'dart'],
+  ['.scala', 'scala'],
+  ['.sc', 'scala'],
+  ['.groovy', 'groovy'],
+  ['.gradle', 'groovy'],
+  ['.gvy', 'groovy'],
+  ['.r', 'r'],
+  ['.jl', 'julia'],
+  ['.hbs', 'handlebars'],
+  ['.handlebars', 'handlebars'],
+  ['.mustache', 'mustache'],
+  ['.jinja', 'jinja'],
+  ['.jinja2', 'jinja'],
+  ['.j2', 'jinja'],
+  ['.django', 'jinja'],
+  ['.djhtml', 'jinja'],
+  ['.razor', 'razor'],
+  ['.cshtml', 'razor']
+]);
+
+const normalizeLimit = (value, fallback) => {
+  const num = Number(value);
+  if (!Number.isFinite(num)) return fallback;
+  return Math.max(0, Math.floor(num));
+};
+
+const normalizePattern = (value, fallback) => {
+  if (value === null || value === undefined) return fallback;
+  if (value instanceof RegExp) return value;
+  if (typeof value === 'string' && value.trim()) return new RegExp(value, 'i');
+  return fallback;
+};
+
+export function normalizeCommentConfig(input = {}) {
+  const cfg = input && typeof input === 'object' ? input : {};
+  const extractRaw = typeof cfg.extract === 'string' ? cfg.extract.toLowerCase() : '';
+  const extract = ['off', 'doc', 'all'].includes(extractRaw) ? extractRaw : DEFAULT_COMMENT_CONFIG.extract;
+  return {
+    extract,
+    includeLicense: cfg.includeLicense === true,
+    minDocChars: normalizeLimit(cfg.minDocChars, DEFAULT_COMMENT_CONFIG.minDocChars),
+    minInlineChars: normalizeLimit(cfg.minInlineChars, DEFAULT_COMMENT_CONFIG.minInlineChars),
+    minTokens: normalizeLimit(cfg.minTokens, DEFAULT_COMMENT_CONFIG.minTokens),
+    maxPerChunk: normalizeLimit(cfg.maxPerChunk, DEFAULT_COMMENT_CONFIG.maxPerChunk),
+    maxBytesPerChunk: normalizeLimit(cfg.maxBytesPerChunk, DEFAULT_COMMENT_CONFIG.maxBytesPerChunk),
+    headerMaxLines: normalizeLimit(cfg.headerMaxLines, DEFAULT_COMMENT_CONFIG.headerMaxLines),
+    licensePattern: normalizePattern(cfg.licensePattern, new RegExp(DEFAULT_COMMENT_CONFIG.licensePattern, 'i')),
+    generatedPattern: normalizePattern(cfg.generatedPattern, new RegExp(DEFAULT_COMMENT_CONFIG.generatedPattern, 'i')),
+    linterPattern: normalizePattern(cfg.linterPattern, new RegExp(DEFAULT_COMMENT_CONFIG.linterPattern, 'i')),
+    skipGenerated: cfg.skipGenerated !== false,
+    skipLinter: cfg.skipLinter !== false
+  };
+}
+
+const resolveCommentStyle = ({ ext, languageId }) => {
+  const resolvedId = languageId || (ext && EXT_OVERRIDES.get(ext)) || null;
+  if (!resolvedId) return null;
+  const match = COMMENT_STYLES.find((entry) => entry.ids.has(resolvedId));
+  return match || null;
+};
+
+const splitLinesWithOffsets = (raw, baseOffset) => {
+  const lines = [];
+  let start = 0;
+  for (let i = 0; i < raw.length; i += 1) {
+    if (raw[i] === '\n') {
+      lines.push({ raw: raw.slice(start, i), offset: baseOffset + start });
+      start = i + 1;
+    }
+  }
+  lines.push({ raw: raw.slice(start), offset: baseOffset + start });
+  return lines;
+};
+
+const stripLineComment = (line, marker) => {
+  let value = line;
+  let offset = marker.length;
+  if (value.startsWith(marker)) {
+    value = value.slice(marker.length);
+  }
+  if (value.startsWith(' ')) {
+    value = value.slice(1);
+    offset += 1;
+  }
+  return { value, offset };
+};
+
+const stripBlockCommentLines = (lines, markerStart, markerEnd, stripStar) => {
+  const trimmed = [];
+  for (let i = 0; i < lines.length; i += 1) {
+    const line = lines[i];
+    let rawLine = line.raw;
+    let offset = 0;
+    if (i === 0 && rawLine.startsWith(markerStart)) {
+      rawLine = rawLine.slice(markerStart.length);
+      offset += markerStart.length;
+    }
+    if (i === lines.length - 1 && markerEnd && rawLine.endsWith(markerEnd)) {
+      rawLine = rawLine.slice(0, -markerEnd.length);
+    }
+    const leading = rawLine.match(/^\s*/)?.[0]?.length || 0;
+    rawLine = rawLine.slice(leading);
+    offset += leading;
+    if (stripStar && rawLine.startsWith('*')) {
+      rawLine = rawLine.slice(1);
+      offset += 1;
+      if (rawLine.startsWith(' ')) {
+        rawLine = rawLine.slice(1);
+        offset += 1;
+      }
+    }
+    trimmed.push({
+      content: rawLine,
+      contentStartOffset: line.offset + offset
+    });
+  }
+  return trimmed;
+};
+
+const classifyCommentType = (raw, kind, headerLine, config) => {
+  if (headerLine && config.licensePattern?.test(raw)) {
+    return 'license';
+  }
+  if (kind === 'block') {
+    if (raw.startsWith('/**') || raw.startsWith('/*!')) return 'doc';
+  } else if (kind === 'line') {
+    if (raw.startsWith('///') || raw.startsWith('//!') || raw.startsWith('##')) return 'doc';
+  }
+  return kind === 'line' ? 'inline' : 'block';
+};
+
+const hasEnoughChars = (text, minChars) => {
+  if (!text) return false;
+  const nonWhitespace = text.replace(/\s/g, '');
+  return nonWhitespace.length >= minChars;
+};
+
+const shouldSkipNoise = (text, config) => {
+  if (!text) return true;
+  if (config.skipGenerated && config.generatedPattern?.test(text)) return true;
+  if (config.skipLinter && config.linterPattern?.test(text)) return true;
+  return false;
+};
+
+const isConfigParsable = (lang, text) => {
+  if (!text) return false;
+  try {
+    if (lang === 'json') {
+      const errors = [];
+      parseJsonc(text, errors, { allowTrailingComma: true, allowEmptyContent: false });
+      return errors.length === 0;
+    }
+    if (lang === 'yaml') {
+      parseYaml(text);
+      return true;
+    }
+    if (lang === 'toml') {
+      parseToml(text);
+      return true;
+    }
+  } catch {
+    return false;
+  }
+  return false;
+};
+
+const scanComments = (text, style) => {
+  const lineMarkers = style?.line || [];
+  const blockMarkers = style?.block || [];
+  const stringDelims = style?.strings || [];
+  const out = [];
+  let inString = null;
+  let escaped = false;
+  let i = 0;
+  while (i < text.length) {
+    const ch = text[i];
+    if (inString) {
+      if (escaped) {
+        escaped = false;
+      } else if (ch === '\\') {
+        escaped = true;
+      } else if (ch === inString) {
+        inString = null;
+      }
+      i += 1;
+      continue;
+    }
+    let matched = null;
+    for (const block of blockMarkers) {
+      if (text.startsWith(block.start, i)) {
+        matched = { ...block, kind: 'block' };
+        break;
+      }
+    }
+    if (matched) {
+      const start = i;
+      const endIndex = text.indexOf(matched.end, i + matched.start.length);
+      const end = endIndex === -1 ? text.length : endIndex + matched.end.length;
+      out.push({
+        start,
+        end,
+        raw: text.slice(start, end),
+        kind: 'block',
+        markerStart: matched.start,
+        markerEnd: matched.end,
+        stripStar: matched.stripStar === true
+      });
+      i = end;
+      continue;
+    }
+    let lineMatch = null;
+    for (const marker of lineMarkers) {
+      if (text.startsWith(marker, i)) {
+        lineMatch = marker;
+        break;
+      }
+    }
+    if (lineMatch) {
+      const start = i;
+      let end = text.indexOf('\n', i + lineMatch.length);
+      if (end === -1) end = text.length;
+      out.push({
+        start,
+        end,
+        raw: text.slice(start, end),
+        kind: 'line',
+        markerStart: lineMatch,
+        markerEnd: null,
+        stripStar: false
+      });
+      i = end;
+      continue;
+    }
+    if (stringDelims.includes(ch)) {
+      inString = ch;
+      i += 1;
+      continue;
+    }
+    i += 1;
+  }
+  return out;
+};
+
+export function extractComments({
+  text,
+  ext,
+  languageId,
+  lineIndex,
+  config: rawConfig
+}) {
+  const config = normalizeCommentConfig(rawConfig || {});
+  if (config.extract === 'off') {
+    return { comments: [], configSegments: [] };
+  }
+  const style = resolveCommentStyle({ ext, languageId });
+  if (!style) {
+    return { comments: [], configSegments: [] };
+  }
+  const comments = [];
+  const configSegments = [];
+  const rawComments = scanComments(text, style);
+  for (const entry of rawComments) {
+    const startLine = offsetToLine(lineIndex, entry.start);
+    const endLine = offsetToLine(lineIndex, Math.max(entry.start, entry.end - 1));
+    const headerLine = Number.isFinite(config.headerMaxLines)
+      ? startLine <= config.headerMaxLines
+      : false;
+    const commentLines = splitLinesWithOffsets(entry.raw, entry.start);
+    let contentLines = [];
+    if (entry.kind === 'line') {
+      const stripped = stripLineComment(commentLines[0].raw, entry.markerStart);
+      contentLines = [{
+        content: stripped.value,
+        contentStartOffset: commentLines[0].offset + stripped.offset
+      }];
+    } else {
+      contentLines = stripBlockCommentLines(
+        commentLines,
+        entry.markerStart,
+        entry.markerEnd,
+        entry.stripStar
+      );
+    }
+    const contentText = contentLines.map((line) => line.content).join('\n').trim();
+    const commentType = classifyCommentType(entry.raw, entry.kind, headerLine, config);
+    const minChars = commentType === 'doc' || commentType === 'license'
+      ? config.minDocChars
+      : config.minInlineChars;
+    if (!hasEnoughChars(contentText, minChars)) continue;
+    if (shouldSkipNoise(contentText, config)) continue;
+    if (config.extract === 'doc' && commentType !== 'doc' && commentType !== 'license') {
+      continue;
+    }
+    comments.push({
+      type: commentType,
+      style: entry.kind,
+      languageId: languageId || null,
+      start: entry.start,
+      end: entry.end,
+      startLine,
+      endLine,
+      text: contentText,
+      raw: entry.raw
+    });
+    for (let i = 0; i < contentLines.length; i += 1) {
+      const line = contentLines[i].content;
+      const fenceMatch = line.match(/^\s*```+\s*([A-Za-z0-9_-]+)?/);
+      if (!fenceMatch) continue;
+      const fenceLang = fenceMatch[1] ? fenceMatch[1].toLowerCase() : '';
+      const lang = ['json', 'yaml', 'yml', 'toml'].includes(fenceLang) ? fenceLang : null;
+      if (!lang) continue;
+      let endFence = -1;
+      for (let j = i + 1; j < contentLines.length; j += 1) {
+        if (/^\s*```/.test(contentLines[j].content)) {
+          endFence = j;
+          break;
+        }
+      }
+      if (endFence === -1 || endFence <= i + 1) continue;
+      const startLineEntry = contentLines[i + 1];
+      const endLineEntry = contentLines[endFence - 1];
+      if (!startLineEntry || !endLineEntry) continue;
+      const startOffset = startLineEntry.contentStartOffset
+        + (startLineEntry.content.match(/^\s*/)?.[0]?.length || 0);
+      const endOffset = endLineEntry.contentStartOffset + endLineEntry.content.length;
+      if (!Number.isFinite(startOffset) || !Number.isFinite(endOffset) || endOffset <= startOffset) {
+        continue;
+      }
+      const blockText = contentLines
+        .slice(i + 1, endFence)
+        .map((entryLine) => entryLine.content)
+        .join('\n');
+      const normalizedLang = lang === 'yml' ? 'yaml' : lang;
+      if (!isConfigParsable(normalizedLang, blockText)) continue;
+      configSegments.push({
+        type: 'embedded',
+        languageId: normalizedLang,
+        start: startOffset,
+        end: endOffset,
+        parentSegmentId: null,
+        embeddingContext: 'config',
+        meta: {
+          fenceInfo: fenceLang || null,
+          source: 'comment'
+        }
+      });
+    }
+  }
+  return { comments, configSegments };
+}
diff --git a/src/indexer/constants.js b/src/index/constants.js
similarity index 62%
rename from src/indexer/constants.js
rename to src/index/constants.js
index 0be09ac8c..36024cdf8 100644
--- a/src/indexer/constants.js
+++ b/src/index/constants.js
@@ -1,9 +1,36 @@
 export const SKIP_DIRS = new Set([
   '.git',
   '.repoMetrics',
+  '.cache',
+  '.gradle',
+  '.idea',
+  '.mypy_cache',
+  '.next',
+  '.nuxt',
+  '.parcel-cache',
+  '.pytest_cache',
+  '.ruff_cache',
+  '.svelte-kit',
+  '.turbo',
+  '.tox',
+  '.venv',
+  '.vite',
+  '.vs',
+  '.vscode',
+  '__generated__',
+  '__pycache__',
+  '__pypackages__',
+  'bazel-bin',
+  'bazel-out',
+  'bazel-testlogs',
+  'buck-out',
+  'build',
+  'cmake-build-debug',
+  'cmake-build-release',
   'coverage',
-  'css',
   'dist',
+  'generated',
+  'gen',
   'exports',
   'holiday93',
   'holiday94',
@@ -12,11 +39,47 @@ export const SKIP_DIRS = new Set([
   'index-prose',
   'index-',
   'node_modules',
+  'obj',
+  'out',
+  'storybook-static',
+  'target',
+  'temp',
+  'tmp',
+  'venv',
   'xmas91',
   'xmas92',
   'tools'
 ]);
 
+export const SKIP_GLOBS = new Set([
+  '**/*.min.js',
+  '**/*.min.mjs',
+  '**/*.min.cjs',
+  '**/*.min.css',
+  '**/*.min.js.map',
+  '**/*.min.css.map',
+  '**/*.min.map',
+  '**/*.bundle.js',
+  '**/*.bundle.css',
+  '**/*.bundle.js.map',
+  '**/*.bundle.css.map',
+  '**/*.d.ts',
+  '**/*.d.ts.map',
+  '**/*.tsbuildinfo',
+  '**/*.gen.ts',
+  '**/*.gen.tsx',
+  '**/*.g.ts',
+  '**/*.g.tsx',
+  '**/*.generated.ts',
+  '**/*.generated.tsx',
+  '**/*.generated.*',
+  '**/*_generated.*',
+  '**/*-generated.*',
+  '**/*-min.js',
+  '**/*-min.css',
+  'cmake-build*/'
+]);
+
 export const SKIP_FILES = new Set([
   '.eslint.config.js',
   '.gitattributes',
@@ -27,8 +90,11 @@ export const SKIP_FILES = new Set([
   '.pairofcleatsignore',
   '.repometrics',
   '.scannedfiles',
+  '.scannedfiles.json',
   '.searchhistory',
   '.skippedfiles',
+  '.skippedfiles.json',
+  '.filelists.json',
   'bash_aliases',
   'char3_postings.json',
   'chunk_meta.json',
@@ -40,8 +106,13 @@ export const SKIP_FILES = new Set([
   '.repoMetrics0.old',
   '.repoMetrics1.old',
   'noResultQueries',
+  'npm-shrinkwrap.json',
   'package-lock.json',
   'package.json',
+  'pnpm-lock.yaml',
+  'yarn.lock',
+  'composer.lock',
+  'Pipfile.lock',
   'searchHistory',
   'site.webmanifest',
   'sparse_postings.json',
@@ -53,7 +124,21 @@ export const SKIP_FILES = new Set([
   'AGENTS.md'
 ]);
 
-export const EXTS_PROSE = new Set(['.md', '.txt', '.rst', '.adoc', '.asciidoc']);
+export const MANIFEST_FILES = new Set([
+  'package.json',
+  'requirements.txt'
+]);
+
+export const LOCK_FILES = new Set([
+  'package-lock.json',
+  'pnpm-lock.yaml',
+  'yarn.lock',
+  'composer.lock',
+  'pipfile.lock',
+  'npm-shrinkwrap.json'
+]);
+
+export const EXTS_PROSE = new Set(['.md', '.mdx', '.txt', '.rst', '.adoc', '.asciidoc']);
 
 export const JS_EXTS = new Set(['.js', '.mjs', '.cjs', '.jsx']);
 export const TS_EXTS = new Set(['.ts', '.tsx', '.mts', '.cts']);
@@ -66,20 +151,59 @@ export const CSHARP_EXTS = new Set(['.cs']);
 export const KOTLIN_EXTS = new Set(['.kt', '.kts']);
 export const RUBY_EXTS = new Set(['.rb']);
 export const PHP_EXTS = new Set(['.php', '.phtml']);
+export const HTML_EXTS = new Set(['.html', '.htm']);
+export const CSS_EXTS = new Set(['.css']);
 export const LUA_EXTS = new Set(['.lua']);
 export const SQL_EXTS = new Set(['.sql', '.psql', '.pgsql', '.mysql', '.sqlite']);
 export const PERL_EXTS = new Set(['.pl', '.pm']);
 export const SHELL_EXTS = new Set(['.sh', '.bash', '.zsh', '.ksh']);
+export const CMAKE_EXTS = new Set(['.cmake']);
+export const STARLARK_EXTS = new Set(['.bzl', '.bazel', '.star']);
+export const NIX_EXTS = new Set(['.nix']);
+export const DART_EXTS = new Set(['.dart']);
+export const SCALA_EXTS = new Set(['.scala', '.sc']);
+export const GROOVY_EXTS = new Set(['.groovy', '.gradle', '.gvy']);
+export const R_EXTS = new Set(['.r']);
+export const JULIA_EXTS = new Set(['.jl']);
+export const HANDLEBARS_EXTS = new Set(['.hbs', '.handlebars']);
+export const MUSTACHE_EXTS = new Set(['.mustache']);
+export const JINJA_EXTS = new Set(['.jinja', '.jinja2', '.j2', '.django', '.djhtml']);
+export const RAZOR_EXTS = new Set(['.razor', '.cshtml']);
 
 export const EXTS_CODE = new Set([
   '.js', '.mjs', '.cjs', '.jsx', '.ts', '.tsx', '.mts', '.cts', '.yml', '.yaml', '.sh',
-  '.html', '.py', '.swift', '.rs', '.c', '.cc', '.cpp', '.h', '.hpp', '.hh',
+  '.html', '.htm', '.css', '.py', '.swift', '.rs', '.c', '.cc', '.cpp', '.h', '.hpp', '.hh',
   '.m', '.mm', '.go', '.java', '.cs', '.kt', '.kts', '.rb', '.php', '.phtml',
   '.lua', '.sql', '.psql', '.pgsql', '.mysql', '.sqlite', '.pl', '.pm', '.bash',
-  '.zsh', '.ksh', '.json', '.toml', '.ini', '.xml', '.cfg', '.conf'
+  '.zsh', '.ksh', '.json', '.toml', '.ini', '.xml', '.cfg', '.conf', '.vue',
+  '.svelte', '.astro', '.proto', '.graphql', '.gql', '.cmake', '.dockerfile', '.makefile', '.bzl', '.bazel',
+  '.star', '.nix', '.dart', '.scala', '.sc', '.groovy', '.gradle', '.gvy', '.r',
+  '.jl', '.hbs', '.handlebars', '.mustache', '.jinja', '.jinja2', '.j2',
+  '.django', '.djhtml', '.razor', '.cshtml'
+]);
+
+export const CODE_FILENAMES = new Set([
+  'dockerfile',
+  'makefile',
+  'gnumakefile',
+  'cmakelists.txt',
+  'build',
+  'workspace'
 ]);
 
-export const CODE_FILENAMES = new Set(['dockerfile', 'makefile']);
+const SPECIAL_CODE_EXTS = new Map([
+  ['dockerfile', '.dockerfile'],
+  ['makefile', '.makefile'],
+  ['gnumakefile', '.makefile'],
+  ['cmakelists.txt', '.cmake'],
+  ['build', '.bazel'],
+  ['workspace', '.bazel']
+]);
+
+const SPECIAL_CODE_PREFIXES = new Map([
+  ['dockerfile', '.dockerfile'],
+  ['makefile', '.makefile']
+]);
 
 export const STOP = new Set([
   'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves',
@@ -219,6 +343,18 @@ export const isRuby = (ext) => RUBY_EXTS.has(ext);
  * @returns {boolean}
  */
 export const isPhp = (ext) => PHP_EXTS.has(ext);
+/**
+ * Check if an extension is HTML.
+ * @param {string} ext
+ * @returns {boolean}
+ */
+export const isHtml = (ext) => HTML_EXTS.has(ext);
+/**
+ * Check if an extension is CSS.
+ * @param {string} ext
+ * @returns {boolean}
+ */
+export const isCss = (ext) => CSS_EXTS.has(ext);
 /**
  * Check if an extension is Lua.
  * @param {string} ext
@@ -249,4 +385,22 @@ export const isShell = (ext) => SHELL_EXTS.has(ext);
  * @param {string} name
  * @returns {boolean}
  */
-export const isSpecialCodeFile = (name) => CODE_FILENAMES.has(String(name || '').toLowerCase());
+export const isSpecialCodeFile = (name) => !!resolveSpecialCodeExt(name);
+
+export const isManifestFile = (name) => MANIFEST_FILES.has(String(name || '').toLowerCase());
+
+export const isLockFile = (name) => LOCK_FILES.has(String(name || '').toLowerCase());
+
+/**
+ * Resolve the extension for special code filenames.
+ * @param {string} name
+ * @returns {string|null}
+ */
+export const resolveSpecialCodeExt = (name) => {
+  const key = String(name || '').toLowerCase();
+  if (SPECIAL_CODE_EXTS.has(key)) return SPECIAL_CODE_EXTS.get(key);
+  for (const [prefix, ext] of SPECIAL_CODE_PREFIXES.entries()) {
+    if (key === prefix || key.startsWith(`${prefix}.`)) return ext;
+  }
+  return null;
+};
diff --git a/src/index/embedding.js b/src/index/embedding.js
new file mode 100644
index 000000000..fa4830ce5
--- /dev/null
+++ b/src/index/embedding.js
@@ -0,0 +1,128 @@
+import { stubEmbedding } from '../shared/embedding.js';
+import { createOnnxEmbedder, normalizeEmbeddingProvider } from '../shared/onnx-embeddings.js';
+
+// NOTE: @xenova/transformers is large and expensive to load.
+// We intentionally lazy-load it so that stub embeddings (used heavily in tests)
+// do not pay the memory/startup cost.
+let transformersModulePromise = null;
+
+async function loadTransformersModule(modelsDir) {
+  if (!transformersModulePromise) {
+    transformersModulePromise = import('@xenova/transformers');
+  }
+  const mod = await transformersModulePromise;
+  if (modelsDir && mod?.env) {
+    mod.env.cacheDir = modelsDir;
+  }
+  return mod;
+}
+
+/**
+ * Quantize a float vector into uint8 bins for compact storage.
+ * @param {number[]} vec
+ * @param {number} [minVal]
+ * @param {number} [maxVal]
+ * @param {number} [levels]
+ * @returns {number[]}
+ */
+export function quantizeVec(vec, minVal = -1, maxVal = 1, levels = 256) {
+  return vec.map((f) =>
+    Math.max(0, Math.min(levels - 1, Math.round(((f - minVal) / (maxVal - minVal)) * (levels - 1))))
+  );
+}
+
+/**
+ * L2-normalize an embedding vector.
+ * @param {number[]} vec
+ * @returns {number[]}
+ */
+export function normalizeVec(vec) {
+  if (!Array.isArray(vec) || vec.length === 0) return vec || [];
+  const norm = Math.sqrt(vec.reduce((sum, v) => sum + v * v, 0));
+  if (!Number.isFinite(norm) || norm === 0) return vec;
+  return vec.map((v) => v / norm);
+}
+
+/**
+ * Build an embedder wrapper for chunk embeddings.
+ * @param {object} options
+ * @param {boolean} options.useStubEmbeddings
+ * @param {string} options.modelId
+ * @param {number} options.dims
+ * @param {string} options.modelsDir
+ * @returns {{getChunkEmbedding:(text:string)=>Promise<number[]>,getChunkEmbeddings:(texts:string[])=>Promise<number[][]>,embedderPromise:Promise<any>|null}}
+ */
+export function createEmbedder({
+  rootDir,
+  useStubEmbeddings,
+  modelId,
+  dims,
+  modelsDir,
+  provider,
+  onnx
+}) {
+  const resolvedProvider = normalizeEmbeddingProvider(provider);
+  const onnxEmbedder = !useStubEmbeddings && resolvedProvider === 'onnx'
+    ? createOnnxEmbedder({
+      rootDir,
+      modelId,
+      modelsDir,
+      onnxConfig: onnx
+    })
+    : null;
+  const embedderPromise = useStubEmbeddings
+    ? null
+    : (onnxEmbedder
+      ? onnxEmbedder.embedderPromise
+      : loadTransformersModule(modelsDir).then(({ pipeline }) => pipeline('feature-extraction', modelId)));
+
+  const normalizeBatchOutput = (output, count) => {
+    if (!output) return Array.from({ length: count }, () => []);
+    if (Array.isArray(output)) {
+      return output.map((entry) => Array.from(entry.data || entry));
+    }
+    if (output.data && Array.isArray(output.dims) && output.dims.length === 2) {
+      const rows = output.dims[0];
+      const cols = output.dims[1];
+      const data = Array.from(output.data);
+      const out = [];
+      for (let i = 0; i < rows; i += 1) {
+        out.push(data.slice(i * cols, (i + 1) * cols));
+      }
+      while (out.length < count) out.push([]);
+      return out;
+    }
+    if (output.data) return [Array.from(output.data)];
+    return Array.from({ length: count }, () => []);
+  };
+
+  async function getChunkEmbedding(text) {
+    if (useStubEmbeddings) {
+      const safeDims = Math.max(1, Number(dims) || 384);
+      return stubEmbedding(text, safeDims);
+    }
+    if (resolvedProvider === 'onnx') {
+      return onnxEmbedder.getEmbedding(text);
+    }
+    const embedder = await embedderPromise;
+    const output = await embedder(text, { pooling: 'mean', normalize: true });
+    return Array.from(output.data);
+  }
+
+  async function getChunkEmbeddings(texts) {
+    const list = Array.isArray(texts) ? texts : [];
+    if (!list.length) return [];
+    if (useStubEmbeddings) {
+      const safeDims = Math.max(1, Number(dims) || 384);
+      return list.map((text) => stubEmbedding(text, safeDims));
+    }
+    if (resolvedProvider === 'onnx') {
+      return onnxEmbedder.getEmbeddings(list);
+    }
+    const embedder = await embedderPromise;
+    const output = await embedder(list, { pooling: 'mean', normalize: true });
+    return normalizeBatchOutput(output, list.length);
+  }
+
+  return { getChunkEmbedding, getChunkEmbeddings, embedderPromise };
+}
diff --git a/src/indexer/field-weighting.js b/src/index/field-weighting.js
similarity index 100%
rename from src/indexer/field-weighting.js
rename to src/index/field-weighting.js
diff --git a/src/index/git.js b/src/index/git.js
new file mode 100644
index 000000000..9d3dcf597
--- /dev/null
+++ b/src/index/git.js
@@ -0,0 +1,222 @@
+import path from 'node:path';
+import simpleGit from 'simple-git';
+import {
+  createLruCache,
+  DEFAULT_CACHE_MB,
+  DEFAULT_CACHE_TTL_MS,
+  estimateJsonBytes
+} from '../shared/cache.js';
+
+let gitMetaCache = createLruCache({
+  name: 'gitMeta',
+  maxMb: DEFAULT_CACHE_MB.gitMeta,
+  ttlMs: DEFAULT_CACHE_TTL_MS.gitMeta,
+  sizeCalculation: estimateJsonBytes
+});
+
+let gitBlameCache = createLruCache({
+  name: 'gitBlame',
+  maxMb: DEFAULT_CACHE_MB.gitMeta,
+  ttlMs: DEFAULT_CACHE_TTL_MS.gitMeta,
+  sizeCalculation: estimateJsonBytes
+});
+
+/**
+ * Configure git metadata cache settings.
+ * @param {{maxMb?:number,ttlMs?:number}|null} cacheConfig
+ * @param {{track?:(stats:object)=>void}|null} reporter
+ */
+export function configureGitMetaCache(cacheConfig, reporter = null) {
+  const maxMb = Number.isFinite(Number(cacheConfig?.maxMb))
+    ? Number(cacheConfig.maxMb)
+    : DEFAULT_CACHE_MB.gitMeta;
+  const ttlMs = Number.isFinite(Number(cacheConfig?.ttlMs))
+    ? Number(cacheConfig.ttlMs)
+    : DEFAULT_CACHE_TTL_MS.gitMeta;
+  gitMetaCache = createLruCache({
+    name: 'gitMeta',
+    maxMb,
+    ttlMs,
+    sizeCalculation: estimateJsonBytes,
+    reporter
+  });
+  gitBlameCache = createLruCache({
+    name: 'gitBlame',
+    maxMb,
+    ttlMs,
+    sizeCalculation: estimateJsonBytes,
+    reporter
+  });
+}
+
+/**
+ * Fetch git metadata for an entire file, with optional line-level blame.
+ * Returns empty object when git is unavailable or fails.
+ * @param {string} file
+ * @param {{blame?:boolean,baseDir?:string}} [options]
+ * @returns {Promise<{last_modified?:string,last_author?:string,churn?:number,churn_added?:number,churn_deleted?:number,churn_commits?:number,lineAuthors?:string[]}|{}>}
+ */
+export async function getGitMetaForFile(file, options = {}) {
+  const blameEnabled = options.blame !== false;
+  const baseDir = options.baseDir
+    ? path.resolve(options.baseDir)
+    : (path.isAbsolute(file) ? path.dirname(file) : process.cwd());
+  const relFile = path.isAbsolute(file) ? path.relative(baseDir, file) : file;
+  const fileArg = relFile.split(path.sep).join('/');
+  const cacheKey = `${baseDir}::${fileArg}`;
+
+  const cached = gitMetaCache.get(cacheKey);
+  if (cached && !blameEnabled) return cached;
+
+  try {
+    const git = simpleGit({ baseDir });
+    let meta = cached;
+    if (!meta) {
+      const log = await git.log({ file: fileArg, n: 10 });
+      const { added, deleted } = await computeNumstatChurn(git, fileArg, log.all.length || 10);
+      const churn = added + deleted;
+      meta = {
+        last_modified: log.latest?.date || null,
+        last_author: log.latest?.author_name || null,
+        churn,
+        churn_added: added,
+        churn_deleted: deleted,
+        churn_commits: log.all.length || 0
+      };
+      gitMetaCache.set(cacheKey, meta);
+    }
+
+    if (!blameEnabled) return meta;
+    const blameKey = `${cacheKey}::blame`;
+    let lineAuthors = gitBlameCache.get(blameKey);
+    if (!lineAuthors) {
+      const blame = await git.raw(['blame', '--line-porcelain', '--', fileArg]);
+      lineAuthors = parseLineAuthors(blame);
+      if (lineAuthors) gitBlameCache.set(blameKey, lineAuthors);
+    }
+    return {
+      ...meta,
+      lineAuthors
+    };
+  } catch {
+    return {};
+  }
+}
+
+/**
+ * Compute chunk authors from line-level blame data.
+ * @param {string[]|null} lineAuthors
+ * @param {number} startLine
+ * @param {number} endLine
+ * @returns {string[]}
+ */
+export function getChunkAuthorsFromLines(lineAuthors, startLine, endLine) {
+  if (!Array.isArray(lineAuthors) || !lineAuthors.length) return [];
+  const start = Math.max(1, Number.parseInt(startLine, 10) || 1);
+  const end = Math.max(start, Number.parseInt(endLine, 10) || start);
+  const authors = new Set();
+  for (let i = start; i <= end && i <= lineAuthors.length; i += 1) {
+    const author = lineAuthors[i - 1];
+    if (author) authors.add(author);
+  }
+  return Array.from(authors);
+}
+
+/**
+ * Fetch git metadata for a file/chunk (author, date, churn, blame authors).
+ * Returns empty object when git is unavailable or fails.
+ * @param {string} file
+ * @param {number} [startLine]
+ * @param {number} [endLine]
+ * @param {{blame?:boolean,baseDir?:string}} [options]
+ * @returns {Promise<{last_modified?:string,last_author?:string,churn?:number,chunk_authors?:string[]}|{}>}
+ */
+export async function getGitMeta(file, startLine = 1, endLine = 1, options = {}) {
+  const fileMeta = await getGitMetaForFile(file, options);
+  if (!fileMeta || !fileMeta.last_modified) return {};
+  const { lineAuthors, ...meta } = fileMeta;
+  if (!lineAuthors || options.blame === false) return meta;
+  const chunkAuthors = getChunkAuthorsFromLines(lineAuthors, startLine, endLine);
+  return chunkAuthors.length
+    ? { ...meta, chunk_authors: chunkAuthors }
+    : meta;
+}
+
+/**
+ * Resolve the current git branch for a repo.
+ * @param {string} repoRoot
+ * @returns {Promise<{branch:string|null,isRepo:boolean}>}
+ */
+export async function getRepoBranch(repoRoot) {
+  try {
+    const git = simpleGit({ baseDir: repoRoot });
+    const status = await git.status();
+    return { branch: status.current || null, isRepo: true };
+  } catch {
+    return { branch: null, isRepo: false };
+  }
+}
+
+/**
+ * Resolve git provenance for a repo.
+ * @param {string} repoRoot
+ * @returns {Promise<{commit:string|null,dirty:boolean|null,branch:string|null,isRepo:boolean}>}
+ */
+export async function getRepoProvenance(repoRoot) {
+  try {
+    const git = simpleGit({ baseDir: repoRoot });
+    const [commitRaw, status] = await Promise.all([
+      git.revparse(['HEAD']),
+      git.status()
+    ]);
+    const commit = String(commitRaw || '').trim() || null;
+    const dirty = Array.isArray(status?.files) ? status.files.length > 0 : null;
+    const branch = status?.current || null;
+    return { commit, dirty, branch, isRepo: true };
+  } catch {
+    return { commit: null, dirty: null, branch: null, isRepo: false };
+  }
+}
+
+/**
+ * Compute churn from git numstat output.
+ * @param {import('simple-git').SimpleGit} git
+ * @param {string} file
+ * @param {number} limit
+ * @returns {Promise<{added:number,deleted:number}>}
+ */
+function parseLineAuthors(blameText) {
+  const authors = [];
+  let currentAuthor = null;
+  for (const line of String(blameText || '').split('\n')) {
+    if (line.startsWith('author ')) {
+      currentAuthor = line.slice(7).trim();
+      continue;
+    }
+    if (line.startsWith('\t')) {
+      authors.push(currentAuthor || 'unknown');
+    }
+  }
+  return authors.length ? authors : null;
+}
+
+async function computeNumstatChurn(git, file, limit) {
+  try {
+    const raw = await git.raw(['log', '--numstat', '-n', String(limit), '--format=', '--', file]);
+    let added = 0;
+    let deleted = 0;
+    for (const line of raw.split('\n')) {
+      const trimmed = line.trim();
+      if (!trimmed) continue;
+      const parts = trimmed.split('\t');
+      if (parts.length < 2) continue;
+      const addedVal = parts[0] === '-' ? 0 : Number.parseInt(parts[0], 10);
+      const deletedVal = parts[1] === '-' ? 0 : Number.parseInt(parts[1], 10);
+      if (Number.isFinite(addedVal)) added += addedVal;
+      if (Number.isFinite(deletedVal)) deleted += deletedVal;
+    }
+    return { added, deleted };
+  } catch {
+    return { added: 0, deleted: 0 };
+  }
+}
diff --git a/src/indexer/headline.js b/src/index/headline.js
similarity index 97%
rename from src/indexer/headline.js
rename to src/index/headline.js
index 3ed15afdf..a5ed94f0f 100644
--- a/src/indexer/headline.js
+++ b/src/index/headline.js
@@ -14,6 +14,7 @@ export function getHeadline(chunk, tokens, n = 7, tokenMaxLen = 30, headlineMaxL
     return chunk.docmeta.doc.split(/\s+/).slice(0, n).join(' ');
   }
 
+  if (chunk.name) return chunk.name;
   if (chunk.codeRelations && chunk.codeRelations.name) {
     return chunk.codeRelations.name;
   }
diff --git a/src/index/language-registry.js b/src/index/language-registry.js
new file mode 100644
index 000000000..e97c7c08d
--- /dev/null
+++ b/src/index/language-registry.js
@@ -0,0 +1,6 @@
+export {
+  getLanguageForFile,
+  collectLanguageImports,
+  buildLanguageContext,
+  buildChunkRelations
+} from './language-registry/registry.js';
diff --git a/src/index/language-registry/control-flow.js b/src/index/language-registry/control-flow.js
new file mode 100644
index 000000000..58b326061
--- /dev/null
+++ b/src/index/language-registry/control-flow.js
@@ -0,0 +1,24 @@
+import { summarizeControlFlow } from '../../lang/flow.js';
+
+export const JS_CONTROL_FLOW = {
+  branchKeywords: ['if', 'else', 'switch', 'case', 'catch', 'try'],
+  loopKeywords: ['for', 'while', 'do']
+};
+
+export const PY_CONTROL_FLOW = {
+  branchKeywords: ['if', 'elif', 'else', 'try', 'except', 'finally', 'match', 'case'],
+  loopKeywords: ['for', 'while']
+};
+
+export const buildControlFlowOnly = (text, chunk, options, keywords) => {
+  if (!options.controlFlowEnabled || !chunk) return null;
+  const slice = text.slice(chunk.start, chunk.end);
+  return {
+    dataflow: null,
+    controlFlow: summarizeControlFlow(slice, keywords),
+    throws: [],
+    awaits: [],
+    yields: false,
+    returnsValue: false
+  };
+};
diff --git a/src/index/language-registry/import-collectors/cmake.js b/src/index/language-registry/import-collectors/cmake.js
new file mode 100644
index 000000000..cd5be000e
--- /dev/null
+++ b/src/index/language-registry/import-collectors/cmake.js
@@ -0,0 +1,15 @@
+import { normalizeImportToken } from '../simple-relations.js';
+
+export const collectCmakeImports = (text) => {
+  const imports = [];
+  const lines = String(text || '').split('\n');
+  for (const line of lines) {
+    if (line.trim().startsWith('#')) continue;
+    const match = line.match(/^\s*(include|add_subdirectory|find_package)\s*\(\s*([^)]+)\)/i);
+    if (!match) continue;
+    const arg = match[2].trim().split(/\s+/)[0];
+    const cleaned = normalizeImportToken(arg);
+    if (cleaned) imports.push(cleaned);
+  }
+  return imports;
+};
diff --git a/src/index/language-registry/import-collectors/dart.js b/src/index/language-registry/import-collectors/dart.js
new file mode 100644
index 000000000..7724a08b1
--- /dev/null
+++ b/src/index/language-registry/import-collectors/dart.js
@@ -0,0 +1,9 @@
+export const collectDartImports = (text) => {
+  const imports = [];
+  const lines = String(text || '').split('\n');
+  for (const line of lines) {
+    const match = line.match(/^\s*import\s+['"]([^'"]+)['"]/);
+    if (match) imports.push(match[1]);
+  }
+  return imports;
+};
diff --git a/src/index/language-registry/import-collectors/dockerfile.js b/src/index/language-registry/import-collectors/dockerfile.js
new file mode 100644
index 000000000..fe718887d
--- /dev/null
+++ b/src/index/language-registry/import-collectors/dockerfile.js
@@ -0,0 +1,21 @@
+export const collectDockerfileImports = (text) => {
+  const imports = new Set();
+  const lines = String(text || '').split(/\r?\n/);
+
+  for (const line of lines) {
+    // FROM <image> [AS <stage>]
+    const fromMatch = line.match(/^\s*FROM\s+([^\s]+)(?:\s+AS\s+([^\s]+))?/i);
+    if (fromMatch) {
+      if (fromMatch[1]) imports.add(fromMatch[1]);
+      if (fromMatch[2]) imports.add(fromMatch[2]);
+    }
+
+    // COPY/ADD --from=<stage-or-image>
+    if (/^\s*(COPY|ADD)\b/i.test(line)) {
+      const fromFlag = line.match(/--from(?:=|\s+)([^\s]+)/i);
+      if (fromFlag?.[1]) imports.add(fromFlag[1]);
+    }
+  }
+
+  return Array.from(imports);
+};
diff --git a/src/index/language-registry/import-collectors/graphql.js b/src/index/language-registry/import-collectors/graphql.js
new file mode 100644
index 000000000..090460451
--- /dev/null
+++ b/src/index/language-registry/import-collectors/graphql.js
@@ -0,0 +1,9 @@
+export const collectGraphqlImports = (text) => {
+  const imports = [];
+  const lines = String(text || '').split('\n');
+  for (const line of lines) {
+    const match = line.match(/^\s*#import\s+\"([^\"]+)\"/i);
+    if (match) imports.push(match[1]);
+  }
+  return imports;
+};
diff --git a/src/index/language-registry/import-collectors/groovy.js b/src/index/language-registry/import-collectors/groovy.js
new file mode 100644
index 000000000..96ab258aa
--- /dev/null
+++ b/src/index/language-registry/import-collectors/groovy.js
@@ -0,0 +1,9 @@
+export const collectGroovyImports = (text) => {
+  const imports = [];
+  const lines = String(text || '').split('\n');
+  for (const line of lines) {
+    const match = line.match(/^\s*import\s+([^\s;]+)/);
+    if (match) imports.push(match[1]);
+  }
+  return imports;
+};
diff --git a/src/index/language-registry/import-collectors/handlebars.js b/src/index/language-registry/import-collectors/handlebars.js
new file mode 100644
index 000000000..42603933f
--- /dev/null
+++ b/src/index/language-registry/import-collectors/handlebars.js
@@ -0,0 +1,9 @@
+export const collectHandlebarsImports = (text) => {
+  const imports = [];
+  const lines = String(text || '').split('\n');
+  for (const line of lines) {
+    const match = line.match(/{{>\s*([A-Za-z0-9_.-]+)\b/);
+    if (match) imports.push(match[1]);
+  }
+  return imports;
+};
diff --git a/src/index/language-registry/import-collectors/jinja.js b/src/index/language-registry/import-collectors/jinja.js
new file mode 100644
index 000000000..e9ba21f29
--- /dev/null
+++ b/src/index/language-registry/import-collectors/jinja.js
@@ -0,0 +1,9 @@
+export const collectJinjaImports = (text) => {
+  const imports = [];
+  const lines = String(text || '').split('\n');
+  for (const line of lines) {
+    const match = line.match(/{%\s*(?:extends|include|import)\s+['"]([^'"]+)['"]/);
+    if (match) imports.push(match[1]);
+  }
+  return imports;
+};
diff --git a/src/index/language-registry/import-collectors/julia.js b/src/index/language-registry/import-collectors/julia.js
new file mode 100644
index 000000000..515f1a15a
--- /dev/null
+++ b/src/index/language-registry/import-collectors/julia.js
@@ -0,0 +1,9 @@
+export const collectJuliaImports = (text) => {
+  const imports = [];
+  const lines = String(text || '').split('\n');
+  for (const line of lines) {
+    const match = line.match(/^\s*(?:using|import)\s+([A-Za-z0-9_.:]+)/);
+    if (match) imports.push(match[1]);
+  }
+  return imports;
+};
diff --git a/src/index/language-registry/import-collectors/makefile.js b/src/index/language-registry/import-collectors/makefile.js
new file mode 100644
index 000000000..3841447b7
--- /dev/null
+++ b/src/index/language-registry/import-collectors/makefile.js
@@ -0,0 +1,12 @@
+export const collectMakefileImports = (text) => {
+  const imports = [];
+  const lines = String(text || '').split('\n');
+  for (const line of lines) {
+    const trimmed = line.replace(/#.*$/, '').trim();
+    const match = trimmed.match(/^\s*-?include\s+(.+)$/i);
+    if (!match) continue;
+    const parts = match[1].split(/\s+/).filter(Boolean);
+    imports.push(...parts);
+  }
+  return imports;
+};
diff --git a/src/index/language-registry/import-collectors/mustache.js b/src/index/language-registry/import-collectors/mustache.js
new file mode 100644
index 000000000..4cf68ad44
--- /dev/null
+++ b/src/index/language-registry/import-collectors/mustache.js
@@ -0,0 +1,9 @@
+export const collectMustacheImports = (text) => {
+  const imports = [];
+  const lines = String(text || '').split('\n');
+  for (const line of lines) {
+    const match = line.match(/{{>\s*([A-Za-z0-9_.-]+)\b/);
+    if (match) imports.push(match[1]);
+  }
+  return imports;
+};
diff --git a/src/index/language-registry/import-collectors/nix.js b/src/index/language-registry/import-collectors/nix.js
new file mode 100644
index 000000000..7bf609a2f
--- /dev/null
+++ b/src/index/language-registry/import-collectors/nix.js
@@ -0,0 +1,13 @@
+import { normalizeImportToken } from '../simple-relations.js';
+
+export const collectNixImports = (text) => {
+  const imports = [];
+  const lines = String(text || '').split('\n');
+  for (const line of lines) {
+    const match = line.match(/\b(import|callPackage)\s+([^\s;]+)/);
+    if (!match) continue;
+    const cleaned = normalizeImportToken(match[2]);
+    if (cleaned) imports.push(cleaned);
+  }
+  return imports;
+};
diff --git a/src/index/language-registry/import-collectors/proto.js b/src/index/language-registry/import-collectors/proto.js
new file mode 100644
index 000000000..ffbc4bacf
--- /dev/null
+++ b/src/index/language-registry/import-collectors/proto.js
@@ -0,0 +1,9 @@
+export const collectProtoImports = (text) => {
+  const imports = [];
+  const lines = String(text || '').split('\n');
+  for (const line of lines) {
+    const match = line.match(/^\s*import\s+(?:public\s+)?\"([^\"]+)\"/);
+    if (match) imports.push(match[1]);
+  }
+  return imports;
+};
diff --git a/src/index/language-registry/import-collectors/r.js b/src/index/language-registry/import-collectors/r.js
new file mode 100644
index 000000000..24450d259
--- /dev/null
+++ b/src/index/language-registry/import-collectors/r.js
@@ -0,0 +1,9 @@
+export const collectRImports = (text) => {
+  const imports = [];
+  const lines = String(text || '').split('\n');
+  for (const line of lines) {
+    const match = line.match(/\b(?:library|require)\s*\(\s*['"]?([^'"]+)['"]?\s*\)/);
+    if (match) imports.push(match[1]);
+  }
+  return imports;
+};
diff --git a/src/index/language-registry/import-collectors/razor.js b/src/index/language-registry/import-collectors/razor.js
new file mode 100644
index 000000000..e3420aed8
--- /dev/null
+++ b/src/index/language-registry/import-collectors/razor.js
@@ -0,0 +1,9 @@
+export const collectRazorImports = (text) => {
+  const imports = [];
+  const lines = String(text || '').split('\n');
+  for (const line of lines) {
+    const match = line.match(/^\s*@using\s+(.+)$/i);
+    if (match) imports.push(match[1].trim());
+  }
+  return imports;
+};
diff --git a/src/index/language-registry/import-collectors/scala.js b/src/index/language-registry/import-collectors/scala.js
new file mode 100644
index 000000000..b160af36c
--- /dev/null
+++ b/src/index/language-registry/import-collectors/scala.js
@@ -0,0 +1,9 @@
+export const collectScalaImports = (text) => {
+  const imports = [];
+  const lines = String(text || '').split('\n');
+  for (const line of lines) {
+    const match = line.match(/^\s*import\s+([^\s;]+)/);
+    if (match) imports.push(match[1]);
+  }
+  return imports;
+};
diff --git a/src/index/language-registry/import-collectors/starlark.js b/src/index/language-registry/import-collectors/starlark.js
new file mode 100644
index 000000000..c33e8a215
--- /dev/null
+++ b/src/index/language-registry/import-collectors/starlark.js
@@ -0,0 +1,10 @@
+export const collectStarlarkImports = (text) => {
+  const imports = [];
+  const lines = String(text || '').split('\n');
+  for (const line of lines) {
+    if (line.trim().startsWith('#')) continue;
+    const match = line.match(/^\s*load\s*\(\s*['"]([^'"]+)['"]/);
+    if (match) imports.push(match[1]);
+  }
+  return imports;
+};
diff --git a/src/index/language-registry/registry.js b/src/index/language-registry/registry.js
new file mode 100644
index 000000000..7c6da56bc
--- /dev/null
+++ b/src/index/language-registry/registry.js
@@ -0,0 +1,682 @@
+import path from 'node:path';
+import * as linguistLanguages from 'linguist-languages';
+import {
+  isCLike,
+  isGo,
+  isJava,
+  isJsLike,
+  isPerl,
+  isShell,
+  isTypeScript,
+  isCSharp,
+  isKotlin,
+  isRuby,
+  isPhp,
+  isHtml,
+  isCss,
+  isLua,
+  isSql,
+  CMAKE_EXTS,
+  STARLARK_EXTS,
+  NIX_EXTS,
+  DART_EXTS,
+  SCALA_EXTS,
+  GROOVY_EXTS,
+  R_EXTS,
+  JULIA_EXTS,
+  HANDLEBARS_EXTS,
+  MUSTACHE_EXTS,
+  JINJA_EXTS,
+  RAZOR_EXTS
+} from '../constants.js';
+import { buildCLikeChunks, buildCLikeRelations, collectCLikeImports, computeCLikeFlow, extractCLikeDocMeta } from '../../lang/clike.js';
+import { buildGoChunks, buildGoRelations, collectGoImports, computeGoFlow, extractGoDocMeta } from '../../lang/go.js';
+import { buildJavaChunks, buildJavaRelations, collectJavaImports, computeJavaFlow, extractJavaDocMeta } from '../../lang/java.js';
+import { buildCodeRelations, collectImports, extractDocMeta, parseJavaScriptAst } from '../../lang/javascript.js';
+import { buildTypeScriptChunks, buildTypeScriptRelations, collectTypeScriptImports, computeTypeScriptFlow, extractTypeScriptDocMeta } from '../../lang/typescript.js';
+import { buildCSharpChunks, buildCSharpRelations, collectCSharpImports, computeCSharpFlow, extractCSharpDocMeta } from '../../lang/csharp.js';
+import * as kotlinLang from '../../lang/kotlin.js';
+import { buildRubyChunks, buildRubyRelations, collectRubyImports, computeRubyFlow, extractRubyDocMeta } from '../../lang/ruby.js';
+import { buildPhpChunks, buildPhpRelations, collectPhpImports, computePhpFlow, extractPhpDocMeta } from '../../lang/php.js';
+import { buildHtmlChunks, buildHtmlRelations, collectHtmlImports, computeHtmlFlow, extractHtmlDocMeta, getHtmlMetadata } from '../../lang/html.js';
+import { buildCssChunks, buildCssRelations, collectCssImports, computeCssFlow, extractCssDocMeta } from '../../lang/css.js';
+import { buildLuaChunks, buildLuaRelations, collectLuaImports, computeLuaFlow, extractLuaDocMeta } from '../../lang/lua.js';
+import { buildSqlChunks, buildSqlRelations, collectSqlImports, computeSqlFlow, extractSqlDocMeta } from '../../lang/sql.js';
+import { buildPerlChunks, buildPerlRelations, collectPerlImports, computePerlFlow, extractPerlDocMeta } from '../../lang/perl.js';
+import { getPythonAst, collectPythonImports, buildPythonRelations, extractPythonDocMeta } from '../../lang/python.js';
+import { buildRustChunks, buildRustRelations, collectRustImports, computeRustFlow, extractRustDocMeta } from '../../lang/rust.js';
+import { buildSwiftChunks, buildSwiftRelations, collectSwiftImports, computeSwiftFlow, extractSwiftDocMeta } from '../../lang/swift.js';
+import { buildShellChunks, buildShellRelations, collectShellImports, computeShellFlow, extractShellDocMeta } from '../../lang/shell.js';
+import { buildTreeSitterChunksAsync } from '../../lang/tree-sitter.js';
+import { buildControlFlowOnly, JS_CONTROL_FLOW, PY_CONTROL_FLOW } from './control-flow.js';
+import { buildSimpleRelations } from './simple-relations.js';
+import { collectCmakeImports } from './import-collectors/cmake.js';
+import { collectDartImports } from './import-collectors/dart.js';
+import { collectDockerfileImports } from './import-collectors/dockerfile.js';
+import { collectGraphqlImports } from './import-collectors/graphql.js';
+import { collectGroovyImports } from './import-collectors/groovy.js';
+import { collectHandlebarsImports } from './import-collectors/handlebars.js';
+import { collectJinjaImports } from './import-collectors/jinja.js';
+import { collectJuliaImports } from './import-collectors/julia.js';
+import { collectMakefileImports } from './import-collectors/makefile.js';
+import { collectMustacheImports } from './import-collectors/mustache.js';
+import { collectNixImports } from './import-collectors/nix.js';
+import { collectProtoImports } from './import-collectors/proto.js';
+import { collectRazorImports } from './import-collectors/razor.js';
+import { collectRImports } from './import-collectors/r.js';
+import { collectScalaImports } from './import-collectors/scala.js';
+import { collectStarlarkImports } from './import-collectors/starlark.js';
+
+const {
+  buildKotlinChunks,
+  buildKotlinRelations,
+  collectKotlinImports,
+  computeKotlinFlow,
+  extractKotlinDocMeta,
+  getKotlinFileStats
+} = kotlinLang;
+
+const flowOptions = (options) => ({
+  dataflow: options.astDataflowEnabled,
+  controlFlow: options.controlFlowEnabled
+});
+
+const LANGUAGE_REGISTRY = [
+  {
+    id: 'javascript',
+    match: (ext) => isJsLike(ext),
+    collectImports: (text, options) => collectImports(text, options),
+    prepare: async ({ text, mode, ext, options }) => {
+      if (mode !== 'code') return {};
+      const context = {};
+      const treeChunks = await buildTreeSitterChunksAsync({
+        text,
+        languageId: null,
+        ext,
+        options
+      });
+      if (treeChunks && treeChunks.length) context.jsChunks = treeChunks;
+      if (options?.relationsEnabled !== false) {
+        context.jsAst = parseJavaScriptAst(text, { ...options, ext });
+      }
+      return context;
+    },
+    buildRelations: ({ text, relPath, allImports, context, options, ext }) =>
+      buildCodeRelations(text, relPath, allImports, {
+        ...options,
+        ext,
+        ast: context?.jsAst,
+        dataflow: options.astDataflowEnabled,
+        controlFlow: options.controlFlowEnabled
+      }),
+    extractDocMeta: ({ text, chunk, fileRelations }) => extractDocMeta(text, chunk, fileRelations),
+    flow: ({ text, chunk, options }) => buildControlFlowOnly(text, chunk, options, JS_CONTROL_FLOW),
+    attachName: false
+  },
+  {
+    id: 'typescript',
+    match: (ext) => isTypeScript(ext),
+    collectImports: (text, options) => collectTypeScriptImports(text, options),
+    prepare: async ({ text, mode, ext, relPath, options }) => {
+      if (mode !== 'code') return {};
+      if (options?.typescript?.importsOnly === true) return {};
+      let tsChunks = await buildTreeSitterChunksAsync({
+        text,
+        languageId: ext === '.tsx' ? 'tsx' : 'typescript',
+        ext,
+        options
+      });
+      if (!tsChunks || !tsChunks.length) {
+        tsChunks = buildTypeScriptChunks(text, { ext, relPath, parser: options?.typescript?.parser });
+      }
+      return { tsChunks };
+    },
+    buildRelations: ({ text, allImports, context, options, ext }) => {
+      if (options?.typescript?.importsOnly === true) {
+        const imports = collectTypeScriptImports(text, { ...options, ext });
+        const importLinks = imports
+          .map((entry) => allImports[entry])
+          .filter((entry) => !!entry)
+          .flat();
+        return {
+          imports,
+          exports: [],
+          calls: [],
+          usages: [],
+          importLinks
+        };
+      }
+      return buildTypeScriptRelations(text, allImports, context.tsChunks, { ...options, ext });
+    },
+    extractDocMeta: ({ chunk }) => extractTypeScriptDocMeta(chunk),
+    flow: ({ text, chunk, options }) => (options?.typescript?.importsOnly === true
+      ? null
+      : computeTypeScriptFlow(text, chunk, flowOptions(options))),
+    attachName: true
+  },
+  {
+    id: 'python',
+    match: (ext) => ext === '.py',
+    collectImports: (text) => collectPythonImports(text).imports,
+    prepare: async ({ text, mode, options }) => {
+      if (mode !== 'code') return {};
+      let pythonAst = null;
+      let pythonAstMetrics = null;
+      if (options?.relationsEnabled !== false) {
+        const pythonAstStart = Date.now();
+        pythonAst = await getPythonAst(text, options.log, {
+          dataflow: options.astDataflowEnabled,
+          controlFlow: options.controlFlowEnabled,
+          pythonAst: options.pythonAst,
+          path: options.filePath || options.relPath
+        });
+        if (pythonAst) {
+          pythonAstMetrics = { durationMs: Date.now() - pythonAstStart };
+        }
+      }
+      let pythonTreeChunks = null;
+      if (!pythonAst) {
+        pythonTreeChunks = await buildTreeSitterChunksAsync({
+          text,
+          languageId: 'python',
+          ext: '.py',
+          options
+        });
+      }
+      return {
+        ...(pythonAst ? { pythonAst } : {}),
+        ...(pythonAstMetrics ? { pythonAstMetrics } : {}),
+        ...(pythonTreeChunks && pythonTreeChunks.length ? { pythonTreeChunks } : {})
+      };
+    },
+    buildRelations: ({ text, allImports, context }) => buildPythonRelations(text, allImports, context.pythonAst),
+    extractDocMeta: ({ chunk }) => extractPythonDocMeta(chunk),
+    flow: ({ text, chunk, options }) => buildControlFlowOnly(text, chunk, options, PY_CONTROL_FLOW),
+    attachName: true
+  },
+  {
+    id: 'swift',
+    match: (ext) => ext === '.swift',
+    collectImports: (text) => collectSwiftImports(text).imports,
+    prepare: ({ text, mode, options }) => (mode === 'code'
+      ? { swiftChunks: buildSwiftChunks(text, options) }
+      : {}),
+    buildRelations: ({ text, allImports }) => buildSwiftRelations(text, allImports),
+    extractDocMeta: ({ chunk }) => extractSwiftDocMeta(chunk),
+    flow: ({ text, chunk, options }) => computeSwiftFlow(text, chunk, flowOptions(options)),
+    attachName: true
+  },
+  {
+    id: 'clike',
+    match: (ext) => isCLike(ext),
+    collectImports: (text) => collectCLikeImports(text),
+    prepare: ({ text, mode, ext, options }) => (mode === 'code'
+      ? { clikeChunks: buildCLikeChunks(text, ext, options) }
+      : {}),
+    buildRelations: ({ text, allImports, context }) => buildCLikeRelations(text, allImports, context.clikeChunks),
+    extractDocMeta: ({ chunk }) => extractCLikeDocMeta(chunk),
+    flow: ({ text, chunk, options }) => computeCLikeFlow(text, chunk, flowOptions(options)),
+    attachName: true
+  },
+  {
+    id: 'rust',
+    match: (ext) => ext === '.rs',
+    collectImports: (text) => collectRustImports(text),
+    prepare: ({ text, mode, options }) => (mode === 'code'
+      ? { rustChunks: buildRustChunks(text, options) }
+      : {}),
+    buildRelations: ({ text, allImports }) => buildRustRelations(text, allImports),
+    extractDocMeta: ({ chunk }) => extractRustDocMeta(chunk),
+    flow: ({ text, chunk, options }) => computeRustFlow(text, chunk, flowOptions(options)),
+    attachName: true
+  },
+  {
+    id: 'go',
+    match: (ext) => isGo(ext),
+    collectImports: (text) => collectGoImports(text),
+    prepare: ({ text, mode, options }) => (mode === 'code'
+      ? { goChunks: buildGoChunks(text, options) }
+      : {}),
+    buildRelations: ({ text, allImports, context }) => buildGoRelations(text, allImports, context.goChunks),
+    extractDocMeta: ({ chunk }) => extractGoDocMeta(chunk),
+    flow: ({ text, chunk, options }) => computeGoFlow(text, chunk, flowOptions(options)),
+    attachName: true
+  },
+  {
+    id: 'java',
+    match: (ext) => isJava(ext),
+    collectImports: (text) => collectJavaImports(text),
+    prepare: ({ text, mode, options }) => (mode === 'code'
+      ? { javaChunks: buildJavaChunks(text, options) }
+      : {}),
+    buildRelations: ({ text, allImports, context }) => buildJavaRelations(text, allImports, context.javaChunks),
+    extractDocMeta: ({ chunk }) => extractJavaDocMeta(chunk),
+    flow: ({ text, chunk, options }) => computeJavaFlow(text, chunk, flowOptions(options)),
+    attachName: true
+  },
+  {
+    id: 'csharp',
+    match: (ext) => isCSharp(ext),
+    collectImports: (text) => collectCSharpImports(text),
+    prepare: ({ text, mode, options }) => (mode === 'code'
+      ? { csharpChunks: buildCSharpChunks(text, options) }
+      : {}),
+    buildRelations: ({ text, allImports, context }) => buildCSharpRelations(text, allImports, context.csharpChunks),
+    extractDocMeta: ({ chunk }) => extractCSharpDocMeta(chunk),
+    flow: ({ text, chunk, options }) => computeCSharpFlow(text, chunk, flowOptions(options)),
+    attachName: true
+  },
+  {
+    id: 'kotlin',
+    match: (ext) => isKotlin(ext),
+    collectImports: (text) => collectKotlinImports(text),
+    prepare: ({ text, mode, options }) => (mode === 'code'
+      ? {
+        kotlinChunks: buildKotlinChunks(text, options),
+        kotlinStats: getKotlinFileStats(text)
+      }
+      : {}),
+    buildRelations: ({ text, allImports, context, options }) => buildKotlinRelations(
+      text,
+      allImports,
+      context.kotlinChunks,
+      { stats: context.kotlinStats, kotlin: options.kotlin }
+    ),
+    extractDocMeta: ({ chunk }) => extractKotlinDocMeta(chunk),
+    flow: ({ text, chunk, options, context }) => computeKotlinFlow(text, chunk, {
+      ...flowOptions(options),
+      kotlin: options.kotlin,
+      stats: context.kotlinStats
+    }),
+    attachName: true
+  },
+  {
+    id: 'ruby',
+    match: (ext) => isRuby(ext),
+    collectImports: (text) => collectRubyImports(text),
+    prepare: ({ text, mode }) => (mode === 'code' ? { rubyChunks: buildRubyChunks(text) } : {}),
+    buildRelations: ({ text, allImports, context }) => buildRubyRelations(text, allImports, context.rubyChunks),
+    extractDocMeta: ({ chunk }) => extractRubyDocMeta(chunk),
+    flow: ({ text, chunk, options }) => computeRubyFlow(text, chunk, flowOptions(options)),
+    attachName: true
+  },
+  {
+    id: 'php',
+    match: (ext) => isPhp(ext),
+    collectImports: (text) => collectPhpImports(text),
+    prepare: ({ text, mode }) => (mode === 'code' ? { phpChunks: buildPhpChunks(text) } : {}),
+    buildRelations: ({ text, allImports, context }) => buildPhpRelations(text, allImports, context.phpChunks),
+    extractDocMeta: ({ chunk }) => extractPhpDocMeta(chunk),
+    flow: ({ text, chunk, options }) => computePhpFlow(text, chunk, flowOptions(options)),
+    attachName: true
+  },
+  {
+    id: 'html',
+    match: (ext) => isHtml(ext),
+    collectImports: (text) => collectHtmlImports(text),
+    prepare: ({ text, mode, options }) => (mode === 'code'
+      ? {
+        htmlChunks: buildHtmlChunks(text, options),
+        htmlMeta: getHtmlMetadata(text)
+      }
+      : {}),
+    buildRelations: ({ text, allImports, context }) =>
+      buildHtmlRelations(text, allImports, context.htmlChunks, context.htmlMeta),
+    extractDocMeta: ({ chunk, context }) => extractHtmlDocMeta(chunk, context?.htmlMeta),
+    flow: () => computeHtmlFlow(),
+    attachName: false
+  },
+  {
+    id: 'css',
+    match: (ext) => isCss(ext),
+    collectImports: (text) => collectCssImports(text),
+    prepare: ({ text, mode }) => (mode === 'code' ? { cssChunks: buildCssChunks(text) } : {}),
+    buildRelations: ({ text, allImports }) => buildCssRelations(text, allImports),
+    extractDocMeta: ({ chunk }) => extractCssDocMeta(chunk),
+    flow: () => computeCssFlow(),
+    attachName: false
+  },
+  {
+    id: 'lua',
+    match: (ext) => isLua(ext),
+    collectImports: (text) => collectLuaImports(text),
+    prepare: ({ text, mode }) => (mode === 'code' ? { luaChunks: buildLuaChunks(text) } : {}),
+    buildRelations: ({ text, allImports, context }) => buildLuaRelations(text, allImports, context.luaChunks),
+    extractDocMeta: ({ chunk }) => extractLuaDocMeta(chunk),
+    flow: ({ text, chunk, options }) => computeLuaFlow(text, chunk, flowOptions(options)),
+    attachName: true
+  },
+  {
+    id: 'sql',
+    match: (ext) => isSql(ext),
+    collectImports: (text) => collectSqlImports(text),
+    prepare: ({ text, mode, ext, options }) => (mode === 'code'
+      ? { sqlChunks: buildSqlChunks(text, { dialect: options.resolveSqlDialect(ext) }) }
+      : {}),
+    buildRelations: ({ text, allImports, context, options, ext }) =>
+      buildSqlRelations(text, allImports, context.sqlChunks, {
+        dialect: options.resolveSqlDialect(ext),
+        log: options.log
+      }),
+    extractDocMeta: ({ chunk }) => extractSqlDocMeta(chunk),
+    flow: ({ text, chunk, options }) => computeSqlFlow(text, chunk, flowOptions(options)),
+    attachName: true
+  },
+  {
+    id: 'dockerfile',
+    match: (ext) => ext === '.dockerfile',
+    collectImports: (text) => collectDockerfileImports(text),
+    buildRelations: ({ text, allImports }) =>
+      buildSimpleRelations(collectDockerfileImports(text), allImports),
+    extractDocMeta: () => ({}),
+    flow: () => null,
+    attachName: true
+  },
+  {
+    id: 'makefile',
+    match: (ext) => ext === '.makefile',
+    collectImports: (text) => collectMakefileImports(text),
+    buildRelations: ({ text, allImports }) =>
+      buildSimpleRelations(collectMakefileImports(text), allImports),
+    extractDocMeta: () => ({}),
+    flow: () => null,
+    attachName: true
+  },
+  {
+    id: 'protobuf',
+    match: (ext) => ext === '.proto',
+    collectImports: (text) => collectProtoImports(text),
+    buildRelations: ({ text, allImports }) =>
+      buildSimpleRelations(collectProtoImports(text), allImports),
+    extractDocMeta: () => ({}),
+    flow: () => null,
+    attachName: true
+  },
+  {
+    id: 'graphql',
+    match: (ext) => ext === '.graphql' || ext === '.gql',
+    collectImports: (text) => collectGraphqlImports(text),
+    buildRelations: ({ text, allImports }) =>
+      buildSimpleRelations(collectGraphqlImports(text), allImports),
+    extractDocMeta: () => ({}),
+    flow: () => null,
+    attachName: true
+  },
+  {
+    id: 'cmake',
+    match: (ext) => CMAKE_EXTS.has(ext),
+    collectImports: (text) => collectCmakeImports(text),
+    buildRelations: ({ text, allImports }) =>
+      buildSimpleRelations(collectCmakeImports(text), allImports),
+    extractDocMeta: () => ({}),
+    flow: () => null,
+    attachName: true
+  },
+  {
+    id: 'starlark',
+    match: (ext) => STARLARK_EXTS.has(ext),
+    collectImports: (text) => collectStarlarkImports(text),
+    buildRelations: ({ text, allImports }) =>
+      buildSimpleRelations(collectStarlarkImports(text), allImports),
+    extractDocMeta: () => ({}),
+    flow: () => null,
+    attachName: true
+  },
+  {
+    id: 'nix',
+    match: (ext) => NIX_EXTS.has(ext),
+    collectImports: (text) => collectNixImports(text),
+    buildRelations: ({ text, allImports }) =>
+      buildSimpleRelations(collectNixImports(text), allImports),
+    extractDocMeta: () => ({}),
+    flow: () => null,
+    attachName: true
+  },
+  {
+    id: 'dart',
+    match: (ext) => DART_EXTS.has(ext),
+    collectImports: (text) => collectDartImports(text),
+    buildRelations: ({ text, allImports }) =>
+      buildSimpleRelations(collectDartImports(text), allImports),
+    extractDocMeta: () => ({}),
+    flow: () => null,
+    attachName: true
+  },
+  {
+    id: 'scala',
+    match: (ext) => SCALA_EXTS.has(ext),
+    collectImports: (text) => collectScalaImports(text),
+    buildRelations: ({ text, allImports }) =>
+      buildSimpleRelations(collectScalaImports(text), allImports),
+    extractDocMeta: () => ({}),
+    flow: () => null,
+    attachName: true
+  },
+  {
+    id: 'groovy',
+    match: (ext) => GROOVY_EXTS.has(ext),
+    collectImports: (text) => collectGroovyImports(text),
+    buildRelations: ({ text, allImports }) =>
+      buildSimpleRelations(collectGroovyImports(text), allImports),
+    extractDocMeta: () => ({}),
+    flow: () => null,
+    attachName: true
+  },
+  {
+    id: 'r',
+    match: (ext) => R_EXTS.has(ext),
+    collectImports: (text) => collectRImports(text),
+    buildRelations: ({ text, allImports }) =>
+      buildSimpleRelations(collectRImports(text), allImports),
+    extractDocMeta: () => ({}),
+    flow: () => null,
+    attachName: true
+  },
+  {
+    id: 'julia',
+    match: (ext) => JULIA_EXTS.has(ext),
+    collectImports: (text) => collectJuliaImports(text),
+    buildRelations: ({ text, allImports }) =>
+      buildSimpleRelations(collectJuliaImports(text), allImports),
+    extractDocMeta: () => ({}),
+    flow: () => null,
+    attachName: true
+  },
+  {
+    id: 'handlebars',
+    match: (ext) => HANDLEBARS_EXTS.has(ext),
+    collectImports: (text) => collectHandlebarsImports(text),
+    buildRelations: ({ text, allImports }) =>
+      buildSimpleRelations(collectHandlebarsImports(text), allImports),
+    extractDocMeta: () => ({}),
+    flow: () => null,
+    attachName: true
+  },
+  {
+    id: 'mustache',
+    match: (ext) => MUSTACHE_EXTS.has(ext),
+    collectImports: (text) => collectMustacheImports(text),
+    buildRelations: ({ text, allImports }) =>
+      buildSimpleRelations(collectMustacheImports(text), allImports),
+    extractDocMeta: () => ({}),
+    flow: () => null,
+    attachName: true
+  },
+  {
+    id: 'jinja',
+    match: (ext) => JINJA_EXTS.has(ext),
+    collectImports: (text) => collectJinjaImports(text),
+    buildRelations: ({ text, allImports }) =>
+      buildSimpleRelations(collectJinjaImports(text), allImports),
+    extractDocMeta: () => ({}),
+    flow: () => null,
+    attachName: true
+  },
+  {
+    id: 'razor',
+    match: (ext) => RAZOR_EXTS.has(ext),
+    collectImports: (text) => collectRazorImports(text),
+    buildRelations: ({ text, allImports }) =>
+      buildSimpleRelations(collectRazorImports(text), allImports),
+    extractDocMeta: () => ({}),
+    flow: () => null,
+    attachName: true
+  },
+  {
+    id: 'perl',
+    match: (ext) => isPerl(ext),
+    collectImports: (text) => collectPerlImports(text),
+    prepare: ({ text, mode }) => (mode === 'code' ? { perlChunks: buildPerlChunks(text) } : {}),
+    buildRelations: ({ text, allImports, context }) => buildPerlRelations(text, allImports, context.perlChunks),
+    extractDocMeta: ({ chunk }) => extractPerlDocMeta(chunk),
+    flow: ({ text, chunk, options }) => computePerlFlow(text, chunk, flowOptions(options)),
+    attachName: true
+  },
+  {
+    id: 'shell',
+    match: (ext) => isShell(ext),
+    collectImports: (text) => collectShellImports(text),
+    prepare: ({ text, mode }) => (mode === 'code' ? { shellChunks: buildShellChunks(text) } : {}),
+    buildRelations: ({ text, allImports, context }) => buildShellRelations(text, allImports, context.shellChunks),
+    extractDocMeta: ({ chunk }) => extractShellDocMeta(chunk),
+    flow: ({ text, chunk, options }) => computeShellFlow(text, chunk, flowOptions(options)),
+    attachName: true
+  }
+];
+
+const LANGUAGE_BY_ID = new Map(LANGUAGE_REGISTRY.map((lang) => [lang.id, lang]));
+const normalizeLinguistName = (value) => String(value || '').trim().toLowerCase();
+const LINGUIST_NAME_TO_ID = new Map([
+  ['c', 'clike'],
+  ['c++', 'clike'],
+  ['objective-c', 'clike'],
+  ['objective-c++', 'clike'],
+  ['c#', 'csharp'],
+  ['csharp', 'csharp'],
+  ['go', 'go'],
+  ['java', 'java'],
+  ['javascript', 'javascript'],
+  ['typescript', 'typescript'],
+  ['tsx', 'typescript'],
+  ['python', 'python'],
+  ['ruby', 'ruby'],
+  ['php', 'php'],
+  ['html', 'html'],
+  ['css', 'css'],
+  ['lua', 'lua'],
+  ['sql', 'sql'],
+  ['shell', 'shell'],
+  ['bash', 'shell'],
+  ['zsh', 'shell'],
+  ['makefile', 'makefile'],
+  ['dockerfile', 'dockerfile'],
+  ['cmake', 'cmake'],
+  ['starlark', 'starlark'],
+  ['bazel', 'starlark'],
+  ['nix', 'nix'],
+  ['dart', 'dart'],
+  ['scala', 'scala'],
+  ['groovy', 'groovy'],
+  ['r', 'r'],
+  ['julia', 'julia'],
+  ['handlebars', 'handlebars'],
+  ['mustache', 'mustache'],
+  ['jinja', 'jinja'],
+  ['jinja2', 'jinja'],
+  ['django', 'jinja'],
+  ['razor', 'razor'],
+  ['protobuf', 'protobuf'],
+  ['protocol buffer', 'protobuf'],
+  ['protocol buffers', 'protobuf'],
+  ['graphql', 'graphql'],
+  ['kotlin', 'kotlin'],
+  ['swift', 'swift'],
+  ['rust', 'rust'],
+  ['perl', 'perl']
+]);
+
+const resolveLinguistId = (name, entry) => {
+  const candidates = [name, ...(entry?.aliases || [])];
+  for (const candidate of candidates) {
+    const normalized = normalizeLinguistName(candidate);
+    if (LINGUIST_NAME_TO_ID.has(normalized)) return LINGUIST_NAME_TO_ID.get(normalized);
+  }
+  return null;
+};
+
+const LINGUIST_EXTENSION_MAP = new Map();
+const LINGUIST_FILENAME_MAP = new Map();
+for (const [name, entry] of Object.entries(linguistLanguages || {})) {
+  const languageId = resolveLinguistId(name, entry);
+  if (!languageId || !LANGUAGE_BY_ID.has(languageId)) continue;
+  for (const ext of entry?.extensions || []) {
+    const key = String(ext || '').toLowerCase();
+    if (key && !LINGUIST_EXTENSION_MAP.has(key)) {
+      LINGUIST_EXTENSION_MAP.set(key, languageId);
+    }
+  }
+  for (const filename of entry?.filenames || []) {
+    const key = String(filename || '').toLowerCase();
+    if (key && !LINGUIST_FILENAME_MAP.has(key)) {
+      LINGUIST_FILENAME_MAP.set(key, languageId);
+    }
+  }
+}
+
+const resolveLinguistLanguage = (ext, relPath) => {
+  const baseName = normalizeLinguistName(path.basename(relPath || ''));
+  if (baseName && LINGUIST_FILENAME_MAP.has(baseName)) {
+    return LANGUAGE_BY_ID.get(LINGUIST_FILENAME_MAP.get(baseName)) || null;
+  }
+  const extKey = normalizeLinguistName(ext);
+  if (extKey && LINGUIST_EXTENSION_MAP.has(extKey)) {
+    return LANGUAGE_BY_ID.get(LINGUIST_EXTENSION_MAP.get(extKey)) || null;
+  }
+  return null;
+};
+
+export function getLanguageForFile(ext, relPath) {
+  const normalized = relPath || '';
+  const direct = LANGUAGE_REGISTRY.find((lang) => lang.match(ext, normalized)) || null;
+  if (direct) return direct;
+  return resolveLinguistLanguage(ext, normalized);
+}
+
+export function collectLanguageImports({ ext, relPath, text, mode, options }) {
+  const lang = getLanguageForFile(ext, relPath);
+  if (!lang || typeof lang.collectImports !== 'function') return [];
+  const imports = lang.collectImports(text, { ext, relPath, mode, options });
+  return Array.isArray(imports) ? imports : [];
+}
+
+export async function buildLanguageContext({ ext, relPath, mode, text, options }) {
+  const lang = getLanguageForFile(ext, relPath);
+  const preparedOptions = options && typeof options === 'object'
+    ? { ...options, relPath }
+    : { relPath };
+  const context = lang && typeof lang.prepare === 'function'
+    ? await lang.prepare({ ext, relPath, mode, text, options: preparedOptions })
+    : {};
+  return { lang, context };
+}
+
+export function buildChunkRelations({ lang, chunk, fileRelations, callIndex = null }) {
+  if (!fileRelations) return {};
+  const output = {};
+  if (chunk?.name) {
+    const callsForChunk = callIndex?.callsByCaller
+      ? (callIndex.callsByCaller.get(chunk.name) || [])
+      : (Array.isArray(fileRelations.calls)
+        ? fileRelations.calls.filter(([caller]) => caller && caller === chunk.name)
+        : []);
+    if (callsForChunk.length) output.calls = callsForChunk;
+    const detailsForChunk = callIndex?.callDetailsByCaller
+      ? (callIndex.callDetailsByCaller.get(chunk.name) || [])
+      : (Array.isArray(fileRelations.callDetails)
+        ? fileRelations.callDetails.filter((detail) => detail?.caller === chunk.name)
+        : []);
+    if (detailsForChunk.length) output.callDetails = detailsForChunk;
+  }
+  if (lang?.attachName && chunk?.name) output.name = chunk.name;
+  return output;
+}
diff --git a/src/index/language-registry/simple-relations.js b/src/index/language-registry/simple-relations.js
new file mode 100644
index 000000000..950edd6df
--- /dev/null
+++ b/src/index/language-registry/simple-relations.js
@@ -0,0 +1,24 @@
+export const normalizeImportToken = (raw) => {
+  if (!raw) return '';
+  return String(raw)
+    .trim()
+    .replace(/^[\"']/, '')
+    .replace(/[\"']$/, '')
+    .replace(/[);]+$/g, '');
+};
+
+export const buildSimpleRelations = (imports, allImports) => {
+  const list = Array.isArray(imports) ? imports.filter(Boolean) : [];
+  const unique = Array.from(new Set(list));
+  const importLinks = unique
+    .map((entry) => allImports?.[entry])
+    .filter((entry) => !!entry)
+    .flat();
+  return {
+    imports: unique,
+    exports: [],
+    calls: [],
+    usages: [],
+    importLinks
+  };
+};
diff --git a/src/index/metadata-v2.js b/src/index/metadata-v2.js
new file mode 100644
index 000000000..3197707e0
--- /dev/null
+++ b/src/index/metadata-v2.js
@@ -0,0 +1,185 @@
+import { sha1 } from '../shared/hash.js';
+
+const normalizeString = (value) => {
+  if (value === null || value === undefined) return null;
+  const text = String(value).trim();
+  return text ? text : null;
+};
+
+const normalizeStringArray = (value) => {
+  if (Array.isArray(value)) {
+    return value.map((entry) => normalizeString(entry)).filter(Boolean);
+  }
+  const single = normalizeString(value);
+  return single ? [single] : [];
+};
+
+const unique = (values) => Array.from(new Set(values.filter(Boolean)));
+
+const buildChunkId = (chunk) => {
+  if (!chunk) return null;
+  const key = [
+    chunk.file || '',
+    chunk.segment?.segmentId || '',
+    chunk.start ?? '',
+    chunk.end ?? '',
+    chunk.kind || '',
+    chunk.name || ''
+  ].join('|');
+  return `chunk_${sha1(key)}`;
+};
+
+const normalizeEntries = (entries) => {
+  if (!Array.isArray(entries)) return [];
+  return entries
+    .filter((entry) => entry && typeof entry === 'object' && entry.type)
+    .map((entry) => ({
+      type: normalizeString(entry.type),
+      source: normalizeString(entry.source) || null,
+      confidence: Number.isFinite(entry.confidence) ? entry.confidence : null,
+      evidence: entry.evidence || null,
+      shape: entry.shape || null,
+      elements: entry.elements || null
+    }))
+    .filter((entry) => entry.type);
+};
+
+const normalizeTypeMap = (raw) => {
+  if (!raw || typeof raw !== 'object') return null;
+  const output = {};
+  for (const [key, entries] of Object.entries(raw)) {
+    const normalized = normalizeEntries(entries);
+    if (normalized.length) output[key] = normalized;
+  }
+  return Object.keys(output).length ? output : null;
+};
+
+const buildDeclaredTypes = (docmeta) => {
+  if (!docmeta || typeof docmeta !== 'object') return null;
+  const declared = {};
+  if (docmeta.paramTypes && typeof docmeta.paramTypes === 'object') {
+    const params = {};
+    for (const [name, type] of Object.entries(docmeta.paramTypes)) {
+      const normalized = normalizeString(type);
+      if (!normalized) continue;
+      params[name] = [{ type: normalized, source: 'annotation', confidence: 0.95 }];
+    }
+    if (Object.keys(params).length) declared.params = params;
+  }
+  const returnType = normalizeString(docmeta.returnType || docmeta.returns);
+  if (returnType) {
+    declared.returns = [{ type: returnType, source: 'annotation', confidence: 0.95 }];
+  }
+  return Object.keys(declared).length ? declared : null;
+};
+
+const splitToolingTypes = (raw) => {
+  const inferred = normalizeTypeMap(raw);
+  if (!inferred) return { inferred: null, tooling: null };
+  const tooling = {};
+  const remaining = {};
+  for (const [key, entries] of Object.entries(inferred)) {
+    const toolingEntries = entries.filter((entry) => entry.source === 'tooling');
+    const otherEntries = entries.filter((entry) => entry.source !== 'tooling');
+    if (toolingEntries.length) tooling[key] = toolingEntries;
+    if (otherEntries.length) remaining[key] = otherEntries;
+  }
+  return {
+    inferred: Object.keys(remaining).length ? remaining : null,
+    tooling: Object.keys(tooling).length ? tooling : null
+  };
+};
+
+export function buildMetaV2({ chunk, docmeta, toolInfo }) {
+  if (!chunk) return null;
+  const segment = chunk.segment || null;
+  const relations = chunk.codeRelations && typeof chunk.codeRelations === 'object'
+    ? chunk.codeRelations
+    : null;
+  const chunkId = chunk.chunkId || buildChunkId(chunk);
+  const generatedBy = toolInfo?.version || null;
+  const tooling = toolInfo
+    ? {
+      tool: toolInfo.tool || 'pairofcleats',
+      version: toolInfo.version || null,
+      configHash: toolInfo.configHash || null
+    }
+    : null;
+  const parser = docmeta?.parser && typeof docmeta.parser === 'object'
+    ? {
+      name: normalizeString(docmeta.parser.name),
+      version: normalizeString(docmeta.parser.version)
+    }
+    : null;
+
+  const inferredSplit = splitToolingTypes(docmeta?.inferredTypes);
+  const declared = buildDeclaredTypes(docmeta);
+  const types = {
+    ...(declared ? { declared } : {}),
+    ...(inferredSplit.inferred ? { inferred: inferredSplit.inferred } : {}),
+    ...(inferredSplit.tooling ? { tooling: inferredSplit.tooling } : {})
+  };
+
+  const metadata = {
+    chunkId,
+    file: normalizeString(chunk.file),
+    segment: segment
+      ? {
+        segmentId: normalizeString(segment.segmentId),
+        type: normalizeString(segment.type),
+        languageId: normalizeString(segment.languageId),
+        parentSegmentId: normalizeString(segment.parentSegmentId)
+      }
+      : null,
+    range: {
+      start: Number.isFinite(chunk.start) ? chunk.start : null,
+      end: Number.isFinite(chunk.end) ? chunk.end : null,
+      startLine: Number.isFinite(chunk.startLine) ? chunk.startLine : null,
+      endLine: Number.isFinite(chunk.endLine) ? chunk.endLine : null
+    },
+    lang: normalizeString(segment?.languageId) || normalizeString(chunk.lang),
+    ext: normalizeString(chunk.ext),
+    kind: normalizeString(chunk.kind),
+    name: normalizeString(chunk.name),
+    generatedBy,
+    tooling,
+    parser,
+    signature: normalizeString(docmeta?.signature),
+    doc: normalizeString(docmeta?.doc),
+    annotations: unique([
+      ...normalizeStringArray(docmeta?.decorators),
+      ...normalizeStringArray(docmeta?.annotations)
+    ]),
+    modifiers: docmeta?.modifiers && typeof docmeta.modifiers === 'object'
+      ? docmeta.modifiers
+      : null,
+    params: Array.isArray(docmeta?.params) ? docmeta.params.filter(Boolean) : [],
+    returns: normalizeString(docmeta?.returnType || docmeta?.returns) || null,
+    controlFlow: docmeta?.controlFlow || null,
+    dataflow: docmeta?.dataflow || null,
+    dependencies: docmeta?.dependencies || null,
+    relations: relations
+      ? {
+        calls: relations.calls || null,
+        callLinks: relations.callLinks || null,
+        usageLinks: relations.usageLinks || null,
+        callSummaries: relations.callSummaries || null
+      }
+      : null,
+    structural: docmeta?.structural || null,
+    risk: docmeta?.risk || null,
+    types: Object.keys(types).length ? types : null,
+    embedded: segment
+      ? {
+        parentSegmentId: normalizeString(segment.parentSegmentId),
+        languageId: normalizeString(segment.languageId),
+        context: normalizeString(segment.embeddingContext)
+      }
+      : null
+  };
+
+  if (!metadata.annotations.length) metadata.annotations = null;
+  if (!metadata.params.length) metadata.params = null;
+
+  return metadata;
+}
diff --git a/src/indexer/minhash.js b/src/index/minhash.js
similarity index 94%
rename from src/indexer/minhash.js
rename to src/index/minhash.js
index 985d56128..174c5deff 100644
--- a/src/indexer/minhash.js
+++ b/src/index/minhash.js
@@ -11,6 +11,10 @@ export class SimpleMinHash {
     this.hashValues = Array(numHashes).fill(Infinity);
   }
 
+  reset() {
+    this.hashValues.fill(Infinity);
+  }
+
   /**
    * Hash a token with a given seed.
    * @param {string} str
diff --git a/src/index/risk-rules.js b/src/index/risk-rules.js
new file mode 100644
index 000000000..ce683d8eb
--- /dev/null
+++ b/src/index/risk-rules.js
@@ -0,0 +1,297 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { createSafeRegex, normalizeSafeRegexConfig } from '../shared/safe-regex.js';
+
+const DEFAULT_RULES = {
+  version: '1.0.0',
+  sources: [
+    {
+      id: 'source.req.body',
+      name: 'req.body',
+      category: 'input',
+      tags: ['http-input'],
+      confidence: 0.6,
+      patterns: ['\\breq\\.body\\b', '\\brequest\\.body\\b']
+    },
+    {
+      id: 'source.req.headers',
+      name: 'req.headers',
+      category: 'input',
+      tags: ['http-input'],
+      confidence: 0.6,
+      patterns: ['\\breq\\.headers\\b', '\\brequest\\.headers\\b']
+    },
+    {
+      id: 'source.req.cookies',
+      name: 'req.cookies',
+      category: 'input',
+      tags: ['http-input'],
+      confidence: 0.6,
+      patterns: ['\\breq\\.cookies\\b', '\\brequest\\.cookies\\b']
+    },
+    {
+      id: 'source.ctx.request.body',
+      name: 'ctx.request.body',
+      category: 'input',
+      tags: ['http-input'],
+      confidence: 0.6,
+      patterns: ['\\bctx\\.request\\.body\\b']
+    },
+    {
+      id: 'source.event.body',
+      name: 'event.body',
+      category: 'input',
+      tags: ['http-input'],
+      confidence: 0.6,
+      patterns: ['\\bevent\\.body\\b']
+    },
+    {
+      id: 'source.req.query',
+      name: 'req.query',
+      category: 'input',
+      tags: ['http-input'],
+      confidence: 0.6,
+      patterns: ['\\breq\\.query\\b', '\\brequest\\.query\\b']
+    },
+    {
+      id: 'source.req.params',
+      name: 'req.params',
+      category: 'input',
+      tags: ['http-input'],
+      confidence: 0.6,
+      patterns: ['\\breq\\.params\\b', '\\brequest\\.params\\b']
+    },
+    {
+      id: 'source.process.env',
+      name: 'process.env',
+      category: 'config',
+      tags: ['env'],
+      confidence: 0.55,
+      patterns: [
+        '\\bprocess\\.env\\b',
+        '\\bos\\.environ\\b',
+        '\\bSystem\\.getenv\\b',
+        '\\bos\\.Getenv\\b',
+        '\\bEnvironment\\.GetEnvironmentVariable\\b'
+      ]
+    },
+    {
+      id: 'source.argv',
+      name: 'argv',
+      category: 'input',
+      tags: ['cli-input'],
+      confidence: 0.55,
+      patterns: ['\\bprocess\\.argv\\b', '\\bsys\\.argv\\b', '\\bos\\.Args\\b']
+    },
+    {
+      id: 'source.stdin',
+      name: 'stdin',
+      category: 'input',
+      tags: ['stdin'],
+      confidence: 0.5,
+      patterns: ['\\binput\\s*\\(', '\\breadline\\s*\\(', '\\bConsole\\.ReadLine\\b']
+    },
+    {
+      id: 'source.location',
+      name: 'location',
+      category: 'input',
+      tags: ['browser-input'],
+      confidence: 0.5,
+      patterns: ['\\bwindow\\.location\\b', '\\bdocument\\.location\\b', '\\blocation\\.(href|search|hash)\\b']
+    }
+  ],
+  sinks: [
+    {
+      id: 'sink.eval',
+      name: 'eval',
+      category: 'code-exec',
+      severity: 'high',
+      tags: ['eval', 'code-exec'],
+      confidence: 0.8,
+      patterns: ['\\beval\\s*\\(', '\\bnew\\s+Function\\s*\\(']
+    },
+    {
+      id: 'sink.exec',
+      name: 'exec',
+      category: 'command',
+      severity: 'high',
+      tags: ['command-exec'],
+      confidence: 0.8,
+      patterns: [
+        '\\bchild_process\\.exec\\s*\\(',
+        '\\bchild_process\\.execFile\\s*\\(',
+        '\\bexecFileSync\\s*\\(',
+        '\\bexecSync\\s*\\(',
+        '\\bexec\\s*\\('
+      ]
+    },
+    {
+      id: 'sink.spawn',
+      name: 'spawn',
+      category: 'command',
+      severity: 'high',
+      tags: ['command-exec'],
+      confidence: 0.75,
+      patterns: ['\\bspawnSync\\s*\\(', '\\bspawn\\s*\\(']
+    },
+    {
+      id: 'sink.file.write',
+      name: 'file.write',
+      category: 'file-write',
+      severity: 'medium',
+      tags: ['file-write'],
+      confidence: 0.7,
+      patterns: [
+        '\\bfs\\.writeFileSync\\s*\\(',
+        '\\bfs\\.writeFile\\s*\\(',
+        '\\bfs\\.appendFileSync\\s*\\(',
+        '\\bfs\\.appendFile\\s*\\(',
+        '\\bFile\\.WriteAllText\\b',
+        '\\bFile\\.AppendAllText\\b'
+      ]
+    },
+    {
+      id: 'sink.sql.query',
+      name: 'sql.query',
+      category: 'sql',
+      severity: 'medium',
+      tags: ['sql'],
+      confidence: 0.7,
+      requires: '\\b(select|insert|update|delete|from|where)\\b',
+      patterns: ['\\b(query|execute|prepare|exec)\\s*\\(']
+    },
+    {
+      id: 'sink.innerHTML',
+      name: 'innerHTML',
+      category: 'xss',
+      severity: 'medium',
+      tags: ['xss'],
+      confidence: 0.7,
+      patterns: ['\\binnerHTML\\b', '\\bdocument\\.write\\b', '\\bdangerouslySetInnerHTML\\b']
+    },
+    {
+      id: 'sink.deserialize',
+      name: 'deserialize',
+      category: 'deserialization',
+      severity: 'high',
+      tags: ['deserialization'],
+      confidence: 0.8,
+      patterns: ['\\bpickle\\.loads\\b', '\\byaml\\.load\\b', '\\byaml\\.unsafe_load\\b', '\\bObjectInputStream\\b']
+    }
+  ],
+  sanitizers: [
+    {
+      id: 'sanitize.escape',
+      name: 'escape',
+      category: 'sanitize',
+      tags: ['sanitize'],
+      confidence: 0.55,
+      patterns: ['\\bescape\\s*\\(', '\\bencodeURIComponent\\s*\\(', '\\bhtmlspecialchars\\s*\\(']
+    },
+    {
+      id: 'sanitize.parameterize',
+      name: 'parameterize',
+      category: 'sanitize',
+      tags: ['sanitize'],
+      confidence: 0.6,
+      patterns: ['\\bprepare\\s*\\(', '\\bparameterize\\s*\\(', '\\bbind\\w*\\s*\\(']
+    }
+  ]
+};
+
+const normalizeRule = (rule, fallbackType) => {
+  if (!rule || typeof rule !== 'object') return null;
+  const name = typeof rule.name === 'string' ? rule.name.trim() : '';
+  const patterns = Array.isArray(rule.patterns) ? rule.patterns : [];
+  if (!name || !patterns.length) return null;
+  return {
+    id: typeof rule.id === 'string' ? rule.id.trim() : `${fallbackType}:${name}`,
+    type: rule.type || fallbackType,
+    name,
+    category: typeof rule.category === 'string' ? rule.category : null,
+    severity: typeof rule.severity === 'string' ? rule.severity : null,
+    tags: Array.isArray(rule.tags) ? rule.tags.filter(Boolean) : [],
+    confidence: Number.isFinite(rule.confidence) ? rule.confidence : null,
+    languages: Array.isArray(rule.languages) ? rule.languages.filter(Boolean) : null,
+    patterns: patterns.filter(Boolean),
+    requires: typeof rule.requires === 'string' ? rule.requires : null
+  };
+};
+
+const normalizeRuleList = (list, type) => {
+  const normalized = [];
+  for (const entry of list || []) {
+    const rule = normalizeRule(entry, type);
+    if (rule) normalized.push(rule);
+  }
+  return normalized;
+};
+
+const compilePattern = (pattern, flags, regexConfig) =>
+  createSafeRegex(pattern, flags, regexConfig);
+
+const compileRule = (rule, regexConfig) => ({
+  ...rule,
+  patterns: rule.patterns
+    .map((pattern) => compilePattern(pattern, '', regexConfig))
+    .filter(Boolean),
+  requires: rule.requires ? compilePattern(rule.requires, '', regexConfig) : null
+});
+
+const mergeRules = (baseList, overrideList) => {
+  const byId = new Map(baseList.map((entry) => [entry.id, entry]));
+  for (const entry of overrideList) {
+    byId.set(entry.id, entry);
+  }
+  return Array.from(byId.values());
+};
+
+const resolveRulesFromPath = (rootDir, rulesPath) => {
+  if (!rulesPath || typeof rulesPath !== 'string') return null;
+  const absPath = path.isAbsolute(rulesPath) ? rulesPath : path.join(rootDir, rulesPath);
+  if (!fs.existsSync(absPath)) return null;
+  try {
+    const raw = fs.readFileSync(absPath, 'utf8');
+    const parsed = JSON.parse(raw);
+    return parsed && typeof parsed === 'object' ? { ...parsed, sourcePath: absPath } : null;
+  } catch {
+    return null;
+  }
+};
+
+export const normalizeRiskRules = (input = {}, { rootDir, regexConfig } = {}) => {
+  const config = input && typeof input === 'object' ? input : {};
+  const regexConfigRaw = regexConfig || config.regex || config.safeRegex || {};
+  const safeRegexConfig = normalizeSafeRegexConfig(regexConfigRaw, { flags: 'i' });
+  const includeDefaults = config.includeDefaults !== false;
+  const overrideBundle = resolveRulesFromPath(rootDir || process.cwd(), config.rulesPath);
+  const inlineRules = config.rules && typeof config.rules === 'object' ? config.rules : {};
+  const base = includeDefaults ? DEFAULT_RULES : { sources: [], sinks: [], sanitizers: [] };
+
+  const sources = mergeRules(
+    normalizeRuleList(base.sources, 'source'),
+    normalizeRuleList([...(overrideBundle?.sources || []), ...(inlineRules.sources || [])], 'source')
+  );
+  const sinks = mergeRules(
+    normalizeRuleList(base.sinks, 'sink'),
+    normalizeRuleList([...(overrideBundle?.sinks || []), ...(inlineRules.sinks || [])], 'sink')
+  );
+  const sanitizers = mergeRules(
+    normalizeRuleList(base.sanitizers, 'sanitizer'),
+    normalizeRuleList([...(overrideBundle?.sanitizers || []), ...(inlineRules.sanitizers || [])], 'sanitizer')
+  );
+
+  const bundle = {
+    version: overrideBundle?.version || base.version || '1.0.0',
+    sources: sources.map((rule) => compileRule(rule, safeRegexConfig)),
+    sinks: sinks.map((rule) => compileRule(rule, safeRegexConfig)),
+    sanitizers: sanitizers.map((rule) => compileRule(rule, safeRegexConfig)),
+    regexConfig: safeRegexConfig,
+    provenance: {
+      defaults: includeDefaults,
+      sourcePath: overrideBundle?.sourcePath || null
+    }
+  };
+
+  return bundle;
+};
diff --git a/src/index/risk.js b/src/index/risk.js
new file mode 100644
index 000000000..f31ad2f6b
--- /dev/null
+++ b/src/index/risk.js
@@ -0,0 +1,343 @@
+import { normalizeRiskRules } from './risk-rules.js';
+
+const DEFAULT_CAPS = {
+  maxBytes: 200 * 1024,
+  maxLines: 3000,
+  maxNodes: 15000,
+  maxEdges: 45000,
+  maxMs: 75,
+  maxFlows: 150
+};
+
+const SEVERITY_RANK = { low: 1, medium: 2, high: 3, critical: 4 };
+
+const normalizeCap = (value, fallback) => {
+  if (value === 0 || value === false) return null;
+  const parsed = Number(value);
+  if (Number.isFinite(parsed) && parsed > 0) return Math.floor(parsed);
+  return fallback;
+};
+
+export function normalizeRiskConfig(raw = {}, { rootDir } = {}) {
+  const input = raw && typeof raw === 'object' ? raw : {};
+  const rulesInput = input.rules !== undefined ? input.rules : input;
+  const regexConfig = input.regex || input.regexConfig || rulesInput?.regex || rulesInput?.safeRegex;
+  const caps = input.caps && typeof input.caps === 'object' ? input.caps : {};
+  return {
+    enabled: input.enabled !== false,
+    rules: normalizeRiskRules(rulesInput, { rootDir, regexConfig }),
+    caps: {
+      maxBytes: normalizeCap(caps.maxBytes, DEFAULT_CAPS.maxBytes),
+      maxLines: normalizeCap(caps.maxLines, DEFAULT_CAPS.maxLines),
+      maxNodes: normalizeCap(caps.maxNodes, DEFAULT_CAPS.maxNodes),
+      maxEdges: normalizeCap(caps.maxEdges, DEFAULT_CAPS.maxEdges),
+      maxMs: normalizeCap(caps.maxMs, DEFAULT_CAPS.maxMs),
+      maxFlows: normalizeCap(caps.maxFlows, DEFAULT_CAPS.maxFlows)
+    }
+  };
+}
+
+const buildEvidence = (line, lineNo, column) => {
+  const trimmed = String(line || '').trim();
+  const excerpt = trimmed.length > 160 ? `${trimmed.slice(0, 157)}...` : trimmed;
+  return {
+    line: lineNo,
+    column,
+    excerpt: excerpt || null
+  };
+};
+
+const lineContainsVar = (line, name) => {
+  if (!name) return false;
+  const escaped = name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+  const rx = new RegExp(`\\b${escaped}\\b`);
+  return rx.test(line);
+};
+
+const matchRuleOnLine = (rule, line, languageId) => {
+  if (!rule || !Array.isArray(rule.patterns)) return null;
+  if (rule.languages && languageId) {
+    const allowed = rule.languages.map((entry) => String(entry).toLowerCase());
+    if (!allowed.includes(String(languageId).toLowerCase())) return null;
+  }
+  if (rule.requires) {
+    rule.requires.lastIndex = 0;
+    if (!rule.requires.test(line)) return null;
+  }
+  for (const pattern of rule.patterns) {
+    if (!pattern) continue;
+    pattern.lastIndex = 0;
+    const match = pattern.exec(line);
+    if (match) return { index: match.index, match: match[0] };
+  }
+  return null;
+};
+
+const collectLineMatches = (rules, line, lineNo, languageId) => {
+  const matches = [];
+  for (const rule of rules || []) {
+    const hit = matchRuleOnLine(rule, line, languageId);
+    if (!hit) continue;
+    matches.push({
+      rule,
+      evidence: buildEvidence(line, lineNo, hit.index + 1)
+    });
+  }
+  return matches;
+};
+
+const dedupeMatches = (entries) => {
+  const seen = new Set();
+  const out = [];
+  for (const entry of entries) {
+    const key = entry?.rule?.id;
+    if (!key || seen.has(key)) continue;
+    seen.add(key);
+    out.push(entry);
+  }
+  return out;
+};
+
+const buildRiskEntries = (matches, type) => matches.map(({ rule, evidence }) => ({
+  id: rule.id,
+  name: rule.name,
+  category: rule.category || null,
+  severity: rule.severity || null,
+  tags: Array.isArray(rule.tags) ? rule.tags.slice() : [],
+  confidence: Number.isFinite(rule.confidence) ? rule.confidence : null,
+  ruleId: rule.id,
+  ruleType: type,
+  evidence
+}));
+
+const maxSeverity = (entries) => {
+  let best = null;
+  let bestRank = 0;
+  for (const entry of entries || []) {
+    const rank = SEVERITY_RANK[entry.severity] || 0;
+    if (rank > bestRank) {
+      bestRank = rank;
+      best = entry.severity;
+    }
+  }
+  return best || null;
+};
+
+const isAssignment = (line) => {
+  const trimmed = line.trim();
+  if (!trimmed) return null;
+  if (trimmed.startsWith('//') || trimmed.startsWith('#') || trimmed.startsWith('*')) return null;
+  if (trimmed.includes('==') || trimmed.includes('!=') || trimmed.includes('=>')) return null;
+  if (trimmed.includes('>=') || trimmed.includes('<=')) return null;
+  let match = trimmed.match(/^(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*(.+)$/);
+  if (!match) {
+    match = trimmed.match(/^([A-Za-z_$][\w$]*)\s*=\s*(.+)$/);
+  }
+  if (!match) return null;
+  return { name: match[1], rhs: match[2] };
+};
+
+const combineSourceEvidence = (sourceMatches, taintedSources) => {
+  const sources = [];
+  const ruleIds = new Set();
+  for (const match of sourceMatches || []) {
+    sources.push(match);
+    ruleIds.add(match.rule.id);
+  }
+  for (const entry of taintedSources || []) {
+    if (!entry) continue;
+    sources.push(entry);
+    if (entry.ruleId) ruleIds.add(entry.ruleId);
+  }
+  return { sources, ruleIds: Array.from(ruleIds) };
+};
+
+/**
+ * Detect taint-like risk signals in a chunk.
+ * @param {{text:string,chunk?:object,config?:object,languageId?:string}} input
+ * @returns {object|null}
+ */
+export function detectRiskSignals({ text, chunk, config, languageId } = {}) {
+  if (!text) return null;
+  const riskConfig = config && typeof config === 'object'
+    ? config
+    : normalizeRiskConfig({});
+  if (!riskConfig.enabled) return null;
+
+  const rules = riskConfig.rules || { sources: [], sinks: [], sanitizers: [], provenance: {} };
+  const caps = riskConfig.caps || DEFAULT_CAPS;
+  const bytes = Buffer.byteLength(text, 'utf8');
+  const lines = text.split(/\r?\n/);
+  const analysisStart = Date.now();
+  const analysisStatus = {
+    status: 'ok',
+    reason: null,
+    caps: {
+      maxBytes: caps.maxBytes,
+      maxLines: caps.maxLines,
+      maxNodes: caps.maxNodes,
+      maxEdges: caps.maxEdges,
+      maxMs: caps.maxMs
+    },
+    bytes,
+    lines: lines.length
+  };
+
+  const exceeded = [];
+  if (caps.maxBytes && bytes > caps.maxBytes) exceeded.push('maxBytes');
+  if (caps.maxLines && lines.length > caps.maxLines) exceeded.push('maxLines');
+  if (exceeded.length) {
+    analysisStatus.status = 'capped';
+    analysisStatus.reason = exceeded.join('|');
+  }
+
+  const matchesByLine = [];
+  for (let i = 0; i < lines.length; i += 1) {
+    matchesByLine.push({
+      line: lines[i],
+      lineNo: i + 1,
+      sources: collectLineMatches(rules.sources, lines[i], i + 1, languageId),
+      sinks: collectLineMatches(rules.sinks, lines[i], i + 1, languageId),
+      sanitizers: collectLineMatches(rules.sanitizers, lines[i], i + 1, languageId)
+    });
+  }
+
+  const sourcesRaw = dedupeMatches(matchesByLine.flatMap((entry) => entry.sources));
+  const sinksRaw = dedupeMatches(matchesByLine.flatMap((entry) => entry.sinks));
+  const sanitizersRaw = dedupeMatches(matchesByLine.flatMap((entry) => entry.sanitizers));
+
+  const sources = buildRiskEntries(sourcesRaw, 'source');
+  const sinks = buildRiskEntries(sinksRaw, 'sink');
+  const sanitizers = buildRiskEntries(sanitizersRaw, 'sanitizer');
+
+  const flows = [];
+  const flowKeys = new Set();
+  const addFlow = (flow) => {
+    if (!flow) return;
+    const key = `${flow.source}:${flow.sink}:${flow.scope || 'local'}:${flow.via || ''}`;
+    if (flowKeys.has(key)) return;
+    if (caps.maxFlows && flows.length >= caps.maxFlows) return;
+    flowKeys.add(key);
+    flows.push(flow);
+  };
+
+  if (analysisStatus.status === 'ok') {
+    const taint = new Map();
+    let nodes = 0;
+    let edges = 0;
+    for (const entry of matchesByLine) {
+      if (caps.maxMs && Date.now() - analysisStart > caps.maxMs) {
+        analysisStatus.status = 'capped';
+        analysisStatus.reason = 'maxMs';
+        break;
+      }
+      const { line, sources: sourceMatches, sinks: sinkMatches, sanitizers: sanitizerMatches } = entry;
+      const sanitizedVars = [];
+      if (sanitizerMatches.length && taint.size) {
+        for (const name of taint.keys()) {
+          if (lineContainsVar(line, name)) sanitizedVars.push(name);
+        }
+        for (const name of sanitizedVars) taint.delete(name);
+      }
+
+      const assignment = isAssignment(line);
+      if (assignment) {
+        nodes += 1;
+        const rhsTainted = [];
+        for (const [name, info] of taint.entries()) {
+          if (lineContainsVar(assignment.rhs, name)) {
+            rhsTainted.push(...(info.sources || []));
+          }
+        }
+        const { sources: newSources, ruleIds } = combineSourceEvidence(sourceMatches, rhsTainted);
+        if (newSources.length) {
+          const confidence = Math.max(...newSources.map((entry) => entry.confidence || 0));
+          taint.set(assignment.name, { sources: newSources, ruleIds, confidence });
+          edges += newSources.length;
+        }
+      }
+
+      if (sinkMatches.length) {
+        nodes += 1;
+        const taintedSources = [];
+        for (const [name, info] of taint.entries()) {
+          if (lineContainsVar(line, name)) {
+            taintedSources.push(...(info.sources || []));
+          }
+        }
+        const { sources: lineSources } = combineSourceEvidence(sourceMatches, taintedSources);
+        if (lineSources.length) {
+          for (const source of lineSources) {
+            for (const sink of sinkMatches) {
+              addFlow({
+                source: source.rule.name,
+                sink: sink.rule.name,
+                category: sink.rule.category || null,
+                severity: sink.rule.severity || null,
+                scope: 'local',
+                confidence: Math.min(1, (source.rule.confidence || 0.5) * (sink.rule.confidence || 0.5) + 0.1),
+                ruleIds: [source.rule.id, sink.rule.id],
+                evidence: buildEvidence(line, entry.lineNo, 1)
+              });
+            }
+          }
+        }
+        edges += sinkMatches.length;
+      }
+
+      if (caps.maxNodes && nodes > caps.maxNodes) {
+        analysisStatus.status = 'capped';
+        analysisStatus.reason = 'maxNodes';
+        break;
+      }
+      if (caps.maxEdges && edges > caps.maxEdges) {
+        analysisStatus.status = 'capped';
+        analysisStatus.reason = 'maxEdges';
+        break;
+      }
+    }
+  }
+
+  if (!sources.length && !sinks.length && !sanitizers.length && !flows.length) {
+    return null;
+  }
+
+  const tags = new Set();
+  const categories = new Set();
+  const confidences = [];
+  sources.forEach((entry) => {
+    (entry.tags || []).forEach((tag) => tags.add(tag));
+    if (entry.category) categories.add(entry.category);
+    if (Number.isFinite(entry.confidence)) confidences.push(entry.confidence);
+  });
+  sinks.forEach((entry) => {
+    (entry.tags || []).forEach((tag) => tags.add(tag));
+    if (entry.category) categories.add(entry.category);
+    if (Number.isFinite(entry.confidence)) confidences.push(entry.confidence);
+  });
+  sanitizers.forEach((entry) => {
+    (entry.tags || []).forEach((tag) => tags.add(tag));
+    if (entry.category) categories.add(entry.category);
+    if (Number.isFinite(entry.confidence)) confidences.push(entry.confidence);
+  });
+  flows.forEach((entry) => {
+    if (Number.isFinite(entry.confidence)) confidences.push(entry.confidence);
+  });
+
+  const risk = {
+    tags: Array.from(tags),
+    categories: Array.from(categories),
+    severity: maxSeverity(sinks) || (sources.length ? 'low' : null),
+    confidence: confidences.length ? Math.max(...confidences) : null,
+    sources,
+    sinks,
+    sanitizers,
+    flows,
+    analysisStatus,
+    ruleProvenance: rules.provenance || null
+  };
+
+  if (!risk.tags.length) risk.tags = [];
+  if (!risk.categories.length) risk.categories = [];
+  return risk;
+}
diff --git a/src/index/segments.js b/src/index/segments.js
new file mode 100644
index 000000000..816a5f376
--- /dev/null
+++ b/src/index/segments.js
@@ -0,0 +1,799 @@
+import { parse, postprocess, preprocess } from 'micromark';
+import { parse as parseAstro } from '@astrojs/compiler/sync';
+import { parse as parseSvelte } from 'svelte/compiler';
+import { parse as parseVue } from '@vue/compiler-sfc';
+import { parseBabelAst } from '../lang/babel-parser.js';
+import { buildLineIndex, offsetToLine } from '../shared/lines.js';
+import { sha1 } from '../shared/hash.js';
+import { smartChunk } from './chunking.js';
+
+const CONFIG_EXTS = new Set([
+  '.json',
+  '.yml',
+  '.yaml',
+  '.toml',
+  '.ini',
+  '.cfg',
+  '.conf',
+  '.xml'
+]);
+
+const LANGUAGE_ID_EXT = new Map([
+  ['javascript', '.js'],
+  ['typescript', '.ts'],
+  ['tsx', '.tsx'],
+  ['jsx', '.jsx'],
+  ['html', '.html'],
+  ['css', '.css'],
+  ['scss', '.css'],
+  ['sass', '.css'],
+  ['less', '.css'],
+  ['markdown', '.md'],
+  ['yaml', '.yaml'],
+  ['json', '.json'],
+  ['toml', '.toml'],
+  ['ini', '.ini'],
+  ['xml', '.xml'],
+  ['python', '.py'],
+  ['ruby', '.rb'],
+  ['php', '.php'],
+  ['go', '.go'],
+  ['rust', '.rs'],
+  ['java', '.java'],
+  ['c', '.c'],
+  ['cpp', '.cpp'],
+  ['csharp', '.cs'],
+  ['kotlin', '.kt'],
+  ['sql', '.sql'],
+  ['shell', '.sh'],
+  ['cmake', '.cmake'],
+  ['starlark', '.bzl'],
+  ['nix', '.nix'],
+  ['dart', '.dart'],
+  ['scala', '.scala'],
+  ['groovy', '.groovy'],
+  ['r', '.r'],
+  ['julia', '.jl'],
+  ['handlebars', '.hbs'],
+  ['mustache', '.mustache'],
+  ['jinja', '.jinja'],
+  ['razor', '.razor']
+]);
+
+const CONFIG_LANGS = new Set(['json', 'yaml', 'toml']);
+
+const MARKDOWN_FENCE_LANG_ALIASES = new Map([
+  ['js', 'javascript'],
+  ['javascript', 'javascript'],
+  ['jsx', 'javascript'],
+  ['ts', 'typescript'],
+  ['typescript', 'typescript'],
+  ['tsx', 'typescript'],
+  ['html', 'html'],
+  ['css', 'css'],
+  ['scss', 'scss'],
+  ['sass', 'sass'],
+  ['less', 'less'],
+  ['json', 'json'],
+  ['yaml', 'yaml'],
+  ['yml', 'yaml'],
+  ['toml', 'toml'],
+  ['xml', 'xml'],
+  ['md', 'markdown'],
+  ['markdown', 'markdown'],
+  ['sh', 'shell'],
+  ['bash', 'shell'],
+  ['shell', 'shell'],
+  ['py', 'python'],
+  ['python', 'python'],
+  ['rb', 'ruby'],
+  ['ruby', 'ruby'],
+  ['go', 'go'],
+  ['rust', 'rust'],
+  ['java', 'java'],
+  ['c', 'c'],
+  ['cpp', 'cpp'],
+  ['csharp', 'csharp'],
+  ['cs', 'csharp'],
+  ['kotlin', 'kotlin'],
+  ['kt', 'kotlin'],
+  ['php', 'php'],
+  ['sql', 'sql'],
+  ['cmake', 'cmake'],
+  ['bazel', 'starlark'],
+  ['starlark', 'starlark'],
+  ['bzl', 'starlark'],
+  ['nix', 'nix'],
+  ['dart', 'dart'],
+  ['scala', 'scala'],
+  ['groovy', 'groovy'],
+  ['r', 'r'],
+  ['julia', 'julia'],
+  ['handlebars', 'handlebars'],
+  ['hbs', 'handlebars'],
+  ['mustache', 'mustache'],
+  ['jinja', 'jinja'],
+  ['jinja2', 'jinja'],
+  ['django', 'jinja'],
+  ['razor', 'razor'],
+  ['cshtml', 'razor']
+]);
+
+const resolveSegmentType = (mode, ext) => {
+  if (mode === 'prose') return 'prose';
+  if (CONFIG_EXTS.has(ext)) return 'config';
+  return 'code';
+};
+
+const resolveSegmentTokenMode = (segment) => {
+  const hint = segment.embeddingContext || segment.meta?.embeddingContext || null;
+  if (hint === 'prose') return 'prose';
+  if (hint === 'code' || hint === 'config') return 'code';
+  if (segment.type === 'prose' || segment.type === 'comment') return 'prose';
+  return 'code';
+};
+
+const shouldIndexSegment = (segment, tokenMode, fileMode) => {
+  if (segment.type === 'embedded') return true;
+  return tokenMode === fileMode;
+};
+
+const resolveSegmentExt = (baseExt, segment) => {
+  if (segment.ext) return segment.ext;
+  if (segment.languageId && LANGUAGE_ID_EXT.has(segment.languageId)) {
+    return LANGUAGE_ID_EXT.get(segment.languageId);
+  }
+  return baseExt;
+};
+
+const buildSegmentId = (relPath, segment) => {
+  const key = [
+    relPath || '',
+    segment.type,
+    segment.languageId || '',
+    segment.start,
+    segment.end,
+    segment.parentSegmentId || ''
+  ].join('|');
+  return `seg_${sha1(key)}`;
+};
+
+const normalizeFenceLanguage = (raw) => {
+  if (!raw) return null;
+  const normalized = String(raw).trim().split(/\s+/)[0]?.toLowerCase();
+  if (!normalized) return null;
+  return MARKDOWN_FENCE_LANG_ALIASES.get(normalized) || normalized;
+};
+
+const normalizeLimit = (value, fallback) => {
+  const num = Number(value);
+  if (!Number.isFinite(num)) return fallback;
+  return Math.max(0, Math.floor(num));
+};
+
+export function normalizeSegmentsConfig(input = {}) {
+  const cfg = input && typeof input === 'object' ? input : {};
+  const inlineCodeSpans = cfg.inlineCodeSpans === true;
+  return {
+    inlineCodeSpans,
+    inlineCodeMinChars: normalizeLimit(cfg.inlineCodeMinChars, 8),
+    inlineCodeMaxSpans: normalizeLimit(cfg.inlineCodeMaxSpans, 200),
+    inlineCodeMaxBytes: normalizeLimit(cfg.inlineCodeMaxBytes, 64 * 1024),
+    frontmatterProse: cfg.frontmatterProse === true,
+    onlyExtras: cfg.onlyExtras === true
+  };
+}
+
+const normalizeLanguageHint = (raw, fallback) => {
+  if (!raw) return fallback;
+  const normalized = String(raw).trim().split(/\s+/)[0]?.toLowerCase();
+  if (!normalized) return fallback;
+  return MARKDOWN_FENCE_LANG_ALIASES.get(normalized) || normalized || fallback;
+};
+
+const hasMeaningfulText = (text) => /\S/.test(text || '');
+
+export const detectFrontmatter = (text) => {
+  if (!text.startsWith('---') && !text.startsWith('+++') && !text.startsWith(';;;')) return null;
+  const lines = text.split('\n');
+  if (!lines.length) return null;
+  const fence = lines[0].trim();
+  if (!['---', '+++', ';;;'].includes(fence)) return null;
+  let endLine = -1;
+  for (let i = 1; i < lines.length; i += 1) {
+    const trimmed = lines[i].trim();
+    if (trimmed === fence || (fence === '---' && trimmed === '...')) {
+      endLine = i;
+      break;
+    }
+  }
+  if (endLine < 0) return null;
+  let endOffset = 0;
+  for (let i = 0; i <= endLine; i += 1) {
+    endOffset += lines[i].length;
+    if (i < endLine) endOffset += 1;
+  }
+  if (text[endOffset] === '\n') endOffset += 1;
+  const languageId = fence === '+++'
+    ? 'toml'
+    : (fence === ';;;' ? 'json' : 'yaml');
+  return { start: 0, end: endOffset, languageId };
+};
+
+const extractInlineCodeSpans = (text, config) => {
+  if (!config?.inlineCodeSpans) return [];
+  const minChars = normalizeLimit(config.inlineCodeMinChars, 8);
+  const maxSpans = normalizeLimit(config.inlineCodeMaxSpans, 200);
+  const maxBytes = normalizeLimit(config.inlineCodeMaxBytes, 64 * 1024);
+  const events = postprocess(parse().document().write(preprocess()(text, 'utf8', true)));
+  const spans = [];
+  let totalBytes = 0;
+  for (const [action, token] of events) {
+    if (action !== 'enter' || token.type !== 'codeTextData') continue;
+    const start = token.start.offset;
+    const end = token.end.offset;
+    if (!Number.isFinite(start) || !Number.isFinite(end) || end <= start) continue;
+    const slice = text.slice(start, end);
+    if (!hasMeaningfulText(slice)) continue;
+    const nonWhitespace = slice.replace(/\s/g, '').length;
+    if (nonWhitespace < minChars) continue;
+    const bytes = Buffer.byteLength(slice, 'utf8');
+    if (spans.length >= maxSpans || totalBytes + bytes > maxBytes) break;
+    totalBytes += bytes;
+    spans.push({ start, end });
+  }
+  return spans;
+};
+
+const extractFencedBlocks = (text) => {
+  const events = postprocess(parse().document().write(preprocess()(text, 'utf8', true)));
+  const blocks = [];
+  let current = null;
+  for (const [action, token] of events) {
+    if (action === 'enter' && token.type === 'codeFenced') {
+      current = { info: null, valueStart: null, valueEnd: null };
+      continue;
+    }
+    if (!current) continue;
+    if (action === 'enter' && token.type === 'codeFencedFenceInfo') {
+      current.info = text.slice(token.start.offset, token.end.offset);
+    }
+    if (token.type === 'codeFlowValue') {
+      if (action === 'enter') current.valueStart = token.start.offset;
+      if (action === 'exit') current.valueEnd = token.end.offset;
+    }
+    if (action === 'exit' && token.type === 'codeFenced') {
+      if (Number.isFinite(current.valueStart) && Number.isFinite(current.valueEnd)) {
+        blocks.push({
+          start: current.valueStart,
+          end: current.valueEnd,
+          info: current.info || null
+        });
+      }
+      current = null;
+    }
+  }
+  return blocks;
+};
+
+const segmentMarkdown = ({ text, ext, relPath, segmentsConfig }) => {
+  const config = normalizeSegmentsConfig(segmentsConfig);
+  const segments = [];
+  const frontmatter = detectFrontmatter(text);
+  if (frontmatter) {
+    segments.push({
+      type: 'config',
+      languageId: frontmatter.languageId,
+      start: frontmatter.start,
+      end: frontmatter.end,
+      parentSegmentId: null,
+      embeddingContext: 'config',
+      meta: { frontmatter: true }
+    });
+    if (config.frontmatterProse) {
+      segments.push({
+        type: 'prose',
+        languageId: 'markdown',
+        start: frontmatter.start,
+        end: frontmatter.end,
+        parentSegmentId: null,
+        embeddingContext: 'prose',
+        meta: { frontmatter: true }
+      });
+    }
+  }
+  const fencedBlocks = extractFencedBlocks(text);
+  for (const block of fencedBlocks) {
+    if (frontmatter && block.start < frontmatter.end) continue;
+    const languageId = normalizeFenceLanguage(block.info);
+    const embeddingContext = CONFIG_LANGS.has(languageId) ? 'config' : 'code';
+    segments.push({
+      type: 'embedded',
+      languageId,
+      start: block.start,
+      end: block.end,
+      parentSegmentId: null,
+      embeddingContext,
+      meta: { fenceInfo: block.info || null }
+    });
+  }
+  if (config.inlineCodeSpans) {
+    const spans = extractInlineCodeSpans(text, config);
+    for (const span of spans) {
+      if (frontmatter && span.start < frontmatter.end) continue;
+      segments.push({
+        type: 'embedded',
+        languageId: 'markdown',
+        start: span.start,
+        end: span.end,
+        parentSegmentId: null,
+        embeddingContext: 'code',
+        meta: { inlineCode: true }
+      });
+    }
+  }
+  const special = segments.slice().sort((a, b) => a.start - b.start || a.end - b.end);
+  let cursor = 0;
+  if (special.length) {
+    for (const seg of special) {
+      if (seg.start > cursor) {
+        const slice = text.slice(cursor, seg.start);
+        if (hasMeaningfulText(slice)) {
+          segments.push({
+            type: 'prose',
+            languageId: 'markdown',
+            start: cursor,
+            end: seg.start,
+            parentSegmentId: null,
+            embeddingContext: 'prose',
+            meta: {}
+          });
+        }
+      }
+      cursor = Math.max(cursor, seg.end);
+    }
+  }
+  if (cursor < text.length) {
+    const slice = text.slice(cursor);
+    if (hasMeaningfulText(slice)) {
+      segments.push({
+        type: 'prose',
+        languageId: 'markdown',
+        start: cursor,
+        end: text.length,
+        parentSegmentId: null,
+        embeddingContext: 'prose',
+        meta: {}
+      });
+    }
+  }
+  if (!segments.length) {
+    segments.push({
+      type: resolveSegmentType('prose', ext),
+      languageId: 'markdown',
+      start: 0,
+      end: text.length,
+      parentSegmentId: null,
+      embeddingContext: 'prose',
+      meta: {}
+    });
+  }
+  return finalizeSegments(segments, relPath);
+};
+
+const segmentVue = ({ text, relPath }) => {
+  let descriptor = null;
+  try {
+    ({ descriptor } = parseVue(text, { sourceMap: false }));
+  } catch {
+    return null;
+  }
+  if (!descriptor) return null;
+  const segments = [];
+  const addBlock = (block, blockType, fallbackLang) => {
+    if (!block?.loc) return;
+    const start = block.loc.start?.offset;
+    const end = block.loc.end?.offset;
+    if (!Number.isFinite(start) || !Number.isFinite(end) || end <= start) return;
+    const languageId = normalizeLanguageHint(block.lang, fallbackLang);
+    segments.push({
+      type: 'embedded',
+      languageId,
+      start,
+      end,
+      parentSegmentId: null,
+      embeddingContext: 'code',
+      meta: { block: blockType, lang: block.lang || null }
+    });
+  };
+  addBlock(descriptor.template, 'template', 'html');
+  addBlock(descriptor.script, 'script', 'javascript');
+  addBlock(descriptor.scriptSetup, 'scriptSetup', 'javascript');
+  for (const style of descriptor.styles || []) {
+    addBlock(style, 'style', 'css');
+  }
+  return segments.length ? finalizeSegments(segments, relPath) : null;
+};
+
+const segmentSvelte = ({ text, relPath }) => {
+  let ast = null;
+  try {
+    ast = parseSvelte(text);
+  } catch {
+    return null;
+  }
+  if (!ast) return null;
+  const segments = [];
+  const addContentBlock = (node, blockType, fallbackLang) => {
+    const content = node?.content || null;
+    const start = content?.start ?? node?.start;
+    const end = content?.end ?? node?.end;
+    if (!Number.isFinite(start) || !Number.isFinite(end) || end <= start) return;
+    const languageId = normalizeLanguageHint(node?.lang, fallbackLang);
+    segments.push({
+      type: 'embedded',
+      languageId,
+      start,
+      end,
+      parentSegmentId: null,
+      embeddingContext: 'code',
+      meta: { block: blockType }
+    });
+  };
+  addContentBlock(ast.instance, 'script', 'javascript');
+  addContentBlock(ast.module, 'scriptModule', 'javascript');
+  addContentBlock(ast.css, 'style', 'css');
+  if (Number.isFinite(ast.html?.start) && Number.isFinite(ast.html?.end) && ast.html.end > ast.html.start) {
+    segments.push({
+      type: 'embedded',
+      languageId: 'html',
+      start: ast.html.start,
+      end: ast.html.end,
+      parentSegmentId: null,
+      embeddingContext: 'code',
+      meta: { block: 'template' }
+    });
+  }
+  return segments.length ? finalizeSegments(segments, relPath) : null;
+};
+
+const segmentAstro = ({ text, relPath }) => {
+  let ast = null;
+  try {
+    const result = parseAstro(text);
+    ast = result?.ast || null;
+  } catch {
+    return null;
+  }
+  if (!ast || !Array.isArray(ast.children)) return null;
+  const segments = [];
+  const addFrontmatter = (node) => {
+    const value = typeof node?.value === 'string' ? node.value : '';
+    const position = node?.position;
+    if (!position?.start || !position?.end) return;
+    let start = text.indexOf(value, position.start.offset);
+    if (start < 0) start = position.start.offset;
+    const end = start + value.length;
+    if (!Number.isFinite(start) || !Number.isFinite(end) || end <= start) return;
+    segments.push({
+      type: 'embedded',
+      languageId: 'javascript',
+      start,
+      end,
+      parentSegmentId: null,
+      embeddingContext: 'code',
+      meta: { block: 'frontmatter' }
+    });
+  };
+  const addElementBlock = (node, blockType, fallbackLang) => {
+    if (!Array.isArray(node?.children) || !node.children.length) return;
+    let start = null;
+    let end = null;
+    for (const child of node.children) {
+      const pos = child?.position;
+      const childStart = pos?.start?.offset;
+      const childEnd = pos?.end?.offset;
+      if (!Number.isFinite(childStart) || !Number.isFinite(childEnd)) continue;
+      if (start == null || childStart < start) start = childStart;
+      if (end == null || childEnd > end) end = childEnd;
+    }
+    if (!Number.isFinite(start) || !Number.isFinite(end) || end <= start) return;
+    const languageId = normalizeLanguageHint(node?.attributes?.find?.((attr) => attr?.name === 'lang')?.value, fallbackLang);
+    segments.push({
+      type: 'embedded',
+      languageId,
+      start,
+      end,
+      parentSegmentId: null,
+      embeddingContext: 'code',
+      meta: { block: blockType }
+    });
+  };
+  const walk = (node) => {
+    if (!node || typeof node !== 'object') return;
+    if (node.type === 'frontmatter') addFrontmatter(node);
+    if (node.type === 'element') {
+      const name = String(node.name || '').toLowerCase();
+      if (name === 'script') addElementBlock(node, 'script', 'javascript');
+      if (name === 'style') addElementBlock(node, 'style', 'css');
+    }
+    if (Array.isArray(node.children)) {
+      for (const child of node.children) walk(child);
+    }
+  };
+  for (const child of ast.children) walk(child);
+  if (!segments.length) return null;
+  const special = segments.slice().sort((a, b) => a.start - b.start || a.end - b.end);
+  let cursor = 0;
+  for (const seg of special) {
+    if (seg.start > cursor) {
+      const slice = text.slice(cursor, seg.start);
+      if (hasMeaningfulText(slice)) {
+        segments.push({
+          type: 'embedded',
+          languageId: 'html',
+          start: cursor,
+          end: seg.start,
+          parentSegmentId: null,
+          embeddingContext: 'code',
+          meta: { block: 'template' }
+        });
+      }
+    }
+    cursor = Math.max(cursor, seg.end);
+  }
+  if (cursor < text.length && hasMeaningfulText(text.slice(cursor))) {
+    segments.push({
+      type: 'embedded',
+      languageId: 'html',
+      start: cursor,
+      end: text.length,
+      parentSegmentId: null,
+      embeddingContext: 'code',
+      meta: { block: 'template' }
+    });
+  }
+  return finalizeSegments(segments, relPath);
+};
+
+const collectJsxRanges = (node, ranges, seen = new Set()) => {
+  if (!node || typeof node !== 'object') return;
+  if (seen.has(node)) return;
+  seen.add(node);
+  if (node.type === 'JSXElement' || node.type === 'JSXFragment') {
+    const start = Number.isFinite(node.start) ? node.start : (Array.isArray(node.range) ? node.range[0] : null);
+    const end = Number.isFinite(node.end) ? node.end : (Array.isArray(node.range) ? node.range[1] : null);
+    if (Number.isFinite(start) && Number.isFinite(end) && end > start) {
+      ranges.push({ start, end });
+    }
+  }
+  for (const value of Object.values(node)) {
+    if (!value) continue;
+    if (Array.isArray(value)) {
+      for (const entry of value) collectJsxRanges(entry, ranges, seen);
+    } else if (typeof value === 'object' && value.type) {
+      collectJsxRanges(value, ranges, seen);
+    }
+  }
+};
+
+const mergeRanges = (ranges) => {
+  if (!ranges.length) return [];
+  const sorted = ranges
+    .filter((range) => Number.isFinite(range.start) && Number.isFinite(range.end) && range.end > range.start)
+    .sort((a, b) => a.start - b.start || a.end - b.end);
+  const merged = [];
+  let current = sorted[0];
+  for (let i = 1; i < sorted.length; i += 1) {
+    const next = sorted[i];
+    if (next.start <= current.end) {
+      current = { start: current.start, end: Math.max(current.end, next.end) };
+    } else {
+      merged.push(current);
+      current = next;
+    }
+  }
+  merged.push(current);
+  return merged;
+};
+
+const segmentJsx = ({ text, ext, relPath, languageId, context }) => {
+  const baseLang = languageId || (ext === '.tsx' ? 'typescript' : 'javascript');
+  let ast = context?.jsAst || null;
+  if (!ast) {
+    const mode = ext === '.tsx' ? 'typescript' : 'javascript';
+    ast = parseBabelAst(text, { ext, mode });
+  }
+  if (!ast) return null;
+  const ranges = [];
+  collectJsxRanges(ast, ranges);
+  const merged = mergeRanges(ranges);
+  if (!merged.length) return null;
+  const segments = [];
+  let cursor = 0;
+  for (const range of merged) {
+    if (range.start > cursor) {
+      const slice = text.slice(cursor, range.start);
+      if (hasMeaningfulText(slice)) {
+        segments.push({
+          type: 'code',
+          languageId: baseLang,
+          start: cursor,
+          end: range.start,
+          parentSegmentId: null,
+          embeddingContext: 'code',
+          meta: {}
+        });
+      }
+    }
+    segments.push({
+      type: 'embedded',
+      languageId: 'html',
+      start: range.start,
+      end: range.end,
+      parentSegmentId: null,
+      embeddingContext: 'code',
+      meta: { block: 'jsx' }
+    });
+    cursor = Math.max(cursor, range.end);
+  }
+  if (cursor < text.length) {
+    const slice = text.slice(cursor);
+    if (hasMeaningfulText(slice)) {
+      segments.push({
+        type: 'code',
+        languageId: baseLang,
+        start: cursor,
+        end: text.length,
+        parentSegmentId: null,
+        embeddingContext: 'code',
+        meta: {}
+      });
+    }
+  }
+  return finalizeSegments(segments, relPath);
+};
+
+const finalizeSegments = (segments, relPath) => {
+  const output = [];
+  for (const segment of segments || []) {
+    if (!segment) continue;
+    const start = Number(segment.start);
+    const end = Number(segment.end);
+    if (!Number.isFinite(start) || !Number.isFinite(end) || end <= start) continue;
+    const normalized = {
+      ...segment,
+      start,
+      end
+    };
+    normalized.segmentId = normalized.segmentId || buildSegmentId(relPath, normalized);
+    output.push(normalized);
+  }
+  output.sort((a, b) => a.start - b.start || a.end - b.end);
+  return output;
+};
+
+export function discoverSegments({
+  text,
+  ext,
+  relPath,
+  mode,
+  languageId = null,
+  context = null,
+  segmentsConfig = null,
+  extraSegments = []
+}) {
+  const config = normalizeSegmentsConfig(segmentsConfig);
+  if (config.onlyExtras) {
+    return finalizeSegments(extraSegments || [], relPath);
+  }
+  if (ext === '.md' || ext === '.mdx') {
+    const segments = segmentMarkdown({ text, ext, relPath, segmentsConfig: config });
+    return extraSegments && extraSegments.length
+      ? finalizeSegments([...segments, ...extraSegments], relPath)
+      : segments;
+  }
+  if (ext === '.jsx' || ext === '.tsx') {
+    const segments = segmentJsx({ text, ext, relPath, languageId, context });
+    if (segments) {
+      return extraSegments && extraSegments.length
+        ? finalizeSegments([...segments, ...extraSegments], relPath)
+        : segments;
+    }
+  }
+  if (ext === '.vue') {
+    const segments = segmentVue({ text, relPath });
+    if (segments) {
+      return extraSegments && extraSegments.length
+        ? finalizeSegments([...segments, ...extraSegments], relPath)
+        : segments;
+    }
+  }
+  if (ext === '.svelte') {
+    const segments = segmentSvelte({ text, relPath });
+    if (segments) {
+      return extraSegments && extraSegments.length
+        ? finalizeSegments([...segments, ...extraSegments], relPath)
+        : segments;
+    }
+  }
+  if (ext === '.astro') {
+    const segments = segmentAstro({ text, relPath });
+    if (segments) {
+      return extraSegments && extraSegments.length
+        ? finalizeSegments([...segments, ...extraSegments], relPath)
+        : segments;
+    }
+  }
+  const baseSegment = {
+    type: resolveSegmentType(mode, ext),
+    languageId,
+    start: 0,
+    end: text.length,
+    parentSegmentId: null,
+    meta: {}
+  };
+  return extraSegments && extraSegments.length
+    ? finalizeSegments([baseSegment, ...extraSegments], relPath)
+    : finalizeSegments([baseSegment], relPath);
+}
+
+export function chunkSegments({
+  text,
+  ext,
+  relPath,
+  mode,
+  context = {},
+  segments = [],
+  lineIndex = null
+}) {
+  const effectiveMode = mode === 'extracted-prose' ? 'prose' : mode;
+  const resolvedLineIndex = lineIndex || buildLineIndex(text);
+  const chunks = [];
+  for (const segment of segments) {
+    const segmentText = text.slice(segment.start, segment.end);
+    const tokenMode = resolveSegmentTokenMode(segment);
+    if (!shouldIndexSegment(segment, tokenMode, effectiveMode)) continue;
+    const segmentExt = resolveSegmentExt(ext, segment);
+    const segmentContext = {
+      ...context,
+      languageId: segment.languageId || context.languageId || null,
+      segment
+    };
+    const segmentChunks = smartChunk({
+      text: segmentText,
+      ext: segmentExt,
+      relPath,
+      mode: tokenMode,
+      context: segmentContext,
+      languageId: segment.languageId || null
+    });
+    if (!Array.isArray(segmentChunks) || !segmentChunks.length) continue;
+    const segmentStartLine = offsetToLine(resolvedLineIndex, segment.start);
+    for (const chunk of segmentChunks) {
+      if (!chunk) continue;
+      const adjusted = { ...chunk };
+      adjusted.start = chunk.start + segment.start;
+      adjusted.end = chunk.end + segment.start;
+      if (adjusted.meta && typeof adjusted.meta === 'object') {
+        if (Number.isFinite(adjusted.meta.startLine)) {
+          adjusted.meta.startLine = segmentStartLine + adjusted.meta.startLine - 1;
+        }
+        if (Number.isFinite(adjusted.meta.endLine)) {
+          adjusted.meta.endLine = segmentStartLine + adjusted.meta.endLine - 1;
+        }
+      }
+      adjusted.segment = {
+        segmentId: segment.segmentId,
+        type: segment.type,
+        languageId: segment.languageId || null,
+        start: segment.start,
+        end: segment.end,
+        parentSegmentId: segment.parentSegmentId || null,
+        embeddingContext: segment.embeddingContext || segment.meta?.embeddingContext || null
+      };
+      chunks.push(adjusted);
+    }
+  }
+  return chunks;
+}
diff --git a/src/index/structural.js b/src/index/structural.js
new file mode 100644
index 000000000..185f2b46e
--- /dev/null
+++ b/src/index/structural.js
@@ -0,0 +1,72 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { readJsonFile, readJsonLinesArraySync } from '../shared/artifact-io.js';
+import { toPosix } from '../shared/files.js';
+
+const normalizePath = (repoRoot, rawPath) => {
+  if (!rawPath) return null;
+  const raw = String(rawPath);
+  const resolved = path.isAbsolute(raw) ? raw : path.resolve(repoRoot, raw);
+  const rel = path.relative(repoRoot, resolved);
+  if (!rel || rel.startsWith('..')) return toPosix(raw);
+  return toPosix(rel);
+};
+
+const normalizeMatch = (repoRoot, entry) => {
+  if (!entry || typeof entry !== 'object') return null;
+  const relPath = normalizePath(repoRoot, entry.path || entry.file || entry.uri);
+  if (!relPath) return null;
+  const startLine = Number.isFinite(Number(entry.startLine)) ? Number(entry.startLine) : null;
+  const endLine = Number.isFinite(Number(entry.endLine)) ? Number(entry.endLine) : startLine;
+  return {
+    engine: entry.engine || null,
+    pack: entry.pack || null,
+    ruleId: entry.ruleId || null,
+    message: entry.message || null,
+    severity: entry.severity || null,
+    tags: Array.isArray(entry.tags) ? entry.tags : [],
+    path: relPath,
+    startLine,
+    startCol: Number.isFinite(Number(entry.startCol)) ? Number(entry.startCol) : null,
+    endLine,
+    endCol: Number.isFinite(Number(entry.endCol)) ? Number(entry.endCol) : null,
+    snippet: entry.snippet || null,
+    metadata: entry.metadata || null
+  };
+};
+
+const readStructuralResults = (jsonlPath, jsonPath) => {
+  if (jsonlPath && fs.existsSync(jsonlPath)) {
+    return readJsonLinesArraySync(jsonlPath);
+  }
+  if (jsonPath && fs.existsSync(jsonPath)) {
+    const payload = readJsonFile(jsonPath);
+    if (Array.isArray(payload)) return payload;
+    if (payload && Array.isArray(payload.results)) return payload.results;
+  }
+  return [];
+};
+
+export const loadStructuralMatches = ({ repoRoot, repoCacheRoot, log }) => {
+  if (!repoCacheRoot) return null;
+  const baseDir = path.join(repoCacheRoot, 'structural');
+  const jsonlPath = path.join(baseDir, 'structural.jsonl');
+  const jsonPath = path.join(baseDir, 'structural.json');
+  if (!fs.existsSync(jsonlPath) && !fs.existsSync(jsonPath)) return null;
+  const entries = readStructuralResults(jsonlPath, jsonPath);
+  if (!entries.length) return null;
+  const matchesByFile = new Map();
+  let accepted = 0;
+  for (const entry of entries) {
+    const normalized = normalizeMatch(repoRoot, entry);
+    if (!normalized) continue;
+    const list = matchesByFile.get(normalized.path) || [];
+    list.push(normalized);
+    matchesByFile.set(normalized.path, list);
+    accepted += 1;
+  }
+  if (log) {
+    log(`Structural matches loaded: ${accepted} entries (${matchesByFile.size} files).`);
+  }
+  return matchesByFile;
+};
diff --git a/src/index/tooling/clangd-provider.js b/src/index/tooling/clangd-provider.js
new file mode 100644
index 000000000..7e857207b
--- /dev/null
+++ b/src/index/tooling/clangd-provider.js
@@ -0,0 +1,251 @@
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { execaSync } from 'execa';
+import { buildLineIndex } from '../../shared/lines.js';
+import { createLspClient, languageIdForFileExt, pathToFileUri } from '../../integrations/tooling/lsp/client.js';
+import { rangeToOffsets } from '../../integrations/tooling/lsp/positions.js';
+import { flattenSymbols } from '../../integrations/tooling/lsp/symbols.js';
+import { createToolingEntry, createToolingGuard, uniqueTypes } from '../../integrations/tooling/providers/shared.js';
+import { parseClikeSignature } from './signature-parse/clike.js';
+
+export const CLIKE_EXTS = ['.c', '.h', '.cc', '.cpp', '.cxx', '.hpp', '.hh', '.m', '.mm'];
+
+const leafName = (value) => {
+  if (!value) return null;
+  const parts = String(value).split(/::|\./).filter(Boolean);
+  return parts.length ? parts[parts.length - 1] : value;
+};
+
+const normalizeHoverContents = (contents) => {
+  if (!contents) return '';
+  if (typeof contents === 'string') return contents;
+  if (Array.isArray(contents)) {
+    return contents.map((entry) => normalizeHoverContents(entry)).filter(Boolean).join('\n');
+  }
+  if (typeof contents === 'object') {
+    if (typeof contents.value === 'string') return contents.value;
+    if (typeof contents.language === 'string' && typeof contents.value === 'string') return contents.value;
+  }
+  return '';
+};
+
+const parseObjcSignature = (detail) => {
+  if (!detail || !detail.includes(':')) return null;
+  const signature = detail.trim();
+  const returnMatch = signature.match(/\(([^)]+)\)\s*[^:]+/);
+  const returnType = returnMatch ? returnMatch[1].trim() : null;
+  const paramTypes = {};
+  const paramNames = [];
+  const paramRe = /:\s*\(([^)]+)\)\s*([A-Za-z_][\w]*)/g;
+  let match;
+  while ((match = paramRe.exec(signature)) !== null) {
+    const type = match[1]?.trim();
+    const name = match[2]?.trim();
+    if (!type || !name) continue;
+    paramNames.push(name);
+    paramTypes[name] = type;
+  }
+  if (!returnType && !paramNames.length) return null;
+  return { signature, returnType, paramTypes, paramNames };
+};
+
+const parseSignature = (detail, languageId, symbolName) => {
+  if (!detail || typeof detail !== 'string') return null;
+  const trimmed = detail.trim();
+  if (!trimmed) return null;
+  if (languageId === 'objective-c' || languageId === 'objective-cpp') {
+    const objc = parseObjcSignature(trimmed);
+    if (objc) return objc;
+  }
+  return parseClikeSignature(trimmed, symbolName);
+};
+
+const findChunkForOffsets = (chunks, offsets, symbolName) => {
+  if (!offsets) return null;
+  const symbolLeaf = leafName(symbolName);
+  let best = null;
+  let bestRank = -1;
+  let bestSpan = Infinity;
+  for (const chunk of chunks || []) {
+    if (!chunk || !Number.isFinite(chunk.start) || !Number.isFinite(chunk.end)) continue;
+    const overlaps = offsets.end >= chunk.start && offsets.start <= chunk.end;
+    if (!overlaps) continue;
+    const contains = offsets.start >= chunk.start && offsets.end <= chunk.end;
+    const nameMatch = symbolLeaf && leafName(chunk.name) === symbolLeaf;
+    const span = chunk.end - chunk.start;
+    const rank = (contains ? 2 : 1) + (nameMatch ? 2 : 0);
+    if (rank > bestRank || (rank === bestRank && span < bestSpan)) {
+      best = chunk;
+      bestRank = rank;
+      bestSpan = span;
+    }
+  }
+  return best;
+};
+
+const shouldUseShell = (cmd) => process.platform === 'win32' && /\.(cmd|bat)$/i.test(cmd);
+
+const canRunClangd = (cmd) => {
+  try {
+    const result = execaSync(cmd, ['--version'], {
+      stdio: 'ignore',
+      shell: shouldUseShell(cmd),
+      reject: false
+    });
+    return result.exitCode === 0;
+  } catch {
+    return false;
+  }
+};
+
+const resolveCommand = (cmd) => {
+  if (process.platform !== 'win32') return cmd;
+  const lowered = String(cmd || '').toLowerCase();
+  if (lowered.endsWith('.exe') || lowered.endsWith('.cmd') || lowered.endsWith('.bat')) return cmd;
+  const pathEntries = (process.env.PATH || '').split(path.delimiter).filter(Boolean);
+  for (const ext of ['.exe', '.cmd', '.bat']) {
+    for (const dir of pathEntries) {
+      const candidate = path.join(dir, `${cmd}${ext}`);
+      if (fsSync.existsSync(candidate)) return candidate;
+    }
+  }
+  return cmd;
+};
+
+export async function collectClangdTypes({
+  rootDir,
+  chunksByFile,
+  log = () => {},
+  cmd = 'clangd',
+  args = [],
+  timeoutMs = 15000,
+  retries = 2,
+  breakerThreshold = 3
+}) {
+  const resolvedCmd = resolveCommand(cmd);
+  const useShell = shouldUseShell(resolvedCmd);
+  const files = Array.from(chunksByFile.keys());
+  if (!files.length) return { typesByChunk: new Map(), enriched: 0 };
+
+  if (!canRunClangd(resolvedCmd)) {
+    log('[index] clangd not detected; skipping tooling-based types.');
+    return { typesByChunk: new Map(), enriched: 0 };
+  }
+
+  const client = createLspClient({ cmd: resolvedCmd, args, cwd: rootDir, log, shell: useShell });
+  const guard = createToolingGuard({
+    name: 'clangd',
+    timeoutMs,
+    retries,
+    breakerThreshold,
+    log
+  });
+  const rootUri = pathToFileUri(rootDir);
+  try {
+    await guard.run(({ timeoutMs: guardTimeout }) => client.initialize({
+      rootUri,
+      capabilities: { textDocument: { documentSymbol: { hierarchicalDocumentSymbolSupport: true } } },
+      timeoutMs: guardTimeout
+    }), { label: 'initialize' });
+  } catch (err) {
+    log(`[index] clangd initialize failed: ${err?.message || err}`);
+    client.kill();
+    return { typesByChunk: new Map(), enriched: 0 };
+  }
+
+  const typesByChunk = new Map();
+  let enriched = 0;
+  for (const file of files) {
+    const absPath = path.join(rootDir, file);
+    let text = '';
+    try {
+      text = await fs.readFile(absPath, 'utf8');
+    } catch {
+      continue;
+    }
+    const uri = pathToFileUri(absPath);
+    const languageId = languageIdForFileExt(path.extname(file));
+    client.notify('textDocument/didOpen', {
+      textDocument: {
+        uri,
+        languageId,
+        version: 1,
+        text
+      }
+    });
+
+    let symbols = null;
+    try {
+      symbols = await guard.run(
+        ({ timeoutMs: guardTimeout }) => client.request(
+          'textDocument/documentSymbol',
+          { textDocument: { uri } },
+          { timeoutMs: guardTimeout }
+        ),
+        { label: 'documentSymbol' }
+      );
+    } catch (err) {
+      log(`[index] clangd documentSymbol failed (${file}): ${err?.message || err}`);
+      client.notify('textDocument/didClose', { textDocument: { uri } });
+      if (guard.isOpen()) break;
+      continue;
+    }
+
+    const flattened = flattenSymbols(symbols || []);
+    if (!flattened.length) {
+      client.notify('textDocument/didClose', { textDocument: { uri } });
+      continue;
+    }
+
+    const lineIndex = buildLineIndex(text);
+    const fileChunks = chunksByFile.get(file) || [];
+
+    for (const symbol of flattened) {
+      const offsets = rangeToOffsets(lineIndex, symbol.selectionRange || symbol.range);
+      const target = findChunkForOffsets(fileChunks, offsets, symbol.name);
+      if (!target) continue;
+      let info = parseSignature(symbol.detail, languageId, symbol.name);
+      if (!info || (!info.returnType && !Object.keys(info.paramTypes || {}).length)) {
+        try {
+          const hover = await guard.run(
+            ({ timeoutMs: guardTimeout }) => client.request('textDocument/hover', {
+              textDocument: { uri },
+              position: symbol.selectionRange?.start || symbol.range?.start
+            }, { timeoutMs: guardTimeout }),
+            { label: 'hover', timeoutOverride: 8000 }
+          );
+          const hoverText = normalizeHoverContents(hover?.contents);
+          const hoverInfo = parseSignature(hoverText, languageId, symbol.name);
+          if (hoverInfo) info = hoverInfo;
+        } catch {}
+      }
+      if (!info) continue;
+
+      const key = `${target.file}::${target.name}`;
+      const entry = typesByChunk.get(key) || createToolingEntry();
+      if (info.signature && !entry.signature) entry.signature = info.signature;
+      if (info.paramNames?.length && (!entry.paramNames || !entry.paramNames.length)) {
+        entry.paramNames = info.paramNames.slice();
+      }
+      if (info.returnType) {
+        entry.returns = uniqueTypes([...(entry.returns || []), info.returnType]);
+      }
+      if (info.paramTypes && Object.keys(info.paramTypes).length) {
+        for (const [name, type] of Object.entries(info.paramTypes)) {
+          if (!name || !type) continue;
+          const existing = entry.params?.[name] || [];
+          entry.params[name] = uniqueTypes([...(existing || []), type]);
+        }
+      }
+      typesByChunk.set(key, entry);
+      enriched += 1;
+    }
+
+    client.notify('textDocument/didClose', { textDocument: { uri } });
+  }
+
+  await client.shutdownAndExit();
+  client.kill();
+  return { typesByChunk, enriched };
+}
diff --git a/src/index/tooling/pyright-provider.js b/src/index/tooling/pyright-provider.js
new file mode 100644
index 000000000..66c283e3a
--- /dev/null
+++ b/src/index/tooling/pyright-provider.js
@@ -0,0 +1,341 @@
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { execaSync } from 'execa';
+import { buildLineIndex } from '../../shared/lines.js';
+import { createLspClient, languageIdForFileExt, pathToFileUri } from '../../integrations/tooling/lsp/client.js';
+import { rangeToOffsets } from '../../integrations/tooling/lsp/positions.js';
+import { flattenSymbols } from '../../integrations/tooling/lsp/symbols.js';
+import { createToolingEntry, createToolingGuard, uniqueTypes } from '../../integrations/tooling/providers/shared.js';
+import { resolveToolRoot } from '../../../tools/dict-utils.js';
+import { parsePythonSignature } from './signature-parse/python.js';
+
+export const PYTHON_EXTS = ['.py', '.pyi'];
+
+const candidateNames = (name) => {
+  if (process.platform === 'win32') {
+    return [`${name}.cmd`, `${name}.exe`, name];
+  }
+  return [name];
+};
+
+const findBinaryInDirs = (name, dirs) => {
+  const candidates = candidateNames(name);
+  for (const dir of dirs) {
+    for (const candidate of candidates) {
+      const full = path.join(dir, candidate);
+      if (fsSync.existsSync(full)) return full;
+    }
+  }
+  return null;
+};
+
+const shouldUseShell = (cmd) => process.platform === 'win32' && /\.(cmd|bat)$/i.test(cmd);
+
+const canRunPyright = (cmd) => {
+  for (const args of [['--version'], ['--help']]) {
+    try {
+      const result = execaSync(cmd, args, {
+        stdio: 'ignore',
+        shell: shouldUseShell(cmd),
+        reject: false
+      });
+      if (result.exitCode === 0) return true;
+    } catch {}
+  }
+  return false;
+};
+
+const resolveCommand = (cmd, rootDir, toolingConfig) => {
+  if (!cmd) return cmd;
+  if (path.isAbsolute(cmd) || cmd.includes(path.sep)) return cmd;
+  const toolRoot = resolveToolRoot();
+  const repoBin = path.join(rootDir, 'node_modules', '.bin');
+  const toolBin = toolRoot ? path.join(toolRoot, 'node_modules', '.bin') : null;
+  const toolingBin = toolingConfig?.dir
+    ? path.join(toolingConfig.dir, 'node', 'node_modules', '.bin')
+    : null;
+  const found = findBinaryInDirs(cmd, [repoBin, toolBin, toolingBin].filter(Boolean));
+  if (found) return found;
+  const pathEntries = (process.env.PATH || '').split(path.delimiter).filter(Boolean);
+  const pathFound = findBinaryInDirs(cmd, pathEntries);
+  return pathFound || cmd;
+};
+
+const normalizeHoverContents = (contents) => {
+  if (!contents) return '';
+  if (typeof contents === 'string') return contents;
+  if (Array.isArray(contents)) {
+    return contents.map((entry) => normalizeHoverContents(entry)).filter(Boolean).join('\n');
+  }
+  if (typeof contents === 'object') {
+    if (typeof contents.value === 'string') return contents.value;
+    if (typeof contents.language === 'string' && typeof contents.value === 'string') return contents.value;
+  }
+  return '';
+};
+
+const findChunkForOffsets = (chunks, offsets) => {
+  if (!offsets) return null;
+  let best = null;
+  let bestRank = -1;
+  let bestSpan = Infinity;
+  for (const chunk of chunks || []) {
+    if (!chunk || !Number.isFinite(chunk.start) || !Number.isFinite(chunk.end)) continue;
+    const overlaps = offsets.end >= chunk.start && offsets.start <= chunk.end;
+    if (!overlaps) continue;
+    const contains = offsets.start >= chunk.start && offsets.end <= chunk.end;
+    const span = chunk.end - chunk.start;
+    const rank = contains ? 2 : 1;
+    if (rank > bestRank || (rank === bestRank && span < bestSpan)) {
+      best = chunk;
+      bestRank = rank;
+      bestSpan = span;
+    }
+  }
+  return best;
+};
+
+const normalizeDiagnostic = (diag) => {
+  if (!diag || typeof diag !== 'object') return null;
+  const message = typeof diag.message === 'string' ? diag.message.trim() : '';
+  if (!message) return null;
+  const normalizePos = (pos) => ({
+    line: Number.isFinite(pos?.line) ? pos.line + 1 : null,
+    column: Number.isFinite(pos?.character) ? pos.character + 1 : null
+  });
+  return {
+    message,
+    severity: Number.isFinite(diag.severity) ? diag.severity : null,
+    code: diag.code ?? null,
+    source: typeof diag.source === 'string' ? diag.source : 'pyright',
+    range: diag.range
+      ? { start: normalizePos(diag.range.start), end: normalizePos(diag.range.end) }
+      : null
+  };
+};
+
+export async function collectPyrightTypes({
+  rootDir,
+  chunksByFile,
+  log = () => {},
+  cmd = 'pyright-langserver',
+  args = ['--stdio'],
+  timeoutMs = 15000,
+  retries = 2,
+  breakerThreshold = 3,
+  toolingConfig = null
+}) {
+  const files = Array.from(chunksByFile.keys());
+  if (!files.length) {
+    return {
+      typesByChunk: new Map(),
+      diagnosticsByChunk: new Map(),
+      enriched: 0,
+      diagnosticsCount: 0,
+      cmd,
+      args
+    };
+  }
+
+  let resolvedCmd = resolveCommand(cmd, rootDir, toolingConfig);
+  const resolvedArgs = Array.isArray(args) ? args : [];
+  let useShell = shouldUseShell(resolvedCmd);
+  if (!canRunPyright(resolvedCmd)) {
+    const pathEntries = (process.env.PATH || '').split(path.delimiter).filter(Boolean);
+    const pathCandidate = findBinaryInDirs(cmd, pathEntries);
+    if (pathCandidate && pathCandidate !== resolvedCmd && canRunPyright(pathCandidate)) {
+      resolvedCmd = pathCandidate;
+      useShell = shouldUseShell(resolvedCmd);
+    } else {
+    log('[index] pyright-langserver not detected; skipping tooling-based types.');
+    return {
+      typesByChunk: new Map(),
+      diagnosticsByChunk: new Map(),
+      enriched: 0,
+      diagnosticsCount: 0,
+      cmd: resolvedCmd,
+      args: resolvedArgs
+    };
+    }
+  }
+
+  const diagnosticsByUri = new Map();
+  const client = createLspClient({
+    cmd: resolvedCmd,
+    args: resolvedArgs,
+    cwd: rootDir,
+    log,
+    shell: useShell,
+    onNotification: (message) => {
+      if (message?.method !== 'textDocument/publishDiagnostics') return;
+      const uri = message.params?.uri;
+      if (!uri) return;
+      const diagnostics = Array.isArray(message.params?.diagnostics)
+        ? message.params.diagnostics
+        : [];
+      diagnosticsByUri.set(uri, diagnostics);
+    }
+  });
+  const guard = createToolingGuard({
+    name: 'pyright',
+    timeoutMs,
+    retries,
+    breakerThreshold,
+    log
+  });
+  const rootUri = pathToFileUri(rootDir);
+  try {
+    await guard.run(({ timeoutMs: guardTimeout }) => client.initialize({
+      rootUri,
+      capabilities: { textDocument: { documentSymbol: { hierarchicalDocumentSymbolSupport: true } } },
+      timeoutMs: guardTimeout
+    }), { label: 'initialize' });
+  } catch (err) {
+    log(`[index] pyright initialize failed: ${err?.message || err}`);
+    client.kill();
+    return {
+      typesByChunk: new Map(),
+      diagnosticsByChunk: new Map(),
+      enriched: 0,
+      diagnosticsCount: 0,
+      cmd: resolvedCmd,
+      args: resolvedArgs
+    };
+  }
+
+  const typesByChunk = new Map();
+  const fileText = new Map();
+  let enriched = 0;
+  for (const file of files) {
+    const absPath = path.join(rootDir, file);
+    let text = '';
+    try {
+      text = await fs.readFile(absPath, 'utf8');
+    } catch {
+      continue;
+    }
+    fileText.set(file, text);
+    const uri = pathToFileUri(absPath);
+    const languageId = languageIdForFileExt(path.extname(file));
+    client.notify('textDocument/didOpen', {
+      textDocument: {
+        uri,
+        languageId,
+        version: 1,
+        text
+      }
+    });
+
+    let symbols = null;
+    try {
+      symbols = await guard.run(
+        ({ timeoutMs: guardTimeout }) => client.request(
+          'textDocument/documentSymbol',
+          { textDocument: { uri } },
+          { timeoutMs: guardTimeout }
+        ),
+        { label: 'documentSymbol' }
+      );
+    } catch (err) {
+      log(`[index] pyright documentSymbol failed (${file}): ${err?.message || err}`);
+      client.notify('textDocument/didClose', { textDocument: { uri } });
+      if (guard.isOpen()) break;
+      continue;
+    }
+
+    const flattened = flattenSymbols(symbols || []);
+    if (!flattened.length) {
+      client.notify('textDocument/didClose', { textDocument: { uri } });
+      continue;
+    }
+
+    const lineIndex = buildLineIndex(text);
+    const fileChunks = chunksByFile.get(file) || [];
+
+    for (const symbol of flattened) {
+      const offsets = rangeToOffsets(lineIndex, symbol.selectionRange || symbol.range);
+      const target = findChunkForOffsets(fileChunks, offsets);
+      if (!target) continue;
+      let info = parsePythonSignature(symbol.detail || symbol.name);
+      if (!info || (!info.returnType && !Object.keys(info.paramTypes || {}).length)) {
+        try {
+          const hover = await guard.run(
+            ({ timeoutMs: guardTimeout }) => client.request('textDocument/hover', {
+              textDocument: { uri },
+              position: symbol.selectionRange?.start || symbol.range?.start
+            }, { timeoutMs: guardTimeout }),
+            { label: 'hover', timeoutOverride: 8000 }
+          );
+          const hoverText = normalizeHoverContents(hover?.contents);
+          const hoverInfo = parsePythonSignature(hoverText);
+          if (hoverInfo) info = hoverInfo;
+        } catch {}
+      }
+      if (!info) continue;
+
+      const key = `${target.file}::${target.name}`;
+      const entry = typesByChunk.get(key) || createToolingEntry();
+      if (info.signature && !entry.signature) entry.signature = info.signature;
+      if (info.paramNames?.length && (!entry.paramNames || !entry.paramNames.length)) {
+        entry.paramNames = info.paramNames.slice();
+      }
+      if (info.returnType) {
+        entry.returns = uniqueTypes([...(entry.returns || []), info.returnType]);
+      }
+      if (info.paramTypes && Object.keys(info.paramTypes).length) {
+        for (const [name, type] of Object.entries(info.paramTypes)) {
+          if (!name || !type) continue;
+          const existing = entry.params?.[name] || [];
+          entry.params[name] = uniqueTypes([...(existing || []), type]);
+        }
+      }
+      typesByChunk.set(key, entry);
+      enriched += 1;
+    }
+
+    client.notify('textDocument/didClose', { textDocument: { uri } });
+  }
+
+  const diagnosticsByChunk = new Map();
+  let diagnosticsCount = 0;
+  for (const [file, chunks] of chunksByFile.entries()) {
+    const absPath = path.join(rootDir, file);
+    const uri = pathToFileUri(absPath);
+    const diagnostics = diagnosticsByUri.get(uri) || [];
+    if (!diagnostics.length) continue;
+    let text = fileText.get(file);
+    if (text === undefined) {
+      try {
+        text = await fs.readFile(absPath, 'utf8');
+      } catch {
+        text = '';
+      }
+    }
+    if (!text) continue;
+    const lineIndex = buildLineIndex(text);
+    for (const diag of diagnostics) {
+      const normalized = normalizeDiagnostic(diag);
+      if (!normalized) continue;
+      const offsets = rangeToOffsets(lineIndex, diag.range);
+      const target = findChunkForOffsets(chunks, offsets);
+      if (!target) continue;
+      const key = `${target.file}::${target.name}`;
+      const existing = diagnosticsByChunk.get(key) || [];
+      existing.push(normalized);
+      diagnosticsByChunk.set(key, existing);
+      diagnosticsCount += 1;
+    }
+  }
+
+  await client.shutdownAndExit();
+  client.kill();
+  return {
+    typesByChunk,
+    diagnosticsByChunk,
+    enriched,
+    diagnosticsCount,
+    cmd: resolvedCmd,
+    args: resolvedArgs
+  };
+}
diff --git a/src/index/tooling/signature-parse/clike.js b/src/index/tooling/signature-parse/clike.js
new file mode 100644
index 000000000..e3dc24679
--- /dev/null
+++ b/src/index/tooling/signature-parse/clike.js
@@ -0,0 +1,89 @@
+const splitClikeParams = (value) => {
+  if (!value) return [];
+  const params = [];
+  let current = '';
+  let depthAngle = 0;
+  let depthParen = 0;
+  let depthBracket = 0;
+  let depthBrace = 0;
+  for (const ch of value) {
+    if (ch === '<') depthAngle += 1;
+    if (ch === '>' && depthAngle > 0) depthAngle -= 1;
+    if (ch === '(') depthParen += 1;
+    if (ch === ')' && depthParen > 0) depthParen -= 1;
+    if (ch === '[') depthBracket += 1;
+    if (ch === ']' && depthBracket > 0) depthBracket -= 1;
+    if (ch === '{') depthBrace += 1;
+    if (ch === '}' && depthBrace > 0) depthBrace -= 1;
+    if (ch === ',' && depthAngle === 0 && depthParen === 0 && depthBracket === 0 && depthBrace === 0) {
+      if (current.trim()) params.push(current.trim());
+      current = '';
+      continue;
+    }
+    current += ch;
+  }
+  if (current.trim()) params.push(current.trim());
+  return params;
+};
+
+const stripQualifiers = (value) => value
+  .replace(/\b(static|inline|constexpr|virtual|extern|friend|typename)\b/g, '')
+  .replace(/\s+/g, ' ')
+  .trim();
+
+const inferReturnType = (before, symbolName) => {
+  if (!before) return null;
+  let candidate = before.trim();
+  if (symbolName) {
+    const idx = candidate.lastIndexOf(symbolName);
+    if (idx > 0) {
+      candidate = candidate.slice(0, idx).trim();
+    } else {
+      const scoped = candidate.lastIndexOf(`::${symbolName}`);
+      if (scoped > 0) candidate = candidate.slice(0, scoped).trim();
+    }
+  }
+  if (!candidate) return null;
+  if (!symbolName) {
+    const match = candidate.match(/^(.*)\b[A-Za-z_][\w]*$/);
+    if (match?.[1]) candidate = match[1].trim();
+  }
+  candidate = stripQualifiers(candidate);
+  if (!candidate || candidate.endsWith('::')) return null;
+  return candidate;
+};
+
+const parseClikeParam = (value) => {
+  const cleaned = value.trim();
+  if (!cleaned || cleaned === 'void' || cleaned === '...') return null;
+  const noDefault = cleaned.split('=').shift().trim();
+  const nameMatch = noDefault.match(/([A-Za-z_][\w]*)\s*(?:\[[^\]]*\])?$/);
+  if (!nameMatch) return null;
+  const name = nameMatch[1];
+  const type = noDefault.slice(0, nameMatch.index).trim();
+  if (!name || !type) return null;
+  return { name, type };
+};
+
+export const parseClikeSignature = (detail, symbolName) => {
+  if (!detail || typeof detail !== 'string') return null;
+  const open = detail.indexOf('(');
+  const close = detail.lastIndexOf(')');
+  if (open === -1 || close === -1 || close < open) return null;
+  const signature = detail.trim();
+  const before = detail.slice(0, open).trim();
+  const paramsText = detail.slice(open + 1, close).trim();
+  const returnType = inferReturnType(before, symbolName);
+  const paramTypes = {};
+  const paramNames = [];
+  for (const part of splitClikeParams(paramsText)) {
+    const parsed = parseClikeParam(part);
+    if (!parsed) continue;
+    paramNames.push(parsed.name);
+    paramTypes[parsed.name] = parsed.type;
+  }
+  if (!returnType && !paramNames.length) return null;
+  return { signature, returnType, paramTypes, paramNames };
+};
+
+export { splitClikeParams };
diff --git a/src/index/tooling/signature-parse/python.js b/src/index/tooling/signature-parse/python.js
new file mode 100644
index 000000000..daa6ff61b
--- /dev/null
+++ b/src/index/tooling/signature-parse/python.js
@@ -0,0 +1,99 @@
+const splitPythonParams = (value) => {
+  if (!value) return [];
+  const params = [];
+  let current = '';
+  let depthAngle = 0;
+  let depthParen = 0;
+  let depthBracket = 0;
+  let depthBrace = 0;
+  for (const ch of value) {
+    if (ch === '<') depthAngle += 1;
+    if (ch === '>' && depthAngle > 0) depthAngle -= 1;
+    if (ch === '(') depthParen += 1;
+    if (ch === ')' && depthParen > 0) depthParen -= 1;
+    if (ch === '[') depthBracket += 1;
+    if (ch === ']' && depthBracket > 0) depthBracket -= 1;
+    if (ch === '{') depthBrace += 1;
+    if (ch === '}' && depthBrace > 0) depthBrace -= 1;
+    if (ch === ',' && depthAngle === 0 && depthParen === 0 && depthBracket === 0 && depthBrace === 0) {
+      if (current.trim()) params.push(current.trim());
+      current = '';
+      continue;
+    }
+    current += ch;
+  }
+  if (current.trim()) params.push(current.trim());
+  return params;
+};
+
+const findTopLevelIndex = (value, targetChar) => {
+  let depthAngle = 0;
+  let depthParen = 0;
+  let depthBracket = 0;
+  let depthBrace = 0;
+  for (let i = 0; i < value.length; i += 1) {
+    const ch = value[i];
+    if (ch === '<') depthAngle += 1;
+    if (ch === '>' && depthAngle > 0) depthAngle -= 1;
+    if (ch === '(') depthParen += 1;
+    if (ch === ')' && depthParen > 0) depthParen -= 1;
+    if (ch === '[') depthBracket += 1;
+    if (ch === ']' && depthBracket > 0) depthBracket -= 1;
+    if (ch === '{') depthBrace += 1;
+    if (ch === '}' && depthBrace > 0) depthBrace -= 1;
+    if (ch === targetChar && depthAngle === 0 && depthParen === 0 && depthBracket === 0 && depthBrace === 0) {
+      return i;
+    }
+  }
+  return -1;
+};
+
+const stripDefaultValue = (value) => {
+  const idx = findTopLevelIndex(value, '=');
+  return idx === -1 ? value : value.slice(0, idx);
+};
+
+const parsePythonParam = (value) => {
+  let cleaned = stripDefaultValue(value).trim();
+  if (!cleaned || cleaned === '/' || cleaned === '*') return null;
+  if (cleaned.startsWith('**')) cleaned = cleaned.slice(2).trim();
+  else if (cleaned.startsWith('*')) cleaned = cleaned.slice(1).trim();
+  if (!cleaned) return null;
+  const colonIdx = findTopLevelIndex(cleaned, ':');
+  const name = (colonIdx === -1 ? cleaned : cleaned.slice(0, colonIdx)).trim();
+  const type = colonIdx === -1 ? null : cleaned.slice(colonIdx + 1).trim();
+  if (!name) return null;
+  return { name, type };
+};
+
+export const parsePythonSignature = (detail) => {
+  if (!detail || typeof detail !== 'string') return null;
+  const candidate = detail.split('\n').find((line) => line.includes('(') && line.includes(')')) || detail;
+  const signature = candidate.trim().replace(/:\s*$/, '');
+  const open = signature.indexOf('(');
+  const close = signature.lastIndexOf(')');
+  if (open === -1 || close === -1 || close < open) return null;
+  const paramsText = signature.slice(open + 1, close).trim();
+  const after = signature.slice(close + 1).trim();
+
+  let returnType = null;
+  const arrowIdx = after.indexOf('->');
+  if (arrowIdx !== -1) {
+    const tail = after.slice(arrowIdx + 2).trim();
+    returnType = tail.replace(/:\s*$/, '').trim() || null;
+  }
+
+  const paramTypes = {};
+  const paramNames = [];
+  for (const part of splitPythonParams(paramsText)) {
+    const parsed = parsePythonParam(part);
+    if (!parsed) continue;
+    paramNames.push(parsed.name);
+    if (parsed.type) paramTypes[parsed.name] = parsed.type;
+  }
+
+  if (!returnType && !paramNames.length) return null;
+  return { signature, returnType, paramTypes, paramNames };
+};
+
+export { splitPythonParams };
diff --git a/src/index/tooling/signature-parse/swift.js b/src/index/tooling/signature-parse/swift.js
new file mode 100644
index 000000000..138dd3f0d
--- /dev/null
+++ b/src/index/tooling/signature-parse/swift.js
@@ -0,0 +1,103 @@
+const splitSwiftParams = (value) => {
+  if (!value) return [];
+  const params = [];
+  let current = '';
+  let depthAngle = 0;
+  let depthParen = 0;
+  let depthBracket = 0;
+  let depthBrace = 0;
+  for (const ch of value) {
+    if (ch === '<') depthAngle += 1;
+    if (ch === '>' && depthAngle > 0) depthAngle -= 1;
+    if (ch === '(') depthParen += 1;
+    if (ch === ')' && depthParen > 0) depthParen -= 1;
+    if (ch === '[') depthBracket += 1;
+    if (ch === ']' && depthBracket > 0) depthBracket -= 1;
+    if (ch === '{') depthBrace += 1;
+    if (ch === '}' && depthBrace > 0) depthBrace -= 1;
+    if (ch === ',' && depthAngle === 0 && depthParen === 0 && depthBracket === 0 && depthBrace === 0) {
+      if (current.trim()) params.push(current.trim());
+      current = '';
+      continue;
+    }
+    current += ch;
+  }
+  if (current.trim()) params.push(current.trim());
+  return params;
+};
+
+const findTopLevelIndex = (value, targetChar) => {
+  let depthAngle = 0;
+  let depthParen = 0;
+  let depthBracket = 0;
+  let depthBrace = 0;
+  for (let i = 0; i < value.length; i += 1) {
+    const ch = value[i];
+    if (ch === '<') depthAngle += 1;
+    if (ch === '>' && depthAngle > 0) depthAngle -= 1;
+    if (ch === '(') depthParen += 1;
+    if (ch === ')' && depthParen > 0) depthParen -= 1;
+    if (ch === '[') depthBracket += 1;
+    if (ch === ']' && depthBracket > 0) depthBracket -= 1;
+    if (ch === '{') depthBrace += 1;
+    if (ch === '}' && depthBrace > 0) depthBrace -= 1;
+    if (ch === targetChar && depthAngle === 0 && depthParen === 0 && depthBracket === 0 && depthBrace === 0) {
+      return i;
+    }
+  }
+  return -1;
+};
+
+const stripDefaultValue = (value) => {
+  const idx = findTopLevelIndex(value, '=');
+  return idx === -1 ? value : value.slice(0, idx);
+};
+
+const parseSwiftParam = (value) => {
+  const cleaned = stripDefaultValue(value).trim();
+  if (!cleaned) return null;
+  const colonIdx = findTopLevelIndex(cleaned, ':');
+  if (colonIdx === -1) return null;
+  const left = cleaned.slice(0, colonIdx).trim();
+  const right = cleaned.slice(colonIdx + 1).trim();
+  if (!left || !right) return null;
+  const tokens = left.split(/\s+/).filter(Boolean);
+  let name = tokens.length ? tokens[tokens.length - 1] : null;
+  if (name === '_' && tokens.length > 1) {
+    name = tokens[tokens.length - 2] || null;
+  }
+  if (!name || name === '_') return null;
+  return { name, type: right };
+};
+
+export const parseSwiftSignature = (detail) => {
+  if (!detail || typeof detail !== 'string') return null;
+  const candidate = detail.split('\n').find((line) => line.includes('(') && line.includes(')')) || detail;
+  const open = candidate.indexOf('(');
+  const close = candidate.lastIndexOf(')');
+  if (open === -1 || close === -1 || close < open) return null;
+  const signature = candidate.trim();
+  const paramsText = candidate.slice(open + 1, close).trim();
+  const after = candidate.slice(close + 1).trim();
+  const arrowIndex = after.lastIndexOf('->');
+  let returnType = null;
+  if (arrowIndex !== -1) {
+    returnType = after.slice(arrowIndex + 2).trim();
+  } else {
+    returnType = 'Void';
+  }
+
+  const paramTypes = {};
+  const paramNames = [];
+  for (const part of splitSwiftParams(paramsText)) {
+    const parsed = parseSwiftParam(part);
+    if (!parsed) continue;
+    paramNames.push(parsed.name);
+    paramTypes[parsed.name] = parsed.type;
+  }
+
+  if (!returnType && !paramNames.length) return null;
+  return { signature, returnType, paramTypes, paramNames };
+};
+
+export { splitSwiftParams };
diff --git a/src/index/tooling/sourcekit-provider.js b/src/index/tooling/sourcekit-provider.js
new file mode 100644
index 000000000..ca91ee3a6
--- /dev/null
+++ b/src/index/tooling/sourcekit-provider.js
@@ -0,0 +1,219 @@
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { execaSync } from 'execa';
+import { buildLineIndex } from '../../shared/lines.js';
+import { createLspClient, pathToFileUri } from '../../integrations/tooling/lsp/client.js';
+import { rangeToOffsets } from '../../integrations/tooling/lsp/positions.js';
+import { flattenSymbols } from '../../integrations/tooling/lsp/symbols.js';
+import { createToolingEntry, createToolingGuard, uniqueTypes } from '../../integrations/tooling/providers/shared.js';
+import { parseSwiftSignature } from './signature-parse/swift.js';
+
+export const SWIFT_EXTS = ['.swift'];
+
+const leafName = (value) => {
+  if (!value) return null;
+  const parts = String(value).split(/::|\./).filter(Boolean);
+  return parts.length ? parts[parts.length - 1] : value;
+};
+
+const normalizeHoverContents = (contents) => {
+  if (!contents) return '';
+  if (typeof contents === 'string') return contents;
+  if (Array.isArray(contents)) {
+    return contents.map((entry) => normalizeHoverContents(entry)).filter(Boolean).join('\n');
+  }
+  if (typeof contents === 'object') {
+    if (typeof contents.value === 'string') return contents.value;
+    if (typeof contents.language === 'string' && typeof contents.value === 'string') return contents.value;
+  }
+  return '';
+};
+
+const findChunkForOffsets = (chunks, offsets, symbolName) => {
+  if (!offsets) return null;
+  const symbolLeaf = leafName(symbolName);
+  let best = null;
+  let bestRank = -1;
+  let bestSpan = Infinity;
+  for (const chunk of chunks || []) {
+    if (!chunk || !Number.isFinite(chunk.start) || !Number.isFinite(chunk.end)) continue;
+    const overlaps = offsets.end >= chunk.start && offsets.start <= chunk.end;
+    if (!overlaps) continue;
+    const contains = offsets.start >= chunk.start && offsets.end <= chunk.end;
+    const nameMatch = symbolLeaf && leafName(chunk.name) === symbolLeaf;
+    const span = chunk.end - chunk.start;
+    const rank = (contains ? 2 : 1) + (nameMatch ? 2 : 0);
+    if (rank > bestRank || (rank === bestRank && span < bestSpan)) {
+      best = chunk;
+      bestRank = rank;
+      bestSpan = span;
+    }
+  }
+  return best;
+};
+
+const shouldUseShell = (cmd) => process.platform === 'win32' && /\.(cmd|bat)$/i.test(cmd);
+
+const canRunSourcekit = (cmd) => {
+  try {
+    const result = execaSync(cmd, ['--help'], {
+      stdio: 'ignore',
+      shell: shouldUseShell(cmd),
+      reject: false
+    });
+    return result.exitCode === 0;
+  } catch {
+    return false;
+  }
+};
+
+const resolveCommand = (cmd) => {
+  if (process.platform !== 'win32') return cmd;
+  const lowered = String(cmd || '').toLowerCase();
+  if (lowered.endsWith('.exe') || lowered.endsWith('.cmd') || lowered.endsWith('.bat')) return cmd;
+  const pathEntries = (process.env.PATH || '').split(path.delimiter).filter(Boolean);
+  for (const ext of ['.exe', '.cmd', '.bat']) {
+    for (const dir of pathEntries) {
+      const candidate = path.join(dir, `${cmd}${ext}`);
+      if (fsSync.existsSync(candidate)) return candidate;
+    }
+  }
+  return cmd;
+};
+
+export async function collectSourcekitTypes({
+  rootDir,
+  chunksByFile,
+  log = () => {},
+  cmd = 'sourcekit-lsp',
+  args = [],
+  timeoutMs = 15000,
+  retries = 2,
+  breakerThreshold = 3
+}) {
+  const resolvedCmd = resolveCommand(cmd);
+  const useShell = shouldUseShell(resolvedCmd);
+  const files = Array.from(chunksByFile.keys());
+  if (!files.length) return { typesByChunk: new Map(), enriched: 0 };
+
+  if (!canRunSourcekit(resolvedCmd)) {
+    log('[index] sourcekit-lsp not detected; skipping tooling-based types.');
+    return { typesByChunk: new Map(), enriched: 0 };
+  }
+
+  const client = createLspClient({ cmd: resolvedCmd, args, cwd: rootDir, log, shell: useShell });
+  const guard = createToolingGuard({
+    name: 'sourcekit-lsp',
+    timeoutMs,
+    retries,
+    breakerThreshold,
+    log
+  });
+  const rootUri = pathToFileUri(rootDir);
+  try {
+    await guard.run(({ timeoutMs: guardTimeout }) => client.initialize({
+      rootUri,
+      capabilities: { textDocument: { documentSymbol: { hierarchicalDocumentSymbolSupport: true } } },
+      timeoutMs: guardTimeout
+    }), { label: 'initialize' });
+  } catch (err) {
+    log(`[index] sourcekit-lsp initialize failed: ${err?.message || err}`);
+    client.kill();
+    return { typesByChunk: new Map(), enriched: 0 };
+  }
+
+  const typesByChunk = new Map();
+  let enriched = 0;
+  for (const file of files) {
+    const absPath = path.join(rootDir, file);
+    let text = '';
+    try {
+      text = await fs.readFile(absPath, 'utf8');
+    } catch {
+      continue;
+    }
+    const uri = pathToFileUri(absPath);
+    client.notify('textDocument/didOpen', {
+      textDocument: {
+        uri,
+        languageId: 'swift',
+        version: 1,
+        text
+      }
+    });
+
+    let symbols = null;
+    try {
+      symbols = await guard.run(
+        ({ timeoutMs: guardTimeout }) => client.request(
+          'textDocument/documentSymbol',
+          { textDocument: { uri } },
+          { timeoutMs: guardTimeout }
+        ),
+        { label: 'documentSymbol' }
+      );
+    } catch (err) {
+      log(`[index] sourcekit-lsp documentSymbol failed (${file}): ${err?.message || err}`);
+      client.notify('textDocument/didClose', { textDocument: { uri } });
+      if (guard.isOpen()) break;
+      continue;
+    }
+
+    const flattened = flattenSymbols(symbols || []);
+    if (!flattened.length) {
+      client.notify('textDocument/didClose', { textDocument: { uri } });
+      continue;
+    }
+
+    const lineIndex = buildLineIndex(text);
+    const fileChunks = chunksByFile.get(file) || [];
+
+    for (const symbol of flattened) {
+      const offsets = rangeToOffsets(lineIndex, symbol.selectionRange || symbol.range);
+      const target = findChunkForOffsets(fileChunks, offsets, symbol.name);
+      if (!target) continue;
+      let info = parseSwiftSignature(symbol.detail);
+      if (!info || (!info.returnType && !Object.keys(info.paramTypes || {}).length)) {
+        try {
+          const hover = await guard.run(
+            ({ timeoutMs: guardTimeout }) => client.request('textDocument/hover', {
+              textDocument: { uri },
+              position: symbol.selectionRange?.start || symbol.range?.start
+            }, { timeoutMs: guardTimeout }),
+            { label: 'hover', timeoutOverride: 8000 }
+          );
+          const hoverText = normalizeHoverContents(hover?.contents);
+          const hoverInfo = parseSwiftSignature(hoverText);
+          if (hoverInfo) info = hoverInfo;
+        } catch {}
+      }
+      if (!info) continue;
+
+      const key = `${target.file}::${target.name}`;
+      const entry = typesByChunk.get(key) || createToolingEntry();
+      if (info.signature && !entry.signature) entry.signature = info.signature;
+      if (info.paramNames?.length && (!entry.paramNames || !entry.paramNames.length)) {
+        entry.paramNames = info.paramNames.slice();
+      }
+      if (info.returnType) {
+        entry.returns = uniqueTypes([...(entry.returns || []), info.returnType]);
+      }
+      if (info.paramTypes && Object.keys(info.paramTypes).length) {
+        for (const [name, type] of Object.entries(info.paramTypes)) {
+          if (!name || !type) continue;
+          const existing = entry.params?.[name] || [];
+          entry.params[name] = uniqueTypes([...(existing || []), type]);
+        }
+      }
+      typesByChunk.set(key, entry);
+      enriched += 1;
+    }
+
+    client.notify('textDocument/didClose', { textDocument: { uri } });
+  }
+
+  await client.shutdownAndExit();
+  client.kill();
+  return { typesByChunk, enriched };
+}
diff --git a/src/index/tooling/typescript-provider.js b/src/index/tooling/typescript-provider.js
new file mode 100644
index 000000000..24bf8165a
--- /dev/null
+++ b/src/index/tooling/typescript-provider.js
@@ -0,0 +1,327 @@
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { pathToFileURL } from 'node:url';
+import { createToolingEntry, uniqueTypes } from '../../integrations/tooling/providers/shared.js';
+
+const DEFAULT_CONFIG_FILES = ['tsconfig.json'];
+
+const normalizePathKey = (value) => {
+  const resolved = path.resolve(value);
+  return process.platform === 'win32' ? resolved.toLowerCase() : resolved;
+};
+
+const createDefaultCompilerOptions = (ts) => ({
+  allowJs: true,
+  checkJs: true,
+  target: ts.ScriptTarget.ESNext,
+  module: ts.ModuleKind.ESNext,
+  jsx: ts.JsxEmit.Preserve
+});
+
+const formatDiagnostic = (ts, diagnostic) => {
+  const message = ts.flattenDiagnosticMessageText(diagnostic?.messageText || '', '\n');
+  if (diagnostic?.file?.fileName) return `${diagnostic.file.fileName}: ${message}`;
+  return message;
+};
+
+const isWithinRoot = (rootDir, candidate) => {
+  const root = normalizePathKey(rootDir);
+  const resolved = normalizePathKey(candidate);
+  return resolved === root || resolved.startsWith(`${root}${path.sep}`);
+};
+
+async function loadTypeScript(toolingConfig, repoRoot) {
+  if (toolingConfig?.typescript?.enabled === false) return null;
+  const toolingRoot = toolingConfig?.dir || '';
+  const resolveOrder = Array.isArray(toolingConfig?.typescript?.resolveOrder)
+    ? toolingConfig.typescript.resolveOrder
+    : ['repo', 'cache', 'global'];
+  const lookup = {
+    repo: path.join(repoRoot, 'node_modules', 'typescript', 'lib', 'typescript.js'),
+    cache: toolingRoot ? path.join(toolingRoot, 'node', 'node_modules', 'typescript', 'lib', 'typescript.js') : null,
+    tooling: toolingRoot ? path.join(toolingRoot, 'node', 'node_modules', 'typescript', 'lib', 'typescript.js') : null
+  };
+
+  for (const entry of resolveOrder) {
+    const key = String(entry || '').toLowerCase();
+    if (key === 'global') {
+      try {
+        const mod = await import('typescript');
+        return mod?.default || mod;
+      } catch {
+        continue;
+      }
+    }
+    const candidate = lookup[key];
+    if (!candidate || !fsSync.existsSync(candidate)) continue;
+    try {
+      const mod = await import(pathToFileURL(candidate).href);
+      return mod?.default || mod;
+    } catch {}
+  }
+  return null;
+}
+
+const resolveTsconfigOverride = (rootDir, toolingConfig, log) => {
+  const override = toolingConfig?.typescript?.tsconfigPath;
+  if (!override) return null;
+  const resolved = path.isAbsolute(override) ? override : path.join(rootDir, override);
+  if (fsSync.existsSync(resolved)) return resolved;
+  log(`[index] TypeScript tsconfig not found at ${resolved}; falling back.`);
+  return null;
+};
+
+const resolveNearestTsconfig = (filePath, rootDir, cache) => {
+  const startDir = path.dirname(filePath);
+  const root = path.resolve(rootDir);
+  let dir = startDir;
+  const visited = [];
+  while (true) {
+    const key = normalizePathKey(dir);
+    if (cache.has(key)) {
+      const cached = cache.get(key);
+      for (const entry of visited) {
+        cache.set(entry, cached);
+      }
+      return cached;
+    }
+    visited.push(key);
+    for (const candidateName of DEFAULT_CONFIG_FILES) {
+      const candidate = path.join(dir, candidateName);
+      if (fsSync.existsSync(candidate)) {
+        for (const entry of visited) {
+          cache.set(entry, candidate);
+        }
+        return candidate;
+      }
+    }
+    if (dir === root) break;
+    const parent = path.dirname(dir);
+    if (parent === dir) break;
+    if (!isWithinRoot(rootDir, parent)) break;
+    dir = parent;
+  }
+  for (const entry of visited) {
+    cache.set(entry, null);
+  }
+  return null;
+};
+
+const resolveTsconfigForFile = (filePath, rootDir, toolingConfig, cache, log) => {
+  if (toolingConfig?.typescript?.useTsconfig === false) return null;
+  const override = resolveTsconfigOverride(rootDir, toolingConfig, log);
+  if (override) return override;
+  return resolveNearestTsconfig(filePath, rootDir, cache);
+};
+
+const parseTsConfig = (ts, configPath, log) => {
+  if (!configPath) return null;
+  const configFile = ts.readConfigFile(configPath, ts.sys.readFile);
+  if (configFile?.error) {
+    log(`[index] TypeScript tsconfig error: ${formatDiagnostic(ts, configFile.error)}`);
+    return null;
+  }
+  const parsed = ts.parseJsonConfigFileContent(
+    configFile.config,
+    ts.sys,
+    path.dirname(configPath)
+  );
+  if (parsed?.errors?.length) {
+    log(`[index] TypeScript tsconfig warnings: ${formatDiagnostic(ts, parsed.errors[0])}`);
+  }
+  return parsed;
+};
+
+const getIdentifierName = (ts, node) => {
+  if (!node) return null;
+  if (ts.isIdentifier(node)) return node.text;
+  if (ts.isStringLiteral(node)) return node.text;
+  if (ts.isNumericLiteral(node)) return node.text;
+  return null;
+};
+
+const isFunctionInitializer = (ts, node) => ts.isArrowFunction(node) || ts.isFunctionExpression(node);
+
+const buildTypeScriptMap = (ts, program, targetFiles) => {
+  const targetSet = new Set(targetFiles.map(normalizePathKey));
+  const checker = program.getTypeChecker();
+  const byFile = new Map();
+
+  const record = (fileName, name, signature, params) => {
+    if (!fileName || !name || !signature) return;
+    const returnType = checker.typeToString(checker.getReturnTypeOfSignature(signature));
+    const paramTypes = {};
+    for (const param of params || []) {
+      const nameNode = param?.name;
+      const paramName = nameNode && typeof nameNode.getText === 'function' ? nameNode.getText() : null;
+      if (!paramName) continue;
+      const paramType = checker.typeToString(checker.getTypeAtLocation(param));
+      if (paramType) paramTypes[paramName] = uniqueTypes([...(paramTypes[paramName] || []), paramType]);
+    }
+    const fileMap = byFile.get(fileName) || {};
+    if (!fileMap[name]) {
+      fileMap[name] = { returnType, paramTypes };
+    } else {
+      const existing = fileMap[name];
+      if (returnType && (!existing.returnType || existing.returnType !== returnType)) {
+        existing.returnType = existing.returnType
+          ? uniqueTypes([existing.returnType, returnType]).join(' | ')
+          : returnType;
+      }
+      for (const [paramName, paramList] of Object.entries(paramTypes)) {
+        const existingList = existing.paramTypes?.[paramName] || [];
+        existing.paramTypes = existing.paramTypes || {};
+        existing.paramTypes[paramName] = uniqueTypes([...(existingList || []), ...paramList]);
+      }
+    }
+    byFile.set(fileName, fileMap);
+  };
+
+  const recordFunctionLike = (fileName, name, node) => {
+    const signature = checker.getSignatureFromDeclaration(node);
+    if (signature) record(fileName, name, signature, node.parameters);
+  };
+
+  for (const sourceFile of program.getSourceFiles()) {
+    if (!targetSet.has(normalizePathKey(sourceFile.fileName))) continue;
+    const visit = (node) => {
+      if (ts.isClassDeclaration(node) && node.name) {
+        const className = node.name.getText(sourceFile);
+        node.members?.forEach((member) => {
+          if (ts.isConstructorDeclaration(member)) {
+            recordFunctionLike(sourceFile.fileName, `${className}.constructor`, member);
+            return;
+          }
+          if (ts.isMethodDeclaration(member) && member.name) {
+            const methodName = getIdentifierName(ts, member.name);
+            if (methodName) recordFunctionLike(sourceFile.fileName, `${className}.${methodName}`, member);
+            return;
+          }
+          if ((ts.isGetAccessorDeclaration(member) || ts.isSetAccessorDeclaration(member)) && member.name) {
+            const accessorName = getIdentifierName(ts, member.name);
+            if (accessorName) recordFunctionLike(sourceFile.fileName, `${className}.${accessorName}`, member);
+            return;
+          }
+          if (ts.isPropertyDeclaration(member) && member.name && member.initializer) {
+            const propName = getIdentifierName(ts, member.name);
+            if (propName && isFunctionInitializer(ts, member.initializer)) {
+              recordFunctionLike(sourceFile.fileName, `${className}.${propName}`, member.initializer);
+            }
+          }
+        });
+      }
+
+      if (ts.isFunctionDeclaration(node) && node.name) {
+        recordFunctionLike(sourceFile.fileName, node.name.getText(sourceFile), node);
+      }
+
+      if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name) && node.initializer) {
+        if (isFunctionInitializer(ts, node.initializer)) {
+          recordFunctionLike(sourceFile.fileName, node.name.text, node.initializer);
+        }
+      }
+
+      ts.forEachChild(node, (child) => visit(child));
+    };
+    visit(sourceFile);
+  }
+  return byFile;
+};
+
+const createProgramFromConfig = (ts, configPath, filePaths, log) => {
+  const defaultOptions = createDefaultCompilerOptions(ts);
+  const parsed = parseTsConfig(ts, configPath, log);
+  if (!parsed) {
+    return { program: ts.createProgram({ rootNames: filePaths, options: defaultOptions }), configPath: null };
+  }
+
+  const rootNames = uniqueTypes([...(parsed.fileNames || []), ...filePaths]);
+  const options = { ...defaultOptions, ...parsed.options };
+  const program = ts.createProgram({
+    rootNames,
+    options,
+    projectReferences: parsed.projectReferences
+  });
+  return { program, configPath };
+};
+
+const createProgramDefault = (ts, filePaths) => {
+  const options = createDefaultCompilerOptions(ts);
+  return { program: ts.createProgram({ rootNames: filePaths, options }), configPath: null };
+};
+
+export async function collectTypeScriptTypes({ rootDir, chunksByFile, log, toolingConfig }) {
+  const tsFiles = Array.from(chunksByFile.keys())
+    .filter((file) => ['.ts', '.tsx', '.mts', '.cts'].includes(path.extname(file).toLowerCase()))
+    .map((file) => path.resolve(rootDir, file));
+  const uniqueTsFiles = Array.from(new Set(tsFiles));
+  if (!uniqueTsFiles.length) return { typesByChunk: new Map(), fileCount: 0 };
+
+  if (toolingConfig?.typescript?.enabled === false) {
+    log('[index] TypeScript tooling disabled; skipping tooling-based types.');
+    return { typesByChunk: new Map(), fileCount: uniqueTsFiles.length };
+  }
+
+  const ts = await loadTypeScript(toolingConfig, rootDir);
+  if (!ts) {
+    log('[index] TypeScript tooling not detected; skipping tooling-based types.');
+    return { typesByChunk: new Map(), fileCount: uniqueTsFiles.length };
+  }
+
+  const configCache = new Map();
+  const groups = new Map();
+  for (const filePath of uniqueTsFiles) {
+    const configPath = resolveTsconfigForFile(filePath, rootDir, toolingConfig, configCache, log);
+    const key = configPath || '__default__';
+    if (!groups.has(key)) {
+      groups.set(key, { configPath, files: new Set() });
+    }
+    groups.get(key).files.add(filePath);
+  }
+
+  const typesByChunk = new Map();
+  for (const group of groups.values()) {
+    const filePaths = Array.from(group.files);
+    if (!filePaths.length) continue;
+    let programResult = null;
+    if (group.configPath) {
+      programResult = createProgramFromConfig(ts, group.configPath, filePaths, log);
+      log(`[index] TypeScript tooling using ${group.configPath} (${filePaths.length} file(s)).`);
+    } else {
+      programResult = createProgramDefault(ts, filePaths);
+      if (toolingConfig?.typescript?.useTsconfig !== false) {
+        log(`[index] TypeScript tooling using default compiler options (${filePaths.length} file(s)).`);
+      }
+    }
+
+    const tsTypesByFile = buildTypeScriptMap(ts, programResult.program, filePaths);
+    for (const [file, chunks] of chunksByFile.entries()) {
+      const ext = path.extname(file).toLowerCase();
+      if (!['.ts', '.tsx', '.mts', '.cts'].includes(ext)) continue;
+      const absFile = path.resolve(rootDir, file);
+      if (!group.files.has(absFile)) continue;
+      const fileMap = tsTypesByFile.get(absFile);
+      if (!fileMap) continue;
+      for (const chunk of chunks) {
+        const tsEntry = fileMap[chunk.name];
+        if (!tsEntry) continue;
+        const key = `${chunk.file}::${chunk.name}`;
+        const entry = typesByChunk.get(key) || createToolingEntry();
+        if (tsEntry.returnType) {
+          entry.returns = uniqueTypes([...(entry.returns || []), tsEntry.returnType]);
+        }
+        if (tsEntry.paramTypes && typeof tsEntry.paramTypes === 'object') {
+          for (const [name, types] of Object.entries(tsEntry.paramTypes)) {
+            if (!name || !Array.isArray(types)) continue;
+            const existing = entry.params?.[name] || [];
+            entry.params[name] = uniqueTypes([...(existing || []), ...types]);
+          }
+        }
+        typesByChunk.set(key, entry);
+      }
+    }
+  }
+
+  log(`[index] TypeScript tooling enabled for ${uniqueTsFiles.length} file(s).`);
+  return { typesByChunk, fileCount: uniqueTsFiles.length };
+}
diff --git a/src/index/type-inference-crossfile.js b/src/index/type-inference-crossfile.js
new file mode 100644
index 000000000..23074398d
--- /dev/null
+++ b/src/index/type-inference-crossfile.js
@@ -0,0 +1 @@
+export { applyCrossFileInference } from './type-inference-crossfile/pipeline.js';
diff --git a/src/index/type-inference-crossfile/apply.js b/src/index/type-inference-crossfile/apply.js
new file mode 100644
index 000000000..05f6d90ba
--- /dev/null
+++ b/src/index/type-inference-crossfile/apply.js
@@ -0,0 +1,48 @@
+const ensureInferred = (docmeta) => {
+  if (!docmeta.inferredTypes || typeof docmeta.inferredTypes !== 'object') {
+    docmeta.inferredTypes = {};
+  }
+  return docmeta.inferredTypes;
+};
+
+export const addInferredReturn = (docmeta, type, source, confidence) => {
+  if (!type) return false;
+  const inferred = ensureInferred(docmeta);
+  if (!Array.isArray(inferred.returns)) inferred.returns = [];
+  const existing = inferred.returns.find((entry) => entry.type === type && entry.source === source);
+  if (existing) {
+    existing.confidence = Math.max(existing.confidence || 0, confidence);
+    return true;
+  }
+  inferred.returns.push({ type, source, confidence });
+  return true;
+};
+
+export const addInferredParam = (docmeta, name, type, source, confidence, maxCandidates = null) => {
+  if (!name || !type) return false;
+  const inferred = ensureInferred(docmeta);
+  if (!inferred.params || typeof inferred.params !== 'object') inferred.params = {};
+  const list = inferred.params[name] || [];
+  if (Number.isFinite(maxCandidates) && maxCandidates > 0) {
+    const hasType = list.some((entry) => entry.type === type);
+    if (!hasType && list.length >= maxCandidates) return false;
+  }
+  const existing = list.find((entry) => entry.type === type && entry.source === source);
+  if (existing) {
+    existing.confidence = Math.max(existing.confidence || 0, confidence);
+    inferred.params[name] = list;
+    return true;
+  }
+  inferred.params[name] = [...list, { type, source, confidence }];
+  return true;
+};
+
+export const mergeDiagnostics = (target, incoming) => {
+  if (!incoming || !incoming.size) return target;
+  for (const [key, list] of incoming.entries()) {
+    if (!Array.isArray(list) || !list.length) continue;
+    const existing = target.get(key) || [];
+    target.set(key, [...existing, ...list]);
+  }
+  return target;
+};
diff --git a/src/index/type-inference-crossfile/constants.js b/src/index/type-inference-crossfile/constants.js
new file mode 100644
index 000000000..ea9b3af25
--- /dev/null
+++ b/src/index/type-inference-crossfile/constants.js
@@ -0,0 +1,18 @@
+export const FLOW_SOURCE = 'flow';
+export const TOOLING_SOURCE = 'tooling';
+export const FLOW_CONFIDENCE = 0.55;
+export const TOOLING_CONFIDENCE = 0.85;
+
+export const TYPE_KIND_PATTERNS = [
+  /class/i,
+  /struct/i,
+  /enum/i,
+  /interface/i,
+  /protocol/i,
+  /trait/i,
+  /record/i,
+  /type/i
+];
+
+export const RETURN_CALL_RX = /return\s+(?:await\s+)?(?!new\s)([A-Za-z_$][\w$.:]*)\s*\(/g;
+export const RETURN_NEW_RX = /return\s+(?:await\s+)?new\s+([A-Za-z_$][\w$.:]*)\s*\(/g;
diff --git a/src/index/type-inference-crossfile/extract.js b/src/index/type-inference-crossfile/extract.js
new file mode 100644
index 000000000..b30e9d7d5
--- /dev/null
+++ b/src/index/type-inference-crossfile/extract.js
@@ -0,0 +1,87 @@
+import { uniqueTypes } from '../../integrations/tooling/providers/shared.js';
+import { RETURN_CALL_RX, RETURN_NEW_RX } from './constants.js';
+import { isTypeDeclaration } from './symbols.js';
+
+export const extractReturnTypes = (chunk) => {
+  const docmeta = chunk?.docmeta || {};
+  const types = [];
+  if (docmeta.returnType) types.push(docmeta.returnType);
+  if (Array.isArray(docmeta.returns)) {
+    for (const value of docmeta.returns) {
+      if (value) types.push(value);
+    }
+  } else if (docmeta.returns) {
+    types.push(docmeta.returns);
+  }
+  if (Array.isArray(docmeta.inferredTypes?.returns)) {
+    for (const entry of docmeta.inferredTypes.returns) {
+      if (entry?.type) types.push(entry.type);
+    }
+  }
+  if (isTypeDeclaration(chunk?.kind) && chunk?.name) {
+    types.push(chunk.name);
+  }
+  return uniqueTypes(types);
+};
+
+export const extractParamTypes = (chunk) => {
+  const docmeta = chunk?.docmeta || {};
+  const paramNames = Array.isArray(docmeta.params) ? docmeta.params : [];
+  const paramTypes = {};
+
+  if (docmeta.paramTypes && typeof docmeta.paramTypes === 'object') {
+    for (const [name, type] of Object.entries(docmeta.paramTypes)) {
+      if (!name || !type) continue;
+      paramTypes[name] = uniqueTypes([...(paramTypes[name] || []), type]);
+    }
+  }
+
+  const inferred = docmeta.inferredTypes?.params || {};
+  if (inferred && typeof inferred === 'object') {
+    for (const [name, entries] of Object.entries(inferred)) {
+      if (!name || !Array.isArray(entries)) continue;
+      for (const entry of entries) {
+        if (!entry?.type) continue;
+        paramTypes[name] = uniqueTypes([...(paramTypes[name] || []), entry.type]);
+      }
+    }
+  }
+
+  return { paramNames, paramTypes };
+};
+
+export const extractReturnCalls = (chunkText) => {
+  const calls = new Set();
+  const news = new Set();
+  if (!chunkText) return { calls, news };
+  RETURN_CALL_RX.lastIndex = 0;
+  RETURN_NEW_RX.lastIndex = 0;
+  let match;
+  while ((match = RETURN_CALL_RX.exec(chunkText)) !== null) {
+    const name = match[1];
+    if (name) calls.add(name);
+  }
+  while ((match = RETURN_NEW_RX.exec(chunkText)) !== null) {
+    const name = match[1];
+    if (name) news.add(name);
+  }
+  return { calls, news };
+};
+
+export const inferArgType = (raw) => {
+  if (!raw) return null;
+  const value = String(raw).trim();
+  if (!value) return null;
+  const lowered = value.toLowerCase();
+  if (lowered === 'null') return 'null';
+  if (lowered === 'undefined') return 'undefined';
+  if (lowered === 'true' || lowered === 'false') return 'boolean';
+  if (/^-?\d+(\.\d+)?$/.test(value)) return 'number';
+  if (value.startsWith('"') || value.startsWith("'") || value.startsWith('`')) return 'string';
+  if (value.startsWith('[')) return 'array';
+  if (value.startsWith('{')) return 'object';
+  const newMatch = value.match(/^new\s+([A-Za-z_$][\w$.]*)/);
+  if (newMatch) return newMatch[1];
+  if (value === 'fn(...)') return 'function';
+  return null;
+};
diff --git a/src/indexer/type-inference-crossfile.js b/src/index/type-inference-crossfile/pipeline.js
similarity index 50%
rename from src/indexer/type-inference-crossfile.js
rename to src/index/type-inference-crossfile/pipeline.js
index a715c1504..aa0220994 100644
--- a/src/indexer/type-inference-crossfile.js
+++ b/src/index/type-inference-crossfile/pipeline.js
@@ -1,47 +1,12 @@
-import fs from 'node:fs/promises';
-import fsSync from 'node:fs';
 import path from 'node:path';
-import { pathToFileURL } from 'node:url';
-import { getToolingConfig } from '../../tools/dict-utils.js';
-
-const FLOW_SOURCE = 'flow';
-const TOOLING_SOURCE = 'tooling';
-const FLOW_CONFIDENCE = 0.55;
-const TOOLING_CONFIDENCE = 0.85;
-
-const TYPE_KIND_PATTERNS = [
-  /class/i,
-  /struct/i,
-  /enum/i,
-  /interface/i,
-  /protocol/i,
-  /trait/i,
-  /record/i,
-  /type/i
-];
-
-const RETURN_CALL_RX = /return\s+(?:await\s+)?(?!new\s)([A-Za-z_$][\w$.:]*)\s*\(/g;
-const RETURN_NEW_RX = /return\s+(?:await\s+)?new\s+([A-Za-z_$][\w$.:]*)\s*\(/g;
-
-const normalizeName = (value) => String(value || '').trim();
-
-const leafName = (value) => {
-  if (!value) return null;
-  const parts = String(value).split(/::|\./).filter(Boolean);
-  return parts.length ? parts[parts.length - 1] : value;
-};
-
-const isTypeDeclaration = (kind) => {
-  if (!kind) return false;
-  return TYPE_KIND_PATTERNS.some((rx) => rx.test(kind));
-};
-
-const addSymbol = (index, key, entry) => {
-  if (!key) return;
-  const list = index.get(key) || [];
-  list.push(entry);
-  index.set(key, list);
-};
+import { getToolingConfig } from '../../../tools/dict-utils.js';
+import { uniqueTypes } from '../../integrations/tooling/providers/shared.js';
+import { readTextFile } from '../../shared/encoding.js';
+import { FLOW_CONFIDENCE, FLOW_SOURCE } from './constants.js';
+import { addInferredParam, addInferredReturn } from './apply.js';
+import { extractParamTypes, extractReturnCalls, extractReturnTypes, inferArgType } from './extract.js';
+import { addSymbol, leafName, isTypeDeclaration, resolveUniqueSymbol } from './symbols.js';
+import { buildChunksByFile, runToolingPass } from './tooling.js';
 
 const addLink = (list, link) => {
   if (!link) return;
@@ -52,193 +17,6 @@ const addLink = (list, link) => {
   list.push(link);
 };
 
-const uniqueTypes = (values) => Array.from(new Set(values.filter(Boolean)));
-
-const extractReturnTypes = (chunk) => {
-  const docmeta = chunk?.docmeta || {};
-  const types = [];
-  if (docmeta.returnType) types.push(docmeta.returnType);
-  if (Array.isArray(docmeta.returns)) {
-    for (const value of docmeta.returns) {
-      if (value) types.push(value);
-    }
-  } else if (docmeta.returns) {
-    types.push(docmeta.returns);
-  }
-  if (Array.isArray(docmeta.inferredTypes?.returns)) {
-    for (const entry of docmeta.inferredTypes.returns) {
-      if (entry?.type) types.push(entry.type);
-    }
-  }
-  if (isTypeDeclaration(chunk?.kind) && chunk?.name) {
-    types.push(chunk.name);
-  }
-  return uniqueTypes(types);
-};
-
-const extractParamTypes = (chunk) => {
-  const docmeta = chunk?.docmeta || {};
-  const paramNames = Array.isArray(docmeta.params) ? docmeta.params : [];
-  const paramTypes = {};
-
-  if (docmeta.paramTypes && typeof docmeta.paramTypes === 'object') {
-    for (const [name, type] of Object.entries(docmeta.paramTypes)) {
-      if (!name || !type) continue;
-      paramTypes[name] = uniqueTypes([...(paramTypes[name] || []), type]);
-    }
-  }
-
-  const inferred = docmeta.inferredTypes?.params || {};
-  if (inferred && typeof inferred === 'object') {
-    for (const [name, entries] of Object.entries(inferred)) {
-      if (!name || !Array.isArray(entries)) continue;
-      for (const entry of entries) {
-        if (!entry?.type) continue;
-        paramTypes[name] = uniqueTypes([...(paramTypes[name] || []), entry.type]);
-      }
-    }
-  }
-
-  return { paramNames, paramTypes };
-};
-
-const resolveUniqueSymbol = (index, name) => {
-  if (!name) return null;
-  const direct = index.get(name) || [];
-  if (direct.length === 1) return direct[0];
-  if (direct.length > 1) return null;
-  const leaf = leafName(name);
-  if (!leaf || leaf === name) return null;
-  const leafMatches = index.get(leaf) || [];
-  return leafMatches.length === 1 ? leafMatches[0] : null;
-};
-
-const ensureInferred = (docmeta) => {
-  if (!docmeta.inferredTypes || typeof docmeta.inferredTypes !== 'object') {
-    docmeta.inferredTypes = {};
-  }
-  return docmeta.inferredTypes;
-};
-
-const addInferredReturn = (docmeta, type, source, confidence) => {
-  if (!type) return false;
-  const inferred = ensureInferred(docmeta);
-  if (!Array.isArray(inferred.returns)) inferred.returns = [];
-  const existing = inferred.returns.find((entry) => entry.type === type && entry.source === source);
-  if (existing) {
-    existing.confidence = Math.max(existing.confidence || 0, confidence);
-    return true;
-  }
-  inferred.returns.push({ type, source, confidence });
-  return true;
-};
-
-const addInferredParam = (docmeta, name, type, source, confidence) => {
-  if (!name || !type) return false;
-  const inferred = ensureInferred(docmeta);
-  if (!inferred.params || typeof inferred.params !== 'object') inferred.params = {};
-  const list = inferred.params[name] || [];
-  const existing = list.find((entry) => entry.type === type && entry.source === source);
-  if (existing) {
-    existing.confidence = Math.max(existing.confidence || 0, confidence);
-    inferred.params[name] = list;
-    return true;
-  }
-  inferred.params[name] = [...list, { type, source, confidence }];
-  return true;
-};
-
-const extractReturnCalls = (chunkText) => {
-  const calls = new Set();
-  const news = new Set();
-  if (!chunkText) return { calls, news };
-  RETURN_CALL_RX.lastIndex = 0;
-  RETURN_NEW_RX.lastIndex = 0;
-  let match;
-  while ((match = RETURN_CALL_RX.exec(chunkText)) !== null) {
-    const name = match[1];
-    if (name) calls.add(name);
-  }
-  while ((match = RETURN_NEW_RX.exec(chunkText)) !== null) {
-    const name = match[1];
-    if (name) news.add(name);
-  }
-  return { calls, news };
-};
-
-async function loadTypeScript(toolingConfig, repoRoot) {
-  const toolingRoot = toolingConfig?.dir || '';
-  const candidates = [
-    path.join(repoRoot, 'node_modules', 'typescript', 'lib', 'typescript.js'),
-    toolingRoot ? path.join(toolingRoot, 'node', 'node_modules', 'typescript', 'lib', 'typescript.js') : null
-  ].filter(Boolean);
-  for (const candidate of candidates) {
-    if (!fsSync.existsSync(candidate)) continue;
-    try {
-      const mod = await import(pathToFileURL(candidate).href);
-      return mod?.default || mod;
-    } catch {}
-  }
-  try {
-    const mod = await import('typescript');
-    return mod?.default || mod;
-  } catch {
-    return null;
-  }
-}
-
-const buildTypeScriptMap = (ts, filePaths) => {
-  const program = ts.createProgram(filePaths, {
-    allowJs: false,
-    target: ts.ScriptTarget.ESNext,
-    module: ts.ModuleKind.ESNext,
-    jsx: ts.JsxEmit.Preserve
-  });
-  const checker = program.getTypeChecker();
-  const byFile = new Map();
-
-  const record = (fileName, name, signature, params) => {
-    if (!fileName || !name || !signature) return;
-    const returnType = checker.typeToString(checker.getReturnTypeOfSignature(signature));
-    const paramTypes = {};
-    for (const param of params || []) {
-      if (!param?.name) continue;
-      const paramType = checker.typeToString(checker.getTypeAtLocation(param));
-      if (paramType) paramTypes[param.name] = paramType;
-    }
-    const fileMap = byFile.get(fileName) || {};
-    fileMap[name] = { returnType, paramTypes };
-    byFile.set(fileName, fileMap);
-  };
-
-  for (const sourceFile of program.getSourceFiles()) {
-    if (!filePaths.includes(sourceFile.fileName)) continue;
-    const visit = (node, contextName = null) => {
-      if (ts.isClassDeclaration(node) && node.name) {
-        const className = node.name.getText(sourceFile);
-        node.members?.forEach((member) => {
-          if (ts.isMethodDeclaration(member) && member.name) {
-            const methodName = member.name.getText(sourceFile);
-            const signature = checker.getSignatureFromDeclaration(member);
-            if (signature) {
-              record(sourceFile.fileName, `${className}.${methodName}`, signature, member.parameters);
-            }
-          }
-        });
-      }
-      if (ts.isFunctionDeclaration(node) && node.name) {
-        const signature = checker.getSignatureFromDeclaration(node);
-        if (signature) {
-          record(sourceFile.fileName, node.name.getText(sourceFile), signature, node.parameters);
-        }
-      }
-      ts.forEachChild(node, (child) => visit(child, contextName));
-    };
-    visit(sourceFile);
-  }
-  return byFile;
-};
-
 export async function applyCrossFileInference({
   rootDir,
   chunks,
@@ -246,16 +24,34 @@ export async function applyCrossFileInference({
   log = () => {},
   useTooling = false,
   enableTypeInference = true,
-  enableRiskCorrelation = false
+  enableRiskCorrelation = false,
+  fileRelations = null
 }) {
   if (!enabled) {
     return { linkedCalls: 0, linkedUsages: 0, inferredReturns: 0, riskFlows: 0 };
   }
+  const toolingConfig = useTooling ? getToolingConfig(rootDir) : null;
+  const toolingTimeoutMs = Number.isFinite(Number(toolingConfig?.timeoutMs))
+    ? Math.max(1000, Math.floor(Number(toolingConfig.timeoutMs)))
+    : 15000;
+  const toolingRetries = Number.isFinite(Number(toolingConfig?.maxRetries))
+    ? Math.max(0, Math.floor(Number(toolingConfig.maxRetries)))
+    : 2;
+  const toolingBreaker = Number.isFinite(Number(toolingConfig?.circuitBreakerThreshold))
+    ? Math.max(1, Math.floor(Number(toolingConfig.circuitBreakerThreshold)))
+    : 3;
+  const toolingLogDir = typeof toolingConfig?.logDir === 'string' && toolingConfig.logDir.trim()
+    ? toolingConfig.logDir.trim()
+    : null;
   const symbolIndex = new Map();
   const symbolEntries = [];
   const entryByKey = new Map();
   const chunkByKey = new Map();
   const riskSeverityRank = { low: 1, medium: 2, high: 3 };
+  const callSampleLimit = 25;
+  const paramTypeLimit = 5;
+  const callSampleCounts = new Map();
+  const confidenceForHop = (hops) => Math.max(0.2, FLOW_CONFIDENCE * (0.85 ** hops));
 
   for (const chunk of chunks) {
     if (!chunk?.name) continue;
@@ -277,23 +73,28 @@ export async function applyCrossFileInference({
     if (leaf && leaf !== chunk.name) addSymbol(symbolIndex, leaf, entry);
   }
 
-  let tsTypesByFile = null;
-  if (useTooling && enableTypeInference) {
-    const tsFiles = symbolEntries
-      .map((entry) => entry.file)
-      .filter((file) => file.endsWith('.ts') || file.endsWith('.tsx') || file.endsWith('.mts') || file.endsWith('.cts'))
-      .map((file) => path.resolve(rootDir, file));
-    const uniqueTsFiles = Array.from(new Set(tsFiles));
-    if (uniqueTsFiles.length) {
-      const toolingConfig = getToolingConfig(rootDir);
-      const ts = await loadTypeScript(toolingConfig, rootDir);
-      if (ts) {
-        tsTypesByFile = buildTypeScriptMap(ts, uniqueTsFiles);
-        log(`[index] TypeScript tooling enabled for ${uniqueTsFiles.length} file(s).`);
-      } else {
-        log('[index] TypeScript tooling not detected; skipping tooling-based types.');
-      }
-    }
+  const chunksByFile = buildChunksByFile(chunks);
+
+  let linkedCalls = 0;
+  let linkedUsages = 0;
+  let inferredReturns = 0;
+  let riskFlows = 0;
+
+  const toolingEnabled = useTooling && enableTypeInference && toolingConfig?.autoEnableOnDetect !== false;
+  if (toolingEnabled) {
+    const toolingResult = await runToolingPass({
+      rootDir,
+      chunksByFile,
+      chunkByKey,
+      entryByKey,
+      log,
+      toolingConfig,
+      toolingTimeoutMs,
+      toolingRetries,
+      toolingBreaker,
+      toolingLogDir
+    });
+    inferredReturns += toolingResult.inferredReturns || 0;
   }
 
   const textCache = new Map();
@@ -302,7 +103,8 @@ export async function applyCrossFileInference({
     const absPath = path.join(rootDir, chunk.file);
     if (!textCache.has(absPath)) {
       try {
-        textCache.set(absPath, await fs.readFile(absPath, 'utf8'));
+        const { text } = await readTextFile(absPath);
+        textCache.set(absPath, text);
       } catch {
         textCache.set(absPath, '');
       }
@@ -311,11 +113,6 @@ export async function applyCrossFileInference({
     return text.slice(chunk.start, chunk.end);
   };
 
-  let linkedCalls = 0;
-  let linkedUsages = 0;
-  let inferredReturns = 0;
-  let riskFlows = 0;
-
   if (enableTypeInference) {
     for (const chunk of chunks) {
       if (!chunk) continue;
@@ -389,6 +186,11 @@ export async function applyCrossFileInference({
   for (const chunk of chunks) {
     if (!chunk) continue;
     const relations = chunk.codeRelations || {};
+    const fileRelation = fileRelations
+      ? (typeof fileRelations.get === 'function'
+        ? fileRelations.get(chunk.file)
+        : fileRelations[chunk.file])
+      : null;
     const callLinks = [];
     const callSummaries = [];
     const usageLinks = [];
@@ -442,8 +244,11 @@ export async function applyCrossFileInference({
       }
     }
 
-    if (Array.isArray(relations.usages)) {
-      for (const usage of relations.usages) {
+    const usageSource = Array.isArray(relations.usages)
+      ? relations.usages
+      : (Array.isArray(fileRelation?.usages) ? fileRelation.usages : null);
+    if (Array.isArray(usageSource)) {
+      for (const usage of usageSource) {
         const resolved = resolveUniqueSymbol(symbolIndex, usage);
         if (!resolved) continue;
         if (resolved.file === chunk.file && resolved.name === chunk.name) continue;
@@ -469,39 +274,57 @@ export async function applyCrossFileInference({
     }
     chunk.codeRelations = relations;
 
+    if (enableTypeInference && callSummaries.length) {
+      for (const summary of callSummaries) {
+        const calleeKey = `${summary.file}::${summary.target}`;
+        const calleeChunk = chunkByKey.get(calleeKey);
+        if (!calleeChunk) continue;
+        const currentSamples = callSampleCounts.get(calleeKey) || 0;
+        if (currentSamples >= callSampleLimit) continue;
+        callSampleCounts.set(calleeKey, currentSamples + 1);
+        const args = Array.isArray(summary.args) ? summary.args : [];
+        const paramNames = Array.isArray(summary.params)
+          ? summary.params
+          : (Array.isArray(calleeChunk.docmeta?.params) ? calleeChunk.docmeta.params : []);
+        const argMap = summary.argMap || {};
+        if (!paramNames.length && !args.length && !Object.keys(argMap).length) continue;
+        if (!calleeChunk.docmeta || typeof calleeChunk.docmeta !== 'object') calleeChunk.docmeta = {};
+        const hopCount = summary.file !== chunk.file ? 1 : 0;
+        const confidence = confidenceForHop(hopCount);
+        for (let i = 0; i < paramNames.length && i < Math.max(args.length, paramNames.length); i += 1) {
+          const name = paramNames[i];
+          const argValue = argMap[name] || args[i];
+          if (!name || !argValue) continue;
+          const type = inferArgType(argValue);
+          if (!type) continue;
+          if (addInferredParam(calleeChunk.docmeta, name, type, FLOW_SOURCE, confidence, paramTypeLimit)) {
+            const entry = entryByKey.get(calleeKey);
+            if (entry) {
+              const existing = entry.paramTypes?.[name] || [];
+              entry.paramTypes = entry.paramTypes || {};
+              entry.paramTypes[name] = uniqueTypes([...(existing || []), type]);
+            }
+          }
+        }
+      }
+    }
+
     if (enableTypeInference && chunk.docmeta && chunk.docmeta.returnsValue) {
       const chunkText = await getChunkText(chunk);
       const { calls: returnCalls } = extractReturnCalls(chunkText);
       for (const callName of returnCalls) {
         const resolved = resolveUniqueSymbol(symbolIndex, callName);
         if (!resolved || !resolved.returnTypes?.length) continue;
+        const hopCount = resolved.file !== chunk.file ? 1 : 0;
+        const confidence = confidenceForHop(hopCount);
         for (const type of resolved.returnTypes) {
-          if (addInferredReturn(chunk.docmeta, type, FLOW_SOURCE, FLOW_CONFIDENCE)) {
+          if (addInferredReturn(chunk.docmeta, type, FLOW_SOURCE, confidence)) {
             inferredReturns += 1;
           }
         }
       }
     }
 
-    if (tsTypesByFile && enableTypeInference && chunk.docmeta && chunk.file) {
-      const absFile = path.resolve(rootDir, chunk.file);
-      const tsMap = tsTypesByFile.get(absFile);
-      if (tsMap && tsMap[chunk.name]) {
-        const tsEntry = tsMap[chunk.name];
-        if (tsEntry.returnType) {
-          if (addInferredReturn(chunk.docmeta, tsEntry.returnType, TOOLING_SOURCE, TOOLING_CONFIDENCE)) {
-            inferredReturns += 1;
-          }
-        }
-        const params = tsEntry.paramTypes || {};
-        for (const [name, type] of Object.entries(params)) {
-          if (addInferredParam(chunk.docmeta, name, type, TOOLING_SOURCE, TOOLING_CONFIDENCE)) {
-            // no-op, keep count minimal
-          }
-        }
-      }
-    }
-
     if (enableRiskCorrelation && callLinks.length) {
       const callerRisk = chunk.docmeta?.risk;
       const callerSources = Array.isArray(callerRisk?.sources) ? callerRisk.sources : [];
@@ -526,13 +349,16 @@ export async function applyCrossFileInference({
           }
           for (const source of callerSources) {
             for (const sink of calleeSinks) {
+              const scope = link.file === chunk.file ? 'file' : 'cross-file';
               const flow = {
                 source: source.name,
                 sink: sink.name,
                 category: sink.category || null,
                 severity: sink.severity || null,
-                scope: 'cross-file',
-                via: `${chunk.name}->${link.target}`
+                scope,
+                via: `${chunk.name}->${link.target}`,
+                confidence: Math.max(0.2, (source.confidence || 0.5) * 0.85),
+                ruleIds: [source.ruleId, sink.ruleId].filter(Boolean)
               };
               if (addRiskFlow(chunk, risk, flow)) riskFlows += 1;
             }
diff --git a/src/index/type-inference-crossfile/symbols.js b/src/index/type-inference-crossfile/symbols.js
new file mode 100644
index 000000000..7daedf2e8
--- /dev/null
+++ b/src/index/type-inference-crossfile/symbols.js
@@ -0,0 +1,30 @@
+import { TYPE_KIND_PATTERNS } from './constants.js';
+
+export const leafName = (value) => {
+  if (!value) return null;
+  const parts = String(value).split(/::|\./).filter(Boolean);
+  return parts.length ? parts[parts.length - 1] : value;
+};
+
+export const isTypeDeclaration = (kind) => {
+  if (!kind) return false;
+  return TYPE_KIND_PATTERNS.some((rx) => rx.test(kind));
+};
+
+export const addSymbol = (index, key, entry) => {
+  if (!key) return;
+  const list = index.get(key) || [];
+  list.push(entry);
+  index.set(key, list);
+};
+
+export const resolveUniqueSymbol = (index, name) => {
+  if (!name) return null;
+  const direct = index.get(name) || [];
+  if (direct.length === 1) return direct[0];
+  if (direct.length > 1) return null;
+  const leaf = leafName(name);
+  if (!leaf || leaf === name) return null;
+  const leafMatches = index.get(leaf) || [];
+  return leafMatches.length === 1 ? leafMatches[0] : null;
+};
diff --git a/src/index/type-inference-crossfile/tooling.js b/src/index/type-inference-crossfile/tooling.js
new file mode 100644
index 000000000..49b8d1034
--- /dev/null
+++ b/src/index/type-inference-crossfile/tooling.js
@@ -0,0 +1,364 @@
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { collectClangdTypes, CLIKE_EXTS } from '../tooling/clangd-provider.js';
+import { collectPyrightTypes, PYTHON_EXTS } from '../tooling/pyright-provider.js';
+import { collectSourcekitTypes, SWIFT_EXTS } from '../tooling/sourcekit-provider.js';
+import { collectTypeScriptTypes } from '../tooling/typescript-provider.js';
+import { mergeToolingMaps, uniqueTypes } from '../../integrations/tooling/providers/shared.js';
+import { TOOLING_CONFIDENCE, TOOLING_SOURCE } from './constants.js';
+import { addInferredParam, addInferredReturn, mergeDiagnostics } from './apply.js';
+
+const resolveCompileCommandsDir = (rootDir, clangdConfig) => {
+  const candidates = [];
+  if (clangdConfig?.compileCommandsDir) {
+    const value = clangdConfig.compileCommandsDir;
+    candidates.push(path.isAbsolute(value) ? value : path.join(rootDir, value));
+  } else {
+    candidates.push(rootDir);
+    candidates.push(path.join(rootDir, 'build'));
+    candidates.push(path.join(rootDir, 'out'));
+    candidates.push(path.join(rootDir, 'cmake-build-debug'));
+    candidates.push(path.join(rootDir, 'cmake-build-release'));
+  }
+  for (const dir of candidates) {
+    const candidate = path.join(dir, 'compile_commands.json');
+    if (fsSync.existsSync(candidate)) return dir;
+  }
+  return null;
+};
+
+const createToolingLogger = (rootDir, logDir, provider, baseLog) => {
+  if (!logDir || !provider) return baseLog;
+  const absDir = path.isAbsolute(logDir) ? logDir : path.join(rootDir, logDir);
+  const logFile = path.join(absDir, `${provider}.log`);
+  let ensured = false;
+  const ensureDir = async () => {
+    if (ensured) return;
+    ensured = true;
+    try {
+      await fs.mkdir(absDir, { recursive: true });
+    } catch {}
+  };
+  return (message) => {
+    baseLog(message);
+    void (async () => {
+      await ensureDir();
+      try {
+        await fs.appendFile(logFile, `[${new Date().toISOString()}] ${message}\n`);
+      } catch {}
+    })();
+  };
+};
+
+export const buildChunksByFile = (chunks) => {
+  const byFile = new Map();
+  for (const chunk of chunks || []) {
+    if (!chunk?.file) continue;
+    const list = byFile.get(chunk.file) || [];
+    list.push(chunk);
+    byFile.set(chunk.file, list);
+  }
+  return byFile;
+};
+
+const filterChunksByExt = (chunksByFile, extensions) => {
+  const extSet = new Set(extensions.map((ext) => ext.toLowerCase()));
+  const filtered = new Map();
+  for (const [file, chunks] of chunksByFile.entries()) {
+    const ext = path.extname(file).toLowerCase();
+    if (!extSet.has(ext)) continue;
+    filtered.set(file, chunks);
+  }
+  return filtered;
+};
+
+const filterChunksByPredicate = (chunksByFile, predicate) => {
+  const filtered = new Map();
+  for (const [file, chunks] of chunksByFile.entries()) {
+    const kept = chunks.filter((chunk) => predicate(chunk));
+    if (kept.length) filtered.set(file, kept);
+  }
+  return filtered;
+};
+
+const hasToolingReturn = (chunk) => {
+  const inferred = chunk?.docmeta?.inferredTypes?.returns;
+  if (!Array.isArray(inferred)) return false;
+  return inferred.some((entry) => entry?.source === TOOLING_SOURCE);
+};
+
+const applyToolingTypes = (typesByChunk, chunkByKey, entryByKey, toolingSources = null, toolingProvenance = null) => {
+  let inferredReturns = 0;
+  let enriched = 0;
+  for (const [key, info] of typesByChunk.entries()) {
+    const chunk = chunkByKey.get(key);
+    if (!chunk || !info) continue;
+    if (!chunk.docmeta || typeof chunk.docmeta !== 'object') chunk.docmeta = {};
+    if (toolingSources && toolingProvenance && toolingSources.has(key)) {
+      const providers = Array.from(toolingSources.get(key) || []);
+      if (providers.length) {
+        const toolingMeta = chunk.docmeta.tooling && typeof chunk.docmeta.tooling === 'object'
+          ? chunk.docmeta.tooling
+          : {};
+        const existing = Array.isArray(toolingMeta.sources) ? toolingMeta.sources : [];
+        const next = [];
+        const seen = new Set();
+        for (const entry of [...existing, ...providers.map((name) => toolingProvenance[name]).filter(Boolean)]) {
+          if (!entry?.provider) continue;
+          if (seen.has(entry.provider)) continue;
+          seen.add(entry.provider);
+          next.push(entry);
+        }
+        toolingMeta.sources = next;
+        chunk.docmeta.tooling = toolingMeta;
+      }
+    }
+    const entry = entryByKey.get(key);
+    let touched = false;
+    if (info.signature && !chunk.docmeta.signature) {
+      chunk.docmeta.signature = info.signature;
+      touched = true;
+    }
+    if (info.paramNames?.length && (!Array.isArray(chunk.docmeta.params) || !chunk.docmeta.params.length)) {
+      chunk.docmeta.params = info.paramNames.slice();
+      touched = true;
+    }
+    if (entry && info.paramNames?.length && (!Array.isArray(entry.paramNames) || !entry.paramNames.length)) {
+      entry.paramNames = info.paramNames.slice();
+    }
+    if (Array.isArray(info.returns) && info.returns.length) {
+      for (const type of uniqueTypes(info.returns)) {
+        if (!type) continue;
+        if (!chunk.docmeta.returnType) chunk.docmeta.returnType = type;
+        if (addInferredReturn(chunk.docmeta, type, TOOLING_SOURCE, TOOLING_CONFIDENCE)) {
+          inferredReturns += 1;
+          touched = true;
+        }
+        if (entry) {
+          entry.returnTypes = uniqueTypes([...(entry.returnTypes || []), type]);
+        }
+      }
+    }
+    if (info.params && typeof info.params === 'object') {
+      if (!chunk.docmeta.paramTypes || typeof chunk.docmeta.paramTypes !== 'object') {
+        chunk.docmeta.paramTypes = {};
+      }
+      for (const [name, types] of Object.entries(info.params)) {
+        if (!name || !Array.isArray(types)) continue;
+        for (const type of uniqueTypes(types)) {
+          if (!type) continue;
+          if (!chunk.docmeta.paramTypes[name]) chunk.docmeta.paramTypes[name] = type;
+          addInferredParam(chunk.docmeta, name, type, TOOLING_SOURCE, TOOLING_CONFIDENCE);
+          if (entry) {
+            const existing = entry.paramTypes?.[name] || [];
+            entry.paramTypes = entry.paramTypes || {};
+            entry.paramTypes[name] = uniqueTypes([...(existing || []), type]);
+          }
+          touched = true;
+        }
+      }
+    }
+    if (touched) enriched += 1;
+  }
+  return { inferredReturns, enriched };
+};
+
+const applyToolingDiagnostics = (diagnosticsByChunk, chunkByKey, toolingSources = null, toolingProvenance = null) => {
+  let enriched = 0;
+  for (const [key, diagnostics] of diagnosticsByChunk.entries()) {
+    if (!Array.isArray(diagnostics) || !diagnostics.length) continue;
+    const chunk = chunkByKey.get(key);
+    if (!chunk) continue;
+    if (!chunk.docmeta || typeof chunk.docmeta !== 'object') chunk.docmeta = {};
+    if (toolingSources && toolingProvenance && toolingSources.has(key)) {
+      const providers = Array.from(toolingSources.get(key) || []);
+      if (providers.length) {
+        const toolingMeta = chunk.docmeta.tooling && typeof chunk.docmeta.tooling === 'object'
+          ? chunk.docmeta.tooling
+          : {};
+        const existing = Array.isArray(toolingMeta.sources) ? toolingMeta.sources : [];
+        const next = [];
+        const seen = new Set();
+        for (const entry of [...existing, ...providers.map((name) => toolingProvenance[name]).filter(Boolean)]) {
+          if (!entry?.provider) continue;
+          if (seen.has(entry.provider)) continue;
+          seen.add(entry.provider);
+          next.push(entry);
+        }
+        toolingMeta.sources = next;
+        chunk.docmeta.tooling = toolingMeta;
+      }
+    }
+    const toolingMeta = chunk.docmeta.tooling && typeof chunk.docmeta.tooling === 'object'
+      ? chunk.docmeta.tooling
+      : {};
+    const existing = Array.isArray(toolingMeta.diagnostics) ? toolingMeta.diagnostics : [];
+    const merged = [];
+    const seen = new Set();
+    for (const entry of [...existing, ...diagnostics]) {
+      if (!entry?.message) continue;
+      const range = entry.range || {};
+      const start = range.start || {};
+      const keyId = `${entry.message}:${entry.code ?? ''}:${start.line ?? ''}:${start.column ?? ''}`;
+      if (seen.has(keyId)) continue;
+      seen.add(keyId);
+      merged.push(entry);
+    }
+    toolingMeta.diagnostics = merged;
+    chunk.docmeta.tooling = toolingMeta;
+    enriched += 1;
+  }
+  return { enriched };
+};
+
+export const runToolingPass = async ({
+  rootDir,
+  chunksByFile,
+  chunkByKey,
+  entryByKey,
+  log,
+  toolingConfig,
+  toolingTimeoutMs,
+  toolingRetries,
+  toolingBreaker,
+  toolingLogDir
+}) => {
+  const toolingChunksByFile = filterChunksByPredicate(chunksByFile, (chunk) => !hasToolingReturn(chunk));
+  const toolingTypes = new Map();
+  const toolingDiagnostics = new Map();
+  const toolingSourcesByChunk = new Map();
+  const toolingProvenance = {};
+  const markToolingSources = (typesByChunk, provider, details) => {
+    if (!typesByChunk || !typesByChunk.size) return;
+    toolingProvenance[provider] = details;
+    for (const key of typesByChunk.keys()) {
+      const existing = toolingSourcesByChunk.get(key) || new Set();
+      existing.add(provider);
+      toolingSourcesByChunk.set(key, existing);
+    }
+  };
+
+  if (toolingChunksByFile.size) {
+    const tsLog = createToolingLogger(rootDir, toolingLogDir, 'typescript', log);
+    const tsResult = await collectTypeScriptTypes({
+      rootDir,
+      chunksByFile: toolingChunksByFile,
+      log: tsLog,
+      toolingConfig
+    });
+    mergeToolingMaps(toolingTypes, tsResult.typesByChunk);
+    markToolingSources(tsResult.typesByChunk, 'typescript', {
+      provider: 'typescript',
+      cmd: 'typescript',
+      args: [],
+      workspaceRoot: rootDir
+    });
+  }
+
+  const clangdFiles = filterChunksByExt(toolingChunksByFile, CLIKE_EXTS);
+  if (clangdFiles.size) {
+    const clangdConfig = toolingConfig?.clangd || {};
+    const compileCommandsDir = resolveCompileCommandsDir(rootDir, clangdConfig);
+    const requireCompilationDatabase = clangdConfig.requireCompilationDatabase === true;
+    if (!compileCommandsDir && requireCompilationDatabase) {
+      log('[index] clangd requires compile_commands.json; skipping tooling-based types.');
+    } else {
+      const clangdArgs = [];
+      if (compileCommandsDir) clangdArgs.push(`--compile-commands-dir=${compileCommandsDir}`);
+      if (!compileCommandsDir) {
+        log('[index] clangd running in best-effort mode (compile_commands.json not found).');
+      }
+      const clangdLog = createToolingLogger(rootDir, toolingLogDir, 'clangd', log);
+      const clangdResult = await collectClangdTypes({
+        rootDir,
+        chunksByFile: clangdFiles,
+        log: clangdLog,
+        cmd: 'clangd',
+        args: clangdArgs,
+        timeoutMs: toolingTimeoutMs,
+        retries: toolingRetries,
+        breakerThreshold: toolingBreaker
+      });
+      mergeToolingMaps(toolingTypes, clangdResult.typesByChunk);
+      markToolingSources(clangdResult.typesByChunk, 'clangd', {
+        provider: 'clangd',
+        cmd: 'clangd',
+        args: clangdArgs,
+        workspaceRoot: rootDir
+      });
+      if (clangdResult.enriched) log(`[index] clangd enriched ${clangdResult.enriched} symbol(s).`);
+    }
+  }
+
+  const swiftFiles = filterChunksByExt(toolingChunksByFile, SWIFT_EXTS);
+  if (swiftFiles.size) {
+    const sourcekitLog = createToolingLogger(rootDir, toolingLogDir, 'sourcekit-lsp', log);
+    const swiftResult = await collectSourcekitTypes({
+      rootDir,
+      chunksByFile: swiftFiles,
+      log: sourcekitLog,
+      cmd: 'sourcekit-lsp',
+      args: [],
+      timeoutMs: toolingTimeoutMs,
+      retries: toolingRetries,
+      breakerThreshold: toolingBreaker
+    });
+    mergeToolingMaps(toolingTypes, swiftResult.typesByChunk);
+    markToolingSources(swiftResult.typesByChunk, 'sourcekit-lsp', {
+      provider: 'sourcekit-lsp',
+      cmd: 'sourcekit-lsp',
+      args: [],
+      workspaceRoot: rootDir
+    });
+    if (swiftResult.enriched) log(`[index] sourcekit-lsp enriched ${swiftResult.enriched} symbol(s).`);
+  }
+
+  const pyrightFiles = filterChunksByExt(toolingChunksByFile, PYTHON_EXTS);
+  if (pyrightFiles.size) {
+    const pyrightLog = createToolingLogger(rootDir, toolingLogDir, 'pyright', log);
+    const pyrightResult = await collectPyrightTypes({
+      rootDir,
+      chunksByFile: pyrightFiles,
+      log: pyrightLog,
+      timeoutMs: toolingTimeoutMs,
+      retries: toolingRetries,
+      breakerThreshold: toolingBreaker,
+      toolingConfig
+    });
+    mergeToolingMaps(toolingTypes, pyrightResult.typesByChunk);
+    mergeDiagnostics(toolingDiagnostics, pyrightResult.diagnosticsByChunk);
+    markToolingSources(pyrightResult.typesByChunk, 'pyright', {
+      provider: 'pyright',
+      cmd: pyrightResult.cmd || 'pyright-langserver',
+      args: pyrightResult.args || [],
+      workspaceRoot: rootDir
+    });
+    markToolingSources(pyrightResult.diagnosticsByChunk, 'pyright', {
+      provider: 'pyright',
+      cmd: pyrightResult.cmd || 'pyright-langserver',
+      args: pyrightResult.args || [],
+      workspaceRoot: rootDir
+    });
+    if (pyrightResult.enriched) log(`[index] pyright enriched ${pyrightResult.enriched} symbol(s).`);
+    if (pyrightResult.diagnosticsCount) {
+      log(`[index] pyright captured ${pyrightResult.diagnosticsCount} diagnostic(s).`);
+    }
+  }
+
+  let inferredReturns = 0;
+  if (toolingTypes.size) {
+    const applyResult = applyToolingTypes(toolingTypes, chunkByKey, entryByKey, toolingSourcesByChunk, toolingProvenance);
+    inferredReturns += applyResult.inferredReturns || 0;
+    if (applyResult.enriched) {
+      log(`[index] tooling enriched ${applyResult.enriched} symbol(s).`);
+    }
+  }
+  if (toolingDiagnostics.size) {
+    const diagResult = applyToolingDiagnostics(toolingDiagnostics, chunkByKey, toolingSourcesByChunk, toolingProvenance);
+    if (diagResult.enriched) {
+      log(`[index] tooling diagnostics attached to ${diagResult.enriched} chunk(s).`);
+    }
+  }
+  return { inferredReturns };
+};
diff --git a/src/indexer/type-inference.js b/src/index/type-inference.js
similarity index 59%
rename from src/indexer/type-inference.js
rename to src/index/type-inference.js
index 98a86de0b..46911cf13 100644
--- a/src/indexer/type-inference.js
+++ b/src/index/type-inference.js
@@ -37,6 +37,38 @@ const JS_TYPES = {
 };
 
 const normalizeText = (value) => String(value || '').trim();
+const unique = (values) => Array.from(new Set((values || []).filter(Boolean)));
+
+const normalizeTypeName = (value, languageId) => {
+  const normalized = normalizeText(value);
+  if (!normalized) return '';
+  if (languageId === 'typescript' || languageId === 'tsx') {
+    return normalized;
+  }
+  const lowered = normalized.toLowerCase();
+  if (languageId === 'python') {
+    const pyMap = {
+      integer: 'int',
+      bool: 'bool',
+      boolean: 'bool',
+      string: 'str',
+      array: 'list',
+      object: 'dict'
+    };
+    return pyMap[lowered] || normalized;
+  }
+  const jsMap = {
+    integer: 'number',
+    float: 'number',
+    bool: 'boolean',
+    string: 'string',
+    array: 'array',
+    object: 'object',
+    dict: 'object',
+    list: 'array'
+  };
+  return jsMap[lowered] || normalized;
+};
 
 const splitTopLevel = (value, delimiter) => {
   const parts = [];
@@ -58,6 +90,32 @@ const splitTopLevel = (value, delimiter) => {
   return parts;
 };
 
+const splitLiteralTopLevel = (value) => {
+  const parts = [];
+  if (!value) return parts;
+  let current = '';
+  let depth = 0;
+  let inSingle = false;
+  let inDouble = false;
+  for (let i = 0; i < value.length; i += 1) {
+    const ch = value[i];
+    if (ch === "'" && !inDouble) inSingle = !inSingle;
+    if (ch === '"' && !inSingle) inDouble = !inDouble;
+    if (!inSingle && !inDouble) {
+      if (ch === '{' || ch === '[' || ch === '(') depth += 1;
+      if (ch === '}' || ch === ']' || ch === ')') depth = Math.max(0, depth - 1);
+      if (ch === ',' && depth === 0) {
+        if (current.trim()) parts.push(current.trim());
+        current = '';
+        continue;
+      }
+    }
+    current += ch;
+  }
+  if (current.trim()) parts.push(current.trim());
+  return parts;
+};
+
 const stripOptionalSuffix = (value) => {
   if (!value) return value;
   if (value.endsWith('?')) return value.slice(0, -1).trim();
@@ -99,12 +157,12 @@ const expandUnionTypes = (value, languageId) => {
 };
 
 const expandTypeCandidates = (raw, languageId) => {
-  const initial = normalizeText(raw);
+  const initial = normalizeTypeName(raw, languageId);
   if (!initial) return [];
   const seen = new Set();
   const queue = [initial];
   while (queue.length) {
-    const current = normalizeText(queue.shift());
+    const current = normalizeTypeName(queue.shift(), languageId);
     if (!current || seen.has(current)) continue;
     seen.add(current);
     const stripped = stripOptionalSuffix(current);
@@ -146,34 +204,94 @@ const mapPrimitive = (kind, languageId) => {
   return JS_TYPES[kind] || kind;
 };
 
+const stripQuotes = (value) => {
+  if (!value) return value;
+  const trimmed = value.trim();
+  if ((trimmed.startsWith('"') && trimmed.endsWith('"'))
+    || (trimmed.startsWith("'") && trimmed.endsWith("'"))) {
+    return trimmed.slice(1, -1);
+  }
+  return trimmed;
+};
+
+const extractObjectKeys = (value, limit) => {
+  const keys = [];
+  if (!value || !value.trim().startsWith('{')) return keys;
+  const keyRx = /([A-Za-z_$][\w$]*|"(?:[^"\\]|\\.)+"|'(?:[^'\\]|\\.)+')\s*:/g;
+  let match;
+  while ((match = keyRx.exec(value)) !== null) {
+    const key = stripQuotes(match[1]);
+    if (key) keys.push(key);
+    if (keys.length >= limit) break;
+  }
+  return keys;
+};
+
+const extractArrayElements = (value, limit, languageId) => {
+  const trimmed = value.trim();
+  if (!trimmed.startsWith('[')) return null;
+  const closeIndex = trimmed.lastIndexOf(']');
+  if (closeIndex <= 0) return null;
+  const inner = trimmed.slice(1, closeIndex);
+  const parts = splitLiteralTopLevel(inner).slice(0, limit);
+  const elements = [];
+  for (const part of parts) {
+    const entry = inferLiteralType(part, languageId);
+    if (entry?.type) elements.push(entry.type);
+  }
+  return unique(elements);
+};
+
 const inferLiteralType = (raw, languageId) => {
   const value = normalizeDefaultValue(raw);
   if (!value) return null;
   const lowered = value.toLowerCase();
   if (languageId === 'python') {
-    if (lowered === 'none') return PYTHON_TYPES.null;
-    if (lowered === 'true' || lowered === 'false') return PYTHON_TYPES.boolean;
-    if (/^-?\d+\.\d+$/.test(value)) return PYTHON_TYPES.float;
-    if (/^-?\d+$/.test(value)) return PYTHON_TYPES.number;
-    if (value.startsWith('[')) return PYTHON_TYPES.array;
-    if (value.startsWith('{')) return PYTHON_TYPES.object;
-    if (value.startsWith('(')) return PYTHON_TYPES.tuple;
-    if (lowered.startsWith('set(')) return PYTHON_TYPES.set;
-    if (lowered.startsWith('dict(')) return PYTHON_TYPES.object;
-    if (lowered.startsWith('list(')) return PYTHON_TYPES.array;
+    if (lowered === 'none') return { type: PYTHON_TYPES.null };
+    if (lowered === 'true' || lowered === 'false') return { type: PYTHON_TYPES.boolean };
+    if (/^-?\d+\.\d+$/.test(value)) return { type: PYTHON_TYPES.float };
+    if (/^-?\d+$/.test(value)) return { type: PYTHON_TYPES.number };
+    if (value.startsWith('[')) {
+      return {
+        type: PYTHON_TYPES.array,
+        elements: extractArrayElements(value, 25, languageId)
+      };
+    }
+    if (value.startsWith('{')) {
+      const keys = extractObjectKeys(value, 20);
+      return {
+        type: PYTHON_TYPES.object,
+        shape: keys.length ? { kind: 'object', keys } : null
+      };
+    }
+    if (value.startsWith('(')) return { type: PYTHON_TYPES.tuple };
+    if (lowered.startsWith('set(')) return { type: PYTHON_TYPES.set };
+    if (lowered.startsWith('dict(')) return { type: PYTHON_TYPES.object };
+    if (lowered.startsWith('list(')) return { type: PYTHON_TYPES.array };
   } else {
-    if (lowered === 'null') return JS_TYPES.null;
-    if (lowered === 'undefined') return JS_TYPES.undefined;
-    if (lowered === 'true' || lowered === 'false') return JS_TYPES.boolean;
-    if (/^-?\d+(\.\d+)?$/.test(value)) return JS_TYPES.number;
-    if (value.startsWith('[')) return JS_TYPES.array;
-    if (value.startsWith('{')) return JS_TYPES.object;
+    if (lowered === 'null') return { type: JS_TYPES.null };
+    if (lowered === 'undefined') return { type: JS_TYPES.undefined };
+    if (lowered === 'true' || lowered === 'false') return { type: JS_TYPES.boolean };
+    if (/^-?\d+(\.\d+)?$/.test(value)) return { type: JS_TYPES.number };
+    if (value.startsWith('[')) {
+      return {
+        type: JS_TYPES.array,
+        elements: extractArrayElements(value, 25, languageId)
+      };
+    }
+    if (value.startsWith('{')) {
+      const keys = extractObjectKeys(value, 20);
+      return {
+        type: JS_TYPES.object,
+        shape: keys.length ? { kind: 'object', keys } : null
+      };
+    }
   }
   if (value.startsWith('"') || value.startsWith("'") || value.startsWith('`')) {
-    return mapPrimitive('string', languageId);
+    return { type: mapPrimitive('string', languageId) };
   }
   const newMatch = value.match(/^new\s+([A-Za-z_][\w.]*)/);
-  if (newMatch) return newMatch[1];
+  if (newMatch) return { type: normalizeTypeName(newMatch[1], languageId) };
   return null;
 };
 
@@ -201,12 +319,22 @@ const addReturnTypes = (list, entry, languageId) => {
   return hasData;
 };
 
+const mergeTypeEntry = (existing, entry) => {
+  if (!existing || !entry) return;
+  existing.confidence = Math.max(existing.confidence || 0, entry.confidence || 0);
+  if (entry.shape && !existing.shape) existing.shape = entry.shape;
+  if (Array.isArray(entry.elements) && entry.elements.length) {
+    existing.elements = unique([...(existing.elements || []), ...entry.elements]);
+  }
+  if (entry.evidence && !existing.evidence) existing.evidence = entry.evidence;
+};
+
 const addEntry = (bucket, key, entry) => {
   if (!bucket || !key || !entry?.type) return false;
-  const list = bucket[key] || [];
+  const list = Array.isArray(bucket[key]) ? bucket[key] : [];
   const existing = list.find((item) => item.type === entry.type && item.source === entry.source);
   if (existing) {
-    existing.confidence = Math.max(existing.confidence, entry.confidence || 0);
+    mergeTypeEntry(existing, entry);
     bucket[key] = list;
     return true;
   }
@@ -215,18 +343,19 @@ const addEntry = (bucket, key, entry) => {
 };
 
 const addListEntry = (list, entry) => {
-  if (!Array.isArray(list) || !entry?.type) return false;
-  const existing = list.find((item) => item.type === entry.type && item.source === entry.source);
+  if (!entry?.type) return false;
+  const target = Array.isArray(list) ? list : [];
+  const existing = target.find((item) => item.type === entry.type && item.source === entry.source);
   if (existing) {
-    existing.confidence = Math.max(existing.confidence, entry.confidence || 0);
+    mergeTypeEntry(existing, entry);
     return true;
   }
-  list.push(entry);
+  target.push(entry);
   return true;
 };
 
 const inferAssignments = (chunkText, languageId, knownTypes) => {
-  const locals = {};
+  const locals = Object.create(null);
   const aliases = new Set();
   if (!chunkText) return { locals, aliases: [] };
   const lines = chunkText.split(/\r?\n/);
@@ -246,7 +375,7 @@ const inferAssignments = (chunkText, languageId, knownTypes) => {
     const inferred = inferLiteralType(value, languageId);
     if (!inferred) continue;
     addEntry(locals, name, {
-      type: inferred,
+      ...inferred,
       source: TYPE_SOURCES.literal,
       confidence: CONFIDENCE.literal
     });
@@ -290,19 +419,50 @@ const inferAssignments = (chunkText, languageId, knownTypes) => {
   return { locals, aliases: Array.from(aliases) };
 };
 
+const inferConditionalTypes = (chunkText, languageId) => {
+  const results = [];
+  if (!chunkText) return results;
+  const lines = chunkText.split(/\r?\n/);
+  for (const line of lines) {
+    const trimmed = line.trim();
+    if (!trimmed || trimmed.startsWith('//') || trimmed.startsWith('#')) continue;
+    let match = trimmed.match(/typeof\s+([A-Za-z_$][\w$]*)\s*===?\s*['"]([A-Za-z]+)['"]/);
+    if (match) {
+      results.push({ name: match[1], type: normalizeTypeName(match[2], languageId) });
+      continue;
+    }
+    match = trimmed.match(/([A-Za-z_$][\w$]*)\s+instanceof\s+([A-Za-z_$][\w$.]*)/);
+    if (match) {
+      results.push({ name: match[1], type: normalizeTypeName(match[2], languageId) });
+      continue;
+    }
+    match = trimmed.match(/Array\.isArray\s*\(\s*([A-Za-z_$][\w$]*)\s*\)/);
+    if (match) {
+      results.push({ name: match[1], type: mapPrimitive('array', languageId) });
+      continue;
+    }
+    match = trimmed.match(/([A-Za-z_$][\w$]*)\s*===?\s*null/);
+    if (match) {
+      results.push({ name: match[1], type: mapPrimitive('null', languageId) });
+      continue;
+    }
+  }
+  return results.filter((entry) => entry.name && entry.type);
+};
+
 export function inferTypeMetadata({ docmeta, chunkText, languageId }) {
   if (!docmeta || typeof docmeta !== 'object') return null;
   const inferred = {
-    params: {},
+    params: Object.create(null),
     returns: [],
-    fields: {},
-    locals: {}
+    fields: Object.create(null),
+    locals: Object.create(null)
   };
   let hasData = false;
 
   const paramTypes = docmeta.paramTypes || {};
   for (const [name, type] of Object.entries(paramTypes)) {
-    const normalized = normalizeText(type);
+    const normalized = normalizeTypeName(type, languageId);
     if (!normalized) continue;
     hasData = addTypes(inferred.params, name, {
       type: normalized,
@@ -316,13 +476,13 @@ export function inferTypeMetadata({ docmeta, chunkText, languageId }) {
     const inferredType = inferLiteralType(value, languageId);
     if (!inferredType) continue;
     hasData = addTypes(inferred.params, name, {
-      type: inferredType,
+      ...inferredType,
       source: TYPE_SOURCES.default,
       confidence: CONFIDENCE.default
     }, languageId) || hasData;
   }
 
-  const returnType = normalizeText(docmeta.returnType);
+  const returnType = normalizeTypeName(docmeta.returnType, languageId);
   if (returnType) {
     hasData = addReturnTypes(inferred.returns, {
       type: returnType,
@@ -336,7 +496,7 @@ export function inferTypeMetadata({ docmeta, chunkText, languageId }) {
       if (!field || !field.name) continue;
       if (field.type) {
         hasData = addTypes(inferred.fields, field.name, {
-          type: normalizeText(field.type),
+          type: normalizeTypeName(field.type, languageId),
           source: TYPE_SOURCES.annotation,
           confidence: CONFIDENCE.annotation
         }, languageId) || hasData;
@@ -345,7 +505,7 @@ export function inferTypeMetadata({ docmeta, chunkText, languageId }) {
         const inferredType = inferLiteralType(field.default, languageId);
         if (inferredType) {
           hasData = addTypes(inferred.fields, field.name, {
-            type: inferredType,
+            ...inferredType,
             source: TYPE_SOURCES.default,
             confidence: CONFIDENCE.default
           }, languageId) || hasData;
@@ -378,6 +538,18 @@ export function inferTypeMetadata({ docmeta, chunkText, languageId }) {
       hasData = addEntry(inferred.locals, name, entry) || hasData;
     }
   }
+
+  const paramNameSet = new Set(Array.isArray(docmeta.params) ? docmeta.params : []);
+  const conditionalTypes = inferConditionalTypes(chunkText, languageId);
+  for (const entry of conditionalTypes) {
+    if (!entry?.name || !entry?.type) continue;
+    const target = paramNameSet.has(entry.name) ? inferred.params : inferred.locals;
+    hasData = addEntry(target, entry.name, {
+      type: entry.type,
+      source: TYPE_SOURCES.flow,
+      confidence: CONFIDENCE.flow
+    }) || hasData;
+  }
   if (!hasData) return null;
   if (!Object.keys(inferred.params).length) delete inferred.params;
   if (!Object.keys(inferred.fields).length) delete inferred.fields;
diff --git a/src/index/validate.js b/src/index/validate.js
new file mode 100644
index 000000000..219673973
--- /dev/null
+++ b/src/index/validate.js
@@ -0,0 +1,741 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import {
+  getIndexDir,
+  loadUserConfig,
+  resolveLmdbPaths,
+  resolveRepoRoot,
+  resolveSqlitePaths
+} from '../../tools/dict-utils.js';
+import { normalizePostingsConfig } from '../shared/postings-config.js';
+import { loadChunkMeta, loadTokenPostings, readJsonFile } from '../shared/artifact-io.js';
+import { checksumFile, sha1File } from '../shared/hash.js';
+import { validateArtifact } from '../shared/artifact-schemas.js';
+import { Unpackr } from 'msgpackr';
+import { LMDB_ARTIFACT_KEYS, LMDB_META_KEYS, LMDB_SCHEMA_VERSION } from '../storage/lmdb/schema.js';
+
+const resolveIndexDir = (root, mode, userConfig, indexRoot = null) => {
+  const cached = getIndexDir(root, mode, userConfig, { indexRoot });
+  const cachedMeta = path.join(cached, 'chunk_meta.json');
+  const cachedMetaJsonl = path.join(cached, 'chunk_meta.jsonl');
+  const cachedMetaParts = path.join(cached, 'chunk_meta.meta.json');
+  if (fs.existsSync(cachedMeta) || fs.existsSync(cachedMetaJsonl) || fs.existsSync(cachedMetaParts)) {
+    return cached;
+  }
+  const local = path.join(root, `index-${mode}`);
+  const localMeta = path.join(local, 'chunk_meta.json');
+  const localMetaJsonl = path.join(local, 'chunk_meta.jsonl');
+  const localMetaParts = path.join(local, 'chunk_meta.meta.json');
+  if (fs.existsSync(localMeta) || fs.existsSync(localMetaJsonl) || fs.existsSync(localMetaParts)) {
+    return local;
+  }
+  return cached;
+};
+
+const extractArray = (raw, key) => {
+  if (Array.isArray(raw?.[key])) return raw[key];
+  if (Array.isArray(raw?.arrays?.[key])) return raw.arrays[key];
+  return [];
+};
+
+const normalizeDenseVectors = (raw) => ({
+  model: raw?.model ?? raw?.fields?.model ?? null,
+  dims: Number.isFinite(Number(raw?.dims ?? raw?.fields?.dims))
+    ? Number(raw?.dims ?? raw?.fields?.dims)
+    : null,
+  scale: Number.isFinite(Number(raw?.scale ?? raw?.fields?.scale))
+    ? Number(raw?.scale ?? raw?.fields?.scale)
+    : null,
+  vectors: extractArray(raw, 'vectors')
+});
+
+const normalizeMinhash = (raw) => ({
+  signatures: extractArray(raw, 'signatures')
+});
+
+const normalizeTokenPostings = (raw) => {
+  if (!raw || typeof raw !== 'object') return null;
+  return {
+    vocab: Array.isArray(raw.vocab) ? raw.vocab : extractArray(raw, 'vocab'),
+    postings: Array.isArray(raw.postings) ? raw.postings : extractArray(raw, 'postings'),
+    docLengths: Array.isArray(raw.docLengths) ? raw.docLengths : extractArray(raw, 'docLengths'),
+    avgDocLen: Number.isFinite(Number(raw.avgDocLen ?? raw.fields?.avgDocLen))
+      ? Number(raw.avgDocLen ?? raw.fields?.avgDocLen)
+      : null,
+    totalDocs: Number.isFinite(Number(raw.totalDocs ?? raw.fields?.totalDocs))
+      ? Number(raw.totalDocs ?? raw.fields?.totalDocs)
+      : null
+  };
+};
+
+const normalizeFieldPostings = (raw) => {
+  if (!raw || typeof raw !== 'object') return null;
+  return raw.fields ? raw : null;
+};
+
+const normalizePhrasePostings = (raw) => ({
+  vocab: extractArray(raw, 'vocab'),
+  postings: extractArray(raw, 'postings')
+});
+
+const normalizeFilterIndex = (raw) => raw && typeof raw === 'object' ? raw : null;
+
+const unpackr = new Unpackr();
+const decode = (value) => (value == null ? null : unpackr.unpack(value));
+
+const hasLmdbStore = (storePath) => {
+  if (!storePath || !fs.existsSync(storePath)) return false;
+  return fs.existsSync(path.join(storePath, 'data.mdb'));
+};
+
+const addIssue = (report, mode, message, hint = null, bucket = 'issues') => {
+  const tag = mode ? `[${mode}] ` : '';
+  report[bucket].push(`${tag}${message}`);
+  if (hint) report.hints.push(hint);
+};
+
+const validateSchema = (report, mode, name, payload, hint) => {
+  const result = validateArtifact(name, payload);
+  if (!result.ok) {
+    const detail = result.errors.length ? ` (${result.errors.join('; ')})` : '';
+    addIssue(report, mode, `${name} schema invalid${detail}`, hint);
+  }
+  return result.ok;
+};
+
+const validatePostingsDocIds = (report, mode, label, postings, chunkCount) => {
+  const maxErrors = 20;
+  let errors = 0;
+  for (const posting of postings || []) {
+    if (!Array.isArray(posting)) continue;
+    for (const entry of posting) {
+      const docId = Array.isArray(entry) ? entry[0] : null;
+      if (!Number.isFinite(docId) || docId < 0 || docId >= chunkCount) {
+        if (errors < maxErrors) {
+          addIssue(report, mode, `${label} docId out of range (${docId})`, 'Rebuild index artifacts for this mode.');
+        }
+        errors += 1;
+        if (errors >= maxErrors) return;
+      }
+    }
+  }
+};
+
+const validateIdPostings = (report, mode, label, postings, chunkCount) => {
+  const maxErrors = 20;
+  let errors = 0;
+  for (const posting of postings || []) {
+    if (!Array.isArray(posting)) continue;
+    for (const docId of posting) {
+      if (!Number.isFinite(docId) || docId < 0 || docId >= chunkCount) {
+        if (errors < maxErrors) {
+          addIssue(report, mode, `${label} docId out of range (${docId})`, 'Rebuild index artifacts for this mode.');
+        }
+        errors += 1;
+        if (errors >= maxErrors) return;
+      }
+    }
+  }
+};
+
+const validateChunkIds = (report, mode, chunkMeta) => {
+  const seen = new Set();
+  for (let i = 0; i < chunkMeta.length; i += 1) {
+    const entry = chunkMeta[i];
+    const id = Number.isFinite(entry?.id) ? entry.id : null;
+    if (id === null) {
+      addIssue(report, mode, `chunk_meta missing id at index ${i}`, 'Rebuild index artifacts for this mode.');
+      return;
+    }
+    if (seen.has(id)) {
+      addIssue(report, mode, `chunk_meta duplicate id ${id}`, 'Rebuild index artifacts for this mode.');
+      return;
+    }
+    seen.add(id);
+    if (id !== i) {
+      addIssue(report, mode, `chunk_meta id mismatch at index ${i} (id=${id})`, 'Rebuild index artifacts for this mode.');
+      return;
+    }
+  }
+};
+
+const validateMetaV2 = (report, mode, chunkMeta) => {
+  const maxErrors = 20;
+  let errors = 0;
+  for (let i = 0; i < chunkMeta.length; i += 1) {
+    const entry = chunkMeta[i];
+    const meta = entry?.metaV2;
+    if (!meta) continue;
+    if (typeof meta.chunkId !== 'string' || !meta.chunkId) {
+      addIssue(report, mode, `metaV2 missing chunkId at index ${i}`, 'Rebuild index artifacts for this mode.');
+      errors += 1;
+    }
+    if (typeof meta.file !== 'string' || !meta.file) {
+      addIssue(report, mode, `metaV2 missing file at index ${i}`, 'Rebuild index artifacts for this mode.');
+      errors += 1;
+    }
+    if (meta.risk?.flows) {
+      for (const flow of meta.risk.flows || []) {
+        if (!flow || !flow.source || !flow.sink) {
+          addIssue(report, mode, `metaV2 risk flow missing source/sink at index ${i}`, 'Rebuild index artifacts for this mode.');
+          errors += 1;
+          break;
+        }
+      }
+    }
+    if (meta.types && typeof meta.types === 'object') {
+      const checkTypeEntries = (bucket) => {
+        if (!bucket || typeof bucket !== 'object') return;
+        for (const entries of Object.values(bucket)) {
+          const list = Array.isArray(entries) ? entries : [];
+          for (const typeEntry of list) {
+            if (!typeEntry?.type) {
+              addIssue(report, mode, `metaV2 type entry missing type at index ${i}`, 'Rebuild index artifacts for this mode.');
+              errors += 1;
+              return;
+            }
+          }
+        }
+      };
+      checkTypeEntries(meta.types.declared);
+      checkTypeEntries(meta.types.inferred);
+      checkTypeEntries(meta.types.tooling);
+    }
+    if (errors >= maxErrors) return;
+  }
+};
+
+export async function validateIndexArtifacts(input = {}) {
+  const root = input.root ? path.resolve(input.root) : resolveRepoRoot(process.cwd());
+  const indexRoot = input.indexRoot ? path.resolve(input.indexRoot) : null;
+  const userConfig = input.userConfig || loadUserConfig(root);
+  const postingsConfig = normalizePostingsConfig(userConfig.indexing?.postings || {});
+  const modes = Array.isArray(input.modes) && input.modes.length
+    ? input.modes
+    : ['code', 'prose'];
+
+  const sqliteEnabled = typeof input.sqliteEnabled === 'boolean'
+    ? input.sqliteEnabled
+    : userConfig.sqlite?.use !== false;
+
+  const report = {
+    ok: true,
+    root: path.resolve(root),
+    indexRoot: indexRoot ? path.resolve(indexRoot) : null,
+    modes: {},
+    sqlite: { enabled: sqliteEnabled },
+    issues: [],
+    warnings: [],
+    hints: []
+  };
+
+  const requiredFiles = ['chunk_meta', 'token_postings'];
+  if (postingsConfig.enablePhraseNgrams) requiredFiles.push('phrase_ngrams.json');
+  if (postingsConfig.enableChargrams) requiredFiles.push('chargram_postings.json');
+  const optionalFiles = [
+    'minhash_signatures.json',
+    'file_relations.json',
+    'graph_relations.json',
+    'file_meta.json',
+    'repo_map.json',
+    'filter_index.json',
+    'index_state.json'
+  ];
+  if (userConfig.search?.annDefault !== false) {
+    optionalFiles.push('dense_vectors_uint8.json');
+    optionalFiles.push('dense_vectors_doc_uint8.json');
+    optionalFiles.push('dense_vectors_code_uint8.json');
+  }
+
+  for (const mode of modes) {
+    const dir = resolveIndexDir(root, mode, userConfig, indexRoot);
+    const modeReport = {
+      path: path.resolve(dir),
+      ok: true,
+      missing: [],
+      warnings: []
+    };
+    const manifestPath = path.join(dir, 'pieces', 'manifest.json');
+    if (!fs.existsSync(manifestPath)) {
+      const warning = 'pieces/manifest.json missing';
+      modeReport.warnings.push(warning);
+      report.warnings.push(`[${mode}] ${warning}`);
+    } else {
+      try {
+        const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
+        validateSchema(report, mode, 'pieces_manifest', manifest, 'Rebuild index artifacts for this mode.');
+        if (!manifest || !Array.isArray(manifest.pieces)) {
+          const issue = 'pieces/manifest.json invalid';
+          modeReport.ok = false;
+          modeReport.missing.push(issue);
+          report.issues.push(`[${mode}] ${issue}`);
+        } else {
+          for (const piece of manifest.pieces) {
+            const relPath = piece?.path;
+            if (!relPath) continue;
+            const absPath = path.join(dir, relPath.split('/').join(path.sep));
+            if (!fs.existsSync(absPath)) {
+              const issue = `piece missing: ${relPath}`;
+              modeReport.ok = false;
+              modeReport.missing.push(issue);
+              report.issues.push(`[${mode}] ${issue}`);
+              continue;
+            }
+            const checksum = typeof piece?.checksum === 'string' ? piece.checksum : '';
+            if (checksum) {
+              const [algo, expected] = checksum.split(':');
+              if (!algo || !expected) {
+                const warning = `piece checksum invalid: ${relPath}`;
+                modeReport.warnings.push(warning);
+                report.warnings.push(`[${mode}] ${warning}`);
+                continue;
+              }
+              if (algo === 'sha1') {
+                const actual = await sha1File(absPath);
+                if (actual !== expected) {
+                  const issue = `piece checksum mismatch: ${relPath}`;
+                  modeReport.ok = false;
+                  modeReport.missing.push(issue);
+                  report.issues.push(`[${mode}] ${issue}`);
+                  report.hints.push('Run `pairofcleats index build` to refresh index artifacts.');
+                }
+              } else if (algo === 'xxh64') {
+                const actual = await checksumFile(absPath);
+                if (!actual || actual.value !== expected) {
+                  const issue = `piece checksum mismatch: ${relPath}`;
+                  modeReport.ok = false;
+                  modeReport.missing.push(issue);
+                  report.issues.push(`[${mode}] ${issue}`);
+                  report.hints.push('Run `pairofcleats index build` to refresh index artifacts.');
+                }
+              } else {
+                const warning = `piece checksum unsupported: ${relPath}`;
+                modeReport.warnings.push(warning);
+                report.warnings.push(`[${mode}] ${warning}`);
+              }
+            }
+          }
+        }
+      } catch {
+        const issue = 'pieces/manifest.json invalid';
+        modeReport.ok = false;
+        modeReport.missing.push(issue);
+        report.issues.push(`[${mode}] ${issue}`);
+      }
+    }
+
+    const hasArtifact = (file) => {
+      if (file === 'chunk_meta') {
+        const json = path.join(dir, 'chunk_meta.json');
+        const jsonl = path.join(dir, 'chunk_meta.jsonl');
+        const meta = path.join(dir, 'chunk_meta.meta.json');
+        const partsDir = path.join(dir, 'chunk_meta.parts');
+        return fs.existsSync(json) || fs.existsSync(jsonl) || fs.existsSync(meta) || fs.existsSync(partsDir);
+      }
+      if (file === 'token_postings') {
+        const json = path.join(dir, 'token_postings.json');
+        const meta = path.join(dir, 'token_postings.meta.json');
+        const shardsDir = path.join(dir, 'token_postings.shards');
+        return fs.existsSync(json) || fs.existsSync(meta) || fs.existsSync(shardsDir);
+      }
+      const filePath = path.join(dir, file);
+      if (fs.existsSync(filePath)) return true;
+      if (file.endsWith('.json')) {
+        const gzPath = `${filePath}.gz`;
+        if (fs.existsSync(gzPath)) return true;
+      }
+      return false;
+    };
+    for (const file of requiredFiles) {
+      if (!hasArtifact(file)) {
+        modeReport.ok = false;
+        modeReport.missing.push(file);
+        report.issues.push(`[${mode}] missing ${file}`);
+        report.hints.push('Run `pairofcleats index build` to rebuild missing artifacts.');
+      }
+    }
+    for (const file of optionalFiles) {
+      if (!hasArtifact(file)) {
+        modeReport.warnings.push(file);
+        report.warnings.push(`[${mode}] optional ${file} missing`);
+      }
+    }
+    try {
+      const chunkMeta = loadChunkMeta(dir);
+      validateSchema(report, mode, 'chunk_meta', chunkMeta, 'Rebuild index artifacts for this mode.');
+      validateChunkIds(report, mode, chunkMeta);
+      validateMetaV2(report, mode, chunkMeta);
+
+      if (postingsConfig.fielded && chunkMeta.length > 0) {
+        const missingFieldArtifacts = [];
+        if (!hasArtifact('field_postings.json')) missingFieldArtifacts.push('field_postings.json');
+        if (!hasArtifact('field_tokens.json')) missingFieldArtifacts.push('field_tokens.json');
+        if (missingFieldArtifacts.length) {
+          modeReport.ok = false;
+          modeReport.missing.push(...missingFieldArtifacts);
+          missingFieldArtifacts.forEach((artifact) => {
+            report.issues.push(`[${mode}] missing ${artifact}`);
+            report.hints.push('Run `pairofcleats index build` to rebuild missing artifacts.');
+          });
+        }
+      }
+
+      const tokenIndex = loadTokenPostings(dir);
+      const tokenNormalized = normalizeTokenPostings(tokenIndex);
+      if (tokenNormalized) {
+        validateSchema(report, mode, 'token_postings', tokenNormalized, 'Rebuild index artifacts for this mode.');
+        const docLengths = tokenNormalized.docLengths || [];
+        if (docLengths.length && chunkMeta.length !== docLengths.length) {
+          const issue = `docLengths mismatch (${docLengths.length} !== ${chunkMeta.length})`;
+          modeReport.ok = false;
+          modeReport.missing.push(issue);
+          report.issues.push(`[${mode}] ${issue}`);
+        }
+        validatePostingsDocIds(report, mode, 'token_postings', tokenNormalized.postings, chunkMeta.length);
+      }
+
+      const fileMetaPath = path.join(dir, 'file_meta.json');
+      if (fs.existsSync(fileMetaPath) || fs.existsSync(`${fileMetaPath}.gz`)) {
+        const fileMeta = readJsonFile(fileMetaPath);
+        validateSchema(report, mode, 'file_meta', fileMeta, 'Rebuild index artifacts for this mode.');
+        const fileIds = new Set();
+        for (const entry of Array.isArray(fileMeta) ? fileMeta : []) {
+          if (!Number.isFinite(entry?.id)) continue;
+          if (fileIds.has(entry.id)) {
+            addIssue(report, mode, `file_meta duplicate id ${entry.id}`, 'Rebuild index artifacts for this mode.');
+            break;
+          }
+          fileIds.add(entry.id);
+        }
+        for (const chunk of chunkMeta) {
+          const fileId = chunk?.fileId;
+          if (fileId == null) continue;
+          if (!fileIds.has(fileId)) {
+            addIssue(report, mode, `chunk_meta fileId missing in file_meta (${fileId})`, 'Rebuild index artifacts for this mode.');
+            break;
+          }
+        }
+      }
+
+      const repoMapPath = path.join(dir, 'repo_map.json');
+      if (fs.existsSync(repoMapPath) || fs.existsSync(`${repoMapPath}.gz`)) {
+        const repoMap = readJsonFile(repoMapPath);
+        validateSchema(report, mode, 'repo_map', repoMap, 'Rebuild index artifacts for this mode.');
+      }
+
+      const graphPath = path.join(dir, 'graph_relations.json');
+      if (fs.existsSync(graphPath) || fs.existsSync(`${graphPath}.gz`)) {
+        const graphRelations = readJsonFile(graphPath);
+        validateSchema(report, mode, 'graph_relations', graphRelations, 'Rebuild index artifacts for this mode.');
+      }
+
+      const filterIndexPath = path.join(dir, 'filter_index.json');
+      if (fs.existsSync(filterIndexPath)) {
+        const filterIndex = normalizeFilterIndex(readJsonFile(filterIndexPath));
+        if (filterIndex) {
+          validateSchema(report, mode, 'filter_index', filterIndex, 'Rebuild index artifacts for this mode.');
+          const fileChunks = Array.isArray(filterIndex.fileChunksById) ? filterIndex.fileChunksById : [];
+          validateIdPostings(report, mode, 'filter_index', fileChunks, chunkMeta.length);
+        }
+      }
+
+      const statePath = path.join(dir, 'index_state.json');
+      if (fs.existsSync(statePath)) {
+        const indexState = readJsonFile(statePath);
+        validateSchema(report, mode, 'index_state', indexState, 'Rebuild index artifacts for this mode.');
+      }
+
+      const relationsPath = path.join(dir, 'file_relations.json');
+      if (fs.existsSync(relationsPath)) {
+        const relations = readJsonFile(relationsPath);
+        validateSchema(report, mode, 'file_relations', relations, 'Rebuild index artifacts for this mode.');
+      }
+
+      const minhashPath = path.join(dir, 'minhash_signatures.json');
+      if (fs.existsSync(minhashPath) || fs.existsSync(`${minhashPath}.gz`)) {
+        const minhashRaw = readJsonFile(minhashPath);
+        const minhash = normalizeMinhash(minhashRaw);
+        validateSchema(report, mode, 'minhash_signatures', minhash, 'Rebuild index artifacts for this mode.');
+        const signatures = minhash.signatures || [];
+        if (signatures.length && signatures.length !== chunkMeta.length) {
+          const issue = `minhash mismatch (${signatures.length} !== ${chunkMeta.length})`;
+          modeReport.ok = false;
+          modeReport.missing.push(issue);
+          report.issues.push(`[${mode}] ${issue}`);
+        }
+      }
+
+      const fieldTokensPath = path.join(dir, 'field_tokens.json');
+      if (fs.existsSync(fieldTokensPath) || fs.existsSync(`${fieldTokensPath}.gz`)) {
+        const fieldTokens = readJsonFile(fieldTokensPath);
+        validateSchema(report, mode, 'field_tokens', fieldTokens, 'Rebuild index artifacts for this mode.');
+        if (Array.isArray(fieldTokens) && fieldTokens.length !== chunkMeta.length) {
+          const issue = `field_tokens mismatch (${fieldTokens.length} !== ${chunkMeta.length})`;
+          modeReport.ok = false;
+          modeReport.missing.push(issue);
+          report.issues.push(`[${mode}] ${issue}`);
+        }
+      }
+
+      const fieldPostingsPath = path.join(dir, 'field_postings.json');
+      if (fs.existsSync(fieldPostingsPath) || fs.existsSync(`${fieldPostingsPath}.gz`)) {
+        const fieldPostingsRaw = readJsonFile(fieldPostingsPath);
+        const fieldPostings = normalizeFieldPostings(fieldPostingsRaw);
+        if (fieldPostings) {
+          validateSchema(report, mode, 'field_postings', fieldPostings, 'Rebuild index artifacts for this mode.');
+          const fields = fieldPostings.fields || {};
+          for (const entry of Object.values(fields)) {
+            validatePostingsDocIds(report, mode, 'field_postings', entry?.postings, chunkMeta.length);
+            const lengths = Array.isArray(entry?.docLengths) ? entry.docLengths : [];
+            if (lengths.length && lengths.length !== chunkMeta.length) {
+              const issue = `field_postings docLengths mismatch (${lengths.length} !== ${chunkMeta.length})`;
+              modeReport.ok = false;
+              modeReport.missing.push(issue);
+              report.issues.push(`[${mode}] ${issue}`);
+            }
+          }
+        }
+      }
+
+      const phrasePath = path.join(dir, 'phrase_ngrams.json');
+      if (fs.existsSync(phrasePath) || fs.existsSync(`${phrasePath}.gz`)) {
+        const phraseRaw = readJsonFile(phrasePath);
+        const phrase = normalizePhrasePostings(phraseRaw);
+        validateSchema(report, mode, 'phrase_ngrams', phrase, 'Rebuild index artifacts for this mode.');
+        validateIdPostings(report, mode, 'phrase_ngrams', phrase.postings, chunkMeta.length);
+      }
+
+      const chargramPath = path.join(dir, 'chargram_postings.json');
+      if (fs.existsSync(chargramPath) || fs.existsSync(`${chargramPath}.gz`)) {
+        const chargramRaw = readJsonFile(chargramPath);
+        const chargram = normalizePhrasePostings(chargramRaw);
+        validateSchema(report, mode, 'chargram_postings', chargram, 'Rebuild index artifacts for this mode.');
+        validateIdPostings(report, mode, 'chargram_postings', chargram.postings, chunkMeta.length);
+      }
+
+      const denseTargets = [
+        { label: 'dense_vectors', file: 'dense_vectors_uint8.json' },
+        { label: 'dense_vectors_doc', file: 'dense_vectors_doc_uint8.json' },
+        { label: 'dense_vectors_code', file: 'dense_vectors_code_uint8.json' }
+      ];
+      for (const target of denseTargets) {
+        const densePath = path.join(dir, target.file);
+        if (!fs.existsSync(densePath) && !fs.existsSync(`${densePath}.gz`)) continue;
+        const denseRaw = readJsonFile(densePath);
+        const dense = normalizeDenseVectors(denseRaw);
+        validateSchema(report, mode, 'dense_vectors', dense, 'Rebuild embeddings for this mode.');
+        const vectors = dense.vectors || [];
+        if (vectors.length && vectors.length !== chunkMeta.length) {
+          const issue = `${target.label} mismatch (${vectors.length} !== ${chunkMeta.length})`;
+          modeReport.ok = false;
+          modeReport.missing.push(issue);
+          report.issues.push(`[${mode}] ${issue}`);
+        }
+        if (dense.dims) {
+          for (let i = 0; i < Math.min(vectors.length, 25); i += 1) {
+            if (!Array.isArray(vectors[i]) || vectors[i].length !== dense.dims) {
+              addIssue(report, mode, `${target.label} dims mismatch at ${i}`, 'Rebuild embeddings for this mode.');
+              break;
+            }
+          }
+        }
+      }
+      const hnswMetaPath = path.join(dir, 'dense_vectors_hnsw.meta.json');
+      if (fs.existsSync(hnswMetaPath)) {
+        const hnswMeta = readJsonFile(hnswMetaPath);
+        validateSchema(report, mode, 'dense_vectors_hnsw_meta', hnswMeta, 'Rebuild embeddings for this mode.');
+        if (Number.isFinite(hnswMeta?.count) && hnswMeta.count !== chunkMeta.length) {
+          const issue = `dense_vectors_hnsw count mismatch (${hnswMeta.count} !== ${chunkMeta.length})`;
+          modeReport.ok = false;
+          modeReport.missing.push(issue);
+          report.issues.push(`[${mode}] ${issue}`);
+        }
+        const hnswIndexPath = path.join(dir, 'dense_vectors_hnsw.bin');
+        if (!fs.existsSync(hnswIndexPath)) {
+          addIssue(report, mode, 'dense_vectors_hnsw index missing', 'Rebuild embeddings for this mode.');
+        }
+      }
+    } catch (err) {
+      const warning = `validation skipped (${err?.code || err?.message || 'error'})`;
+      modeReport.warnings.push(warning);
+      report.warnings.push(`[${mode}] ${warning}`);
+    }
+    report.modes[mode] = modeReport;
+  }
+
+  const lmdbEnabled = typeof input.lmdbEnabled === 'boolean'
+    ? input.lmdbEnabled
+    : userConfig.lmdb?.use !== false;
+  const lmdbPaths = resolveLmdbPaths(root, userConfig, indexRoot ? { indexRoot } : {});
+  const lmdbTargets = new Set(modes.filter((mode) => mode === 'code' || mode === 'prose'));
+  const lmdbReport = {
+    enabled: lmdbEnabled,
+    ok: true,
+    code: lmdbPaths.codePath,
+    prose: lmdbPaths.prosePath,
+    issues: [],
+    warnings: []
+  };
+  lmdbReport.enabled = lmdbReport.enabled && lmdbTargets.size > 0;
+
+  if (lmdbReport.enabled) {
+    let openLmdb = null;
+    try {
+      ({ open: openLmdb } = await import('lmdb'));
+    } catch {}
+    const addLmdbIssue = (label, message, hint) => {
+      lmdbReport.ok = false;
+      lmdbReport.issues.push(`${label}: ${message}`);
+      addIssue(report, `lmdb/${label}`, message, hint);
+    };
+    const addLmdbWarning = (label, message) => {
+      lmdbReport.warnings.push(`${label}: ${message}`);
+      report.warnings.push(`[lmdb/${label}] ${message}`);
+    };
+    const validateStore = (label, storePath) => {
+      if (!hasLmdbStore(storePath)) {
+        addLmdbWarning(label, 'db missing');
+        return;
+      }
+      if (!openLmdb) {
+        addLmdbWarning(label, 'lmdb dependency unavailable; integrity check skipped');
+        return;
+      }
+      const db = openLmdb({ path: storePath, readOnly: true });
+      try {
+        const version = decode(db.get(LMDB_META_KEYS.schemaVersion));
+        if (version !== LMDB_SCHEMA_VERSION) {
+          addLmdbIssue(
+            label,
+            `schema mismatch (expected ${LMDB_SCHEMA_VERSION}, got ${version ?? 'missing'})`,
+            'Run `npm run build-lmdb-index` to rebuild LMDB artifacts.'
+          );
+        }
+        const modeValue = decode(db.get(LMDB_META_KEYS.mode));
+        if (modeValue && modeValue !== label) {
+          addLmdbIssue(
+            label,
+            `mode mismatch (expected ${label}, got ${modeValue})`,
+            'Run `npm run build-lmdb-index` to rebuild LMDB artifacts.'
+          );
+        }
+        const chunkCount = decode(db.get(LMDB_META_KEYS.chunkCount));
+        if (chunkCount != null && !Number.isFinite(Number(chunkCount))) {
+          addLmdbWarning(label, 'meta:chunkCount invalid');
+        }
+        const artifacts = decode(db.get(LMDB_META_KEYS.artifacts));
+        if (!Array.isArray(artifacts)) {
+          addLmdbIssue(
+            label,
+            'meta:artifacts missing or invalid',
+            'Run `npm run build-lmdb-index` to rebuild LMDB artifacts.'
+          );
+          return;
+        }
+        const requiredArtifacts = [
+          LMDB_ARTIFACT_KEYS.chunkMeta,
+          LMDB_ARTIFACT_KEYS.tokenPostings
+        ];
+        for (const key of requiredArtifacts) {
+          if (!artifacts.includes(key)) {
+            addLmdbIssue(
+              label,
+              `missing artifact key ${key}`,
+              'Run `npm run build-lmdb-index` to rebuild LMDB artifacts.'
+            );
+          }
+          if (db.get(key) == null) {
+            addLmdbIssue(
+              label,
+              `artifact missing: ${key}`,
+              'Run `npm run build-lmdb-index` to rebuild LMDB artifacts.'
+            );
+          }
+        }
+      } finally {
+        db.close();
+      }
+    };
+    if (lmdbTargets.has('code')) validateStore('code', lmdbPaths.codePath);
+    if (lmdbTargets.has('prose')) validateStore('prose', lmdbPaths.prosePath);
+  }
+
+  const sqlitePaths = resolveSqlitePaths(root, userConfig, indexRoot ? { indexRoot } : {});
+  const sqliteMode = userConfig.sqlite?.scoreMode === 'fts' ? 'fts' : 'bm25';
+  const sqliteTargets = new Set(modes.filter((mode) => mode === 'code' || mode === 'prose'));
+  const requireCodeDb = sqliteTargets.has('code');
+  const requireProseDb = sqliteTargets.has('prose');
+  const sqliteRequiredTables = sqliteMode === 'fts'
+    ? ['chunks', 'chunks_fts', 'minhash_signatures', 'dense_vectors', 'dense_meta']
+    : [
+      'chunks',
+      'token_vocab',
+      'token_postings',
+      'doc_lengths',
+      'token_stats',
+      'phrase_vocab',
+      'phrase_postings',
+      'chargram_vocab',
+      'chargram_postings',
+      'minhash_signatures',
+      'dense_vectors',
+      'dense_meta'
+    ];
+
+  const sqliteReport = {
+    enabled: report.sqlite.enabled,
+    mode: sqliteMode,
+    ok: true,
+    code: sqlitePaths.codePath,
+    prose: sqlitePaths.prosePath,
+    issues: []
+  };
+  sqliteReport.enabled = sqliteReport.enabled && sqliteTargets.size > 0;
+
+  if (sqliteReport.enabled) {
+    const sqliteIssues = [];
+    if (requireCodeDb && !fs.existsSync(sqlitePaths.codePath)) sqliteIssues.push('code db missing');
+    if (requireProseDb && !fs.existsSync(sqlitePaths.prosePath)) sqliteIssues.push('prose db missing');
+    if (sqliteIssues.length) {
+      sqliteReport.ok = false;
+      sqliteReport.issues.push(...sqliteIssues);
+      sqliteIssues.forEach((issue) => report.issues.push(`[sqlite] ${issue}`));
+      report.hints.push('Run `npm run build-sqlite-index` to rebuild SQLite artifacts.');
+    } else {
+      let Database = null;
+      try {
+        ({ default: Database } = await import('better-sqlite3'));
+      } catch {
+        sqliteReport.ok = false;
+        const issue = 'better-sqlite3 not available';
+        sqliteReport.issues.push(issue);
+        report.issues.push(`[sqlite] ${issue}`);
+      }
+      if (Database) {
+        const checkTables = (dbPath, label) => {
+          const db = new Database(dbPath, { readonly: true });
+          try {
+            const rows = db.prepare("SELECT name FROM sqlite_master WHERE type='table'").all();
+            const tableNames = new Set(rows.map((row) => row.name));
+            const missing = sqliteRequiredTables.filter((name) => !tableNames.has(name));
+            if (missing.length) {
+              sqliteReport.ok = false;
+              const issue = `${label} missing tables: ${missing.join(', ')}`;
+              sqliteReport.issues.push(issue);
+              report.issues.push(`[sqlite] ${issue}`);
+            }
+          } finally {
+            db.close();
+          }
+        };
+        if (requireCodeDb) checkTables(sqlitePaths.codePath, 'code');
+        if (requireProseDb) checkTables(sqlitePaths.prosePath, 'prose');
+      }
+    }
+  }
+
+  report.lmdb = lmdbReport;
+  report.sqlite = sqliteReport;
+  report.ok = report.issues.length === 0;
+  return report;
+}
diff --git a/src/indexer/analysis.js b/src/indexer/analysis.js
deleted file mode 100644
index ce6a671eb..000000000
--- a/src/indexer/analysis.js
+++ /dev/null
@@ -1,35 +0,0 @@
-import escomplex from 'escomplex';
-import { ESLint } from 'eslint';
-
-/**
- * Compute basic cyclomatic complexity metrics for JS code.
- * @param {string} code
- * @returns {Promise<{functions:number,averageCyclomatic:number}|{}>}
- */
-export async function analyzeComplexity(code) {
-  try {
-    const report = escomplex.analyse(code, { esmImportExport: true });
-    return report && report.functions ? {
-      functions: report.functions.length,
-      averageCyclomatic: (report.aggregate && report.aggregate.cyclomatic) || 0
-    } : {};
-  } catch {
-    return {};
-  }
-}
-
-/**
- * Run ESLint on a code chunk and return lint messages.
- * @param {string} text
- * @param {string} relPath
- * @returns {Promise<Array<{message:string}>>}
- */
-export async function lintChunk(text, relPath) {
-  try {
-    const eslint = new ESLint({ useEslintrc: false });
-    const results = await eslint.lintText(text, { filePath: relPath });
-    return results.length ? results[0].messages : [];
-  } catch {
-    return [];
-  }
-}
diff --git a/src/indexer/build/args.js b/src/indexer/build/args.js
deleted file mode 100644
index a8e004bd6..000000000
--- a/src/indexer/build/args.js
+++ /dev/null
@@ -1,27 +0,0 @@
-import os from 'node:os';
-import minimist from 'minimist';
-
-/**
- * Parse CLI args for build_index.
- * @param {string[]} rawArgs
- * @returns {{argv:object,modes:string[]}}
- */
-export function parseBuildArgs(rawArgs) {
-  const argv = minimist(rawArgs, {
-    boolean: ['incremental', 'stub-embeddings', 'watch'],
-    string: ['model', 'watch-poll', 'watch-debounce', 'repo'],
-    alias: { i: 'incremental' },
-    default: {
-      mode: 'all',
-      dims: 512,
-      threads: os.cpus().length,
-      incremental: false,
-      'stub-embeddings': false,
-      watch: false,
-      'watch-poll': 2000,
-      'watch-debounce': 500
-    }
-  });
-  const modes = argv.mode === 'all' ? ['prose', 'code'] : [argv.mode];
-  return { argv, modes };
-}
diff --git a/src/indexer/build/artifacts.js b/src/indexer/build/artifacts.js
deleted file mode 100644
index 9bb99409b..000000000
--- a/src/indexer/build/artifacts.js
+++ /dev/null
@@ -1,168 +0,0 @@
-import fs from 'node:fs/promises';
-import path from 'node:path';
-import { getMetricsDir } from '../../../tools/dict-utils.js';
-import { log } from '../../shared/progress.js';
-import { normalizePostingsConfig } from '../../shared/postings-config.js';
-
-/**
- * Write index artifacts and metrics.
- * @param {object} input
- */
-export async function writeIndexArtifacts(input) {
-  const {
-    outDir,
-    mode,
-    state,
-    postings,
-    postingsConfig,
-    modelId,
-    useStubEmbeddings,
-    dictSummary,
-    timing,
-    root,
-    userConfig,
-    incrementalEnabled,
-    fileCounts
-  } = input;
-
-  const chunkMeta = state.chunks.map((c) => ({
-    id: c.id,
-    file: c.file,
-    start: c.start,
-    end: c.end,
-    startLine: c.startLine,
-    endLine: c.endLine,
-    ext: c.ext,
-    kind: c.kind,
-    name: c.name,
-    weight: c.weight,
-    headline: c.headline,
-    preContext: c.preContext,
-    postContext: c.postContext,
-    tokens: c.tokens,
-    ngrams: c.ngrams,
-    codeRelations: c.codeRelations,
-    docmeta: c.docmeta,
-    stats: c.stats,
-    complexity: c.complexity,
-    lint: c.lint,
-    externalDocs: c.externalDocs,
-    last_modified: c.last_modified,
-    last_author: c.last_author,
-    churn: c.churn,
-    chunk_authors: c.chunk_authors
-  }));
-
-  await fs.writeFile(
-    path.join(outDir, '.scannedfiles.json'),
-    JSON.stringify(state.scannedFilesTimes, null, 2)
-  );
-  await fs.writeFile(
-    path.join(outDir, '.skippedfiles.json'),
-    JSON.stringify(state.skippedFiles, null, 2)
-  );
-  log('→ Wrote .scannedfiles.json and .skippedfiles.json');
-
-  const resolvedConfig = normalizePostingsConfig(postingsConfig || {});
-  log('Writing index files...');
-  const writeStart = Date.now();
-  const writes = [
-    fs.writeFile(
-      path.join(outDir, 'dense_vectors_uint8.json'),
-      JSON.stringify({ model: modelId, dims: postings.dims, scale: 1.0, vectors: postings.quantizedVectors }) + '\n'
-    ),
-    fs.writeFile(
-      path.join(outDir, 'chunk_meta.json'),
-      JSON.stringify(chunkMeta) + '\n'
-    ),
-    fs.writeFile(
-      path.join(outDir, 'minhash_signatures.json'),
-      JSON.stringify({ signatures: postings.minhashSigs }) + '\n'
-    ),
-    fs.writeFile(
-      path.join(outDir, 'token_postings.json'),
-      JSON.stringify({
-        vocab: postings.tokenVocab,
-        postings: postings.tokenPostingsList,
-        docLengths: state.docLengths,
-        avgDocLen: postings.avgDocLen,
-        totalDocs: state.docLengths.length
-      }) + '\n'
-    )
-  ];
-  if (resolvedConfig.enablePhraseNgrams !== false) {
-    writes.push(fs.writeFile(
-      path.join(outDir, 'phrase_ngrams.json'),
-      JSON.stringify({ vocab: postings.phraseVocab, postings: postings.phrasePostings }) + '\n'
-    ));
-  }
-  if (resolvedConfig.enableChargrams !== false) {
-    writes.push(fs.writeFile(
-      path.join(outDir, 'chargram_postings.json'),
-      JSON.stringify({ vocab: postings.chargramVocab, postings: postings.chargramPostings }) + '\n'
-    ));
-  }
-  await Promise.all(writes);
-  timing.writeMs = Date.now() - writeStart;
-  timing.totalMs = Date.now() - timing.start;
-  log(
-    `📦  ${mode.padEnd(5)}: ${state.chunks.length.toLocaleString()} chunks, ${postings.trimmedVocab.length.toLocaleString()} tokens, dims=${postings.dims}`
-  );
-
-  const cacheHits = state.scannedFilesTimes.filter((entry) => entry.cached).length;
-  const cacheMisses = state.scannedFilesTimes.length - cacheHits;
-  const skippedByReason = state.skippedFiles.reduce((acc, entry) => {
-    const reason = entry && typeof entry === 'object' && entry.reason
-      ? String(entry.reason)
-      : 'unknown';
-    acc[reason] = (acc[reason] || 0) + 1;
-    return acc;
-  }, {});
-  const metrics = {
-    generatedAt: new Date().toISOString(),
-    repoRoot: path.resolve(root),
-    mode,
-    indexDir: path.resolve(outDir),
-    incremental: incrementalEnabled,
-    cache: {
-      hits: cacheHits,
-      misses: cacheMisses,
-      hitRate: state.scannedFilesTimes.length ? cacheHits / state.scannedFilesTimes.length : 0
-    },
-    files: {
-      scanned: state.scannedFiles.length,
-      skipped: state.skippedFiles.length,
-      candidates: fileCounts.candidates,
-      skippedByReason
-    },
-    chunks: {
-      total: state.chunks.length,
-      avgTokens: state.chunks.length ? state.totalTokens / state.chunks.length : 0
-    },
-    tokens: {
-      total: state.totalTokens,
-      vocab: postings.trimmedVocab.length
-    },
-    bm25: {
-      k1: postings.k1,
-      b: postings.b,
-      avgChunkLen: postings.avgChunkLen,
-      totalDocs: postings.totalDocs
-    },
-    embeddings: {
-      dims: postings.dims,
-      stub: useStubEmbeddings,
-      model: modelId
-    },
-    dictionaries: dictSummary,
-    timings: timing
-  };
-  try {
-    const metricsDir = getMetricsDir(root, userConfig);
-    await fs.mkdir(metricsDir, { recursive: true });
-    await fs.writeFile(
-      path.join(metricsDir, `index-${mode}.json`),
-      JSON.stringify(metrics, null, 2)
-    );
-  } catch {}
-}
diff --git a/src/indexer/build/context-window.js b/src/indexer/build/context-window.js
deleted file mode 100644
index 65c01a8c6..000000000
--- a/src/indexer/build/context-window.js
+++ /dev/null
@@ -1,47 +0,0 @@
-import fs from 'node:fs/promises';
-import path from 'node:path';
-import { smartChunk } from '../chunking.js';
-import { buildLanguageContext } from '../language-registry.js';
-import { isSpecialCodeFile } from '../constants.js';
-import { fileExt, toPosix } from '../../shared/files.js';
-
-/**
- * Estimate context window size from sampled chunk lengths.
- * @param {{files:string[],root:string,mode:'code'|'prose',languageOptions:object}} input
- * @returns {Promise<number>}
- */
-export async function estimateContextWindow({ files, root, mode, languageOptions }) {
-  const sampleChunkLens = [];
-  for (let i = 0; i < Math.min(20, files.length); ++i) {
-    const text = await fs.readFile(files[i], 'utf8');
-    const relSample = path.relative(root, files[i]);
-    const relSampleKey = toPosix(relSample);
-    const baseName = path.basename(files[i]);
-    const rawExt = fileExt(files[i]);
-    const ext = rawExt || (isSpecialCodeFile(baseName)
-      ? (baseName.toLowerCase() === 'dockerfile' ? '.dockerfile' : '.makefile')
-      : rawExt);
-    const { context: sampleContext } = buildLanguageContext({
-      ext,
-      relPath: relSampleKey,
-      mode,
-      text,
-      options: languageOptions
-    });
-    const chunks0 = smartChunk({
-      text,
-      ext,
-      relPath: relSampleKey,
-      mode,
-      context: sampleContext
-    });
-    sampleChunkLens.push(...chunks0.map(c =>
-      text.slice(c.start, c.end).split('\n').length
-    ));
-  }
-  sampleChunkLens.sort((a, b) => a - b);
-  const medianChunkLines = sampleChunkLens.length
-    ? sampleChunkLens[Math.floor(sampleChunkLens.length / 2)]
-    : 8;
-  return Math.min(10, Math.max(3, Math.floor(medianChunkLines / 2)));
-}
diff --git a/src/indexer/build/discover.js b/src/indexer/build/discover.js
deleted file mode 100644
index f75f57fb0..000000000
--- a/src/indexer/build/discover.js
+++ /dev/null
@@ -1,60 +0,0 @@
-import fs from 'node:fs/promises';
-import path from 'node:path';
-import { EXTS_CODE, EXTS_PROSE, isSpecialCodeFile } from '../constants.js';
-import { fileExt, toPosix } from '../../shared/files.js';
-
-/**
- * Recursively discover indexable files under a directory.
- * @param {{root:string,mode:'code'|'prose',ignoreMatcher:import('ignore').Ignore,skippedFiles:Array, maxFileBytes:number|null}} input
- * @returns {Promise<string[]>}
- */
-export async function discoverFiles({ root, mode, ignoreMatcher, skippedFiles, maxFileBytes = null }) {
-  const maxBytes = Number.isFinite(Number(maxFileBytes)) && Number(maxFileBytes) > 0
-    ? Number(maxFileBytes)
-    : null;
-  const recordSkip = (filePath, reason, extra = {}) => {
-    skippedFiles.push({
-      file: filePath,
-      reason,
-      ...extra
-    });
-  };
-  async function walk(dir, acc = []) {
-    for (const entry of await fs.readdir(dir, { withFileTypes: true })) {
-      const absPath = path.join(dir, entry.name);
-      const relPosix = toPosix(path.relative(root, absPath));
-      const ignoreKey = entry.isDirectory() ? `${relPosix}/` : relPosix;
-      if (ignoreMatcher.ignores(ignoreKey)) {
-        recordSkip(absPath, 'ignored');
-        continue;
-      }
-      if (entry.isDirectory()) {
-        await walk(absPath, acc);
-      } else {
-        const ext = fileExt(absPath);
-        const isSpecial = isSpecialCodeFile(entry.name);
-        if ((mode === 'prose' && EXTS_PROSE.has(ext)) ||
-          (mode === 'code' && (EXTS_CODE.has(ext) || isSpecial))) {
-          if (maxBytes) {
-            try {
-              const stat = await fs.stat(absPath);
-              if (stat.size > maxBytes) {
-                recordSkip(absPath, 'oversize', { bytes: stat.size, maxBytes });
-                continue;
-              }
-            } catch {
-              recordSkip(absPath, 'stat-failed');
-              continue;
-            }
-          }
-          acc.push(absPath);
-        } else {
-          recordSkip(absPath, 'unsupported');
-        }
-      }
-    }
-    return acc;
-  }
-
-  return walk(root);
-}
diff --git a/src/indexer/build/file-processor.js b/src/indexer/build/file-processor.js
deleted file mode 100644
index 721329c0f..000000000
--- a/src/indexer/build/file-processor.js
+++ /dev/null
@@ -1,386 +0,0 @@
-import fs from 'node:fs/promises';
-import path from 'node:path';
-import { analyzeComplexity, lintChunk } from '../analysis.js';
-import { smartChunk } from '../chunking.js';
-import { buildChunkRelations, buildLanguageContext } from '../language-registry.js';
-import { detectRiskSignals } from '../risk.js';
-import { inferTypeMetadata } from '../type-inference.js';
-import { SimpleMinHash } from '../minhash.js';
-import { getHeadline } from '../headline.js';
-import { getGitMeta } from '../git.js';
-import { getFieldWeight } from '../field-weighting.js';
-import { isGo, isJsLike, isSpecialCodeFile, STOP, SYN } from '../constants.js';
-import { buildLineIndex, offsetToLine } from '../../shared/lines.js';
-import { fileExt, toPosix } from '../../shared/files.js';
-import { extractNgrams, splitId, splitWordsWithDict, stem, tri } from '../../shared/tokenize.js';
-import { readCachedBundle, writeIncrementalBundle } from './incremental.js';
-import { sha1 } from '../../shared/hash.js';
-
-/**
- * Create a file processor with shared caches.
- * @param {object} options
- * @returns {{processFile:(abs:string,fileIndex:number)=>Promise<object|null>}}
- */
-export function createFileProcessor(options) {
-  const {
-    root,
-    mode,
-    dictWords,
-    languageOptions,
-    postingsConfig,
-    allImports,
-    contextWin,
-    incrementalState,
-    getChunkEmbedding,
-    typeInferenceEnabled,
-    riskAnalysisEnabled,
-    seenFiles,
-    gitBlameEnabled
-  } = options;
-  const { astDataflowEnabled, controlFlowEnabled } = languageOptions;
-  const phraseNgramsEnabled = postingsConfig?.enablePhraseNgrams !== false;
-  const chargramsEnabled = postingsConfig?.enableChargrams !== false;
-  let phraseMinN = Number.isFinite(Number(postingsConfig?.phraseMinN)) ? Number(postingsConfig.phraseMinN) : 2;
-  let phraseMaxN = Number.isFinite(Number(postingsConfig?.phraseMaxN)) ? Number(postingsConfig.phraseMaxN) : Math.max(phraseMinN, 4);
-  if (phraseMaxN < phraseMinN) phraseMaxN = phraseMinN;
-  let chargramMinN = Number.isFinite(Number(postingsConfig?.chargramMinN)) ? Number(postingsConfig.chargramMinN) : 3;
-  let chargramMaxN = Number.isFinite(Number(postingsConfig?.chargramMaxN)) ? Number(postingsConfig.chargramMaxN) : Math.max(chargramMinN, 5);
-  if (chargramMaxN < chargramMinN) chargramMaxN = chargramMinN;
-  const complexityCache = new Map();
-  const lintCache = new Map();
-
-  const resolveExt = (absPath) => {
-    const baseName = path.basename(absPath);
-    const rawExt = fileExt(absPath);
-    return rawExt || (isSpecialCodeFile(baseName)
-      ? (baseName.toLowerCase() === 'dockerfile' ? '.dockerfile' : '.makefile')
-      : rawExt);
-  };
-
-  const mergeFlowMeta = (docmeta, flowMeta) => {
-    if (!flowMeta) return docmeta;
-    const output = docmeta && typeof docmeta === 'object' ? docmeta : {};
-    if (controlFlowEnabled && flowMeta.controlFlow && output.controlFlow == null) {
-      output.controlFlow = flowMeta.controlFlow;
-    }
-    if (astDataflowEnabled) {
-      if (flowMeta.dataflow && output.dataflow == null) output.dataflow = flowMeta.dataflow;
-      if (flowMeta.throws && output.throws === undefined) output.throws = flowMeta.throws;
-      if (flowMeta.awaits && output.awaits === undefined) output.awaits = flowMeta.awaits;
-      if (typeof flowMeta.yields === 'boolean' && output.yields === undefined) output.yields = flowMeta.yields;
-      if (typeof flowMeta.returnsValue === 'boolean') {
-        const shouldOverride = output.returnsValue === undefined || (output.returnsValue === false && flowMeta.returnsValue);
-        if (shouldOverride) {
-          output.returnsValue = flowMeta.returnsValue;
-        }
-      }
-    }
-    return output;
-  };
-
-  const buildExternalDocs = (ext, codeRelations) => {
-    const externalDocs = [];
-    if (!codeRelations?.imports || !codeRelations.imports.length) return externalDocs;
-    const isPython = ext === '.py';
-    const isNode = isJsLike(ext);
-    const isGoLang = isGo(ext);
-    for (const mod of codeRelations.imports) {
-      if (mod.startsWith('.')) continue;
-      if (isPython) {
-        const base = mod.split('.')[0];
-        if (base) externalDocs.push(`https://pypi.org/project/${base}`);
-      } else if (isNode) {
-        externalDocs.push(`https://www.npmjs.com/package/${mod.replace(/^@/, '')}`);
-      } else if (isGoLang) {
-        externalDocs.push(`https://pkg.go.dev/${mod}`);
-      }
-    }
-    return externalDocs;
-  };
-
-  /**
-   * Process a file: read, chunk, analyze, and produce chunk payloads.
-   * @param {string} abs
-   * @param {number} fileIndex
-   * @returns {Promise<object|null>}
-   */
-  async function processFile(abs, fileIndex) {
-    const fileStart = Date.now();
-    const rel = path.relative(root, abs);
-    const relKey = toPosix(rel);
-    if (seenFiles) seenFiles.add(relKey);
-    const ext = resolveExt(abs);
-    let fileStat;
-    try {
-      fileStat = await fs.stat(abs);
-    } catch {
-      return null;
-    }
-
-    let cachedBundle = null;
-    let text = null;
-    let fileHash = null;
-    const cachedResult = await readCachedBundle({
-      enabled: incrementalState.enabled,
-      absPath: abs,
-      relKey,
-      fileStat,
-      manifest: incrementalState.manifest,
-      bundleDir: incrementalState.bundleDir
-    });
-    cachedBundle = cachedResult.cachedBundle;
-    text = cachedResult.text;
-    fileHash = cachedResult.fileHash;
-
-    if (cachedBundle && Array.isArray(cachedBundle.chunks)) {
-      const updatedChunks = cachedBundle.chunks.map((cachedChunk) => {
-        const updatedChunk = { ...cachedChunk };
-        if (updatedChunk.codeRelations?.imports) {
-          const importLinks = updatedChunk.codeRelations.imports
-            .map((i) => allImports[i])
-            .filter((x) => !!x)
-            .flat();
-          updatedChunk.codeRelations = {
-            ...updatedChunk.codeRelations,
-            importLinks
-          };
-        }
-        return updatedChunk;
-      });
-      const fileDurationMs = Date.now() - fileStart;
-      return {
-        abs,
-        relKey,
-        fileIndex,
-        cached: true,
-        durationMs: fileDurationMs,
-        chunks: updatedChunks,
-        manifestEntry: null
-      };
-    }
-
-    if (!text) {
-      try {
-        text = await fs.readFile(abs, 'utf8');
-      } catch {
-        return null;
-      }
-    }
-    if (!fileHash) fileHash = sha1(text);
-
-    const { lang, context: languageContext } = buildLanguageContext({
-      ext,
-      relPath: relKey,
-      mode,
-      text,
-      options: languageOptions
-    });
-    const lineIndex = buildLineIndex(text);
-    const fileRelations = (mode === 'code' && lang && typeof lang.buildRelations === 'function')
-      ? lang.buildRelations({
-        text,
-        relPath: relKey,
-        allImports,
-        context: languageContext,
-        options: languageOptions
-      })
-      : null;
-    const sc = smartChunk({
-      text,
-      ext,
-      relPath: relKey,
-      mode,
-      context: languageContext
-    });
-    const fileChunks = [];
-
-    for (let ci = 0; ci < sc.length; ++ci) {
-      const c = sc[ci];
-      const ctext = text.slice(c.start, c.end);
-
-      let tokens = splitId(ctext);
-      tokens = tokens.map((t) => t.normalize('NFKD'));
-
-      if (!(mode === 'prose' && ext === '.md')) {
-        tokens = tokens.flatMap((t) => splitWordsWithDict(t, dictWords));
-      }
-
-      if (mode === 'prose') {
-        tokens = tokens.filter((w) => !STOP.has(w));
-        tokens = tokens.flatMap((w) => [w, stem(w)]);
-      }
-      const seq = [];
-      for (const w of tokens) {
-        seq.push(w);
-        if (SYN[w]) seq.push(SYN[w]);
-      }
-      if (!seq.length) continue;
-
-      const ngrams = phraseNgramsEnabled ? extractNgrams(seq, phraseMinN, phraseMaxN) : null;
-      let chargrams = null;
-      if (chargramsEnabled) {
-        const charSet = new Set();
-        seq.forEach((w) => {
-          for (let n = chargramMinN; n <= chargramMaxN; ++n) tri(w, n).forEach((g) => charSet.add(g));
-        });
-        chargrams = Array.from(charSet);
-      }
-
-      const meta = {
-        ...c.meta,
-        ext,
-        path: relKey,
-        kind: c.kind,
-        name: c.name,
-        file: relKey,
-        weight: getFieldWeight(c, rel)
-      };
-
-      let codeRelations = {}, docmeta = {};
-      if (mode === 'code') {
-        docmeta = lang && typeof lang.extractDocMeta === 'function'
-          ? lang.extractDocMeta({
-            text,
-            chunk: c,
-            fileRelations,
-            context: languageContext,
-            options: languageOptions
-          })
-          : {};
-        if (fileRelations) {
-          codeRelations = buildChunkRelations({ lang, chunk: c, fileRelations });
-        }
-        const flowMeta = lang && typeof lang.flow === 'function'
-          ? lang.flow({
-            text,
-            chunk: c,
-            context: languageContext,
-            options: languageOptions
-          })
-          : null;
-        if (flowMeta) {
-          docmeta = mergeFlowMeta(docmeta, flowMeta);
-        }
-        if (typeInferenceEnabled) {
-          const inferredTypes = inferTypeMetadata({
-            docmeta,
-            chunkText: ctext,
-            languageId: lang?.id || null
-          });
-          if (inferredTypes) {
-            docmeta = { ...docmeta, inferredTypes };
-          }
-        }
-        if (riskAnalysisEnabled) {
-          const risk = detectRiskSignals({ text: ctext });
-          if (risk) {
-            docmeta = { ...docmeta, risk };
-          }
-        }
-      }
-
-      let complexity = {}, lint = [];
-      if (isJsLike(ext) && mode === 'code') {
-        if (!complexityCache.has(rel)) {
-          const fullCode = text;
-          const compResult = await analyzeComplexity(fullCode, rel);
-          complexityCache.set(rel, compResult);
-        }
-        complexity = complexityCache.get(rel);
-
-        if (!lintCache.has(rel)) {
-          const fullCode = text;
-          const lintResult = await lintChunk(fullCode, rel);
-          lintCache.set(rel, lintResult);
-        }
-        lint = lintCache.get(rel);
-      }
-
-      const freq = {};
-      tokens.forEach((t) => {
-        freq[t] = (freq[t] || 0) + 1;
-      });
-      const unique = Object.keys(freq).length;
-      const counts = Object.values(freq);
-      const sum = counts.reduce((a, b) => a + b, 0);
-      const entropy = -counts.reduce((e, c) => e + (c / sum) * Math.log2(c / sum), 0);
-      const stats = { unique, entropy, sum };
-
-      const embed_doc = await getChunkEmbedding(docmeta.doc || '');
-      const embed_code = await getChunkEmbedding(ctext);
-      const embedding = embed_doc.map((v, i) => v + embed_code[i]);
-
-      const mh = new SimpleMinHash();
-      tokens.forEach((t) => mh.update(t));
-      const minhashSig = mh.hashValues;
-
-      const headline = getHeadline(c, tokens);
-
-      let preContext = [], postContext = [];
-      if (ci > 0) preContext = text.slice(sc[ci - 1].start, sc[ci - 1].end).split('\n').slice(-contextWin);
-      if (ci + 1 < sc.length) postContext = text.slice(sc[ci + 1].start, sc[ci + 1].end).split('\n').slice(0, contextWin);
-
-      const gitMeta = await getGitMeta(abs, c.start, c.end, { blame: gitBlameEnabled });
-
-      const externalDocs = buildExternalDocs(ext, codeRelations);
-
-      const startLine = c.meta?.startLine || offsetToLine(lineIndex, c.start);
-      const endLine = c.meta?.endLine || offsetToLine(lineIndex, c.end);
-
-      const chunkPayload = {
-        file: relKey,
-        ext,
-        start: c.start,
-        end: c.end,
-        startLine,
-        endLine,
-        kind: c.kind,
-        name: c.name,
-        tokens,
-        seq,
-        ngrams,
-        chargrams,
-        meta,
-        codeRelations,
-        docmeta,
-        stats,
-        complexity,
-        lint,
-        headline,
-        preContext,
-        postContext,
-        embedding,
-        embed_doc,
-        embed_code,
-        minhashSig,
-        weight: meta.weight,
-        ...gitMeta,
-        externalDocs
-      };
-
-      fileChunks.push(chunkPayload);
-    }
-
-    const manifestEntry = await writeIncrementalBundle({
-      enabled: incrementalState.enabled,
-      bundleDir: incrementalState.bundleDir,
-      relKey,
-      fileStat,
-      fileHash,
-      fileChunks
-    });
-
-    const fileDurationMs = Date.now() - fileStart;
-    return {
-      abs,
-      relKey,
-      fileIndex,
-      cached: false,
-      durationMs: fileDurationMs,
-      chunks: fileChunks,
-      manifestEntry
-    };
-  }
-
-  return { processFile };
-}
diff --git a/src/indexer/build/imports.js b/src/indexer/build/imports.js
deleted file mode 100644
index 443c9fb08..000000000
--- a/src/indexer/build/imports.js
+++ /dev/null
@@ -1,47 +0,0 @@
-import fs from 'node:fs/promises';
-import path from 'node:path';
-import { collectLanguageImports } from '../language-registry.js';
-import { runWithConcurrency } from '../../shared/concurrency.js';
-import { fileExt, toPosix } from '../../shared/files.js';
-import { showProgress } from '../../shared/progress.js';
-
-/**
- * Scan files for imports to build cross-link map.
- * @param {{files:string[],root:string,mode:'code'|'prose',languageOptions:object,importConcurrency:number}} input
- * @returns {Promise<{allImports:Record<string,string[]>,durationMs:number}>}
- */
-export async function scanImports({ files, root, mode, languageOptions, importConcurrency }) {
-  const allImports = {};
-  const start = Date.now();
-  let processed = 0;
-
-  await runWithConcurrency(files, importConcurrency, async (absPath) => {
-    const rel = path.relative(root, absPath);
-    const relKey = toPosix(rel);
-    const ext = fileExt(rel);
-    let text;
-    try {
-      text = await fs.readFile(absPath, 'utf8');
-    } catch {
-      processed++;
-      showProgress('Imports', processed, files.length);
-      return;
-    }
-    const imports = collectLanguageImports({
-      ext,
-      relPath: relKey,
-      text,
-      mode,
-      options: languageOptions
-    });
-    for (const mod of imports) {
-      if (!allImports[mod]) allImports[mod] = [];
-      allImports[mod].push(relKey);
-    }
-    processed++;
-    showProgress('Imports', processed, files.length);
-  });
-
-  showProgress('Imports', files.length, files.length);
-  return { allImports, durationMs: Date.now() - start };
-}
diff --git a/src/indexer/build/incremental.js b/src/indexer/build/incremental.js
deleted file mode 100644
index bb1458ca9..000000000
--- a/src/indexer/build/incremental.js
+++ /dev/null
@@ -1,152 +0,0 @@
-import fs from 'node:fs/promises';
-import fsSync from 'node:fs';
-import path from 'node:path';
-import { sha1 } from '../../shared/hash.js';
-
-/**
- * Initialize incremental cache state for a mode.
- * @param {{repoCacheRoot:string,mode:'code'|'prose',enabled:boolean}} input
- * @returns {Promise<{enabled:boolean,incrementalDir:string,bundleDir:string,manifestPath:string,manifest:object}>}
- */
-export async function loadIncrementalState({ repoCacheRoot, mode, enabled }) {
-  const incrementalDir = path.join(repoCacheRoot, 'incremental', mode);
-  const bundleDir = path.join(incrementalDir, 'files');
-  const manifestPath = path.join(incrementalDir, 'manifest.json');
-  let manifest = { version: 1, mode, files: {} };
-  if (enabled && fsSync.existsSync(manifestPath)) {
-    try {
-      const loaded = JSON.parse(await fs.readFile(manifestPath, 'utf8'));
-      if (loaded && typeof loaded === 'object') {
-        manifest = { version: loaded.version || 1, mode, files: loaded.files || {} };
-      }
-    } catch {}
-  }
-  if (enabled) {
-    await fs.mkdir(bundleDir, { recursive: true });
-  }
-  return { enabled, incrementalDir, bundleDir, manifestPath, manifest };
-}
-
-/**
- * Attempt to load a cached bundle for a file.
- * @param {{enabled:boolean,absPath:string,relKey:string,fileStat:import('node:fs').Stats,manifest:object,bundleDir:string}} input
- * @returns {Promise<{cachedBundle:object|null,fileHash:string|null,text:string|null}>}
- */
-export async function readCachedBundle({ enabled, absPath, relKey, fileStat, manifest, bundleDir }) {
-  let cachedBundle = null;
-  let text = null;
-  let fileHash = null;
-  if (!enabled) return { cachedBundle, fileHash, text };
-
-  const cacheKey = sha1(relKey);
-  const bundlePath = path.join(bundleDir, `${cacheKey}.json`);
-  const cachedEntry = manifest.files[relKey];
-  if (cachedEntry && cachedEntry.size === fileStat.size && cachedEntry.mtimeMs === fileStat.mtimeMs && fsSync.existsSync(bundlePath)) {
-    try {
-      cachedBundle = JSON.parse(await fs.readFile(bundlePath, 'utf8'));
-    } catch {
-      cachedBundle = null;
-    }
-  } else if (cachedEntry && cachedEntry.hash && fsSync.existsSync(bundlePath)) {
-    try {
-      text = await fs.readFile(absPath, 'utf8');
-      fileHash = sha1(text);
-      if (fileHash === cachedEntry.hash) {
-        cachedBundle = JSON.parse(await fs.readFile(bundlePath, 'utf8'));
-      }
-    } catch {
-      cachedBundle = null;
-    }
-  }
-
-  return { cachedBundle, fileHash, text };
-}
-
-/**
- * Write bundle and return manifest entry.
- * @param {{enabled:boolean,bundleDir:string,relKey:string,fileStat:import('node:fs').Stats,fileHash:string,fileChunks:object[]}} input
- * @returns {Promise<object|null>}
- */
-export async function writeIncrementalBundle({ enabled, bundleDir, relKey, fileStat, fileHash, fileChunks }) {
-  if (!enabled) return null;
-  const cacheKey = sha1(relKey);
-  const bundlePath = path.join(bundleDir, `${cacheKey}.json`);
-  const bundle = {
-    file: relKey,
-    hash: fileHash,
-    mtimeMs: fileStat.mtimeMs,
-    size: fileStat.size,
-    chunks: fileChunks
-  };
-  try {
-    await fs.writeFile(bundlePath, JSON.stringify(bundle) + '\n');
-    return {
-      hash: fileHash,
-      mtimeMs: fileStat.mtimeMs,
-      size: fileStat.size,
-      bundle: path.basename(bundlePath)
-    };
-  } catch {
-    return null;
-  }
-}
-
-/**
- * Remove incremental entries for deleted files and persist manifest.
- * @param {{enabled:boolean,manifest:object,manifestPath:string,bundleDir:string,seenFiles:Set<string>}} input
- */
-export async function pruneIncrementalManifest({ enabled, manifest, manifestPath, bundleDir, seenFiles }) {
-  if (!enabled) return;
-  for (const relKey of Object.keys(manifest.files)) {
-    if (seenFiles.has(relKey)) continue;
-    const entry = manifest.files[relKey];
-    if (entry?.bundle) {
-      const bundlePath = path.join(bundleDir, entry.bundle);
-      if (fsSync.existsSync(bundlePath)) {
-        try {
-          await fs.rm(bundlePath);
-        } catch {}
-      }
-    }
-    delete manifest.files[relKey];
-  }
-  try {
-    await fs.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
-  } catch {}
-}
-
-/**
- * Update incremental bundles after cross-file inference.
- * @param {{enabled:boolean,manifest:object,bundleDir:string,chunks:object[],log:(msg:string)=>void}} input
- */
-export async function updateBundlesWithChunks({ enabled, manifest, bundleDir, chunks, log }) {
-  if (!enabled) return;
-  const chunkMap = new Map();
-  for (const chunk of chunks) {
-    if (!chunk?.file) continue;
-    const list = chunkMap.get(chunk.file) || [];
-    list.push(chunk);
-    chunkMap.set(chunk.file, list);
-  }
-  let bundleUpdates = 0;
-  for (const [file, entry] of Object.entries(manifest.files || {})) {
-    const bundleName = entry?.bundle;
-    const fileChunks = chunkMap.get(file);
-    if (!bundleName || !fileChunks) continue;
-    const bundlePath = path.join(bundleDir, bundleName);
-    const bundle = {
-      file,
-      hash: entry.hash,
-      mtimeMs: entry.mtimeMs,
-      size: entry.size,
-      chunks: fileChunks
-    };
-    try {
-      await fs.writeFile(bundlePath, JSON.stringify(bundle) + '\n');
-      bundleUpdates += 1;
-    } catch {}
-  }
-  if (bundleUpdates) {
-    log(`Cross-file inference updated ${bundleUpdates} incremental bundle(s).`);
-  }
-}
diff --git a/src/indexer/build/indexer.js b/src/indexer/build/indexer.js
deleted file mode 100644
index 38013885e..000000000
--- a/src/indexer/build/indexer.js
+++ /dev/null
@@ -1,174 +0,0 @@
-import fs from 'node:fs/promises';
-import { getIndexDir } from '../../../tools/dict-utils.js';
-import { buildRecordsIndexForRepo } from '../../triage/index-records.js';
-import { applyCrossFileInference } from '../type-inference-crossfile.js';
-import { runWithConcurrency } from '../../shared/concurrency.js';
-import { log, showProgress } from '../../shared/progress.js';
-import { writeIndexArtifacts } from './artifacts.js';
-import { estimateContextWindow } from './context-window.js';
-import { discoverFiles } from './discover.js';
-import { createFileProcessor } from './file-processor.js';
-import { scanImports } from './imports.js';
-import { loadIncrementalState, pruneIncrementalManifest, updateBundlesWithChunks } from './incremental.js';
-import { buildPostings } from './postings.js';
-import { createIndexState, appendChunk } from './state.js';
-
-/**
- * Build indexes for a given mode.
- * @param {{mode:'code'|'prose'|'records',runtime:object}} input
- */
-export async function buildIndexForMode({ mode, runtime }) {
-  if (mode === 'records') {
-    await buildRecordsIndexForRepo({ runtime });
-    return;
-  }
-  const outDir = getIndexDir(runtime.root, mode, runtime.userConfig);
-  await fs.mkdir(outDir, { recursive: true });
-  log(`\n📄  Scanning ${mode} …`);
-  const timing = { start: Date.now() };
-
-  const state = createIndexState();
-  const seenFiles = new Set();
-  const incrementalState = await loadIncrementalState({
-    repoCacheRoot: runtime.repoCacheRoot,
-    mode,
-    enabled: runtime.incrementalEnabled
-  });
-
-  log('Discovering files...');
-  const discoverStart = Date.now();
-  const allFiles = await discoverFiles({
-    root: runtime.root,
-    mode,
-    ignoreMatcher: runtime.ignoreMatcher,
-    skippedFiles: state.skippedFiles,
-    maxFileBytes: runtime.maxFileBytes
-  });
-  allFiles.sort();
-  log(`→ Found ${allFiles.length} files.`);
-  timing.discoverMs = Date.now() - discoverStart;
-
-  log('Scanning for imports...');
-  const importResult = await scanImports({
-    files: allFiles,
-    root: runtime.root,
-    mode,
-    languageOptions: runtime.languageOptions,
-    importConcurrency: runtime.importConcurrency
-  });
-  timing.importsMs = importResult.durationMs;
-
-  const contextWin = await estimateContextWindow({
-    files: allFiles,
-    root: runtime.root,
-    mode,
-    languageOptions: runtime.languageOptions
-  });
-  log(`Auto-selected context window: ${contextWin} lines`);
-
-  log('Processing and indexing files...');
-  const processStart = Date.now();
-  log(`Indexing concurrency: files=${runtime.fileConcurrency}, imports=${runtime.importConcurrency}`);
-
-  const { processFile } = createFileProcessor({
-    root: runtime.root,
-    mode,
-    dictWords: runtime.dictWords,
-    languageOptions: runtime.languageOptions,
-    postingsConfig: runtime.postingsConfig,
-    allImports: importResult.allImports,
-    contextWin,
-    incrementalState,
-    getChunkEmbedding: runtime.getChunkEmbedding,
-    typeInferenceEnabled: runtime.typeInferenceEnabled,
-    riskAnalysisEnabled: runtime.riskAnalysisEnabled,
-    seenFiles,
-    gitBlameEnabled: runtime.gitBlameEnabled
-  });
-
-  let processedFiles = 0;
-  const fileResults = await runWithConcurrency(allFiles, runtime.fileConcurrency, async (abs, fileIndex) => {
-    const result = await processFile(abs, fileIndex);
-    processedFiles += 1;
-    showProgress('Files', processedFiles, allFiles.length);
-    return result;
-  });
-  showProgress('Files', allFiles.length, allFiles.length);
-
-  for (const result of fileResults) {
-    if (!result) continue;
-    for (const chunk of result.chunks) {
-      appendChunk(state, { ...chunk }, runtime.postingsConfig);
-    }
-    state.scannedFilesTimes.push({ file: result.abs, duration_ms: result.durationMs, cached: result.cached });
-    state.scannedFiles.push(result.abs);
-    if (result.manifestEntry) {
-      incrementalState.manifest.files[result.relKey] = result.manifestEntry;
-    }
-  }
-
-  timing.processMs = Date.now() - processStart;
-
-  await pruneIncrementalManifest({
-    enabled: runtime.incrementalEnabled,
-    manifest: incrementalState.manifest,
-    manifestPath: incrementalState.manifestPath,
-    bundleDir: incrementalState.bundleDir,
-    seenFiles
-  });
-
-  log(`   → Indexed ${state.chunks.length} chunks, total tokens: ${state.totalTokens.toLocaleString()}`);
-
-  const postings = buildPostings({
-    chunks: state.chunks,
-    df: state.df,
-    tokenPostings: state.tokenPostings,
-    docLengths: state.docLengths,
-    phrasePost: state.phrasePost,
-    triPost: state.triPost,
-    postingsConfig: runtime.postingsConfig,
-    modelId: runtime.modelId,
-    useStubEmbeddings: runtime.useStubEmbeddings,
-    log
-  });
-
-  const crossFileEnabled = runtime.typeInferenceCrossFileEnabled || runtime.riskAnalysisCrossFileEnabled;
-  if (mode === 'code' && crossFileEnabled) {
-    const crossFileStats = await applyCrossFileInference({
-      rootDir: runtime.root,
-      chunks: state.chunks,
-      enabled: true,
-      log,
-      useTooling: runtime.typeInferenceEnabled && runtime.typeInferenceCrossFileEnabled,
-      enableTypeInference: runtime.typeInferenceEnabled,
-      enableRiskCorrelation: runtime.riskAnalysisEnabled && runtime.riskAnalysisCrossFileEnabled
-    });
-    if (crossFileStats) {
-      const riskFlows = Number.isFinite(crossFileStats.riskFlows) ? crossFileStats.riskFlows : 0;
-      log(`Cross-file inference: callLinks=${crossFileStats.linkedCalls}, usageLinks=${crossFileStats.linkedUsages}, returns=${crossFileStats.inferredReturns}, riskFlows=${riskFlows}`);
-    }
-    await updateBundlesWithChunks({
-      enabled: runtime.incrementalEnabled,
-      manifest: incrementalState.manifest,
-      bundleDir: incrementalState.bundleDir,
-      chunks: state.chunks,
-      log
-    });
-  }
-
-  await writeIndexArtifacts({
-    outDir,
-    mode,
-    state,
-    postings,
-    postingsConfig: runtime.postingsConfig,
-    modelId: runtime.modelId,
-    useStubEmbeddings: runtime.useStubEmbeddings,
-    dictSummary: runtime.dictSummary,
-    timing,
-    root: runtime.root,
-    userConfig: runtime.userConfig,
-    incrementalEnabled: runtime.incrementalEnabled,
-    fileCounts: { candidates: allFiles.length }
-  });
-}
diff --git a/src/indexer/build/postings.js b/src/indexer/build/postings.js
deleted file mode 100644
index 8f5acce0c..000000000
--- a/src/indexer/build/postings.js
+++ /dev/null
@@ -1,81 +0,0 @@
-import { quantizeVec } from '../embedding.js';
-import { normalizePostingsConfig } from '../../shared/postings-config.js';
-
-const tuneBM25Params = (chunks) => {
-  const avgLen = chunks.reduce((s, c) => s + c.tokens.length, 0) / chunks.length;
-  const b = avgLen > 800 ? 0.6 : 0.8;
-  const k1 = avgLen > 800 ? 1.2 : 1.7;
-  return { k1, b };
-};
-
-/**
- * Build postings and vector artifacts for the index.
- * @param {object} input
- * @returns {object}
- */
-export function buildPostings(input) {
-  const {
-    chunks,
-    df,
-    tokenPostings,
-    docLengths,
-    phrasePost,
-    triPost,
-    postingsConfig,
-    modelId,
-    useStubEmbeddings,
-    log
-  } = input;
-
-  const resolvedConfig = normalizePostingsConfig(postingsConfig || {});
-  const phraseEnabled = resolvedConfig.enablePhraseNgrams !== false;
-  const chargramEnabled = resolvedConfig.enableChargrams !== false;
-
-  const { k1, b } = tuneBM25Params(chunks);
-  const N = chunks.length;
-  const avgChunkLen = chunks.reduce((sum, c) => sum + c.tokens.length, 0) / Math.max(N, 1);
-
-  const vocabAll = Array.from(df.keys());
-  const trimmedVocab = vocabAll.slice();
-  const posts = trimmedVocab.map((token) => {
-    const posting = tokenPostings.get(token) || [];
-    return posting.map(([docId]) => docId);
-  });
-
-  const embedLabel = useStubEmbeddings ? 'stub' : 'model';
-  log(`Using ${embedLabel} embeddings for dense vectors (${modelId})...`);
-  const dims = chunks[0]?.embedding.length || 384;
-  const embeddingVectors = chunks.map((c) => c.embedding);
-  const quantizedVectors = embeddingVectors.map((vec) => quantizeVec(vec));
-
-  const phraseVocab = phraseEnabled ? Array.from(phrasePost.keys()) : [];
-  const phrasePostings = phraseEnabled ? phraseVocab.map((k) => Array.from(phrasePost.get(k))) : [];
-  const chargramVocab = chargramEnabled ? Array.from(triPost.keys()) : [];
-  const chargramPostings = chargramEnabled ? chargramVocab.map((k) => Array.from(triPost.get(k))) : [];
-
-  const tokenVocab = Array.from(tokenPostings.keys());
-  const tokenPostingsList = tokenVocab.map((t) => tokenPostings.get(t));
-  const avgDocLen = docLengths.length
-    ? docLengths.reduce((sum, len) => sum + len, 0) / docLengths.length
-    : 0;
-
-  const minhashSigs = chunks.map((c) => c.minhashSig);
-
-  return {
-    k1,
-    b,
-    avgChunkLen,
-    totalDocs: N,
-    trimmedVocab,
-    phraseVocab,
-    phrasePostings,
-    chargramVocab,
-    chargramPostings,
-    tokenVocab,
-    tokenPostingsList,
-    avgDocLen,
-    minhashSigs,
-    dims,
-    quantizedVectors
-  };
-}
diff --git a/src/indexer/build/runtime.js b/src/indexer/build/runtime.js
deleted file mode 100644
index 2d15b8b22..000000000
--- a/src/indexer/build/runtime.js
+++ /dev/null
@@ -1,199 +0,0 @@
-import fs from 'node:fs/promises';
-import os from 'node:os';
-import path from 'node:path';
-import {
-  DEFAULT_MODEL_ID,
-  getDictionaryPaths,
-  getDictConfig,
-  getModelConfig,
-  getRepoCacheRoot,
-  loadUserConfig
-} from '../../../tools/dict-utils.js';
-import { createEmbedder } from '../embedding.js';
-import { log } from '../../shared/progress.js';
-import { buildIgnoreMatcher } from './ignore.js';
-import { normalizePostingsConfig } from '../../shared/postings-config.js';
-
-/**
- * Create runtime configuration for build_index.
- * @param {{root:string,argv:object,rawArgv:string[]}} input
- * @returns {Promise<object>}
- */
-export async function createBuildRuntime({ root, argv, rawArgv }) {
-  const userConfig = loadUserConfig(root);
-  const repoCacheRoot = getRepoCacheRoot(root, userConfig);
-  const indexingConfig = userConfig.indexing || {};
-  const postingsConfig = normalizePostingsConfig(indexingConfig.postings || {});
-  const maxFileBytesRaw = indexingConfig.maxFileBytes;
-  const maxFileBytesParsed = Number(maxFileBytesRaw);
-  let maxFileBytes = null;
-  if (maxFileBytesRaw === false || maxFileBytesRaw === 0) {
-    maxFileBytes = null;
-  } else if (Number.isFinite(maxFileBytesParsed) && maxFileBytesParsed > 0) {
-    maxFileBytes = maxFileBytesParsed;
-  } else {
-    maxFileBytes = 5 * 1024 * 1024;
-  }
-  const astDataflowEnabled = indexingConfig.astDataflow !== false;
-  const controlFlowEnabled = indexingConfig.controlFlow !== false;
-  const typeInferenceEnabled = indexingConfig.typeInference === true;
-  const typeInferenceCrossFileEnabled = indexingConfig.typeInferenceCrossFile === true;
-  const riskAnalysisEnabled = indexingConfig.riskAnalysis !== false;
-  const riskAnalysisCrossFileEnabled = riskAnalysisEnabled
-    && indexingConfig.riskAnalysisCrossFile !== false;
-  const gitBlameEnabled = indexingConfig.gitBlame !== false;
-  const sqlConfig = userConfig.sql || {};
-  const defaultSqlDialects = {
-    '.psql': 'postgres',
-    '.pgsql': 'postgres',
-    '.mysql': 'mysql',
-    '.sqlite': 'sqlite'
-  };
-  const sqlDialectByExt = { ...defaultSqlDialects, ...(sqlConfig.dialectByExt || {}) };
-  const sqlDialectOverride = typeof sqlConfig.dialect === 'string' && sqlConfig.dialect.trim()
-    ? sqlConfig.dialect.trim()
-    : '';
-  const resolveSqlDialect = (ext) => (sqlDialectOverride || sqlDialectByExt[ext] || 'generic');
-
-  const threadsArgPresent = rawArgv.includes('--threads');
-  const configConcurrency = Number(indexingConfig.concurrency);
-  const cliConcurrency = threadsArgPresent ? Number(argv.threads) : null;
-  const defaultConcurrency = Math.max(1, Math.min(4, os.cpus().length));
-  const fileConcurrency = Math.max(
-    1,
-    Math.min(
-      16,
-      Number.isFinite(configConcurrency)
-        ? configConcurrency
-        : Number.isFinite(cliConcurrency)
-          ? cliConcurrency
-          : defaultConcurrency
-    )
-  );
-  const importConcurrency = Math.max(
-    1,
-    Math.min(
-      16,
-      Number.isFinite(Number(indexingConfig.importConcurrency))
-        ? Number(indexingConfig.importConcurrency)
-        : fileConcurrency
-    )
-  );
-
-  const incrementalEnabled = argv.incremental === true;
-  const useStubEmbeddings = argv['stub-embeddings'] === true || process.env.PAIROFCLEATS_EMBEDDINGS === 'stub';
-  const modelConfig = getModelConfig(root, userConfig);
-  const modelId = argv.model || modelConfig.id || DEFAULT_MODEL_ID;
-  const modelsDir = modelConfig.dir;
-  if (modelsDir) {
-    try {
-      await fs.mkdir(modelsDir, { recursive: true });
-    } catch {}
-  }
-
-  const dictConfig = getDictConfig(root, userConfig);
-  const dictionaryPaths = await getDictionaryPaths(root, dictConfig);
-  const dictWords = new Set();
-  for (const dictFile of dictionaryPaths) {
-    try {
-      const contents = await fs.readFile(dictFile, 'utf8');
-      for (const line of contents.split(/\r?\n/)) {
-        const trimmed = line.trim();
-        if (trimmed) dictWords.add(trimmed);
-      }
-    } catch {}
-  }
-  const dictSummary = { files: dictionaryPaths.length, words: dictWords.size };
-
-  const { getChunkEmbedding } = createEmbedder({
-    useStubEmbeddings,
-    modelId,
-    dims: argv.dims,
-    modelsDir
-  });
-
-  const { ignoreMatcher, config: ignoreConfig, ignoreFiles } = await buildIgnoreMatcher({ root, userConfig });
-
-  if (dictSummary.files) {
-    log(`Wordlists enabled: ${dictSummary.files} file(s), ${dictSummary.words.toLocaleString()} words for identifier splitting.`);
-  } else {
-    log('Wordlists disabled: no dictionary files found; identifier splitting will be limited.');
-  }
-  if (useStubEmbeddings) {
-    log('Embeddings: stub mode enabled (no model downloads).');
-  } else {
-    log(`Embeddings: model ${modelId}`);
-  }
-  if (incrementalEnabled) {
-    log(`Incremental cache enabled (root: ${path.join(repoCacheRoot, 'incremental')}).`);
-  }
-  if (!astDataflowEnabled) {
-    log('AST dataflow metadata disabled via indexing.astDataflow.');
-  }
-  if (!controlFlowEnabled) {
-    log('Control-flow metadata disabled via indexing.controlFlow.');
-  }
-  if (typeInferenceEnabled) {
-    log('Type inference metadata enabled via indexing.typeInference.');
-  }
-  if (typeInferenceCrossFileEnabled && !typeInferenceEnabled) {
-    log('Cross-file type inference requested but indexing.typeInference is disabled.');
-  }
-  if (!gitBlameEnabled) {
-    log('Git blame metadata disabled via indexing.gitBlame.');
-  }
-  if (!riskAnalysisEnabled) {
-    log('Risk analysis disabled via indexing.riskAnalysis.');
-  }
-  if (!riskAnalysisCrossFileEnabled && riskAnalysisEnabled) {
-    log('Cross-file risk correlation disabled via indexing.riskAnalysisCrossFile.');
-  }
-  if (postingsConfig.enablePhraseNgrams === false) {
-    log('Phrase n-gram postings disabled via indexing.postings.enablePhraseNgrams.');
-  }
-  if (postingsConfig.enableChargrams === false) {
-    log('Chargram postings disabled via indexing.postings.enableChargrams.');
-  }
-
-  const languageOptions = {
-    astDataflowEnabled,
-    controlFlowEnabled,
-    resolveSqlDialect,
-    log
-  };
-
-  return {
-    root,
-    argv,
-    rawArgv,
-    userConfig,
-    repoCacheRoot,
-    indexingConfig,
-    postingsConfig,
-    astDataflowEnabled,
-    controlFlowEnabled,
-    typeInferenceEnabled,
-    typeInferenceCrossFileEnabled,
-    riskAnalysisEnabled,
-    riskAnalysisCrossFileEnabled,
-    gitBlameEnabled,
-    resolveSqlDialect,
-    fileConcurrency,
-    importConcurrency,
-    incrementalEnabled,
-    useStubEmbeddings,
-    modelConfig,
-    modelId,
-    modelsDir,
-    dictConfig,
-    dictionaryPaths,
-    dictWords,
-    dictSummary,
-    getChunkEmbedding,
-    languageOptions,
-    ignoreMatcher,
-    ignoreConfig,
-    ignoreFiles,
-    maxFileBytes
-  };
-}
diff --git a/src/indexer/build/state.js b/src/indexer/build/state.js
deleted file mode 100644
index 1049b8221..000000000
--- a/src/indexer/build/state.js
+++ /dev/null
@@ -1,93 +0,0 @@
-import { extractNgrams, tri } from '../../shared/tokenize.js';
-import { normalizePostingsConfig } from '../../shared/postings-config.js';
-
-const DEFAULT_POSTINGS_CONFIG = normalizePostingsConfig();
-
-/**
- * Create the mutable state for index building.
- * @returns {object}
- */
-export function createIndexState() {
-  return {
-    df: new Map(),
-    chunks: [],
-    tokenPostings: new Map(),
-    docLengths: [],
-    triPost: new Map(),
-    phrasePost: new Map(),
-    scannedFiles: [],
-    scannedFilesTimes: [],
-    skippedFiles: [],
-    totalTokens: 0
-  };
-}
-
-/**
- * Append a processed chunk into global index structures.
- * @param {object} state
- * @param {object} chunk
- */
-export function appendChunk(state, chunk, postingsConfig = DEFAULT_POSTINGS_CONFIG) {
-  const tokens = Array.isArray(chunk.tokens) ? chunk.tokens : [];
-  const seq = Array.isArray(chunk.seq) && chunk.seq.length ? chunk.seq : tokens;
-  if (!seq.length) return;
-
-  const phraseEnabled = postingsConfig?.enablePhraseNgrams !== false;
-  const chargramEnabled = postingsConfig?.enableChargrams !== false;
-
-  state.totalTokens += seq.length;
-  const ngrams = phraseEnabled
-    ? (Array.isArray(chunk.ngrams) && chunk.ngrams.length
-      ? chunk.ngrams
-      : extractNgrams(seq, postingsConfig.phraseMinN, postingsConfig.phraseMaxN))
-    : [];
-
-  const charSet = new Set();
-  if (chargramEnabled) {
-    const chargrams = Array.isArray(chunk.chargrams) && chunk.chargrams.length
-      ? chunk.chargrams
-      : null;
-    if (chargrams) {
-      chargrams.forEach((g) => charSet.add(g));
-    } else {
-      seq.forEach((w) => {
-        for (let n = postingsConfig.chargramMinN; n <= postingsConfig.chargramMaxN; ++n) {
-          tri(w, n).forEach((g) => charSet.add(g));
-        }
-      });
-    }
-  }
-
-  const freq = {};
-  tokens.forEach((t) => {
-    freq[t] = (freq[t] || 0) + 1;
-  });
-  const chunkId = state.chunks.length;
-
-  state.docLengths[chunkId] = tokens.length;
-  for (const [tok, count] of Object.entries(freq)) {
-    let postings = state.tokenPostings.get(tok);
-    if (!postings) {
-      postings = [];
-      state.tokenPostings.set(tok, postings);
-    }
-    postings.push([chunkId, count]);
-  }
-
-  if (phraseEnabled) {
-    for (const ng of ngrams) {
-      if (!state.phrasePost.has(ng)) state.phrasePost.set(ng, new Set());
-      state.phrasePost.get(ng).add(chunkId);
-    }
-  }
-  if (chargramEnabled) {
-    for (const tg of charSet) {
-      if (!state.triPost.has(tg)) state.triPost.set(tg, new Set());
-      state.triPost.get(tg).add(chunkId);
-    }
-  }
-
-  tokens.forEach((t) => state.df.set(t, (state.df.get(t) || 0) + 1));
-  chunk.id = chunkId;
-  state.chunks.push(chunk);
-}
diff --git a/src/indexer/build/watch.js b/src/indexer/build/watch.js
deleted file mode 100644
index c8506846d..000000000
--- a/src/indexer/build/watch.js
+++ /dev/null
@@ -1,143 +0,0 @@
-import fs from 'node:fs/promises';
-import { acquireIndexLock } from './lock.js';
-import { discoverFiles } from './discover.js';
-import { buildIndexForMode } from './indexer.js';
-import { log } from '../../shared/progress.js';
-
-const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
-
-const scanFiles = async ({ root, modes, ignoreMatcher, maxFileBytes }) => {
-  const files = new Set();
-  const skippedFiles = [];
-  for (const mode of modes) {
-    const modeFiles = await discoverFiles({ root, mode, ignoreMatcher, skippedFiles, maxFileBytes });
-    modeFiles.forEach((file) => files.add(file));
-  }
-  return Array.from(files);
-};
-
-const statFiles = async (files) => {
-  const stats = new Map();
-  for (const file of files) {
-    try {
-      const stat = await fs.stat(file);
-      stats.set(file, { mtimeMs: stat.mtimeMs, size: stat.size });
-    } catch {}
-  }
-  return stats;
-};
-
-const hasChanges = (prev, next) => {
-  for (const [file, stat] of next.entries()) {
-    const before = prev.get(file);
-    if (!before || before.mtimeMs !== stat.mtimeMs || before.size !== stat.size) return true;
-  }
-  for (const file of prev.keys()) {
-    if (!next.has(file)) return true;
-  }
-  return false;
-};
-
-/**
- * Poll for file changes and rebuild indexes incrementally.
- * @param {{runtime:object,modes:string[],pollMs:number,debounceMs:number}} input
- */
-export async function watchIndex({ runtime, modes, pollMs, debounceMs }) {
-  const root = runtime.root;
-  const ignoreMatcher = runtime.ignoreMatcher;
-  const maxFileBytes = runtime.maxFileBytes;
-  runtime.incrementalEnabled = true;
-  runtime.argv.incremental = true;
-
-  let prevStats = new Map();
-  let running = false;
-  let pending = false;
-  let scanRunning = false;
-  let debounceTimer = null;
-  let shouldExit = false;
-  let shutdownSignal = null;
-
-  const requestShutdown = (signal) => {
-    if (shouldExit) return;
-    shouldExit = true;
-    shutdownSignal = signal;
-    if (debounceTimer) {
-      clearTimeout(debounceTimer);
-      debounceTimer = null;
-    }
-    log(`[watch] ${signal} received; shutting down...`);
-  };
-  const handleSigint = () => requestShutdown('SIGINT');
-  const handleSigterm = () => requestShutdown('SIGTERM');
-  process.on('SIGINT', handleSigint);
-  process.on('SIGTERM', handleSigterm);
-
-  const runBuild = async () => {
-    if (running) {
-      pending = true;
-      return;
-    }
-    if (shouldExit) return;
-    running = true;
-    const lock = await acquireIndexLock({ repoCacheRoot: runtime.repoCacheRoot, log });
-    if (!lock) {
-      running = false;
-      return;
-    }
-    try {
-      for (const mode of modes) {
-        await buildIndexForMode({ mode, runtime });
-      }
-      log('[watch] Index update complete.');
-    } finally {
-      await lock.release();
-      running = false;
-    }
-    if (pending) {
-      pending = false;
-      if (!shouldExit) scheduleBuild();
-    }
-  };
-
-  const scheduleBuild = () => {
-    if (shouldExit) return;
-    if (debounceTimer) clearTimeout(debounceTimer);
-    debounceTimer = setTimeout(runBuild, debounceMs);
-  };
-
-  const initialFiles = await scanFiles({ root, modes, ignoreMatcher, maxFileBytes });
-  prevStats = await statFiles(initialFiles);
-  log(`[watch] Monitoring ${prevStats.size} file(s) every ${pollMs}ms.`);
-
-  while (!shouldExit) {
-    if (scanRunning) {
-      await sleep(pollMs);
-      continue;
-    }
-    scanRunning = true;
-    try {
-      const files = await scanFiles({ root, modes, ignoreMatcher, maxFileBytes });
-      const nextStats = await statFiles(files);
-      if (hasChanges(prevStats, nextStats)) {
-        log('[watch] Change detected; scheduling incremental rebuild.');
-        prevStats = nextStats;
-        scheduleBuild();
-      }
-    } catch (err) {
-      log(`[watch] Scan failed: ${err.message || err}`);
-    } finally {
-      scanRunning = false;
-    }
-    await sleep(pollMs);
-  }
-
-  if (running) {
-    log('[watch] Waiting for active build to finish...');
-    while (running) {
-      await sleep(200);
-    }
-  }
-  process.off('SIGINT', handleSigint);
-  process.off('SIGTERM', handleSigterm);
-  log(`[watch] Shutdown complete${shutdownSignal ? ` (${shutdownSignal})` : ''}.`);
-}
diff --git a/src/indexer/chunking.js b/src/indexer/chunking.js
deleted file mode 100644
index e33c73b8e..000000000
--- a/src/indexer/chunking.js
+++ /dev/null
@@ -1,398 +0,0 @@
-import * as yaml from 'yaml';
-import {
-  EXTS_PROSE,
-  isCLike,
-  isGo,
-  isJava,
-  isJsLike,
-  isPerl,
-  isRust,
-  isShell,
-  isTypeScript,
-  isCSharp,
-  isKotlin,
-  isRuby,
-  isPhp,
-  isLua,
-  isSql
-} from './constants.js';
-import { buildJsChunks } from '../lang/javascript.js';
-import { buildTypeScriptChunks } from '../lang/typescript.js';
-import { buildCSharpChunks } from '../lang/csharp.js';
-import { buildKotlinChunks } from '../lang/kotlin.js';
-import { buildRubyChunks } from '../lang/ruby.js';
-import { buildPhpChunks } from '../lang/php.js';
-import { buildLuaChunks } from '../lang/lua.js';
-import { buildSqlChunks } from '../lang/sql.js';
-import { buildCLikeChunks } from '../lang/clike.js';
-import { buildPythonChunksFromAst, buildPythonHeuristicChunks } from '../lang/python.js';
-import { buildRustChunks } from '../lang/rust.js';
-import { buildSwiftChunks } from '../lang/swift.js';
-import { buildGoChunks } from '../lang/go.js';
-import { buildJavaChunks } from '../lang/java.js';
-import { buildPerlChunks } from '../lang/perl.js';
-import { buildShellChunks } from '../lang/shell.js';
-import { buildLineIndex } from '../shared/lines.js';
-
-function buildChunksFromMatches(text, matches, titleTransform) {
-  const chunks = [];
-  for (let i = 0; i < matches.length; ++i) {
-    const start = matches[i].index;
-    const end = i + 1 < matches.length ? matches[i + 1].index : text.length;
-    const rawTitle = matches[i][0];
-    const title = titleTransform ? titleTransform(rawTitle) : rawTitle.trim();
-    chunks.push({
-      start,
-      end,
-      name: title || 'section',
-      kind: 'Section',
-      meta: { title }
-    });
-  }
-  return chunks.length ? chunks : null;
-}
-
-function buildChunksFromLineHeadings(text, headings) {
-  if (!headings.length) return null;
-  const lineIndex = buildLineIndex(text);
-  const chunks = [];
-  for (let i = 0; i < headings.length; ++i) {
-    const startLine = headings[i].line;
-    const endLine = i + 1 < headings.length ? headings[i + 1].line : lineIndex.length;
-    const start = lineIndex[startLine] || 0;
-    const end = endLine < lineIndex.length ? lineIndex[endLine] : text.length;
-    const title = headings[i].title || 'section';
-    chunks.push({
-      start,
-      end,
-      name: title,
-      kind: 'Section',
-      meta: { title }
-    });
-  }
-  return chunks;
-}
-
-function chunkMarkdown(text) {
-  const matches = [...text.matchAll(/^#{1,6} .+$/gm)];
-  return buildChunksFromMatches(text, matches, (raw) => raw.replace(/^#+ /, '').trim());
-}
-
-function chunkAsciiDoc(text) {
-  const matches = [...text.matchAll(/^={1,6} .+$/gm)];
-  return buildChunksFromMatches(text, matches, (raw) => raw.replace(/^=+ /, '').trim());
-}
-
-function chunkRst(text) {
-  const lines = text.split('\n');
-  const headings = [];
-  for (let i = 1; i < lines.length; ++i) {
-    const underline = lines[i].trim();
-    if (!underline) continue;
-    if (/^([=~^"'#*\\-])\1{2,}$/.test(underline)) {
-      const title = lines[i - 1].trim();
-      if (title) headings.push({ line: i - 1, title });
-    }
-  }
-  return buildChunksFromLineHeadings(text, headings);
-}
-
-function parseJsonString(text, start) {
-  let i = start + 1;
-  let value = '';
-  while (i < text.length) {
-    const ch = text[i];
-    if (ch === '\\\\') {
-      if (i + 1 < text.length) {
-        value += text[i + 1];
-        i += 2;
-        continue;
-      }
-      i += 1;
-      continue;
-    }
-    if (ch === '"') {
-      return { value, end: i };
-    }
-    value += ch;
-    i += 1;
-  }
-  return null;
-}
-
-function chunkJson(text) {
-  let parsed;
-  try {
-    parsed = JSON.parse(text);
-  } catch {
-    return null;
-  }
-  if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
-    return [{ start: 0, end: text.length, name: 'root', kind: 'ConfigSection', meta: { format: 'json' } }];
-  }
-  const keys = [];
-  let depth = 0;
-  let i = 0;
-  while (i < text.length) {
-    const ch = text[i];
-    if (ch === '"') {
-      const parsedString = parseJsonString(text, i);
-      if (!parsedString) break;
-      const nextIdx = text.slice(parsedString.end + 1).search(/\S/);
-      const nextPos = nextIdx >= 0 ? parsedString.end + 1 + nextIdx : -1;
-      if (nextPos > 0 && text[nextPos] === ':' && depth === 1) {
-        keys.push({ name: parsedString.value, index: i });
-      }
-      i = parsedString.end + 1;
-      continue;
-    }
-    if (ch === '{' || ch === '[') depth += 1;
-    if (ch === '}' || ch === ']') depth -= 1;
-    i += 1;
-  }
-  if (!keys.length) return [{ start: 0, end: text.length, name: 'root', kind: 'ConfigSection', meta: { format: 'json' } }];
-  const chunks = [];
-  for (let k = 0; k < keys.length; ++k) {
-    const start = keys[k].index;
-    const end = k + 1 < keys.length ? keys[k + 1].index : text.length;
-    const title = keys[k].name || 'section';
-    chunks.push({
-      start,
-      end,
-      name: title,
-      kind: 'ConfigSection',
-      meta: { title, format: 'json' }
-    });
-  }
-  return chunks;
-}
-
-function chunkIniToml(text) {
-  const lines = text.split('\n');
-  const headings = [];
-  for (let i = 0; i < lines.length; ++i) {
-    const line = lines[i];
-    const match = line.match(/^\s*\[\[?([^\]]+)\]\]?\s*$/);
-    if (match) {
-      headings.push({ line: i, title: match[1].trim() });
-    }
-  }
-  const chunks = buildChunksFromLineHeadings(text, headings);
-  return chunks || [{ start: 0, end: text.length, name: 'root', kind: 'ConfigSection', meta: { format: 'ini' } }];
-}
-
-function chunkXml(text) {
-  const keys = [];
-  let depth = 0;
-  let i = 0;
-  while (i < text.length) {
-    if (text[i] !== '<') {
-      i += 1;
-      continue;
-    }
-    if (text.startsWith('<!--', i)) {
-      const end = text.indexOf('-->', i + 4);
-      i = end === -1 ? text.length : end + 3;
-      continue;
-    }
-    if (text.startsWith('<?', i) || text.startsWith('<!', i)) {
-      const end = text.indexOf('>', i + 2);
-      i = end === -1 ? text.length : end + 1;
-      continue;
-    }
-    if (text.startsWith('</', i)) {
-      depth = Math.max(0, depth - 1);
-      const end = text.indexOf('>', i + 2);
-      i = end === -1 ? text.length : end + 1;
-      continue;
-    }
-    const tagMatch = text.slice(i + 1).match(/^([A-Za-z0-9:_-]+)/);
-    if (!tagMatch) {
-      i += 1;
-      continue;
-    }
-    const tag = tagMatch[1];
-    const closeIdx = text.indexOf('>', i + 1);
-    const selfClose = closeIdx >= 0 && text[closeIdx - 1] === '/';
-    if (depth === 1) {
-      keys.push({ name: tag, index: i });
-    }
-    if (!selfClose) depth += 1;
-    i = closeIdx === -1 ? text.length : closeIdx + 1;
-  }
-  if (!keys.length) return [{ start: 0, end: text.length, name: 'root', kind: 'ConfigSection', meta: { format: 'xml' } }];
-  const chunks = [];
-  for (let k = 0; k < keys.length; ++k) {
-    const start = keys[k].index;
-    const end = k + 1 < keys.length ? keys[k + 1].index : text.length;
-    const title = keys[k].name || 'section';
-    chunks.push({
-      start,
-      end,
-      name: title,
-      kind: 'ConfigSection',
-      meta: { title, format: 'xml' }
-    });
-  }
-  return chunks;
-}
-
-function chunkDockerfile(text) {
-  const lines = text.split('\n');
-  const headings = [];
-  const rx = /^\s*([A-Z][A-Z0-9_-]+)\b/;
-  for (let i = 0; i < lines.length; ++i) {
-    const match = lines[i].match(rx);
-    if (match) headings.push({ line: i, title: match[1] });
-  }
-  const chunks = buildChunksFromLineHeadings(text, headings);
-  return chunks || [{ start: 0, end: text.length, name: 'Dockerfile', kind: 'ConfigSection', meta: { format: 'dockerfile' } }];
-}
-
-function chunkMakefile(text) {
-  const lines = text.split('\n');
-  const headings = [];
-  const rx = /^([A-Za-z0-9_./-]+)\s*:/;
-  for (let i = 0; i < lines.length; ++i) {
-    const line = lines[i];
-    if (line.trim().startsWith('#') || !line.trim()) continue;
-    const match = line.match(rx);
-    if (match) headings.push({ line: i, title: match[1] });
-  }
-  const chunks = buildChunksFromLineHeadings(text, headings);
-  return chunks || [{ start: 0, end: text.length, name: 'Makefile', kind: 'ConfigSection', meta: { format: 'makefile' } }];
-}
-
-function chunkGitHubActions(text) {
-  const lines = text.split('\n');
-  const headings = [];
-  let jobsLine = -1;
-  for (let i = 0; i < lines.length; ++i) {
-    if (/^\s*jobs:\s*$/.test(lines[i])) {
-      jobsLine = i;
-      break;
-    }
-  }
-  if (jobsLine >= 0) {
-    for (let i = jobsLine + 1; i < lines.length; ++i) {
-      const match = lines[i].match(/^\s{2}([A-Za-z0-9_-]+):\s*$/);
-      if (match) headings.push({ line: i, title: match[1] });
-    }
-  }
-  const chunks = buildChunksFromLineHeadings(text, headings);
-  return chunks || [{ start: 0, end: text.length, name: 'workflow', kind: 'ConfigSection', meta: { format: 'github-actions' } }];
-}
-
-function chunkYaml(text, relPath) {
-  const isWorkflow = relPath ? relPath.replace(/\\\\/g, '/').includes('.github/workflows/') : false;
-  if (isWorkflow) return chunkGitHubActions(text);
-  try {
-    const doc = yaml.parse(text);
-    if (doc && typeof doc === 'object' && !Array.isArray(doc)) {
-      const keys = Object.keys(doc);
-      return keys.map((key) => ({
-        start: text.indexOf(key),
-        end: text.length,
-        name: key,
-        kind: 'ConfigSection',
-        meta: { title: key, format: 'yaml' }
-      }));
-    }
-  } catch {}
-  return null;
-}
-
-const CODE_CHUNKERS = [
-  { id: 'javascript', match: (ext) => isJsLike(ext), chunk: ({ text }) => buildJsChunks(text) },
-  { id: 'typescript', match: (ext) => isTypeScript(ext), chunk: ({ text, context }) => context?.tsChunks || buildTypeScriptChunks(text) },
-  { id: 'python', match: (ext) => ext === '.py', chunk: ({ text, context }) => {
-    const astChunks = buildPythonChunksFromAst(text, context?.pythonAst || null);
-    return (astChunks && astChunks.length) ? astChunks : buildPythonHeuristicChunks(text);
-  } },
-  { id: 'swift', match: (ext) => ext === '.swift', chunk: ({ text, context }) => context?.swiftChunks || buildSwiftChunks(text) },
-  { id: 'clike', match: (ext) => isCLike(ext), chunk: ({ text, ext, context }) => context?.clikeChunks || buildCLikeChunks(text, ext) },
-  { id: 'rust', match: (ext) => isRust(ext), chunk: ({ text, context }) => context?.rustChunks || buildRustChunks(text) },
-  { id: 'go', match: (ext) => isGo(ext), chunk: ({ text, context }) => context?.goChunks || buildGoChunks(text) },
-  { id: 'java', match: (ext) => isJava(ext), chunk: ({ text, context }) => context?.javaChunks || buildJavaChunks(text) },
-  { id: 'perl', match: (ext) => isPerl(ext), chunk: ({ text, context }) => context?.perlChunks || buildPerlChunks(text) },
-  { id: 'shell', match: (ext) => isShell(ext), chunk: ({ text, context }) => context?.shellChunks || buildShellChunks(text) },
-  { id: 'csharp', match: (ext) => isCSharp(ext), chunk: ({ text, context }) => context?.csharpChunks || buildCSharpChunks(text) },
-  { id: 'kotlin', match: (ext) => isKotlin(ext), chunk: ({ text, context }) => context?.kotlinChunks || buildKotlinChunks(text) },
-  { id: 'ruby', match: (ext) => isRuby(ext), chunk: ({ text, context }) => context?.rubyChunks || buildRubyChunks(text) },
-  { id: 'php', match: (ext) => isPhp(ext), chunk: ({ text, context }) => context?.phpChunks || buildPhpChunks(text) },
-  { id: 'lua', match: (ext) => isLua(ext), chunk: ({ text, context }) => context?.luaChunks || buildLuaChunks(text) },
-  { id: 'sql', match: (ext) => isSql(ext), chunk: ({ text, context }) => context?.sqlChunks || buildSqlChunks(text) }
-];
-
-const CODE_FORMAT_CHUNKERS = [
-  { id: 'json', match: (ext) => ext === '.json', chunk: ({ text }) => chunkJson(text) },
-  { id: 'ini', match: (ext) => ['.toml', '.ini', '.cfg', '.conf'].includes(ext), chunk: ({ text }) => chunkIniToml(text) },
-  { id: 'xml', match: (ext) => ext === '.xml', chunk: ({ text }) => chunkXml(text) },
-  { id: 'dockerfile', match: (ext) => ext === '.dockerfile', chunk: ({ text }) => chunkDockerfile(text) },
-  { id: 'makefile', match: (ext) => ext === '.makefile', chunk: ({ text }) => chunkMakefile(text) },
-  { id: 'yaml', match: (ext) => ext === '.yaml' || ext === '.yml', chunk: ({ text, relPath }) => chunkYaml(text, relPath) }
-];
-
-const PROSE_CHUNKERS = [
-  { id: 'markdown', match: (ext) => ext === '.md', chunk: ({ text }) => chunkMarkdown(text) },
-  { id: 'rst', match: (ext) => ext === '.rst', chunk: ({ text }) => chunkRst(text) },
-  { id: 'asciidoc', match: (ext) => ext === '.adoc' || ext === '.asciidoc', chunk: ({ text }) => chunkAsciiDoc(text) }
-];
-
-const resolveChunker = (chunkers, ext, relPath) => (
-  chunkers.find((entry) => entry.match(ext, relPath)) || null
-);
-
-/**
- * Build chunks for a single file using language-aware heuristics.
- * Falls back to generic fixed-size chunks when no parser matches.
- * @param {object} params
- * @param {string} params.text
- * @param {string} params.ext
- * @param {string|null} [params.relPath]
- * @param {'code'|'prose'} params.mode
- * @param {object} [params.context]
- * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>}
- */
-export function smartChunk({
-  text,
-  ext,
-  relPath = null,
-  mode,
-  context = {}
-}) {
-  if (mode === 'prose') {
-    const chunker = resolveChunker(PROSE_CHUNKERS, ext, relPath);
-    if (chunker) {
-      const chunks = chunker.chunk({ text, ext, relPath, context });
-      if (chunks && chunks.length) return chunks;
-    }
-  }
-  if (mode === 'code') {
-    const codeChunker = resolveChunker(CODE_CHUNKERS, ext, relPath);
-    if (codeChunker) {
-      const chunks = codeChunker.chunk({ text, ext, relPath, context });
-      if (chunks && chunks.length) return chunks;
-    }
-    const formatChunker = resolveChunker(CODE_FORMAT_CHUNKERS, ext, relPath);
-    if (formatChunker) {
-      const chunks = formatChunker.chunk({ text, ext, relPath, context });
-      if (chunks && chunks.length) return chunks;
-    }
-  }
-  if (mode === 'prose' && EXTS_PROSE.has(ext)) {
-    return [{ start: 0, end: text.length, name: 'root', kind: 'Section', meta: {} }];
-  }
-  const fallbackChunkSize = 800;
-  const out = [];
-  for (let off = 0; off < text.length; off += fallbackChunkSize) {
-    out.push({
-      start: off,
-      end: Math.min(text.length, off + fallbackChunkSize),
-      name: 'blob',
-      kind: 'Blob',
-      meta: {}
-    });
-  }
-  return out;
-}
diff --git a/src/indexer/embedding.js b/src/indexer/embedding.js
deleted file mode 100644
index 0f52a6acc..000000000
--- a/src/indexer/embedding.js
+++ /dev/null
@@ -1,44 +0,0 @@
-import { pipeline, env } from '@xenova/transformers';
-import { stubEmbedding } from '../shared/embedding.js';
-
-/**
- * Quantize a float vector into uint8 bins for compact storage.
- * @param {number[]} vec
- * @param {number} [minVal]
- * @param {number} [maxVal]
- * @param {number} [levels]
- * @returns {number[]}
- */
-export function quantizeVec(vec, minVal = -1, maxVal = 1, levels = 256) {
-  return vec.map((f) =>
-    Math.max(0, Math.min(levels - 1, Math.round(((f - minVal) / (maxVal - minVal)) * (levels - 1))))
-  );
-}
-
-/**
- * Build an embedder wrapper for chunk embeddings.
- * @param {object} options
- * @param {boolean} options.useStubEmbeddings
- * @param {string} options.modelId
- * @param {number} options.dims
- * @param {string} options.modelsDir
- * @returns {{getChunkEmbedding:(text:string)=>Promise<number[]>,embedderPromise:Promise<any>|null}}
- */
-export function createEmbedder({ useStubEmbeddings, modelId, dims, modelsDir }) {
-  if (modelsDir) {
-    env.cacheDir = modelsDir;
-  }
-  const embedderPromise = useStubEmbeddings ? null : pipeline('feature-extraction', modelId);
-
-  async function getChunkEmbedding(text) {
-    if (useStubEmbeddings) {
-      const safeDims = Math.max(1, Number(dims) || 384);
-      return stubEmbedding(text, safeDims);
-    }
-    const embedder = await embedderPromise;
-    const output = await embedder(text, { pooling: 'mean', normalize: true });
-    return Array.from(output.data);
-  }
-
-  return { getChunkEmbedding, embedderPromise };
-}
diff --git a/src/indexer/git.js b/src/indexer/git.js
deleted file mode 100644
index c6b8e9dd1..000000000
--- a/src/indexer/git.js
+++ /dev/null
@@ -1,98 +0,0 @@
-import simpleGit from 'simple-git';
-
-const gitMetaCache = new Map();
-
-/**
- * Fetch git metadata for a file/chunk (author, date, churn, blame authors).
- * Returns empty object when git is unavailable or fails.
- * @param {string} file
- * @param {number} [start]
- * @param {number} [end]
- * @param {{blame?:boolean}} [options]
- * @returns {Promise<{last_modified?:string,last_author?:string,churn?:number,chunk_authors?:string[]}|{}>}
- */
-export async function getGitMeta(file, start = 0, end = 0, options = {}) {
-  const blameEnabled = options.blame !== false;
-  if (gitMetaCache.has(file)) {
-    const cached = gitMetaCache.get(file);
-    if (!blameEnabled) return cached;
-    let blameData = {};
-    try {
-      const git = simpleGit();
-      const blame = await git.raw(['blame', '-L', `${start + 1},${end + 1}`, file]);
-      const authors = new Set();
-      for (const line of blame.split('\n')) {
-        const m = line.match(/^\^?\w+\s+\(([^)]+)\s+\d{4}/);
-        if (m) authors.add(m[1].trim());
-      }
-      blameData = { chunk_authors: Array.from(authors) };
-    } catch {}
-    return {
-      ...cached,
-      ...blameData
-    };
-  }
-
-  try {
-    const git = simpleGit();
-    const log = await git.log({ file, n: 10 });
-    const { added, deleted } = await computeNumstatChurn(git, file, log.all.length || 10);
-    const churn = added + deleted;
-    const meta = {
-      last_modified: log.latest?.date || null,
-      last_author: log.latest?.author_name || null,
-      churn,
-      churn_added: added,
-      churn_deleted: deleted,
-      churn_commits: log.all.length || 0
-    };
-    gitMetaCache.set(file, meta);
-    let blameData = {};
-    if (blameEnabled) {
-      try {
-        const blame = await git.raw(['blame', '-L', `${start + 1},${end + 1}`, file]);
-        const authors = new Set();
-        for (const line of blame.split('\n')) {
-          const m = line.match(/^\^?\w+\s+\(([^)]+)\s+\d{4}/);
-          if (m) authors.add(m[1].trim());
-        }
-        blameData = { chunk_authors: Array.from(authors) };
-      } catch {}
-    }
-
-    return {
-      ...meta,
-      ...blameData
-    };
-  } catch {
-    return {};
-  }
-}
-
-/**
- * Compute churn from git numstat output.
- * @param {import('simple-git').SimpleGit} git
- * @param {string} file
- * @param {number} limit
- * @returns {Promise<{added:number,deleted:number}>}
- */
-async function computeNumstatChurn(git, file, limit) {
-  try {
-    const raw = await git.raw(['log', '--numstat', '-n', String(limit), '--format=', '--', file]);
-    let added = 0;
-    let deleted = 0;
-    for (const line of raw.split('\n')) {
-      const trimmed = line.trim();
-      if (!trimmed) continue;
-      const parts = trimmed.split('\t');
-      if (parts.length < 2) continue;
-      const addedVal = parts[0] === '-' ? 0 : Number.parseInt(parts[0], 10);
-      const deletedVal = parts[1] === '-' ? 0 : Number.parseInt(parts[1], 10);
-      if (Number.isFinite(addedVal)) added += addedVal;
-      if (Number.isFinite(deletedVal)) deleted += deletedVal;
-    }
-    return { added, deleted };
-  } catch {
-    return { added: 0, deleted: 0 };
-  }
-}
diff --git a/src/indexer/language-registry.js b/src/indexer/language-registry.js
deleted file mode 100644
index 35d272f4c..000000000
--- a/src/indexer/language-registry.js
+++ /dev/null
@@ -1,261 +0,0 @@
-import {
-  isCLike,
-  isGo,
-  isJava,
-  isJsLike,
-  isPerl,
-  isShell,
-  isTypeScript,
-  isCSharp,
-  isKotlin,
-  isRuby,
-  isPhp,
-  isLua,
-  isSql
-} from './constants.js';
-import { buildCLikeChunks, buildCLikeRelations, collectCLikeImports, computeCLikeFlow, extractCLikeDocMeta } from '../lang/clike.js';
-import { buildGoChunks, buildGoRelations, collectGoImports, computeGoFlow, extractGoDocMeta } from '../lang/go.js';
-import { buildJavaChunks, buildJavaRelations, collectJavaImports, computeJavaFlow, extractJavaDocMeta } from '../lang/java.js';
-import { buildCodeRelations, collectImports, extractDocMeta } from '../lang/javascript.js';
-import { buildTypeScriptChunks, buildTypeScriptRelations, collectTypeScriptImports, computeTypeScriptFlow, extractTypeScriptDocMeta } from '../lang/typescript.js';
-import { buildCSharpChunks, buildCSharpRelations, collectCSharpImports, computeCSharpFlow, extractCSharpDocMeta } from '../lang/csharp.js';
-import { buildKotlinChunks, buildKotlinRelations, collectKotlinImports, computeKotlinFlow, extractKotlinDocMeta } from '../lang/kotlin.js';
-import { buildRubyChunks, buildRubyRelations, collectRubyImports, computeRubyFlow, extractRubyDocMeta } from '../lang/ruby.js';
-import { buildPhpChunks, buildPhpRelations, collectPhpImports, computePhpFlow, extractPhpDocMeta } from '../lang/php.js';
-import { buildLuaChunks, buildLuaRelations, collectLuaImports, computeLuaFlow, extractLuaDocMeta } from '../lang/lua.js';
-import { buildSqlChunks, buildSqlRelations, collectSqlImports, extractSqlDocMeta } from '../lang/sql.js';
-import { buildPerlChunks, buildPerlRelations, collectPerlImports, computePerlFlow, extractPerlDocMeta } from '../lang/perl.js';
-import { getPythonAst, collectPythonImports, buildPythonRelations, extractPythonDocMeta } from '../lang/python.js';
-import { buildRustChunks, buildRustRelations, collectRustImports, computeRustFlow, extractRustDocMeta } from '../lang/rust.js';
-import { buildSwiftChunks, buildSwiftRelations, collectSwiftImports, computeSwiftFlow, extractSwiftDocMeta } from '../lang/swift.js';
-import { buildShellChunks, buildShellRelations, collectShellImports, computeShellFlow, extractShellDocMeta } from '../lang/shell.js';
-import { summarizeControlFlow } from '../lang/flow.js';
-
-const flowOptions = (options) => ({
-  dataflow: options.astDataflowEnabled,
-  controlFlow: options.controlFlowEnabled
-});
-
-const buildControlFlowOnly = (text, chunk, options, keywords) => {
-  if (!options.controlFlowEnabled || !chunk) return null;
-  const slice = text.slice(chunk.start, chunk.end);
-  return {
-    dataflow: null,
-    controlFlow: summarizeControlFlow(slice, keywords),
-    throws: [],
-    awaits: [],
-    yields: false,
-    returnsValue: false
-  };
-};
-
-const JS_CONTROL_FLOW = {
-  branchKeywords: ['if', 'else', 'switch', 'case', 'catch', 'try'],
-  loopKeywords: ['for', 'while', 'do']
-};
-
-const PY_CONTROL_FLOW = {
-  branchKeywords: ['if', 'elif', 'else', 'try', 'except', 'finally', 'match', 'case'],
-  loopKeywords: ['for', 'while']
-};
-
-const LANGUAGE_REGISTRY = [
-  {
-    id: 'javascript',
-    match: (ext) => isJsLike(ext),
-    collectImports: (text) => collectImports(text),
-    buildRelations: ({ text, relPath, allImports, options }) =>
-      buildCodeRelations(text, relPath, allImports, { dataflow: options.astDataflowEnabled }),
-    extractDocMeta: ({ text, chunk, fileRelations }) => extractDocMeta(text, chunk, fileRelations),
-    flow: ({ text, chunk, options }) => buildControlFlowOnly(text, chunk, options, JS_CONTROL_FLOW),
-    attachName: false
-  },
-  {
-    id: 'typescript',
-    match: (ext) => isTypeScript(ext),
-    collectImports: (text) => collectTypeScriptImports(text),
-    prepare: ({ text, mode }) => (mode === 'code' ? { tsChunks: buildTypeScriptChunks(text) } : {}),
-    buildRelations: ({ text, allImports, context }) => buildTypeScriptRelations(text, allImports, context.tsChunks),
-    extractDocMeta: ({ chunk }) => extractTypeScriptDocMeta(chunk),
-    flow: ({ text, chunk, options }) => computeTypeScriptFlow(text, chunk, flowOptions(options)),
-    attachName: true
-  },
-  {
-    id: 'python',
-    match: (ext) => ext === '.py',
-    collectImports: (text) => collectPythonImports(text).imports,
-    prepare: ({ text, mode, options }) => (mode === 'code'
-      ? { pythonAst: getPythonAst(text, options.log, { dataflow: options.astDataflowEnabled }) }
-      : {}),
-    buildRelations: ({ text, allImports, context }) => buildPythonRelations(text, allImports, context.pythonAst),
-    extractDocMeta: ({ chunk }) => extractPythonDocMeta(chunk),
-    flow: ({ text, chunk, options }) => buildControlFlowOnly(text, chunk, options, PY_CONTROL_FLOW),
-    attachName: true
-  },
-  {
-    id: 'swift',
-    match: (ext) => ext === '.swift',
-    collectImports: (text) => collectSwiftImports(text).imports,
-    prepare: ({ text, mode }) => (mode === 'code' ? { swiftChunks: buildSwiftChunks(text) } : {}),
-    buildRelations: ({ text, allImports }) => buildSwiftRelations(text, allImports),
-    extractDocMeta: ({ chunk }) => extractSwiftDocMeta(chunk),
-    flow: ({ text, chunk, options }) => computeSwiftFlow(text, chunk, flowOptions(options)),
-    attachName: true
-  },
-  {
-    id: 'clike',
-    match: (ext) => isCLike(ext),
-    collectImports: (text) => collectCLikeImports(text),
-    prepare: ({ text, mode, ext }) => (mode === 'code' ? { clikeChunks: buildCLikeChunks(text, ext) } : {}),
-    buildRelations: ({ text, allImports, context }) => buildCLikeRelations(text, allImports, context.clikeChunks),
-    extractDocMeta: ({ chunk }) => extractCLikeDocMeta(chunk),
-    flow: ({ text, chunk, options }) => computeCLikeFlow(text, chunk, flowOptions(options)),
-    attachName: true
-  },
-  {
-    id: 'rust',
-    match: (ext) => ext === '.rs',
-    collectImports: (text) => collectRustImports(text),
-    prepare: ({ text, mode }) => (mode === 'code' ? { rustChunks: buildRustChunks(text) } : {}),
-    buildRelations: ({ text, allImports }) => buildRustRelations(text, allImports),
-    extractDocMeta: ({ chunk }) => extractRustDocMeta(chunk),
-    flow: ({ text, chunk, options }) => computeRustFlow(text, chunk, flowOptions(options)),
-    attachName: true
-  },
-  {
-    id: 'go',
-    match: (ext) => isGo(ext),
-    collectImports: (text) => collectGoImports(text),
-    prepare: ({ text, mode }) => (mode === 'code' ? { goChunks: buildGoChunks(text) } : {}),
-    buildRelations: ({ text, allImports, context }) => buildGoRelations(text, allImports, context.goChunks),
-    extractDocMeta: ({ chunk }) => extractGoDocMeta(chunk),
-    flow: ({ text, chunk, options }) => computeGoFlow(text, chunk, flowOptions(options)),
-    attachName: true
-  },
-  {
-    id: 'java',
-    match: (ext) => isJava(ext),
-    collectImports: (text) => collectJavaImports(text),
-    prepare: ({ text, mode }) => (mode === 'code' ? { javaChunks: buildJavaChunks(text) } : {}),
-    buildRelations: ({ text, allImports, context }) => buildJavaRelations(text, allImports, context.javaChunks),
-    extractDocMeta: ({ chunk }) => extractJavaDocMeta(chunk),
-    flow: ({ text, chunk, options }) => computeJavaFlow(text, chunk, flowOptions(options)),
-    attachName: true
-  },
-  {
-    id: 'csharp',
-    match: (ext) => isCSharp(ext),
-    collectImports: (text) => collectCSharpImports(text),
-    prepare: ({ text, mode }) => (mode === 'code' ? { csharpChunks: buildCSharpChunks(text) } : {}),
-    buildRelations: ({ text, allImports, context }) => buildCSharpRelations(text, allImports, context.csharpChunks),
-    extractDocMeta: ({ chunk }) => extractCSharpDocMeta(chunk),
-    flow: ({ text, chunk, options }) => computeCSharpFlow(text, chunk, flowOptions(options)),
-    attachName: true
-  },
-  {
-    id: 'kotlin',
-    match: (ext) => isKotlin(ext),
-    collectImports: (text) => collectKotlinImports(text),
-    prepare: ({ text, mode }) => (mode === 'code' ? { kotlinChunks: buildKotlinChunks(text) } : {}),
-    buildRelations: ({ text, allImports, context }) => buildKotlinRelations(text, allImports, context.kotlinChunks),
-    extractDocMeta: ({ chunk }) => extractKotlinDocMeta(chunk),
-    flow: ({ text, chunk, options }) => computeKotlinFlow(text, chunk, flowOptions(options)),
-    attachName: true
-  },
-  {
-    id: 'ruby',
-    match: (ext) => isRuby(ext),
-    collectImports: (text) => collectRubyImports(text),
-    prepare: ({ text, mode }) => (mode === 'code' ? { rubyChunks: buildRubyChunks(text) } : {}),
-    buildRelations: ({ text, allImports, context }) => buildRubyRelations(text, allImports, context.rubyChunks),
-    extractDocMeta: ({ chunk }) => extractRubyDocMeta(chunk),
-    flow: ({ text, chunk, options }) => computeRubyFlow(text, chunk, flowOptions(options)),
-    attachName: true
-  },
-  {
-    id: 'php',
-    match: (ext) => isPhp(ext),
-    collectImports: (text) => collectPhpImports(text),
-    prepare: ({ text, mode }) => (mode === 'code' ? { phpChunks: buildPhpChunks(text) } : {}),
-    buildRelations: ({ text, allImports, context }) => buildPhpRelations(text, allImports, context.phpChunks),
-    extractDocMeta: ({ chunk }) => extractPhpDocMeta(chunk),
-    flow: ({ text, chunk, options }) => computePhpFlow(text, chunk, flowOptions(options)),
-    attachName: true
-  },
-  {
-    id: 'lua',
-    match: (ext) => isLua(ext),
-    collectImports: (text) => collectLuaImports(text),
-    prepare: ({ text, mode }) => (mode === 'code' ? { luaChunks: buildLuaChunks(text) } : {}),
-    buildRelations: ({ text, allImports, context }) => buildLuaRelations(text, allImports, context.luaChunks),
-    extractDocMeta: ({ chunk }) => extractLuaDocMeta(chunk),
-    flow: ({ text, chunk, options }) => computeLuaFlow(text, chunk, flowOptions(options)),
-    attachName: true
-  },
-  {
-    id: 'sql',
-    match: (ext) => isSql(ext),
-    collectImports: (text) => collectSqlImports(text),
-    prepare: ({ text, mode, ext, options }) => (mode === 'code'
-      ? { sqlChunks: buildSqlChunks(text, { dialect: options.resolveSqlDialect(ext) }) }
-      : {}),
-    buildRelations: ({ text, allImports, context }) => buildSqlRelations(text, allImports, context.sqlChunks),
-    extractDocMeta: ({ chunk }) => extractSqlDocMeta(chunk),
-    attachName: true
-  },
-  {
-    id: 'perl',
-    match: (ext) => isPerl(ext),
-    collectImports: (text) => collectPerlImports(text),
-    prepare: ({ text, mode }) => (mode === 'code' ? { perlChunks: buildPerlChunks(text) } : {}),
-    buildRelations: ({ text, allImports, context }) => buildPerlRelations(text, allImports, context.perlChunks),
-    extractDocMeta: ({ chunk }) => extractPerlDocMeta(chunk),
-    flow: ({ text, chunk, options }) => computePerlFlow(text, chunk, flowOptions(options)),
-    attachName: true
-  },
-  {
-    id: 'shell',
-    match: (ext) => isShell(ext),
-    collectImports: (text) => collectShellImports(text),
-    prepare: ({ text, mode }) => (mode === 'code' ? { shellChunks: buildShellChunks(text) } : {}),
-    buildRelations: ({ text, allImports, context }) => buildShellRelations(text, allImports, context.shellChunks),
-    extractDocMeta: ({ chunk }) => extractShellDocMeta(chunk),
-    flow: ({ text, chunk, options }) => computeShellFlow(text, chunk, flowOptions(options)),
-    attachName: true
-  }
-];
-
-export function getLanguageForFile(ext, relPath) {
-  const normalized = relPath || '';
-  return LANGUAGE_REGISTRY.find((lang) => lang.match(ext, normalized)) || null;
-}
-
-export function collectLanguageImports({ ext, relPath, text, mode, options }) {
-  const lang = getLanguageForFile(ext, relPath);
-  if (!lang || typeof lang.collectImports !== 'function') return [];
-  const imports = lang.collectImports(text, { ext, relPath, mode, options });
-  return Array.isArray(imports) ? imports : [];
-}
-
-export function buildLanguageContext({ ext, relPath, mode, text, options }) {
-  const lang = getLanguageForFile(ext, relPath);
-  const context = lang && typeof lang.prepare === 'function'
-    ? lang.prepare({ ext, relPath, mode, text, options })
-    : {};
-  return { lang, context };
-}
-
-export function buildChunkRelations({ lang, chunk, fileRelations }) {
-  if (!fileRelations) return {};
-  const output = { ...fileRelations };
-  if (chunk?.name && Array.isArray(fileRelations.calls)) {
-    const callsForChunk = fileRelations.calls.filter(([caller]) => caller && caller === chunk.name);
-    if (callsForChunk.length) output.calls = callsForChunk;
-  }
-  if (chunk?.name && Array.isArray(fileRelations.callDetails)) {
-    const detailsForChunk = fileRelations.callDetails.filter((detail) => detail?.caller === chunk.name);
-    if (detailsForChunk.length) output.callDetails = detailsForChunk;
-  }
-  if (lang?.attachName && chunk?.name) output.name = chunk.name;
-  return output;
-}
diff --git a/src/indexer/risk.js b/src/indexer/risk.js
deleted file mode 100644
index 2f6d55235..000000000
--- a/src/indexer/risk.js
+++ /dev/null
@@ -1,262 +0,0 @@
-const SOURCE_PATTERNS = [
-  {
-    name: 'req.body',
-    category: 'input',
-    tags: ['http-input'],
-    patterns: [/\breq\.body\b/i, /\brequest\.body\b/i]
-  },
-  {
-    name: 'req.headers',
-    category: 'input',
-    tags: ['http-input'],
-    patterns: [/\breq\.headers\b/i, /\brequest\.headers\b/i]
-  },
-  {
-    name: 'req.cookies',
-    category: 'input',
-    tags: ['http-input'],
-    patterns: [/\breq\.cookies\b/i, /\brequest\.cookies\b/i]
-  },
-  {
-    name: 'ctx.request.body',
-    category: 'input',
-    tags: ['http-input'],
-    patterns: [/\bctx\.request\.body\b/i]
-  },
-  {
-    name: 'event.body',
-    category: 'input',
-    tags: ['http-input'],
-    patterns: [/\bevent\.body\b/i]
-  },
-  {
-    name: 'req.query',
-    category: 'input',
-    tags: ['http-input'],
-    patterns: [/\breq\.query\b/i, /\brequest\.query\b/i]
-  },
-  {
-    name: 'req.params',
-    category: 'input',
-    tags: ['http-input'],
-    patterns: [/\breq\.params\b/i, /\brequest\.params\b/i]
-  },
-  {
-    name: 'process.env',
-    category: 'config',
-    tags: ['env'],
-    patterns: [
-      /\bprocess\.env\b/i,
-      /\bos\.environ\b/i,
-      /\bSystem\.getenv\b/i,
-      /\bos\.Getenv\b/i,
-      /\bEnvironment\.GetEnvironmentVariable\b/i
-    ]
-  },
-  {
-    name: 'argv',
-    category: 'input',
-    tags: ['cli-input'],
-    patterns: [/\bprocess\.argv\b/i, /\bsys\.argv\b/i, /\bos\.Args\b/i]
-  },
-  {
-    name: 'stdin',
-    category: 'input',
-    tags: ['stdin'],
-    patterns: [/\binput\s*\(/i, /\breadline\s*\(/i, /\bConsole\.ReadLine\b/i]
-  },
-  {
-    name: 'location',
-    category: 'input',
-    tags: ['browser-input'],
-    patterns: [/\bwindow\.location\b/i, /\bdocument\.location\b/i, /\blocation\.(href|search|hash)\b/i]
-  }
-];
-
-const SQL_KEYWORDS = /\b(select|insert|update|delete|from|where)\b/i;
-
-const SINK_PATTERNS = [
-  {
-    name: 'eval',
-    category: 'code-exec',
-    severity: 'high',
-    tags: ['eval', 'code-exec'],
-    patterns: [/\beval\s*\(/i, /\bnew\s+Function\s*\(/i]
-  },
-  {
-    name: 'exec',
-    category: 'command',
-    severity: 'high',
-    tags: ['command-exec'],
-    patterns: [
-      /\bchild_process\.exec\s*\(/i,
-      /\bchild_process\.execFile\s*\(/i,
-      /\bexecFileSync\s*\(/i,
-      /\bexecSync\s*\(/i,
-      /\bexec\s*\(/i
-    ]
-  },
-  {
-    name: 'spawn',
-    category: 'command',
-    severity: 'high',
-    tags: ['command-exec'],
-    patterns: [/\bspawnSync\s*\(/i, /\bspawn\s*\(/i]
-  },
-  {
-    name: 'system',
-    category: 'command',
-    severity: 'high',
-    tags: ['command-exec'],
-    patterns: [
-      /\bos\.system\s*\(/i,
-      /\bsubprocess\.(run|call|popen)\s*\(/i,
-      /\bpopen\s*\(/i,
-      /\bRuntime\.getRuntime\(\)\.exec\s*\(/i
-    ]
-  },
-  {
-    name: 'file.write',
-    category: 'file-write',
-    severity: 'medium',
-    tags: ['file-write'],
-    patterns: [
-      /\bfs\.writeFileSync\s*\(/i,
-      /\bfs\.writeFile\s*\(/i,
-      /\bfs\.appendFileSync\s*\(/i,
-      /\bfs\.appendFile\s*\(/i,
-      /\bFile\.WriteAllText\b/i,
-      /\bFile\.AppendAllText\b/i
-    ]
-  },
-  {
-    name: 'sql.query',
-    category: 'sql',
-    severity: 'medium',
-    tags: ['sql'],
-    requires: SQL_KEYWORDS,
-    patterns: [/\b(query|execute|prepare|exec)\s*\(/i]
-  },
-  {
-    name: 'innerHTML',
-    category: 'xss',
-    severity: 'medium',
-    tags: ['xss'],
-    patterns: [/\binnerHTML\b/i, /\bdocument\.write\b/i, /\bdangerouslySetInnerHTML\b/i]
-  },
-  {
-    name: 'deserialize',
-    category: 'deserialization',
-    severity: 'high',
-    tags: ['deserialization'],
-    patterns: [
-      /\bpickle\.loads\b/i,
-      /\byaml\.load\b/i,
-      /\byaml\.unsafe_load\b/i,
-      /\bObjectInputStream\b/i
-    ]
-  }
-];
-
-const SEVERITY_RANK = { low: 1, medium: 2, high: 3 };
-
-function collectMatches(text, entries) {
-  const matches = [];
-  for (const entry of entries) {
-    if (entry.requires && !entry.requires.test(text)) continue;
-    let matched = false;
-    for (const pattern of entry.patterns || []) {
-      if (pattern.test(text)) {
-        matched = true;
-        break;
-      }
-    }
-    if (!matched) continue;
-    matches.push({
-      name: entry.name,
-      category: entry.category,
-      tags: entry.tags || [],
-      severity: entry.severity || null
-    });
-  }
-  return matches;
-}
-
-function dedupeByName(entries) {
-  const seen = new Set();
-  const out = [];
-  for (const entry of entries) {
-    const key = entry.name;
-    if (!key || seen.has(key)) continue;
-    seen.add(key);
-    out.push(entry);
-  }
-  return out;
-}
-
-function maxSeverity(entries) {
-  let best = null;
-  let bestRank = 0;
-  for (const entry of entries) {
-    const rank = SEVERITY_RANK[entry.severity] || 0;
-    if (rank > bestRank) {
-      bestRank = rank;
-      best = entry.severity;
-    }
-  }
-  return best || null;
-}
-
-/**
- * Detect taint-like risk signals in a chunk.
- * @param {{text:string}} input
- * @returns {object|null}
- */
-export function detectRiskSignals({ text }) {
-  if (!text) return null;
-  const sources = collectMatches(text, SOURCE_PATTERNS);
-  const sinks = collectMatches(text, SINK_PATTERNS);
-  if (!sources.length && !sinks.length) return null;
-
-  const dedupedSources = dedupeByName(sources);
-  const dedupedSinks = dedupeByName(sinks);
-  const flows = [];
-  if (dedupedSources.length && dedupedSinks.length) {
-    for (const source of dedupedSources) {
-      for (const sink of dedupedSinks) {
-        flows.push({
-          source: source.name,
-          sink: sink.name,
-          category: sink.category,
-          severity: sink.severity || null
-        });
-      }
-    }
-  }
-
-  const tags = new Set();
-  const categories = new Set();
-  dedupedSources.forEach((entry) => (entry.tags || []).forEach((tag) => tags.add(tag)));
-  dedupedSinks.forEach((entry) => {
-    (entry.tags || []).forEach((tag) => tags.add(tag));
-    if (entry.category) categories.add(entry.category);
-  });
-
-  return {
-    tags: Array.from(tags),
-    categories: Array.from(categories),
-    severity: maxSeverity(dedupedSinks) || (dedupedSources.length ? 'low' : null),
-    sources: dedupedSources.map(({ name, category, tags: entryTags }) => ({
-      name,
-      category,
-      tags: entryTags || []
-    })),
-    sinks: dedupedSinks.map(({ name, category, severity, tags: entryTags }) => ({
-      name,
-      category,
-      severity,
-      tags: entryTags || []
-    })),
-    flows: flows.map((flow) => ({ ...flow, scope: 'local' }))
-  };
-}
diff --git a/src/integrations/core/index.js b/src/integrations/core/index.js
new file mode 100644
index 000000000..c03f6d093
--- /dev/null
+++ b/src/integrations/core/index.js
@@ -0,0 +1,581 @@
+import fs from 'node:fs/promises';
+import { spawn } from 'node:child_process';
+import path from 'node:path';
+import { parseBuildArgs } from '../../index/build/args.js';
+import { buildIndexForMode } from '../../index/build/indexer.js';
+import { acquireIndexLock } from '../../index/build/lock.js';
+import { preprocessFiles, writePreprocessStats } from '../../index/build/preprocess.js';
+import { createBuildRuntime } from '../../index/build/runtime.js';
+import { initBuildState, markBuildPhase, startBuildHeartbeat, updateBuildState } from '../../index/build/build-state.js';
+import { promoteBuild } from '../../index/build/promotion.js';
+import { validateIndexArtifacts } from '../../index/validate.js';
+import { watchIndex } from '../../index/build/watch.js';
+import { getEnvConfig } from '../../shared/env.js';
+import { log as defaultLog } from '../../shared/progress.js';
+import { observeIndexDuration } from '../../shared/metrics.js';
+import { shutdownPythonAstPool } from '../../lang/python.js';
+import { createFeatureMetrics, writeFeatureMetrics } from '../../index/build/feature-metrics.js';
+import { getCacheRoot, getMetricsDir, getRepoCacheRoot, getToolVersion, getIndexDir, loadUserConfig, resolveRepoRoot, resolveToolRoot } from '../../../tools/dict-utils.js';
+import { ensureQueueDir, enqueueJob } from '../../../tools/service/queue.js';
+import { runBuildSqliteIndex } from '../../../tools/build-sqlite-index.js';
+import { runSearchCli } from '../../retrieval/cli.js';
+import { getStatus } from './status.js';
+
+const toolRoot = resolveToolRoot();
+const buildEmbeddingsPath = path.join(toolRoot, 'tools', 'build-embeddings.js');
+
+const buildRawArgs = (options = {}) => {
+  const args = [];
+  if (options.mode) args.push('--mode', String(options.mode));
+  if (options.stage) args.push('--stage', String(options.stage));
+  if (options.threads !== undefined) args.push('--threads', String(options.threads));
+  if (options.incremental) args.push('--incremental');
+  if (options['stub-embeddings'] || options.stubEmbeddings) args.push('--stub-embeddings');
+  if (options.watch) args.push('--watch');
+  if (options['watch-poll'] !== undefined) args.push('--watch-poll', String(options['watch-poll']));
+  if (options['watch-debounce'] !== undefined) args.push('--watch-debounce', String(options['watch-debounce']));
+  if (options.sqlite === true) args.push('--sqlite');
+  if (options.sqlite === false) args.push('--no-sqlite');
+  if (options.model) args.push('--model', String(options.model));
+  return args;
+};
+
+const pushFlag = (args, name, value) => {
+  if (value === undefined || value === null) return;
+  if (value === true) {
+    args.push(`--${name}`);
+  } else if (value === false) {
+    args.push(`--no-${name}`);
+  } else {
+    args.push(`--${name}`, String(value));
+  }
+};
+
+  const buildSearchArgs = (params = {}) => {
+    const args = [];
+  pushFlag(args, 'mode', params.mode);
+  pushFlag(args, 'backend', params.backend);
+  pushFlag(args, 'ann', params.ann);
+  pushFlag(args, 'json', params.json);
+  pushFlag(args, 'json-compact', params.jsonCompact);
+  pushFlag(args, 'explain', params.explain);
+  pushFlag(args, 'context', params.context);
+  pushFlag(args, 'n', params.n);
+  pushFlag(args, 'case', params.case);
+  pushFlag(args, 'case-file', params.caseFile);
+  pushFlag(args, 'case-tokens', params.caseTokens);
+  pushFlag(args, 'path', params.path);
+  pushFlag(args, 'file', params.file);
+  pushFlag(args, 'ext', params.ext);
+  pushFlag(args, 'lang', params.lang);
+  if (params.args) args.push(...params.args);
+    return args;
+  };
+
+const normalizeStage = (raw) => {
+    const value = typeof raw === 'string' ? raw.trim().toLowerCase() : '';
+    if (!value) return null;
+    if (value === '1' || value === 'stage1' || value === 'sparse') return 'stage1';
+    if (value === '2' || value === 'stage2' || value === 'enrich' || value === 'full') return 'stage2';
+    if (value === '3' || value === 'stage3' || value === 'embeddings' || value === 'embed') return 'stage3';
+    if (value === '4' || value === 'stage4' || value === 'sqlite' || value === 'ann') return 'stage4';
+  return null;
+};
+
+const resolveEnrichmentStatePath = (repoCacheRoot) => path.join(repoCacheRoot, 'enrichment_state.json');
+
+const updateEnrichmentState = async (repoCacheRoot, patch) => {
+  if (!repoCacheRoot) return null;
+  let state = {};
+  try {
+    state = JSON.parse(await fs.readFile(resolveEnrichmentStatePath(repoCacheRoot), 'utf8'));
+  } catch {}
+  const next = {
+    ...state,
+    ...patch,
+    updatedAt: new Date().toISOString()
+  };
+  try {
+    await fs.mkdir(repoCacheRoot, { recursive: true });
+    await fs.writeFile(resolveEnrichmentStatePath(repoCacheRoot), JSON.stringify(next, null, 2));
+  } catch {}
+  return next;
+};
+
+const buildStage2Args = ({ root, argv, rawArgv }) => {
+  const args = ['--repo', root, '--stage', 'stage2'];
+  if (argv.mode && argv.mode !== 'all') args.push('--mode', argv.mode);
+  const stageThreads = Number(argv.threads);
+  if (Number.isFinite(stageThreads) && stageThreads > 0) {
+    args.push('--threads', String(stageThreads));
+  }
+  if (argv.incremental) args.push('--incremental');
+  if (rawArgv.includes('--stub-embeddings')) args.push('--stub-embeddings');
+  if (typeof argv.sqlite === 'boolean') args.push(argv.sqlite ? '--sqlite' : '--no-sqlite');
+  if (argv.model) args.push('--model', String(argv.model));
+  return args;
+};
+
+const resolveEmbeddingRuntime = ({ argv, userConfig, envConfig }) => {
+  const embeddingsConfig = userConfig?.indexing?.embeddings || {};
+  const embeddingModeRaw = typeof embeddingsConfig.mode === 'string'
+    ? embeddingsConfig.mode.trim().toLowerCase()
+    : 'auto';
+  const baseStubEmbeddings = argv['stub-embeddings'] === true
+    || envConfig.embeddings === 'stub';
+  const normalizedEmbeddingMode = ['auto', 'inline', 'service', 'stub', 'off'].includes(embeddingModeRaw)
+    ? embeddingModeRaw
+    : 'auto';
+  const resolvedEmbeddingMode = normalizedEmbeddingMode === 'auto'
+    ? (baseStubEmbeddings ? 'stub' : 'inline')
+    : normalizedEmbeddingMode;
+  const embeddingService = embeddingsConfig.enabled !== false
+    && resolvedEmbeddingMode === 'service';
+  const embeddingEnabled = embeddingsConfig.enabled !== false
+    && resolvedEmbeddingMode !== 'off';
+  const queueDir = typeof embeddingsConfig.queue?.dir === 'string'
+    ? embeddingsConfig.queue.dir.trim()
+    : '';
+  const queueMaxRaw = Number(embeddingsConfig.queue?.maxQueued);
+  const queueMaxQueued = Number.isFinite(queueMaxRaw)
+    ? Math.max(0, Math.floor(queueMaxRaw))
+    : null;
+  return {
+    embeddingEnabled,
+    embeddingService,
+    useStubEmbeddings: resolvedEmbeddingMode === 'stub' || baseStubEmbeddings,
+    resolvedEmbeddingMode,
+    queueDir,
+    queueMaxQueued
+  };
+};
+
+const teardownRuntime = async (runtime) => {
+  if (!runtime) return;
+  try {
+    if (runtime.workerPools?.destroy) {
+      await runtime.workerPools.destroy();
+    } else if (runtime.workerPool?.destroy) {
+      await runtime.workerPool.destroy();
+    }
+  } catch {}
+  shutdownPythonAstPool();
+};
+
+const runEmbeddingsTool = (args, extraEnv = null) => new Promise((resolve, reject) => {
+  const child = spawn(process.execPath, args, {
+    stdio: 'inherit',
+    env: extraEnv ? { ...process.env, ...extraEnv } : process.env
+  });
+  child.on('close', (code) => {
+    if (code === 0) {
+      resolve();
+    } else {
+      reject(new Error(`build-embeddings exited with code ${code ?? 'unknown'}`));
+    }
+  });
+});
+
+/**
+ * Build file-backed indexes for a repo.
+ * @param {string} repoRoot
+ * @param {object} [options]
+ * @returns {Promise<object>}
+ */
+export async function buildIndex(repoRoot, options = {}) {
+  const root = repoRoot ? path.resolve(repoRoot) : resolveRepoRoot(process.cwd());
+  const defaults = parseBuildArgs([]).argv;
+  const baseArgv = { ...defaults, ...options, repo: root };
+  const explicitStage = normalizeStage(baseArgv.stage);
+  const argv = explicitStage ? { ...baseArgv, stage: explicitStage } : baseArgv;
+  const mode = argv.mode || 'all';
+  const modes = mode === 'all'
+    ? ['prose', 'code', 'extracted-prose']
+    : [mode];
+  const rawArgv = options.rawArgv || buildRawArgs(options);
+  const log = typeof options.log === 'function' ? options.log : defaultLog;
+  const metricsMode = mode || 'all';
+  const recordIndexMetric = (stage, status, start) => {
+    try {
+      const elapsed = Number(process.hrtime.bigint() - start) / 1e9;
+      observeIndexDuration({ stage, mode: metricsMode, status, seconds: elapsed });
+    } catch {}
+  };
+
+  if (argv.watch) {
+    const runtime = await createBuildRuntime({ root, argv, rawArgv });
+    const pollMs = Number.isFinite(Number(argv['watch-poll'])) ? Number(argv['watch-poll']) : 2000;
+    const debounceMs = Number.isFinite(Number(argv['watch-debounce'])) ? Number(argv['watch-debounce']) : 500;
+    try {
+      await watchIndex({ runtime, modes, pollMs, debounceMs });
+      return { modes, watch: true };
+    } finally {
+      await teardownRuntime(runtime);
+    }
+  }
+
+  const userConfig = loadUserConfig(root);
+  const envConfig = getEnvConfig();
+  const repoCacheRoot = getRepoCacheRoot(root, userConfig);
+  const twoStageConfig = userConfig?.indexing?.twoStage || {};
+  const twoStageEnabled = twoStageConfig.enabled === true;
+  const embeddingRuntime = resolveEmbeddingRuntime({ argv, userConfig, envConfig });
+  const buildEmbedModes = modes.filter((modeItem) => modeItem === 'code' || modeItem === 'prose');
+  const runEmbeddingsStage = async () => {
+    const started = process.hrtime.bigint();
+    const recordOk = (result) => {
+      recordIndexMetric('stage3', 'ok', started);
+      return result;
+    };
+    if (!embeddingRuntime.embeddingEnabled) {
+      log('Embeddings disabled; skipping stage3.');
+      return recordOk({ modes: buildEmbedModes, embeddings: { skipped: true }, repo: root, stage: 'stage3' });
+    }
+    const lock = await acquireIndexLock({ repoCacheRoot, log });
+    if (!lock) throw new Error('Index lock unavailable.');
+    try {
+      if (embeddingRuntime.embeddingService) {
+        const queueDir = embeddingRuntime.queueDir
+          ? path.resolve(embeddingRuntime.queueDir)
+          : path.join(getCacheRoot(), 'service', 'queue');
+        await ensureQueueDir(queueDir);
+        const jobs = [];
+        for (const modeItem of buildEmbedModes) {
+          const jobId = `${Date.now()}-${Math.random().toString(16).slice(2, 10)}`;
+          const result = await enqueueJob(
+            queueDir,
+            {
+              id: jobId,
+              createdAt: new Date().toISOString(),
+              repo: root,
+              mode: modeItem,
+              reason: 'stage3',
+              stage: 'stage3'
+            },
+            embeddingRuntime.queueMaxQueued,
+            'embeddings'
+          );
+          if (!result.ok) {
+            log(`[embeddings] Queue full or unavailable; skipped enqueue (${modeItem}).`);
+            continue;
+          }
+          log(`[embeddings] Queued embedding job ${jobId} (${modeItem}).`);
+          jobs.push(result.job || { id: jobId, mode: modeItem });
+        }
+        return recordOk({ modes: buildEmbedModes, embeddings: { queued: true, jobs }, repo: root, stage: 'stage3' });
+      }
+      for (const modeItem of buildEmbedModes) {
+        const args = [buildEmbeddingsPath, '--repo', root, '--mode', modeItem];
+        if (Number.isFinite(Number(argv.dims))) {
+          args.push('--dims', String(argv.dims));
+        }
+        if (embeddingRuntime.useStubEmbeddings) args.push('--stub-embeddings');
+        await runEmbeddingsTool(args);
+      }
+      return recordOk({ modes: buildEmbedModes, embeddings: { queued: false, inline: true }, repo: root, stage: 'stage3' });
+    } catch (err) {
+      recordIndexMetric('stage3', 'error', started);
+      throw err;
+    } finally {
+      await lock.release();
+    }
+  };
+  const runSqliteStage = async () => {
+    const started = process.hrtime.bigint();
+    const recordOk = (result) => {
+      recordIndexMetric('stage4', 'ok', started);
+      return result;
+    };
+    const sqliteConfigured = userConfig?.sqlite?.use !== false;
+    const shouldBuildSqlite = typeof argv.sqlite === 'boolean' ? argv.sqlite : sqliteConfigured;
+    if (!shouldBuildSqlite) {
+      log('SQLite disabled; skipping stage4.');
+      return recordOk({ modes: buildEmbedModes, sqlite: { skipped: true }, repo: root, stage: 'stage4' });
+    }
+    const lock = await acquireIndexLock({ repoCacheRoot, log });
+    if (!lock) throw new Error('Index lock unavailable.');
+    try {
+      if (!buildEmbedModes.length) return recordOk({ modes: buildEmbedModes, sqlite: null, repo: root, stage: 'stage4' });
+      const sqliteResult = await buildSqliteIndex(root, {
+        mode: buildEmbedModes.length === 1 ? buildEmbedModes[0] : 'all',
+        incremental: argv.incremental === true,
+        emitOutput: options.emitOutput !== false,
+        exitOnError: false
+      });
+      return recordOk({ modes: buildEmbedModes, sqlite: sqliteResult, repo: root, stage: 'stage4' });
+    } catch (err) {
+      recordIndexMetric('stage4', 'error', started);
+      throw err;
+    } finally {
+      await lock.release();
+    }
+  };
+  const runStage = async (stage, { allowSqlite = true } = {}) => {
+    const started = process.hrtime.bigint();
+    const stageArgv = stage ? { ...argv, stage } : argv;
+    let phaseStage = stage || 'stage2';
+    let runtime = null;
+    let result = null;
+    try {
+      runtime = await createBuildRuntime({ root, argv: stageArgv, rawArgv });
+      phaseStage = runtime.stage || phaseStage;
+      runtime.featureMetrics = createFeatureMetrics({
+        buildId: runtime.buildId,
+        configHash: runtime.configHash,
+        stage: phaseStage,
+        repoRoot: runtime.root,
+        toolVersion: getToolVersion()
+      });
+      const lock = await acquireIndexLock({ repoCacheRoot: runtime.repoCacheRoot, log });
+      if (!lock) throw new Error('Index lock unavailable.');
+      let sqliteResult = null;
+      const stopHeartbeat = (phaseStage === 'stage2' || phaseStage === 'stage3')
+        ? startBuildHeartbeat(runtime.buildRoot, phaseStage)
+        : () => {};
+      try {
+      await initBuildState({
+        buildRoot: runtime.buildRoot,
+        buildId: runtime.buildId,
+        repoRoot: runtime.root,
+        modes,
+        stage: phaseStage,
+        configHash: runtime.configHash,
+        toolVersion: getToolVersion(),
+        repoProvenance: runtime.repoProvenance
+      });
+      await markBuildPhase(runtime.buildRoot, 'discovery', 'running');
+      let sharedDiscovery = null;
+      const preprocessModes = modes.filter((modeItem) => modeItem === 'code' || modeItem === 'prose');
+      if (preprocessModes.length) {
+        await markBuildPhase(runtime.buildRoot, 'preprocessing', 'running');
+        const preprocess = await preprocessFiles({
+          root: runtime.root,
+          modes: preprocessModes,
+          ignoreMatcher: runtime.ignoreMatcher,
+          maxFileBytes: runtime.maxFileBytes,
+          fileCaps: runtime.fileCaps,
+          maxDepth: runtime.guardrails?.maxDepth ?? null,
+          maxFiles: runtime.guardrails?.maxFiles ?? null,
+          fileScan: runtime.fileScan,
+          lineCounts: runtime.shards?.enabled === true,
+          concurrency: runtime.ioConcurrency,
+          log
+        });
+        await writePreprocessStats(runtime.repoCacheRoot, preprocess.stats);
+        await markBuildPhase(runtime.buildRoot, 'preprocessing', 'done');
+        sharedDiscovery = {};
+        for (const modeItem of preprocessModes) {
+          sharedDiscovery[modeItem] = {
+            entries: preprocess.entriesByMode[modeItem] || [],
+            skippedFiles: preprocess.skippedByMode[modeItem] || [],
+            lineCounts: preprocess.lineCountsByMode[modeItem] || new Map()
+          };
+        }
+      }
+      await markBuildPhase(runtime.buildRoot, 'discovery', 'done');
+      await markBuildPhase(runtime.buildRoot, phaseStage, 'running');
+      for (const modeItem of modes) {
+        const discovery = sharedDiscovery ? sharedDiscovery[modeItem] : null;
+        await buildIndexForMode({ mode: modeItem, runtime, discovery });
+      }
+      if (runtime.featureMetrics) {
+        await writeFeatureMetrics({
+          metricsDir: getMetricsDir(runtime.root, runtime.userConfig),
+          featureMetrics: runtime.featureMetrics
+        });
+      }
+      await markBuildPhase(runtime.buildRoot, phaseStage, 'done');
+      const sqliteConfigured = runtime.userConfig?.sqlite?.use !== false;
+      const sqliteModes = modes.filter((modeItem) => modeItem === 'code' || modeItem === 'prose');
+      const shouldBuildSqlite = allowSqlite
+        && (typeof stageArgv.sqlite === 'boolean' ? stageArgv.sqlite : sqliteConfigured);
+      const sqliteEnabledForValidation = shouldBuildSqlite && sqliteModes.length > 0;
+      if (shouldBuildSqlite && sqliteModes.length) {
+        const codeDir = getIndexDir(root, 'code', runtime.userConfig, { indexRoot: runtime.buildRoot });
+        const proseDir = getIndexDir(root, 'prose', runtime.userConfig, { indexRoot: runtime.buildRoot });
+        const sqliteOut = path.join(runtime.buildRoot, 'index-sqlite');
+        sqliteResult = await buildSqliteIndex(root, {
+          mode: sqliteModes.length === 1 ? sqliteModes[0] : 'all',
+          incremental: stageArgv.incremental === true,
+          out: sqliteOut,
+          codeDir,
+          proseDir,
+          emitOutput: options.emitOutput !== false,
+          exitOnError: false
+        });
+      }
+      await markBuildPhase(runtime.buildRoot, 'validation', 'running');
+      const validation = await validateIndexArtifacts({
+        root: runtime.root,
+        indexRoot: runtime.buildRoot,
+        modes,
+        userConfig: runtime.userConfig,
+        sqliteEnabled: sqliteEnabledForValidation
+      });
+      await updateBuildState(runtime.buildRoot, {
+        validation: {
+          ok: validation.ok,
+          issueCount: validation.issues.length,
+          warningCount: validation.warnings.length
+        }
+      });
+      if (!validation.ok) {
+        await markBuildPhase(runtime.buildRoot, 'validation', 'failed');
+        throw new Error('Index validation failed; see index-validate output for details.');
+      }
+      await markBuildPhase(runtime.buildRoot, 'validation', 'done');
+      await markBuildPhase(runtime.buildRoot, 'promote', 'running');
+      await promoteBuild({
+        repoRoot: runtime.root,
+        userConfig: runtime.userConfig,
+        buildId: runtime.buildId,
+        buildRoot: runtime.buildRoot,
+        stage: phaseStage,
+        modes,
+        configHash: runtime.configHash,
+        repoProvenance: runtime.repoProvenance
+      });
+      await markBuildPhase(runtime.buildRoot, 'promote', 'done');
+      result = { modes, sqlite: sqliteResult, repo: runtime.root, stage };
+      } finally {
+        stopHeartbeat();
+        await lock.release();
+        await teardownRuntime(runtime);
+      }
+    } catch (err) {
+      recordIndexMetric(phaseStage, 'error', started);
+      throw err;
+    }
+
+    if (twoStageEnabled) {
+      const now = new Date().toISOString();
+      if (stage === 'stage1') {
+        await updateEnrichmentState(runtime.repoCacheRoot, {
+          status: 'pending',
+          stage1At: now,
+          queued: false
+        });
+      }
+      if (stage === 'stage2') {
+        await updateEnrichmentState(runtime.repoCacheRoot, {
+          status: 'done',
+          stage2At: now,
+          queued: false
+        });
+      }
+    }
+    recordIndexMetric(phaseStage, 'ok', started);
+    return result;
+  };
+
+  if (explicitStage === 'stage3') {
+    return runEmbeddingsStage();
+  }
+  if (explicitStage === 'stage4') {
+    return runSqliteStage();
+  }
+
+  if (explicitStage || !twoStageEnabled) {
+    return runStage(explicitStage, { allowSqlite: true });
+  }
+
+  const stage1Result = await runStage('stage1', { allowSqlite: false });
+  if (twoStageConfig.background === true) {
+    const stage2Args = buildStage2Args({ root, argv, rawArgv });
+    const queueEnabled = twoStageConfig.queue !== false;
+    if (queueEnabled) {
+      const queueDir = userConfig?.indexing?.embeddings?.queue?.dir
+        ? path.resolve(userConfig.indexing.embeddings.queue.dir)
+        : path.join(getCacheRoot(), 'service', 'queue');
+      const maxQueuedRaw = Number(userConfig?.indexing?.embeddings?.queue?.maxQueued);
+      const maxQueued = Number.isFinite(maxQueuedRaw) ? Math.max(0, Math.floor(maxQueuedRaw)) : null;
+      const jobId = `${Date.now()}-${Math.random().toString(16).slice(2, 10)}`;
+      await ensureQueueDir(queueDir);
+      const result = await enqueueJob(
+        queueDir,
+        {
+          id: jobId,
+          createdAt: new Date().toISOString(),
+          repo: root,
+          mode: argv.mode || 'all',
+          reason: 'stage2',
+          stage: 'stage2',
+          args: stage2Args
+        },
+        maxQueued,
+        'index'
+      );
+      if (result.ok) {
+        await updateEnrichmentState(repoCacheRoot, {
+          queued: true,
+          queueId: jobId
+        });
+        log('Two-stage indexing: stage2 queued for background enrichment.');
+        return { modes, stage1: stage1Result, stage2: { queued: true, queueId: jobId }, repo: root };
+      }
+    }
+    const stage2ArgsWithScript = [path.join(toolRoot, 'build_index.js'), ...stage2Args];
+    spawn(process.execPath, stage2ArgsWithScript, { stdio: 'ignore', detached: true }).unref();
+    return { modes, stage1: stage1Result, stage2: { background: true }, repo: root };
+  }
+
+  const stage2Result = await runStage('stage2', { allowSqlite: true });
+  return { modes, stage1: stage1Result, stage2: stage2Result, repo: root };
+}
+
+/**
+ * Build or update SQLite indexes for a repo.
+ * @param {string} repoRoot
+ * @param {object} [options]
+ * @returns {Promise<object>}
+ */
+export async function buildSqliteIndex(repoRoot, options = {}) {
+  const root = repoRoot ? path.resolve(repoRoot) : resolveRepoRoot(process.cwd());
+  const rawArgs = Array.isArray(options.args) ? options.args.slice() : [];
+  if (!options.args) {
+    if (options.mode) rawArgs.push('--mode', String(options.mode));
+    if (options.incremental) rawArgs.push('--incremental');
+    if (options.compact) rawArgs.push('--compact');
+    if (options.out) rawArgs.push('--out', String(options.out));
+    if (options.codeDir) rawArgs.push('--code-dir', String(options.codeDir));
+    if (options.proseDir) rawArgs.push('--prose-dir', String(options.proseDir));
+  }
+  return runBuildSqliteIndex(rawArgs, {
+    root,
+    emitOutput: options.emitOutput !== false,
+    exitOnError: options.exitOnError === true
+  });
+}
+
+/**
+ * Execute a search for a repo.
+ * @param {string} repoRoot
+ * @param {object} params
+ * @returns {Promise<object>}
+ */
+export async function search(repoRoot, params = {}) {
+  const rootOverride = repoRoot
+    ? path.resolve(repoRoot)
+    : (params.root ? path.resolve(params.root) : null);
+  const rawArgs = Array.isArray(params.args) ? params.args.slice() : buildSearchArgs(params);
+  const query = typeof params.query === 'string' ? params.query : '';
+  if (query) rawArgs.push(query);
+  return runSearchCli(rawArgs, {
+    root: rootOverride || undefined,
+    emitOutput: params.emitOutput === true,
+    exitOnError: params.exitOnError === true,
+    indexCache: params.indexCache,
+    sqliteCache: params.sqliteCache
+  });
+}
+
+/**
+ * Report artifact status for a repo.
+ * @param {string} repoRoot
+ * @param {object} [options]
+ * @returns {Promise<object>}
+ */
+export async function status(repoRoot, options = {}) {
+  const root = repoRoot ? path.resolve(repoRoot) : resolveRepoRoot(process.cwd());
+  return getStatus({ repoRoot: root, includeAll: options.all === true });
+}
diff --git a/src/integrations/core/status.js b/src/integrations/core/status.js
new file mode 100644
index 000000000..ef476bff2
--- /dev/null
+++ b/src/integrations/core/status.js
@@ -0,0 +1,289 @@
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { getCacheRoot, getDictConfig, getIndexDir, getMetricsDir, getRepoCacheRoot, loadUserConfig, resolveLmdbPaths, resolveRepoRoot, resolveSqlitePaths } from '../../../tools/dict-utils.js';
+import { getEnvConfig } from '../../shared/env.js';
+
+const MAX_STATUS_JSON_BYTES = 8 * 1024 * 1024;
+
+const readJsonWithLimit = async (targetPath, maxBytes = MAX_STATUS_JSON_BYTES) => {
+  try {
+    const stat = await fsPromises.stat(targetPath);
+    if (!stat.isFile()) return null;
+    if (Number.isFinite(maxBytes) && stat.size > maxBytes) {
+      return { data: null, bytes: stat.size, truncated: true };
+    }
+    const data = JSON.parse(await fsPromises.readFile(targetPath, 'utf8'));
+    return { data, bytes: stat.size, truncated: false };
+  } catch {
+    return null;
+  }
+};
+
+const summarizeShardPlan = (plan) => {
+  if (!Array.isArray(plan) || !plan.length) return null;
+  let totalFiles = 0;
+  let totalLines = 0;
+  let maxFiles = 0;
+  let maxLines = 0;
+  let maxShard = null;
+  for (const shard of plan) {
+    const files = Number(shard?.fileCount) || 0;
+    const lines = Number(shard?.lineCount) || 0;
+    totalFiles += files;
+    totalLines += lines;
+    if (files > maxFiles || lines > maxLines) {
+      maxFiles = Math.max(maxFiles, files);
+      maxLines = Math.max(maxLines, lines);
+      maxShard = shard || null;
+    }
+  }
+  const sample = [...plan]
+    .sort((a, b) => (Number(b?.lineCount) || 0) - (Number(a?.lineCount) || 0))
+    .slice(0, Math.min(5, plan.length))
+    .map((shard) => ({
+      id: shard.id || null,
+      label: shard.label || shard.id || null,
+      lang: shard.lang || null,
+      dir: shard.dir || null,
+      files: Number(shard.fileCount) || 0,
+      lines: Number(shard.lineCount) || 0
+    }));
+  return {
+    count: plan.length,
+    totalFiles,
+    totalLines,
+    maxFiles,
+    maxLines,
+    largest: maxShard
+      ? {
+        id: maxShard.id || null,
+        label: maxShard.label || maxShard.id || null,
+        files: Number(maxShard.fileCount) || 0,
+        lines: Number(maxShard.lineCount) || 0
+      }
+      : null,
+    sample
+  };
+};
+
+/**
+ * Recursively compute the size of a file or directory.
+ * @param {string} targetPath
+ * @returns {Promise<number>}
+ */
+async function sizeOfPath(targetPath) {
+  try {
+    const stat = await fsPromises.lstat(targetPath);
+    if (stat.isSymbolicLink()) return 0;
+    if (stat.isFile()) return stat.size;
+    if (!stat.isDirectory()) return 0;
+
+    const entries = await fsPromises.readdir(targetPath);
+    let total = 0;
+    for (const entry of entries) {
+      total += await sizeOfPath(path.join(targetPath, entry));
+    }
+    return total;
+  } catch {
+    return 0;
+  }
+}
+
+/**
+ * Check if a path is contained within another path.
+ * @param {string} parent
+ * @param {string} child
+ * @returns {boolean}
+ */
+function isInside(parent, child) {
+  const rel = path.relative(parent, child);
+  return rel === '' || (!rel.startsWith('..') && !path.isAbsolute(rel));
+}
+
+/**
+ * Collect artifact sizes and health status for a repo.
+ * @param {{repoRoot?:string,includeAll?:boolean}} input
+ * @returns {Promise<object>}
+ */
+export async function getStatus(input = {}) {
+  const root = input.repoRoot ? path.resolve(input.repoRoot) : resolveRepoRoot(process.cwd());
+  const includeAll = input.includeAll === true;
+  const userConfig = loadUserConfig(root);
+  const envConfig = getEnvConfig();
+  const cacheRoot = (userConfig.cache && userConfig.cache.root)
+    || envConfig.cacheRoot
+    || getCacheRoot();
+  const repoCacheRoot = getRepoCacheRoot(root, userConfig);
+  const dictConfig = getDictConfig(root, userConfig);
+  const dictDir = dictConfig.dir;
+  const sqlitePaths = resolveSqlitePaths(root, userConfig);
+  const lmdbPaths = resolveLmdbPaths(root, userConfig);
+
+  const indexCodeDir = getIndexDir(root, 'code', userConfig);
+  const indexProseDir = getIndexDir(root, 'prose', userConfig);
+  const repoArtifacts = {
+    indexCode: indexCodeDir,
+    indexProse: indexProseDir,
+    repometrics: path.join(repoCacheRoot, 'repometrics'),
+    incremental: path.join(repoCacheRoot, 'incremental')
+  };
+
+  const repoCacheSize = await sizeOfPath(repoCacheRoot);
+  const repoArtifactSizes = {};
+  for (const [name, artifactPath] of Object.entries(repoArtifacts)) {
+    repoArtifactSizes[name] = await sizeOfPath(artifactPath);
+  }
+
+  const sqliteStats = {};
+  let sqliteOutsideCacheSize = 0;
+  const lmdbStats = {};
+  let lmdbOutsideCacheSize = 0;
+  const sqliteTargets = [
+    { label: 'code', path: sqlitePaths.codePath },
+    { label: 'prose', path: sqlitePaths.prosePath }
+  ];
+  for (const target of sqliteTargets) {
+    const exists = fs.existsSync(target.path);
+    const size = exists ? await sizeOfPath(target.path) : 0;
+    sqliteStats[target.label] = exists ? { path: target.path, bytes: size } : null;
+    if (exists && !isInside(path.resolve(cacheRoot), target.path)) {
+      sqliteOutsideCacheSize += size;
+    }
+  }
+  const lmdbTargets = [
+    { label: 'code', path: lmdbPaths.codePath },
+    { label: 'prose', path: lmdbPaths.prosePath }
+  ];
+  for (const target of lmdbTargets) {
+    const exists = fs.existsSync(path.join(target.path, 'data.mdb'));
+    const size = exists ? await sizeOfPath(target.path) : 0;
+    lmdbStats[target.label] = exists ? { path: target.path, bytes: size } : null;
+    if (exists && !isInside(path.resolve(cacheRoot), target.path)) {
+      lmdbOutsideCacheSize += size;
+    }
+  }
+
+  const cacheRootSize = await sizeOfPath(cacheRoot);
+  const dictSize = await sizeOfPath(dictDir);
+  const overallSize = cacheRootSize + sqliteOutsideCacheSize + lmdbOutsideCacheSize;
+
+  const health = { issues: [], hints: [] };
+  const indexIssues = [];
+  if (!fs.existsSync(indexCodeDir)) {
+    indexIssues.push('index-code directory missing');
+  } else {
+    const codeChunkMeta = fs.existsSync(path.join(indexCodeDir, 'chunk_meta.json'))
+      || fs.existsSync(path.join(indexCodeDir, 'chunk_meta.jsonl'))
+      || fs.existsSync(path.join(indexCodeDir, 'chunk_meta.meta.json'))
+      || fs.existsSync(path.join(indexCodeDir, 'chunk_meta.parts'));
+    if (!codeChunkMeta) {
+      indexIssues.push('index-code chunk_meta.json missing');
+    }
+    const codeTokenPostings = fs.existsSync(path.join(indexCodeDir, 'token_postings.json'))
+      || fs.existsSync(path.join(indexCodeDir, 'token_postings.meta.json'))
+      || fs.existsSync(path.join(indexCodeDir, 'token_postings.shards'));
+    if (!codeTokenPostings) {
+      indexIssues.push('index-code token_postings.json missing');
+    }
+  }
+  if (!fs.existsSync(indexProseDir)) {
+    indexIssues.push('index-prose directory missing');
+  } else {
+    const proseChunkMeta = fs.existsSync(path.join(indexProseDir, 'chunk_meta.json'))
+      || fs.existsSync(path.join(indexProseDir, 'chunk_meta.jsonl'))
+      || fs.existsSync(path.join(indexProseDir, 'chunk_meta.meta.json'))
+      || fs.existsSync(path.join(indexProseDir, 'chunk_meta.parts'));
+    if (!proseChunkMeta) {
+      indexIssues.push('index-prose chunk_meta.json missing');
+    }
+    const proseTokenPostings = fs.existsSync(path.join(indexProseDir, 'token_postings.json'))
+      || fs.existsSync(path.join(indexProseDir, 'token_postings.meta.json'))
+      || fs.existsSync(path.join(indexProseDir, 'token_postings.shards'));
+    if (!proseTokenPostings) {
+      indexIssues.push('index-prose token_postings.json missing');
+    }
+  }
+  if (indexIssues.length) {
+    health.issues.push(...indexIssues);
+    health.hints.push('Run `npm run build-index` to rebuild file-backed indexes.');
+  }
+
+  const sqliteIssues = [];
+  if (userConfig.sqlite?.use !== false) {
+    if (!fs.existsSync(sqlitePaths.codePath)) sqliteIssues.push('sqlite code db missing');
+    if (!fs.existsSync(sqlitePaths.prosePath)) sqliteIssues.push('sqlite prose db missing');
+  }
+  if (sqliteIssues.length) {
+    health.issues.push(...sqliteIssues);
+    health.hints.push('Run `npm run build-sqlite-index` to rebuild SQLite indexes.');
+  }
+
+  const lmdbIssues = [];
+  if (userConfig.lmdb?.use !== false) {
+    if (!fs.existsSync(path.join(lmdbPaths.codePath, 'data.mdb'))) {
+      lmdbIssues.push('lmdb code db missing');
+    }
+    if (!fs.existsSync(path.join(lmdbPaths.prosePath, 'data.mdb'))) {
+      lmdbIssues.push('lmdb prose db missing');
+    }
+  }
+  if (lmdbIssues.length) {
+    health.issues.push(...lmdbIssues);
+    health.hints.push('Run `npm run build-lmdb-index` to rebuild LMDB indexes.');
+  }
+
+  const payload = {
+    repo: {
+      root: path.resolve(repoCacheRoot),
+      totalBytes: repoCacheSize,
+      artifacts: repoArtifactSizes,
+      sqlite: {
+        code: sqliteStats.code,
+        prose: sqliteStats.prose,
+        legacy: sqlitePaths.legacyExists ? { path: sqlitePaths.legacyPath } : null
+      },
+      lmdb: {
+        code: lmdbStats.code,
+        prose: lmdbStats.prose
+      }
+    },
+    health,
+    overall: {
+      cacheRoot: path.resolve(cacheRoot),
+      cacheBytes: cacheRootSize,
+      dictionaryBytes: dictSize,
+      sqliteOutsideCacheBytes: sqliteOutsideCacheSize,
+      lmdbOutsideCacheBytes: lmdbOutsideCacheSize,
+      totalBytes: overallSize
+    }
+  };
+
+  if (includeAll) {
+    const repoRollups = [];
+    const reposRoot = path.join(cacheRoot, 'repos');
+    if (fs.existsSync(reposRoot)) {
+      const entries = await fsPromises.readdir(reposRoot, { withFileTypes: true });
+      for (const entry of entries) {
+        if (!entry.isDirectory()) continue;
+        const repoPath = path.join(reposRoot, entry.name);
+        const bytes = await sizeOfPath(repoPath);
+        const stat = await fsPromises.stat(repoPath);
+        repoRollups.push({
+          id: entry.name,
+          path: path.resolve(repoPath),
+          bytes,
+          mtime: stat.mtime ? stat.mtime.toISOString() : null
+        });
+      }
+    }
+    const totalRepoBytes = repoRollups.reduce((sum, repo) => sum + repo.bytes, 0);
+    payload.allRepos = {
+      root: path.resolve(reposRoot),
+      repos: repoRollups,
+      totalBytes: totalRepoBytes
+    };
+  }
+
+  return payload;
+}
diff --git a/src/mcp/defs.js b/src/integrations/mcp/defs.js
similarity index 96%
rename from src/mcp/defs.js
rename to src/integrations/mcp/defs.js
index cc96bf9b8..a7a18da49 100644
--- a/src/mcp/defs.js
+++ b/src/integrations/mcp/defs.js
@@ -94,6 +94,11 @@ export function getToolDefs(defaultModelId) {
           path: { type: 'string', description: 'Substring/regex match for file paths.' },
           file: { type: 'string', description: 'Substring/regex match for file paths.' },
           ext: { type: 'string', description: 'Extension filter (ex: .js).' },
+          lang: { type: 'string', description: 'Language filter (maps to extensions).' },
+          branch: { type: 'string', description: 'Git branch filter (current branch).' },
+          case: { type: 'boolean', description: 'Case-sensitive matching for file/path and tokens.' },
+          caseFile: { type: 'boolean', description: 'Case-sensitive file/path matching.' },
+          caseTokens: { type: 'boolean', description: 'Case-sensitive token matching.' },
           meta: { type: 'object', description: 'Metadata filters for records (key/value).' },
           metaJson: { type: 'string', description: 'JSON metadata filters for records.' }
         },
diff --git a/src/mcp/protocol.js b/src/integrations/mcp/protocol.js
similarity index 62%
rename from src/mcp/protocol.js
rename to src/integrations/mcp/protocol.js
index 191cb8e36..f28825c2b 100644
--- a/src/mcp/protocol.js
+++ b/src/integrations/mcp/protocol.js
@@ -1,12 +1,16 @@
+import { closeJsonRpcWriter, writeFramedJsonRpc } from '../../shared/jsonrpc.js';
+
 /**
  * Send a JSON-RPC payload with Content-Length framing.
  * @param {object} payload
  * @param {NodeJS.WritableStream} [output]
  */
 export function sendMessage(payload, output = process.stdout) {
-  const json = JSON.stringify(payload);
-  const header = `Content-Length: ${Buffer.byteLength(json, 'utf8')}\r\n\r\n`;
-  output.write(header + json);
+  const pending = writeFramedJsonRpc(output, payload);
+  if (pending && typeof pending.catch === 'function') {
+    pending.catch(() => {});
+  }
+  return pending;
 }
 
 /**
@@ -36,6 +40,16 @@ export function sendResult(id, result, output = process.stdout) {
  * @param {string} message
  * @param {NodeJS.WritableStream} [output]
  */
-export function sendError(id, code, message, output = process.stdout) {
-  sendMessage({ jsonrpc: '2.0', id, error: { code, message } }, output);
+export function sendError(id, code, message, output = process.stdout, data) {
+  const error = { code, message };
+  if (data !== undefined) error.data = data;
+  sendMessage({ jsonrpc: '2.0', id, error }, output);
+}
+
+/**
+ * Close the JSON-RPC writer for an output stream.
+ * @param {NodeJS.WritableStream} [output]
+ */
+export function closeOutput(output = process.stdout) {
+  closeJsonRpcWriter(output);
 }
diff --git a/src/integrations/tooling/lsp/client.js b/src/integrations/tooling/lsp/client.js
new file mode 100644
index 000000000..6f80cf406
--- /dev/null
+++ b/src/integrations/tooling/lsp/client.js
@@ -0,0 +1,241 @@
+import { spawn } from 'node:child_process';
+import path from 'node:path';
+import { pathToFileURL } from 'node:url';
+import { StreamMessageReader } from 'vscode-jsonrpc';
+import { closeJsonRpcWriter, getJsonRpcWriter } from '../../../shared/jsonrpc.js';
+
+/**
+ * Convert a local path to a file:// URI.
+ * @param {string} absPath
+ * @returns {string}
+ */
+export function pathToFileUri(absPath) {
+  return pathToFileURL(path.resolve(absPath)).href;
+}
+
+/**
+ * Map file extensions to LSP language identifiers.
+ * @param {string} ext
+ * @returns {string}
+ */
+export function languageIdForFileExt(ext) {
+  const normalized = ext.toLowerCase();
+  const map = {
+    '.ts': 'typescript',
+    '.tsx': 'typescriptreact',
+    '.mts': 'typescript',
+    '.cts': 'typescript',
+    '.js': 'javascript',
+    '.jsx': 'javascriptreact',
+    '.c': 'c',
+    '.h': 'c',
+    '.cc': 'cpp',
+    '.cpp': 'cpp',
+    '.cxx': 'cpp',
+    '.hpp': 'cpp',
+    '.hh': 'cpp',
+    '.mm': 'objective-cpp',
+    '.m': 'objective-c',
+    '.swift': 'swift',
+    '.py': 'python',
+    '.pyi': 'python'
+  };
+  return map[normalized] || 'plaintext';
+}
+
+/**
+ * Create a minimal JSON-RPC client for LSP servers.
+ * @param {{cmd:string,args?:string[],cwd?:string,env?:object,log?:(msg:string)=>void,onNotification?:(msg:object)=>void,onRequest?:(msg:object)=>Promise<any>}} options
+ */
+export function createLspClient(options) {
+  const {
+    cmd,
+    args = [],
+    cwd,
+    env,
+    shell = false,
+    log = () => {},
+    onNotification,
+    onRequest
+  } = options || {};
+  if (!cmd) throw new Error('createLspClient requires a command.');
+
+  let proc = null;
+  let reader = null;
+  let writer = null;
+  let writerClosed = false;
+  let nextId = 1;
+  const pending = new Map();
+
+  const send = (payload) => {
+    if (!writer || writerClosed) return;
+    const pendingWrite = writer.write(payload);
+    if (pendingWrite && typeof pendingWrite.catch === 'function') {
+      pendingWrite.catch((err) => {
+        if (err?.code === 'ERR_STREAM_DESTROYED') {
+          writerClosed = true;
+          return;
+        }
+        log(`[lsp] write error: ${err?.message || err}`);
+      });
+    }
+  };
+
+  const handleResponse = (message) => {
+    const entry = pending.get(message.id);
+    if (!entry) return;
+    pending.delete(message.id);
+    if (entry.timeout) clearTimeout(entry.timeout);
+    if (message.error) {
+      const err = new Error(message.error.message || 'LSP request failed.');
+      err.code = message.error.code;
+      entry.reject(err);
+      return;
+    }
+    entry.resolve(message.result);
+  };
+
+  const handleRequest = async (message) => {
+    if (typeof onRequest === 'function') {
+      try {
+        const result = await onRequest(message);
+        send({ jsonrpc: '2.0', id: message.id, result: result ?? null });
+      } catch (err) {
+        send({
+          jsonrpc: '2.0',
+          id: message.id,
+          error: { code: -32603, message: err?.message || 'LSP request failed.' }
+        });
+      }
+      return;
+    }
+    send({
+      jsonrpc: '2.0',
+      id: message.id,
+      error: { code: -32601, message: 'Method not supported.' }
+    });
+  };
+
+  const handleMessage = (message) => {
+    if (!message || typeof message !== 'object') return;
+    if (Object.prototype.hasOwnProperty.call(message, 'id')) {
+      if (message.method) {
+        void handleRequest(message);
+        return;
+      }
+      handleResponse(message);
+      return;
+    }
+    if (message.method && typeof onNotification === 'function') {
+      onNotification(message);
+    }
+  };
+
+  const start = () => {
+    if (proc) return proc;
+    proc = spawn(cmd, args, { stdio: ['pipe', 'pipe', 'pipe'], cwd, env, shell });
+    reader = new StreamMessageReader(proc.stdout);
+    writer = getJsonRpcWriter(proc.stdin);
+    writerClosed = false;
+    const markWriterClosed = () => {
+      writerClosed = true;
+      if (proc?.stdin) closeJsonRpcWriter(proc.stdin);
+    };
+    proc.stdin?.on('close', markWriterClosed);
+    proc.stdin?.on('error', markWriterClosed);
+    reader.onError((err) => log(`[lsp] parse error: ${err.message}`));
+    reader.onClose(() => log('[lsp] reader closed'));
+    reader.listen(handleMessage);
+    proc.stderr.on('data', (chunk) => {
+      const text = chunk.toString('utf8').trim();
+      if (text) log(`[lsp] ${text}`);
+    });
+    proc.on('error', (err) => {
+      const currentProc = proc;
+      for (const entry of pending.values()) {
+        if (entry.timeout) clearTimeout(entry.timeout);
+        entry.reject(err);
+      }
+      pending.clear();
+      proc = null;
+      reader = null;
+      writer = null;
+      writerClosed = true;
+      if (currentProc?.stdin) closeJsonRpcWriter(currentProc.stdin);
+    });
+    proc.on('exit', (code, signal) => {
+      const currentProc = proc;
+      for (const entry of pending.values()) {
+        if (entry.timeout) clearTimeout(entry.timeout);
+        entry.reject(new Error(`LSP exited (${code ?? 'null'}, ${signal ?? 'null'}).`));
+      }
+      pending.clear();
+      proc = null;
+      reader = null;
+      writer = null;
+      writerClosed = true;
+      if (currentProc?.stdin) closeJsonRpcWriter(currentProc.stdin);
+    });
+    return proc;
+  };
+
+  const request = (method, params, { timeoutMs } = {}) => {
+    start();
+    const id = nextId++;
+    return new Promise((resolve, reject) => {
+      const entry = { resolve, reject, method, timeout: null };
+      if (Number.isFinite(timeoutMs) && timeoutMs > 0) {
+        entry.timeout = setTimeout(() => {
+          pending.delete(id);
+          reject(new Error(`LSP request timeout (${method}).`));
+        }, timeoutMs);
+      }
+      pending.set(id, entry);
+      send({ jsonrpc: '2.0', id, method, params });
+    });
+  };
+
+  const notify = (method, params) => {
+    start();
+    send({ jsonrpc: '2.0', method, params });
+  };
+
+  const initialize = async ({ rootUri, capabilities, initializationOptions, workspaceFolders, timeoutMs } = {}) => {
+    const result = await request('initialize', {
+      processId: process.pid,
+      rootUri: rootUri || null,
+      capabilities: capabilities || {},
+      initializationOptions: initializationOptions || null,
+      workspaceFolders: workspaceFolders || (rootUri ? [{ uri: rootUri, name: rootUri.split('/').pop() || 'workspace' }] : null)
+    }, { timeoutMs: Number.isFinite(timeoutMs) ? timeoutMs : 10000 });
+    notify('initialized', {});
+    return result;
+  };
+
+  const shutdownAndExit = async () => {
+    if (!proc) return;
+    try {
+      await request('shutdown', null, { timeoutMs: 5000 });
+    } catch {}
+    if (!writerClosed) {
+      notify('exit', null);
+    }
+  };
+
+  const kill = () => {
+    if (!proc) return;
+    if (proc.stdin) closeJsonRpcWriter(proc.stdin);
+    proc.kill();
+    proc = null;
+    writerClosed = true;
+  };
+
+  return {
+    start,
+    initialize,
+    notify,
+    request,
+    shutdownAndExit,
+    kill
+  };
+}
diff --git a/src/integrations/tooling/lsp/positions.js b/src/integrations/tooling/lsp/positions.js
new file mode 100644
index 000000000..8dbedc6c5
--- /dev/null
+++ b/src/integrations/tooling/lsp/positions.js
@@ -0,0 +1,28 @@
+import { lineColToOffset } from '../../../shared/lines.js';
+
+/**
+ * Convert a 0-based LSP position to a character offset.
+ * @param {number[]} lineIndex
+ * @param {{line:number,character:number}|null} position
+ * @returns {number}
+ */
+export function positionToOffset(lineIndex, position) {
+  if (!position) return 0;
+  const line = Number(position.line) + 1;
+  const col = Number(position.character) || 0;
+  return lineColToOffset(lineIndex, line, col);
+}
+
+/**
+ * Convert a 0-based LSP range to start/end offsets.
+ * @param {number[]} lineIndex
+ * @param {{start:{line:number,character:number},end:{line:number,character:number}}|null} range
+ * @returns {{start:number,end:number}}
+ */
+export function rangeToOffsets(lineIndex, range) {
+  if (!range) return { start: 0, end: 0 };
+  return {
+    start: positionToOffset(lineIndex, range.start),
+    end: positionToOffset(lineIndex, range.end)
+  };
+}
diff --git a/src/integrations/tooling/lsp/symbols.js b/src/integrations/tooling/lsp/symbols.js
new file mode 100644
index 000000000..f89744661
--- /dev/null
+++ b/src/integrations/tooling/lsp/symbols.js
@@ -0,0 +1,58 @@
+import { SymbolKind } from 'vscode-languageserver-protocol';
+
+const isSymbolInformation = (symbol) => Boolean(symbol && symbol.location && symbol.location.range);
+
+const isValidKind = (kind) => Number.isInteger(kind) && SymbolKind[kind] !== undefined;
+const coerceKind = (kind) => (isValidKind(kind) ? kind : null);
+
+function flattenDocumentSymbols(symbols, parentName = '') {
+  const out = [];
+  for (const symbol of symbols || []) {
+    if (!symbol || !symbol.name || !symbol.range) continue;
+    const fullName = parentName ? `${parentName}.${symbol.name}` : symbol.name;
+    out.push({
+      name: symbol.name,
+      fullName,
+      kind: coerceKind(symbol.kind),
+      range: symbol.range,
+      selectionRange: symbol.selectionRange || symbol.range,
+      detail: symbol.detail || null
+    });
+    if (Array.isArray(symbol.children) && symbol.children.length) {
+      out.push(...flattenDocumentSymbols(symbol.children, fullName));
+    }
+  }
+  return out;
+}
+
+function flattenSymbolInformation(symbols) {
+  const out = [];
+  for (const symbol of symbols || []) {
+    if (!symbol || !symbol.name || !symbol.location?.range) continue;
+    const container = symbol.containerName || '';
+    const fullName = container ? `${container}.${symbol.name}` : symbol.name;
+    out.push({
+      name: symbol.name,
+      fullName,
+      kind: coerceKind(symbol.kind),
+      range: symbol.location.range,
+      selectionRange: symbol.location.range,
+      detail: symbol.detail || null,
+      containerName: container || null
+    });
+  }
+  return out;
+}
+
+/**
+ * Normalize LSP symbols into a flat list with names and ranges.
+ * @param {Array} symbols
+ * @returns {Array<{name:string,fullName:string,kind:number|null,range:object,selectionRange:object,detail:string|null,containerName?:string|null}>}
+ */
+export function flattenSymbols(symbols) {
+  if (!Array.isArray(symbols) || symbols.length === 0) return [];
+  if (isSymbolInformation(symbols[0])) {
+    return flattenSymbolInformation(symbols);
+  }
+  return flattenDocumentSymbols(symbols);
+}
diff --git a/src/integrations/tooling/providers/lsp.js b/src/integrations/tooling/providers/lsp.js
new file mode 100644
index 000000000..33431581a
--- /dev/null
+++ b/src/integrations/tooling/providers/lsp.js
@@ -0,0 +1,290 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { buildLineIndex } from '../../../shared/lines.js';
+import { createLspClient, languageIdForFileExt, pathToFileUri } from '../lsp/client.js';
+import { rangeToOffsets } from '../lsp/positions.js';
+import { flattenSymbols } from '../lsp/symbols.js';
+import { createToolingEntry, createToolingGuard, uniqueTypes } from './shared.js';
+
+const splitParams = (value) => {
+  if (!value) return [];
+  const params = [];
+  let current = '';
+  let depthAngle = 0;
+  let depthParen = 0;
+  let depthBracket = 0;
+  let depthBrace = 0;
+  for (const ch of value) {
+    if (ch === '<') depthAngle += 1;
+    if (ch === '>' && depthAngle > 0) depthAngle -= 1;
+    if (ch === '(') depthParen += 1;
+    if (ch === ')' && depthParen > 0) depthParen -= 1;
+    if (ch === '[') depthBracket += 1;
+    if (ch === ']' && depthBracket > 0) depthBracket -= 1;
+    if (ch === '{') depthBrace += 1;
+    if (ch === '}' && depthBrace > 0) depthBrace -= 1;
+    if (ch === ',' && depthAngle === 0 && depthParen === 0 && depthBracket === 0 && depthBrace === 0) {
+      if (current.trim()) params.push(current.trim());
+      current = '';
+      continue;
+    }
+    current += ch;
+  }
+  if (current.trim()) params.push(current.trim());
+  return params;
+};
+
+const normalizeHoverContents = (contents) => {
+  if (!contents) return '';
+  if (typeof contents === 'string') return contents;
+  if (Array.isArray(contents)) {
+    return contents.map((entry) => normalizeHoverContents(entry)).filter(Boolean).join('\n');
+  }
+  if (typeof contents === 'object') {
+    if (typeof contents.value === 'string') return contents.value;
+    if (typeof contents.language === 'string' && typeof contents.value === 'string') return contents.value;
+  }
+  return '';
+};
+
+const extractSwiftSignature = (detail) => {
+  const open = detail.indexOf('(');
+  const close = detail.lastIndexOf(')');
+  if (open === -1 || close === -1 || close < open) return null;
+  const signature = detail.trim();
+  const paramsText = detail.slice(open + 1, close).trim();
+  const after = detail.slice(close + 1).trim();
+  const arrowMatch = after.match(/->\s*(.+)$/);
+  const returnType = arrowMatch ? arrowMatch[1].trim() : null;
+  const paramTypes = {};
+  const paramNames = [];
+  for (const part of splitParams(paramsText)) {
+    const cleaned = part.replace(/=.*/g, '').trim();
+    if (!cleaned) continue;
+    const segments = cleaned.split(':');
+    if (segments.length < 2) continue;
+    const nameTokens = segments[0].trim().split(/\s+/).filter(Boolean);
+    let name = nameTokens[nameTokens.length - 1] || '';
+    if (name === '_' && nameTokens.length > 1) {
+      name = nameTokens[nameTokens.length - 2] || '';
+    }
+    const type = segments.slice(1).join(':').trim();
+    if (!name || !type) continue;
+    paramNames.push(name);
+    paramTypes[name] = type;
+  }
+  return { signature, returnType, paramTypes, paramNames };
+};
+
+const extractObjcSignature = (detail) => {
+  if (!detail.includes(':')) return null;
+  const signature = detail.trim();
+  const returnMatch = signature.match(/\(([^)]+)\)\s*[^:]+/);
+  const returnType = returnMatch ? returnMatch[1].trim() : null;
+  const paramTypes = {};
+  const paramNames = [];
+  const paramRe = /:\s*\(([^)]+)\)\s*([A-Za-z_][\w]*)/g;
+  let match;
+  while ((match = paramRe.exec(signature)) !== null) {
+    const type = match[1]?.trim();
+    const name = match[2]?.trim();
+    if (!type || !name) continue;
+    paramNames.push(name);
+    paramTypes[name] = type;
+  }
+  if (!returnType && !paramNames.length) return null;
+  return { signature, returnType, paramTypes, paramNames };
+};
+
+const extractClikeSignature = (detail, symbolName) => {
+  const open = detail.indexOf('(');
+  const close = detail.lastIndexOf(')');
+  if (open === -1 || close === -1 || close < open) return null;
+  const signature = detail.trim();
+  const before = detail.slice(0, open).trim();
+  const paramsText = detail.slice(open + 1, close).trim();
+  let returnType = null;
+  if (before) {
+    let idx = -1;
+    if (symbolName) {
+      idx = before.lastIndexOf(symbolName);
+      if (idx === -1) idx = before.lastIndexOf(`::${symbolName}`);
+      if (idx !== -1 && before[idx] === ':' && before[idx - 1] === ':') idx -= 1;
+    }
+    returnType = idx > 0 ? before.slice(0, idx).trim() : before;
+    returnType = returnType.replace(/\b(static|inline|constexpr|virtual|extern|friend)\b/g, '').trim();
+  }
+  const paramTypes = {};
+  const paramNames = [];
+  for (const part of splitParams(paramsText)) {
+    const cleaned = part.trim();
+    if (!cleaned || cleaned === 'void' || cleaned === '...') continue;
+    const noDefault = cleaned.split('=').shift().trim();
+    const nameMatch = noDefault.match(/([A-Za-z_][\w]*)\s*(?:\[[^\]]*\])?$/);
+    if (!nameMatch) continue;
+    const name = nameMatch[1];
+    const type = noDefault.slice(0, nameMatch.index).trim();
+    if (!name || !type) continue;
+    paramNames.push(name);
+    paramTypes[name] = type;
+  }
+  return { signature, returnType, paramTypes, paramNames };
+};
+
+const extractSignatureInfo = (detail, languageId, symbolName) => {
+  if (!detail || typeof detail !== 'string') return null;
+  const trimmed = detail.trim();
+  if (!trimmed) return null;
+  if (languageId === 'swift') return extractSwiftSignature(trimmed);
+  if (languageId === 'objective-c' || languageId === 'objective-cpp') {
+    const objc = extractObjcSignature(trimmed);
+    if (objc) return objc;
+  }
+  if (languageId === 'c' || languageId === 'cpp' || languageId === 'objective-c' || languageId === 'objective-cpp') {
+    return extractClikeSignature(trimmed, symbolName);
+  }
+  return null;
+};
+
+const findChunkForOffsets = (chunks, start, end) => {
+  let best = null;
+  let bestSpan = Infinity;
+  for (const chunk of chunks || []) {
+    if (!chunk || !Number.isFinite(chunk.start) || !Number.isFinite(chunk.end)) continue;
+    if (start >= chunk.start && end <= chunk.end) {
+      const span = chunk.end - chunk.start;
+      if (span < bestSpan) {
+        best = chunk;
+        bestSpan = span;
+      }
+    }
+  }
+  return best;
+};
+
+export async function collectLspTypes({
+  rootDir,
+  chunksByFile,
+  log,
+  cmd,
+  args,
+  timeoutMs = 15000,
+  retries = 2,
+  breakerThreshold = 3
+}) {
+  const files = Array.from(chunksByFile.keys());
+  if (!files.length) return { typesByChunk: new Map(), enriched: 0 };
+
+  const client = createLspClient({ cmd, args, cwd: rootDir, log });
+  const guard = createToolingGuard({
+    name: cmd,
+    timeoutMs,
+    retries,
+    breakerThreshold,
+    log
+  });
+  const rootUri = pathToFileUri(rootDir);
+  try {
+    await guard.run(({ timeoutMs: guardTimeout }) => client.initialize({
+      rootUri,
+      capabilities: { textDocument: { documentSymbol: { hierarchicalDocumentSymbolSupport: true } } },
+      timeoutMs: guardTimeout
+    }), { label: 'initialize' });
+  } catch (err) {
+    log(`[index] ${cmd} initialize failed: ${err?.message || err}`);
+    client.kill();
+    return { typesByChunk: new Map(), enriched: 0 };
+  }
+
+  const typesByChunk = new Map();
+  let enriched = 0;
+  for (const file of files) {
+    const absPath = path.join(rootDir, file);
+    let text = '';
+    try {
+      text = await fs.readFile(absPath, 'utf8');
+    } catch {
+      continue;
+    }
+    const uri = pathToFileUri(absPath);
+    const languageId = languageIdForFileExt(path.extname(file));
+    client.notify('textDocument/didOpen', {
+      textDocument: {
+        uri,
+        languageId,
+        version: 1,
+        text
+      }
+    });
+
+    let symbols = null;
+    try {
+      symbols = await guard.run(
+        ({ timeoutMs: guardTimeout }) => client.request(
+          'textDocument/documentSymbol',
+          { textDocument: { uri } },
+          { timeoutMs: guardTimeout }
+        ),
+        { label: 'documentSymbol' }
+      );
+    } catch (err) {
+      log(`[index] ${cmd} documentSymbol failed (${file}): ${err?.message || err}`);
+      client.notify('textDocument/didClose', { textDocument: { uri } });
+      if (guard.isOpen()) break;
+      continue;
+    }
+    const flattened = flattenSymbols(symbols || []);
+    if (!flattened.length) {
+      client.notify('textDocument/didClose', { textDocument: { uri } });
+      continue;
+    }
+
+    const lineIndex = buildLineIndex(text);
+    const fileChunks = chunksByFile.get(file) || [];
+
+    for (const symbol of flattened) {
+      const offsets = rangeToOffsets(lineIndex, symbol.selectionRange || symbol.range);
+      const target = findChunkForOffsets(fileChunks, offsets.start, offsets.end);
+      if (!target) continue;
+      let info = extractSignatureInfo(symbol.detail, languageId, symbol.name);
+      if (!info || (!info.returnType && !Object.keys(info.paramTypes || {}).length)) {
+        try {
+          const hover = await guard.run(
+            ({ timeoutMs: guardTimeout }) => client.request('textDocument/hover', {
+              textDocument: { uri },
+              position: symbol.selectionRange?.start || symbol.range?.start
+            }, { timeoutMs: guardTimeout }),
+            { label: 'hover', timeoutOverride: 8000 }
+          );
+          const hoverText = normalizeHoverContents(hover?.contents);
+          const hoverInfo = extractSignatureInfo(hoverText, languageId, symbol.name);
+          if (hoverInfo) info = hoverInfo;
+        } catch {}
+      }
+      if (!info) continue;
+
+      const key = `${target.file}::${target.name}`;
+      const entry = typesByChunk.get(key) || createToolingEntry();
+      if (info.signature && !entry.signature) entry.signature = info.signature;
+      if (info.paramNames?.length && (!entry.paramNames || !entry.paramNames.length)) {
+        entry.paramNames = info.paramNames.slice();
+      }
+      if (info.returnType) entry.returns = uniqueTypes([...(entry.returns || []), info.returnType]);
+      if (info.paramTypes && Object.keys(info.paramTypes).length) {
+        for (const [name, type] of Object.entries(info.paramTypes)) {
+          if (!name || !type) continue;
+          const existing = entry.params?.[name] || [];
+          entry.params[name] = uniqueTypes([...(existing || []), type]);
+        }
+      }
+      typesByChunk.set(key, entry);
+      enriched += 1;
+    }
+
+    client.notify('textDocument/didClose', { textDocument: { uri } });
+  }
+
+  await client.shutdownAndExit();
+  client.kill();
+  return { typesByChunk, enriched };
+}
diff --git a/src/integrations/tooling/providers/shared.js b/src/integrations/tooling/providers/shared.js
new file mode 100644
index 000000000..0b6093785
--- /dev/null
+++ b/src/integrations/tooling/providers/shared.js
@@ -0,0 +1,84 @@
+export const uniqueTypes = (values) => Array.from(new Set((values || []).filter(Boolean)));
+
+export const createToolingEntry = () => ({
+  returns: [],
+  params: {},
+  signature: '',
+  paramNames: []
+});
+
+export const mergeToolingEntry = (target, incoming) => {
+  if (!incoming) return target;
+  if (incoming.signature && !target.signature) target.signature = incoming.signature;
+  if (incoming.paramNames?.length && (!target.paramNames || !target.paramNames.length)) {
+    target.paramNames = incoming.paramNames.slice();
+  }
+  if (Array.isArray(incoming.returns) && incoming.returns.length) {
+    target.returns = uniqueTypes([...(target.returns || []), ...incoming.returns]);
+  }
+  if (incoming.params && typeof incoming.params === 'object') {
+    if (!target.params || typeof target.params !== 'object') target.params = {};
+    for (const [name, types] of Object.entries(incoming.params)) {
+      if (!name || !Array.isArray(types)) continue;
+      const existing = target.params[name] || [];
+      target.params[name] = uniqueTypes([...(existing || []), ...types]);
+    }
+  }
+  return target;
+};
+
+export const mergeToolingMaps = (base, incoming) => {
+  for (const [key, value] of incoming || []) {
+    if (!base.has(key)) {
+      const entry = createToolingEntry();
+      mergeToolingEntry(entry, value);
+      base.set(key, entry);
+      continue;
+    }
+    mergeToolingEntry(base.get(key), value);
+  }
+  return base;
+};
+
+const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
+
+export const createToolingGuard = ({
+  name,
+  timeoutMs = 15000,
+  retries = 2,
+  breakerThreshold = 3,
+  log = () => {}
+} = {}) => {
+  let consecutiveFailures = 0;
+  const isOpen = () => consecutiveFailures >= breakerThreshold;
+  const reset = () => {
+    consecutiveFailures = 0;
+  };
+  const recordFailure = (err, label) => {
+    consecutiveFailures += 1;
+    if (label) log(`[tooling] ${name} ${label} failed (${consecutiveFailures}/${breakerThreshold}): ${err?.message || err}`);
+    if (isOpen()) log(`[tooling] ${name} circuit breaker tripped.`);
+  };
+  const run = async (fn, { label, timeoutOverride } = {}) => {
+    if (isOpen()) throw new Error(`${name} tooling disabled (circuit breaker).`);
+    let attempt = 0;
+    while (attempt <= retries) {
+      try {
+        const result = await fn({ timeoutMs: timeoutOverride || timeoutMs });
+        reset();
+        return result;
+      } catch (err) {
+        recordFailure(err, label);
+        attempt += 1;
+        if (isOpen() || attempt > retries) throw err;
+        const delay = attempt === 1 ? 250 : 1000;
+        await wait(delay);
+      }
+    }
+    return null;
+  };
+  return {
+    isOpen,
+    run
+  };
+};
diff --git a/src/triage/index-records.js b/src/integrations/triage/index-records.js
similarity index 70%
rename from src/triage/index-records.js
rename to src/integrations/triage/index-records.js
index 48c7b87a5..e5d7ed401 100644
--- a/src/triage/index-records.js
+++ b/src/integrations/triage/index-records.js
@@ -1,14 +1,14 @@
 import fs from 'node:fs/promises';
 import path from 'node:path';
-import { getIndexDir, getTriageConfig } from '../../tools/dict-utils.js';
-import { SimpleMinHash } from '../indexer/minhash.js';
-import { getHeadline } from '../indexer/headline.js';
-import { STOP, SYN } from '../indexer/constants.js';
-import { createIndexState, appendChunk } from '../indexer/build/state.js';
-import { buildPostings } from '../indexer/build/postings.js';
-import { writeIndexArtifacts } from '../indexer/build/artifacts.js';
-import { extractNgrams, splitId, splitWordsWithDict, stem, tri } from '../shared/tokenize.js';
-import { log, showProgress } from '../shared/progress.js';
+import { getIndexDir, getTriageConfig } from '../../../tools/dict-utils.js';
+import { SimpleMinHash } from '../../index/minhash.js';
+import { getHeadline } from '../../index/headline.js';
+import { STOP, SYN } from '../../index/constants.js';
+import { createIndexState, appendChunk } from '../../index/build/state.js';
+import { buildPostings } from '../../index/build/postings.js';
+import { writeIndexArtifacts } from '../../index/build/artifacts.js';
+import { extractNgrams, splitId, splitWordsWithDict, stem, tri } from '../../shared/tokenize.js';
+import { log, showProgress } from '../../shared/progress.js';
 import { promoteRecordFields } from './record-utils.js';
 
 /**
@@ -19,7 +19,7 @@ import { promoteRecordFields } from './record-utils.js';
 export async function buildRecordsIndexForRepo({ runtime }) {
   const triageConfig = getTriageConfig(runtime.root, runtime.userConfig);
   const recordsDir = triageConfig.recordsDir;
-  const outDir = getIndexDir(runtime.root, 'records', runtime.userConfig);
+  const outDir = getIndexDir(runtime.root, 'records', runtime.userConfig, { indexRoot: runtime.buildRoot });
   const postingsConfig = runtime.postingsConfig;
   await fs.mkdir(outDir, { recursive: true });
 
@@ -46,11 +46,26 @@ export async function buildRecordsIndexForRepo({ runtime }) {
 
     const record = await loadRecordJson(recordsDir, absPath);
     const docmeta = buildDocMeta(record, triageConfig);
+    const recordName = record?.vuln?.vulnId || record?.recordId || path.basename(relPath, '.md');
 
-    const tokenPayload = tokenizeRecord(text, runtime.dictWords, '.md', postingsConfig);
+    const tokenPayload = tokenizeRecord(
+      text,
+      runtime.dictWords,
+      runtime.dictConfig,
+      '.md',
+      postingsConfig,
+      [recordName, docmeta.doc || ''].filter(Boolean).join(' ')
+    );
     if (!tokenPayload.tokens.length) continue;
 
     const stats = computeTokenStats(tokenPayload.tokens);
+    const fieldTokens = postingsConfig?.fielded !== false ? {
+      name: recordName ? buildRecordSeq(recordName, runtime.dictWords, runtime.dictConfig, '.md').tokens : [],
+      signature: [],
+      doc: docmeta.doc ? buildRecordSeq(docmeta.doc, runtime.dictWords, runtime.dictConfig, '.md').tokens : [],
+      comment: [],
+      body: tokenPayload.tokens
+    } : null;
     const embedText = docmeta.doc || text;
     const embedding = await runtime.getChunkEmbedding(embedText);
 
@@ -69,7 +84,7 @@ export async function buildRecordsIndexForRepo({ runtime }) {
       startLine,
       endLine,
       kind: 'Record',
-      name: record?.vuln?.vulnId || record?.recordId || path.basename(relPath, '.md'),
+      name: recordName,
       tokens: tokenPayload.tokens,
       seq: tokenPayload.seq,
       ngrams: tokenPayload.ngrams,
@@ -85,7 +100,8 @@ export async function buildRecordsIndexForRepo({ runtime }) {
       embedding,
       minhashSig: mh.hashValues,
       weight: 1,
-      externalDocs: []
+      externalDocs: [],
+      ...(fieldTokens ? { fieldTokens } : {})
     };
 
     appendChunk(state, chunkPayload, postingsConfig);
@@ -102,17 +118,20 @@ export async function buildRecordsIndexForRepo({ runtime }) {
 
   log(`   → Indexed ${state.chunks.length} chunks, total tokens: ${state.totalTokens.toLocaleString()}`);
 
-  const postings = buildPostings({
+  const postings = await buildPostings({
     chunks: state.chunks,
     df: state.df,
     tokenPostings: state.tokenPostings,
     docLengths: state.docLengths,
+    fieldPostings: state.fieldPostings,
+    fieldDocLengths: state.fieldDocLengths,
     phrasePost: state.phrasePost,
     triPost: state.triPost,
     postingsConfig,
     modelId: runtime.modelId,
     useStubEmbeddings: runtime.useStubEmbeddings,
-    log
+    log,
+    workerPool: runtime.workerPool
   });
 
   await writeIndexArtifacts({
@@ -153,7 +172,8 @@ async function listMarkdownFiles(rootDir) {
 
 async function loadRecordJson(recordsDir, mdPath) {
   const base = path.basename(mdPath, '.md');
-  const jsonPath = path.join(recordsDir, `${base}.json`);
+  const dir = path.dirname(mdPath);
+  const jsonPath = path.join(dir, `${base}.json`);
   try {
     const raw = await fs.readFile(jsonPath, 'utf8');
     return JSON.parse(raw);
@@ -172,30 +192,25 @@ function buildDocMeta(record, triageConfig) {
   return docmeta;
 }
 
-function tokenizeRecord(text, dictWords, ext, postingsConfig) {
-  let tokens = splitId(text);
-  tokens = tokens.map((t) => t.normalize('NFKD'));
-
-  if (ext !== '.md') {
-    tokens = tokens.flatMap((t) => splitWordsWithDict(t, dictWords));
-  }
-
-  tokens = tokens.filter((w) => !STOP.has(w));
-  tokens = tokens.flatMap((w) => [w, stem(w)]);
-
-  const seq = [];
-  for (const w of tokens) {
-    seq.push(w);
-    if (SYN[w]) seq.push(SYN[w]);
-  }
-
+function tokenizeRecord(text, dictWords, dictConfig, ext, postingsConfig, chargramFieldText = '') {
+  const { tokens, seq } = buildRecordSeq(text, dictWords, dictConfig, ext);
   const phraseEnabled = postingsConfig?.enablePhraseNgrams !== false;
   const chargramEnabled = postingsConfig?.enableChargrams !== false;
+  const chargramSource = typeof postingsConfig?.chargramSource === 'string'
+    ? postingsConfig.chargramSource.trim().toLowerCase()
+    : 'fields';
+  const chargramMaxTokenLength = postingsConfig?.chargramMaxTokenLength == null
+    ? null
+    : Math.max(2, Math.floor(Number(postingsConfig.chargramMaxTokenLength)));
   const ngrams = phraseEnabled ? extractNgrams(seq, postingsConfig.phraseMinN, postingsConfig.phraseMaxN) : null;
   let chargrams = null;
   if (chargramEnabled) {
     const charSet = new Set();
-    seq.forEach((w) => {
+    const sourceTokens = chargramSource === 'fields' && chargramFieldText
+      ? buildRecordSeq(chargramFieldText, dictWords, dictConfig, ext).seq
+      : seq;
+    sourceTokens.forEach((w) => {
+      if (chargramMaxTokenLength && w.length > chargramMaxTokenLength) return;
       for (let n = postingsConfig.chargramMinN; n <= postingsConfig.chargramMaxN; ++n) tri(w, n).forEach((g) => charSet.add(g));
     });
     chargrams = Array.from(charSet);
@@ -209,6 +224,25 @@ function tokenizeRecord(text, dictWords, ext, postingsConfig) {
   };
 }
 
+function buildRecordSeq(text, dictWords, dictConfig, ext) {
+  let tokens = splitId(text);
+  tokens = tokens.map((t) => t.normalize('NFKD'));
+
+  if (ext !== '.md') {
+    tokens = tokens.flatMap((t) => splitWordsWithDict(t, dictWords, dictConfig));
+  }
+
+  tokens = tokens.filter((w) => !STOP.has(w));
+  tokens = tokens.flatMap((w) => [w, stem(w)]);
+
+  const seq = [];
+  for (const w of tokens) {
+    seq.push(w);
+    if (SYN[w]) seq.push(SYN[w]);
+  }
+  return { tokens, seq };
+}
+
 function computeTokenStats(tokens) {
   const freq = {};
   tokens.forEach((t) => {
diff --git a/src/triage/normalize/aws-inspector.js b/src/integrations/triage/normalize/aws-inspector.js
similarity index 100%
rename from src/triage/normalize/aws-inspector.js
rename to src/integrations/triage/normalize/aws-inspector.js
diff --git a/src/triage/normalize/dependabot.js b/src/integrations/triage/normalize/dependabot.js
similarity index 100%
rename from src/triage/normalize/dependabot.js
rename to src/integrations/triage/normalize/dependabot.js
diff --git a/src/triage/normalize/generic.js b/src/integrations/triage/normalize/generic.js
similarity index 100%
rename from src/triage/normalize/generic.js
rename to src/integrations/triage/normalize/generic.js
diff --git a/src/triage/normalize/helpers.js b/src/integrations/triage/normalize/helpers.js
similarity index 100%
rename from src/triage/normalize/helpers.js
rename to src/integrations/triage/normalize/helpers.js
diff --git a/src/triage/record-utils.js b/src/integrations/triage/record-utils.js
similarity index 100%
rename from src/triage/record-utils.js
rename to src/integrations/triage/record-utils.js
diff --git a/src/triage/render.js b/src/integrations/triage/render.js
similarity index 100%
rename from src/triage/render.js
rename to src/integrations/triage/render.js
diff --git a/src/lang/babel-parser.js b/src/lang/babel-parser.js
new file mode 100644
index 000000000..cba595fb6
--- /dev/null
+++ b/src/lang/babel-parser.js
@@ -0,0 +1,72 @@
+import { parse } from '@babel/parser';
+
+const TS_EXTS = new Set(['.ts', '.tsx', '.mts', '.cts']);
+const JSX_LIKE = /<([A-Za-z][A-Za-z0-9]*)\b[^>]*\/?>/;
+const FLOW_DIRECTIVE = /@flow\b/;
+const NOFLOW_DIRECTIVE = /@noflow\b/;
+
+const BASE_PLUGINS = [
+  'decorators-legacy',
+  'classProperties',
+  'classPrivateProperties',
+  'classPrivateMethods',
+  'classStaticBlock',
+  'dynamicImport',
+  'importMeta',
+  'optionalChaining',
+  'nullishCoalescingOperator',
+  'objectRestSpread',
+  'topLevelAwait',
+  'numericSeparator',
+  'logicalAssignment',
+  'privateIn',
+  'exportDefaultFrom',
+  'exportNamespaceFrom'
+];
+
+function shouldEnableFlow(text, flowMode) {
+  if (flowMode === true || flowMode === 'on') return true;
+  if (flowMode === false || flowMode === 'off') return false;
+  if (NOFLOW_DIRECTIVE.test(text)) return false;
+  return FLOW_DIRECTIVE.test(text);
+}
+
+function shouldEnableJsx(ext, text, isTypeScript) {
+  if (isTypeScript) return ext === '.tsx';
+  if (ext === '.jsx') return true;
+  return JSX_LIKE.test(text);
+}
+
+export function parseBabelAst(text, options = {}) {
+  const ext = typeof options.ext === 'string' ? options.ext.toLowerCase() : '';
+  const flowMode = options.flowMode ?? 'auto';
+  const isTypeScript = options.mode === 'typescript' || TS_EXTS.has(ext);
+  const plugins = [...BASE_PLUGINS];
+  if (shouldEnableJsx(ext, text, isTypeScript)) plugins.push('jsx');
+  if (isTypeScript) {
+    plugins.push('typescript');
+  } else if (shouldEnableFlow(text, flowMode)) {
+    plugins.push('flow', 'flowComments');
+  }
+
+  try {
+    const ast = parse(text, {
+      sourceType: 'unambiguous',
+      errorRecovery: true,
+      ranges: true,
+      tokens: true,
+      allowReturnOutsideFunction: true,
+      allowAwaitOutsideFunction: true,
+      plugins
+    });
+    if (ast && ast.type === 'File' && ast.program) {
+      if (ast.tokens && !ast.program.tokens) {
+        ast.program.tokens = ast.tokens;
+      }
+      return ast.program;
+    }
+    return ast || null;
+  } catch {
+    return null;
+  }
+}
diff --git a/src/lang/clike.js b/src/lang/clike.js
index b0364e3ad..0aaa0b09a 100644
--- a/src/lang/clike.js
+++ b/src/lang/clike.js
@@ -10,8 +10,9 @@ import {
   OBJC_TYPE_MAP,
   isCLike,
   isObjc
-} from '../indexer/constants.js';
+} from '../index/constants.js';
 import { buildHeuristicDataflow, hasReturnValue, summarizeControlFlow } from './flow.js';
+import { buildTreeSitterChunks } from './tree-sitter.js';
 
 /**
  * C-like language chunking and relations.
@@ -298,7 +299,9 @@ function collectCLikeCallsAndUsages(text) {
  * @param {string} ext
  * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null}
  */
-export function buildCLikeChunks(text, ext) {
+export function buildCLikeChunks(text, ext, options = {}) {
+  const treeChunks = buildTreeSitterChunks({ text, ext, options, languageId: null });
+  if (treeChunks && treeChunks.length) return treeChunks;
   const lineIndex = buildLineIndex(text);
   const lines = text.split('\n');
   const decls = [];
diff --git a/src/lang/csharp.js b/src/lang/csharp.js
index f1b7947f4..18fee4a98 100644
--- a/src/lang/csharp.js
+++ b/src/lang/csharp.js
@@ -2,6 +2,7 @@ import { buildLineIndex, offsetToLine } from '../shared/lines.js';
 import { findCLikeBodyBounds } from './clike.js';
 import { extractDocComment, sliceSignature } from './shared.js';
 import { buildHeuristicDataflow, hasReturnValue, summarizeControlFlow } from './flow.js';
+import { buildTreeSitterChunks } from './tree-sitter.js';
 
 /**
  * C# language chunking and relations.
@@ -205,7 +206,9 @@ export function collectCSharpImports(text) {
  * @param {string} text
  * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null}
  */
-export function buildCSharpChunks(text) {
+export function buildCSharpChunks(text, options = {}) {
+  const treeChunks = buildTreeSitterChunks({ text, languageId: 'csharp', options });
+  if (treeChunks && treeChunks.length) return treeChunks;
   const lineIndex = buildLineIndex(text);
   const lines = text.split('\n');
   const decls = [];
diff --git a/src/lang/css.js b/src/lang/css.js
new file mode 100644
index 000000000..09dcaf54b
--- /dev/null
+++ b/src/lang/css.js
@@ -0,0 +1,163 @@
+import { buildLineIndex, offsetToLine } from '../shared/lines.js';
+import { extractDocComment, sliceSignature } from './shared.js';
+import { getTreeSitterParser } from './tree-sitter.js';
+
+const RULE_NODES = new Set([
+  'rule_set',
+  'keyframes_statement',
+  'media_statement',
+  'supports_statement',
+  'font_face_statement',
+  'at_rule'
+]);
+
+function extractRuleName(text, node) {
+  const limit = Math.min(node.endIndex, node.startIndex + 240);
+  const slice = text.slice(node.startIndex, limit);
+  const newline = slice.indexOf('\n');
+  const brace = slice.indexOf('{');
+  const semi = slice.indexOf(';');
+  const candidates = [newline, brace, semi].filter((idx) => idx >= 0);
+  const cutoff = candidates.length ? Math.min(...candidates) : slice.length;
+  return slice.slice(0, cutoff).replace(/\s+/g, ' ').trim();
+}
+
+function gatherRuleNodes(root) {
+  const nodes = [];
+  const stack = [root];
+  while (stack.length) {
+    const node = stack.pop();
+    if (!node || node.isMissing) continue;
+    if (RULE_NODES.has(node.type)) nodes.push(node);
+    if (node.namedChildren && node.namedChildren.length) {
+      for (let i = node.namedChildren.length - 1; i >= 0; i -= 1) {
+        stack.push(node.namedChildren[i]);
+      }
+    }
+  }
+  return nodes;
+}
+
+export function collectCssImports(text) {
+  const imports = new Set();
+  const regex = /@import\s+(?:url\()?['"]?([^'")\s;]+)['"]?\)?/gi;
+  for (const match of text.matchAll(regex)) {
+    if (match[1]) imports.add(match[1]);
+  }
+  return Array.from(imports);
+}
+
+export function buildCssChunks(text) {
+  const parser = getTreeSitterParser('css');
+  if (!parser) return buildCssHeuristicChunks(text);
+  let tree;
+  try {
+    tree = parser.parse(text);
+  } catch {
+    return buildCssHeuristicChunks(text);
+  }
+  const rootNode = tree?.rootNode;
+  if (!rootNode) return buildCssHeuristicChunks(text);
+  const nodes = gatherRuleNodes(rootNode);
+  if (!nodes.length) return buildCssHeuristicChunks(text);
+  const lineIndex = buildLineIndex(text);
+  const lines = text.split('\n');
+  const chunks = [];
+  for (const node of nodes) {
+    const name = extractRuleName(text, node);
+    if (!name) continue;
+    const start = node.startIndex;
+    const end = node.endIndex;
+    const startLine = offsetToLine(lineIndex, start);
+    const endLine = offsetToLine(lineIndex, Math.max(start, end - 1));
+    const signature = sliceSignature(text, start, Math.min(end, start + 240));
+    const docstring = extractDocComment(lines, startLine - 1, {
+      blockStarts: ['/**', '/*']
+    });
+    chunks.push({
+      start,
+      end,
+      name,
+      kind: 'StyleRule',
+      meta: {
+        startLine,
+        endLine,
+        signature,
+        docstring
+      }
+    });
+  }
+  if (!chunks.length) return null;
+  chunks.sort((a, b) => a.start - b.start);
+  return chunks;
+}
+
+function buildCssHeuristicChunks(text) {
+  const chunks = [];
+  const lineIndex = buildLineIndex(text);
+  const lines = text.split('\n');
+  let idx = 0;
+  while (idx < text.length) {
+    const brace = text.indexOf('{', idx);
+    if (brace === -1) break;
+    const selectorStart = Math.max(text.lastIndexOf('\n', brace), text.lastIndexOf('\r', brace)) + 1;
+    const selector = text.slice(selectorStart, brace).trim();
+    if (!selector) {
+      idx = brace + 1;
+      continue;
+    }
+    let depth = 0;
+    let end = brace;
+    for (; end < text.length; end += 1) {
+      const ch = text[end];
+      if (ch === '{') depth += 1;
+      if (ch === '}') {
+        depth -= 1;
+        if (depth <= 0) {
+          end += 1;
+          break;
+        }
+      }
+    }
+    const start = selectorStart;
+    const endIdx = Math.min(text.length, end);
+    const startLine = offsetToLine(lineIndex, start);
+    const endLine = offsetToLine(lineIndex, Math.max(start, endIdx - 1));
+    const signature = sliceSignature(text, start, Math.min(endIdx, start + 240));
+    const docstring = extractDocComment(lines, startLine - 1, {
+      linePrefixes: ['/*', '/**'],
+      blockStarts: ['/*', '/**'],
+      blockEnd: '*/'
+    });
+    chunks.push({
+      start,
+      end: endIdx,
+      name: selector,
+      kind: selector.startsWith('@') ? 'AtRule' : 'StyleRule',
+      meta: {
+        signature,
+        docstring,
+        startLine,
+        endLine
+      }
+    });
+    idx = endIdx;
+  }
+  return chunks.length ? chunks : null;
+}
+
+export function buildCssRelations(text, allImports) {
+  return { imports: collectCssImports(text), exports: [], calls: [], usages: [], importLinks: [], functionMeta: {}, classMeta: {} };
+}
+
+export function extractCssDocMeta(chunk) {
+  const meta = chunk?.meta || {};
+  return {
+    signature: meta.signature || null,
+    docstring: meta.docstring || null
+  };
+}
+
+export function computeCssFlow() {
+  return null;
+}
diff --git a/src/lang/flow.js b/src/lang/flow.js
index 37ed3520e..6907fae1d 100644
--- a/src/lang/flow.js
+++ b/src/lang/flow.js
@@ -7,6 +7,17 @@ const DEFAULT_ASSIGNMENT_OPERATORS = [
 
 const escapeRegExp = (value) => value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
 
+const normalizeFlowName = (raw, memberOperators) => {
+  let name = raw.replace(/\s+/g, '');
+  for (const op of memberOperators) {
+    name = name.split(op).join('.');
+  }
+  if (name.includes('[')) {
+    name = name.replace(/\[[^\]]*\]/g, '[]');
+  }
+  return name;
+};
+
 /**
  * Normalize text for flow scanning by removing comparison operators.
  * @param {string} text
@@ -59,20 +70,9 @@ export function extractWritesAndMutations(text, options = {}) {
   const writes = new Set();
   const mutations = new Set();
 
-  const normalizeName = (raw) => {
-    let name = raw.replace(/\s+/g, '');
-    for (const op of memberOperators) {
-      name = name.split(op).join('.');
-    }
-    if (name.includes('[')) {
-      name = name.replace(/\[[^\]]*\]/g, '[]');
-    }
-    return name;
-  };
-
   const recordName = (raw) => {
     if (!raw) return;
-    const normalized = normalizeName(raw);
+    const normalized = normalizeFlowName(raw, memberOperators);
     if (!normalized) return;
     if (normalized.includes('.') || normalized.includes('[]')) {
       mutations.add(normalized);
@@ -91,6 +91,57 @@ export function extractWritesAndMutations(text, options = {}) {
   return { writes, mutations };
 }
 
+/**
+ * Extract alias assignments from text (lhs=rhs).
+ * @param {string} text
+ * @param {object} [options]
+ * @param {string} [options.identifierPattern]
+ * @param {string[]} [options.memberOperators]
+ * @param {string[]} [options.aliasOperators]
+ * @param {string[]} [options.declarationKeywords]
+ * @param {boolean} [options.allowIndex]
+ * @returns {Set<string>}
+ */
+export function extractAliases(text, options = {}) {
+  const identifierPattern = options.identifierPattern || '[A-Za-z_][A-Za-z0-9_]*';
+  const memberOperators = Array.isArray(options.memberOperators)
+    ? options.memberOperators
+    : ['.', '->', '::'];
+  const aliasOperators = Array.isArray(options.aliasOperators)
+    ? options.aliasOperators
+    : ['=', ':='];
+  const declarationKeywords = Array.isArray(options.declarationKeywords)
+    ? options.declarationKeywords
+    : ['const', 'let', 'var', 'val', 'mut', 'auto'];
+  const allowIndex = options.allowIndex !== false;
+
+  const cleaned = normalizeFlowText(text);
+  const memberOps = memberOperators.length
+    ? memberOperators.map(escapeRegExp).join('|')
+    : '';
+  const memberPart = memberOps ? `(?:\\s*(?:${memberOps})\\s*${identifierPattern})*` : '';
+  const indexPart = allowIndex ? '(?:\\s*\\[[^\\]]+\\])*' : '';
+  const lhsPattern = identifierPattern;
+  const rhsPattern = `${identifierPattern}${memberPart}${indexPart}`;
+  const opPattern = aliasOperators.map(escapeRegExp).join('|');
+  const declPrefix = declarationKeywords.length
+    ? `(?:\\b(?:${declarationKeywords.map(escapeRegExp).join('|')})\\b\\s+)*`
+    : '';
+  const aliasRe = new RegExp(`${declPrefix}(${lhsPattern})\\s*(?:${opPattern})\\s*(${rhsPattern})`, 'g');
+
+  const aliases = new Set();
+  for (const match of cleaned.matchAll(aliasRe)) {
+    const lhsRaw = match[1];
+    const rhsRaw = match[2];
+    if (!lhsRaw || !rhsRaw) continue;
+    const lhs = normalizeFlowName(lhsRaw, memberOperators);
+    const rhs = normalizeFlowName(rhsRaw, memberOperators);
+    if (!lhs || !rhs) continue;
+    aliases.add(`${lhs}=${rhs}`);
+  }
+  return aliases;
+}
+
 /**
  * Extract identifiers from text with keyword filtering.
  * @param {string} text
@@ -165,7 +216,7 @@ export function summarizeControlFlow(text, options = {}) {
  * @param {RegExp} [options.identifierRegex]
  * @param {Set<string>} [options.skip]
  * @param {(name:string)=>string} [options.normalize]
- * @returns {{reads:string[],writes:string[],mutations:string[]}}
+ * @returns {{reads:string[],writes:string[],mutations:string[],aliases:string[]}}
  */
 export function buildHeuristicDataflow(text, options = {}) {
   const normalize = typeof options.normalize === 'function' ? options.normalize : (name) => name;
@@ -175,8 +226,16 @@ export function buildHeuristicDataflow(text, options = {}) {
     assignmentOperators: options.assignmentOperators,
     allowIndex: options.allowIndex
   });
+  const rawAliases = extractAliases(text, {
+    identifierPattern: options.identifierPattern,
+    memberOperators: options.memberOperators,
+    aliasOperators: options.aliasOperators,
+    declarationKeywords: options.declarationKeywords,
+    allowIndex: options.allowIndex
+  });
   const writes = new Set();
   const mutations = new Set();
+  const aliases = new Set();
   for (const name of rawWrites) {
     const normalized = normalize(name);
     if (normalized) writes.add(normalized);
@@ -185,6 +244,13 @@ export function buildHeuristicDataflow(text, options = {}) {
     const normalized = normalize(name);
     if (normalized) mutations.add(normalized);
   }
+  for (const alias of rawAliases) {
+    const [lhs, rhs] = alias.split('=');
+    const normalizedLhs = normalize(lhs);
+    const normalizedRhs = normalize(rhs);
+    if (!normalizedLhs || !normalizedRhs) continue;
+    aliases.add(`${normalizedLhs}=${normalizedRhs}`);
+  }
   const identifiers = extractIdentifiers(text, {
     regex: options.identifierRegex,
     skip: options.skip,
@@ -199,7 +265,8 @@ export function buildHeuristicDataflow(text, options = {}) {
   return {
     reads: sortedUnique(reads),
     writes: sortedUnique(writes),
-    mutations: sortedUnique(mutations)
+    mutations: sortedUnique(mutations),
+    aliases: sortedUnique(aliases)
   };
 }
 
diff --git a/src/lang/go.js b/src/lang/go.js
index e77b07e7f..ecab2ed42 100644
--- a/src/lang/go.js
+++ b/src/lang/go.js
@@ -1,7 +1,8 @@
 import { buildLineIndex, offsetToLine } from '../shared/lines.js';
 import { findCLikeBodyBounds } from './clike.js';
-import { sliceSignature } from './shared.js';
+import { extractDocComment, sliceSignature } from './shared.js';
 import { buildHeuristicDataflow, hasReturnValue, summarizeControlFlow } from './flow.js';
+import { buildTreeSitterChunks } from './tree-sitter.js';
 
 /**
  * Go language chunking and relations.
@@ -23,46 +24,12 @@ const GO_USAGE_SKIP = new Set([
   'nil', 'true', 'false'
 ]);
 
-function extractGoDocComment(lines, startLineIdx) {
-  let i = startLineIdx - 1;
-  while (i >= 0 && lines[i].trim() === '') i--;
-  if (i < 0) return '';
-  const trimmed = lines[i].trim();
-  if (trimmed.startsWith('//')) {
-    const out = [];
-    while (i >= 0) {
-      const line = lines[i].trim();
-      if (!line.startsWith('//')) break;
-      if (line.startsWith('//go:') || line.startsWith('// +build')) {
-        i--;
-        continue;
-      }
-      out.unshift(line.replace(/^\/\/\s?/, ''));
-      i--;
-    }
-    return out.join('\n').trim();
-  }
-  if (trimmed.includes('*/')) {
-    const raw = [];
-    while (i >= 0) {
-      raw.unshift(lines[i]);
-      if (lines[i].includes('/*')) break;
-      i--;
-    }
-    return raw
-      .map((line) =>
-        line
-          .replace(/^\s*\/\*+/, '')
-          .replace(/\*\/\s*$/, '')
-          .replace(/^\s*\*\s?/, '')
-          .trim()
-      )
-      .filter(Boolean)
-      .join('\n')
-      .trim();
-  }
-  return '';
-}
+const GO_DOC_OPTIONS = {
+  linePrefixes: ['//'],
+  blockStarts: ['/*'],
+  blockEnd: '*/',
+  skipLine: (line) => line.startsWith('//go:') || line.startsWith('// +build')
+};
 
 function readSignatureLines(lines, startLine) {
   const parts = [];
@@ -191,7 +158,23 @@ export function collectGoImports(text) {
  * @param {string} text
  * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null}
  */
-export function buildGoChunks(text) {
+export function buildGoChunks(text, options = {}) {
+  const treeChunks = buildTreeSitterChunks({ text, languageId: 'go', options });
+  if (treeChunks && treeChunks.length) {
+    return treeChunks.map((chunk) => {
+      const meta = chunk.meta || {};
+      const signature = meta.signature || '';
+      return {
+        ...chunk,
+        meta: {
+          ...meta,
+          signature,
+          params: extractGoParams(signature),
+          returns: extractGoReturns(signature)
+        }
+      };
+    });
+  }
   const lineIndex = buildLineIndex(text);
   const lines = text.split('\n');
   const decls = [];
@@ -217,7 +200,7 @@ export function buildGoChunks(text) {
         startLine: i + 1,
         endLine: offsetToLine(lineIndex, end),
         signature,
-        docstring: extractGoDocComment(lines, i)
+        docstring: extractDocComment(lines, i, GO_DOC_OPTIONS)
       };
       decls.push({ start, end, name: match[1], kind, meta });
       continue;
@@ -230,7 +213,7 @@ export function buildGoChunks(text) {
         startLine: i + 1,
         endLine: offsetToLine(lineIndex, end),
         signature: trimmed,
-        docstring: extractGoDocComment(lines, i)
+        docstring: extractDocComment(lines, i, GO_DOC_OPTIONS)
       };
       decls.push({ start, end, name: aliasMatch[1], kind: 'TypeAliasDeclaration', meta });
     }
@@ -270,7 +253,7 @@ export function buildGoChunks(text) {
       signature: signatureText,
       params: extractGoParams(signature),
       returns: extractGoReturns(signature),
-      docstring: extractGoDocComment(lines, i)
+      docstring: extractDocComment(lines, i, GO_DOC_OPTIONS)
     };
     decls.push({ start, end, name, kind, meta });
     i = endLine;
diff --git a/src/lang/html.js b/src/lang/html.js
new file mode 100644
index 000000000..8eb3f0cdc
--- /dev/null
+++ b/src/lang/html.js
@@ -0,0 +1,387 @@
+import { parse as parseHtml } from 'parse5';
+import { buildLineIndex, offsetToLine } from '../shared/lines.js';
+import { buildTreeSitterChunks } from './tree-sitter.js';
+import { buildJsChunks } from './javascript.js';
+import { buildTypeScriptChunks } from './typescript.js';
+import { buildPythonHeuristicChunks } from './python.js';
+import { buildGoChunks } from './go.js';
+import { buildRustChunks } from './rust.js';
+import { buildJavaChunks } from './java.js';
+import { buildCLikeChunks } from './clike.js';
+import { buildCSharpChunks } from './csharp.js';
+import { buildKotlinChunks } from './kotlin.js';
+import { buildRubyChunks } from './ruby.js';
+import { buildPhpChunks } from './php.js';
+import { buildLuaChunks } from './lua.js';
+import { buildSqlChunks } from './sql.js';
+import { buildShellChunks } from './shell.js';
+import { buildCssChunks } from './css.js';
+import { chunkIniToml, chunkJson, chunkMarkdown, chunkXml, chunkYaml } from '../index/chunking.js';
+
+const IMPORTANT_TAGS = new Set([
+  'html',
+  'head',
+  'body',
+  'main',
+  'section',
+  'article',
+  'header',
+  'footer',
+  'nav',
+  'aside',
+  'form',
+  'template',
+  'script',
+  'style'
+]);
+
+const LANGUAGE_ALIASES = new Map([
+  ['js', 'javascript'],
+  ['javascript', 'javascript'],
+  ['ecmascript', 'javascript'],
+  ['mjs', 'javascript'],
+  ['module', 'javascript'],
+  ['ts', 'typescript'],
+  ['typescript', 'typescript'],
+  ['tsx', 'typescript'],
+  ['jsx', 'javascript'],
+  ['c', 'c'],
+  ['c++', 'cpp'],
+  ['cpp', 'cpp'],
+  ['cxx', 'cpp'],
+  ['objc', 'objc'],
+  ['objective-c', 'objc'],
+  ['c#', 'csharp'],
+  ['csharp', 'csharp'],
+  ['cs', 'csharp'],
+  ['golang', 'go'],
+  ['go', 'go'],
+  ['java', 'java'],
+  ['rust', 'rust'],
+  ['rb', 'ruby'],
+  ['ruby', 'ruby'],
+  ['php', 'php'],
+  ['lua', 'lua'],
+  ['sql', 'sql'],
+  ['css', 'css'],
+  ['scss', 'scss'],
+  ['sass', 'sass'],
+  ['less', 'less'],
+  ['yaml', 'yaml'],
+  ['yml', 'yaml'],
+  ['xml', 'xml'],
+  ['json', 'json'],
+  ['toml', 'toml'],
+  ['ini', 'ini'],
+  ['md', 'markdown'],
+  ['markdown', 'markdown'],
+  ['html', 'html'],
+  ['bash', 'shell'],
+  ['sh', 'shell'],
+  ['shell', 'shell'],
+  ['zsh', 'shell'],
+  ['python', 'python'],
+  ['py', 'python']
+]);
+
+const SCRIPT_TYPE_ALIASES = new Map([
+  ['text/javascript', 'javascript'],
+  ['application/javascript', 'javascript'],
+  ['text/ecmascript', 'javascript'],
+  ['application/ecmascript', 'javascript'],
+  ['text/typescript', 'typescript'],
+  ['application/typescript', 'typescript'],
+  ['text/css', 'css'],
+  ['text/yaml', 'yaml'],
+  ['application/yaml', 'yaml'],
+  ['text/xml', 'xml'],
+  ['application/xml', 'xml'],
+  ['application/json', 'json'],
+  ['application/ld+json', 'json'],
+  ['application/schema+json', 'json'],
+  ['text/json', 'json'],
+  ['text/markdown', 'markdown'],
+  ['text/toml', 'toml'],
+  ['application/toml', 'toml'],
+  ['text/plain', 'text'],
+  ['module', 'javascript']
+]);
+
+function extractTagSignature(text, start, end) {
+  const limit = Math.min(end, start + 400);
+  const slice = text.slice(start, limit);
+  const close = slice.indexOf('>');
+  if (close < 0) return slice.trim();
+  return slice.slice(0, close + 1).replace(/\s+/g, ' ').trim();
+}
+
+function walkHtml(node, visitor) {
+  if (!node || typeof node !== 'object') return;
+  visitor(node);
+  const children = node.childNodes || node.content?.childNodes || [];
+  if (!Array.isArray(children)) return;
+  for (const child of children) walkHtml(child, visitor);
+}
+
+function extractHtmlMetadata(text) {
+  let document = null;
+  try {
+    document = parseHtml(text, { sourceCodeLocationInfo: true });
+  } catch {
+    return { imports: [], title: null, description: null, keywords: [], scripts: [], links: [] };
+  }
+  const imports = new Set();
+  const scripts = [];
+  const links = [];
+  let title = null;
+  let description = null;
+  const keywords = new Set();
+  walkHtml(document, (node) => {
+    if (!node || typeof node.nodeName !== 'string') return;
+    const tag = node.nodeName.toLowerCase();
+    if (tag === 'title' && Array.isArray(node.childNodes)) {
+      const textNode = node.childNodes.find((child) => child.nodeName === '#text');
+      if (textNode?.value) title = textNode.value.trim();
+    }
+    if (!Array.isArray(node.attrs)) return;
+    const attrs = Object.fromEntries(node.attrs.map((attr) => [attr.name.toLowerCase(), attr.value]));
+    if (tag === 'meta') {
+      const name = attrs.name || attrs.property || '';
+      const content = attrs.content || '';
+      if (name === 'description' && content) description = content;
+      if (name === 'keywords' && content) {
+        content.split(',').map((entry) => entry.trim()).filter(Boolean).forEach((entry) => keywords.add(entry));
+      }
+    }
+    if (tag === 'script') {
+      const src = attrs.src || '';
+      if (src) {
+        imports.add(src);
+        scripts.push(src);
+      }
+    }
+    if (tag === 'link') {
+      const href = attrs.href || '';
+      if (href) {
+        imports.add(href);
+        links.push(href);
+      }
+    }
+  });
+  return {
+    imports: Array.from(imports),
+    title,
+    description,
+    keywords: Array.from(keywords),
+    scripts,
+    links
+  };
+}
+
+export function getHtmlMetadata(text) {
+  return extractHtmlMetadata(text);
+}
+
+export function collectHtmlImports(text) {
+  return extractHtmlMetadata(text).imports;
+}
+
+function normalizeLang(raw) {
+  if (!raw) return null;
+  const normalized = String(raw).trim().toLowerCase();
+  return LANGUAGE_ALIASES.get(normalized) || normalized;
+}
+
+function extractLangFromAttrs(attrs) {
+  if (!attrs) return null;
+  const type = attrs.type && SCRIPT_TYPE_ALIASES.get(String(attrs.type).trim().toLowerCase());
+  if (type) return type;
+  const direct = normalizeLang(attrs.lang || attrs['data-lang']);
+  if (direct) return direct;
+  const classes = String(attrs.class || '')
+    .split(/\s+/)
+    .map((entry) => entry.trim())
+    .filter(Boolean);
+  for (const cls of classes) {
+    if (cls.startsWith('language-')) return normalizeLang(cls.slice('language-'.length));
+    if (cls.startsWith('lang-')) return normalizeLang(cls.slice('lang-'.length));
+  }
+  return null;
+}
+
+const EMBEDDED_CHUNKERS = new Map([
+  ['typescript', (text, options) => buildTypeScriptChunks(text, options)],
+  ['javascript', (text, options) => buildJsChunks(text, options)],
+  ['python', (text) => buildPythonHeuristicChunks(text)],
+  ['go', (text, options) => buildGoChunks(text, options)],
+  ['rust', (text, options) => buildRustChunks(text, options)],
+  ['java', (text, options) => buildJavaChunks(text, options)],
+  ['csharp', (text, options) => buildCSharpChunks(text, options)],
+  ['kotlin', (text, options) => buildKotlinChunks(text, options)],
+  ['ruby', (text) => buildRubyChunks(text)],
+  ['php', (text) => buildPhpChunks(text)],
+  ['lua', (text) => buildLuaChunks(text)],
+  ['sql', (text) => buildSqlChunks(text, { dialect: 'generic' })],
+  ['json', (text) => chunkJson(text)],
+  ['xml', (text) => chunkXml(text)],
+  ['yaml', (text, options) => chunkYaml(text, null, { yamlChunking: options?.yamlChunking })],
+  ['toml', (text) => chunkIniToml(text, 'toml')],
+  ['ini', (text) => chunkIniToml(text, 'ini')],
+  ['markdown', (text) => chunkMarkdown(text)],
+  ['css', (text) => buildCssChunks(text) || null],
+  ['scss', (text) => buildCssChunks(text) || null],
+  ['sass', (text) => buildCssChunks(text) || null],
+  ['less', (text) => buildCssChunks(text) || null],
+  ['shell', (text) => buildShellChunks(text)],
+  ['c', (text, options) => buildCLikeChunks(text, '.c', options)],
+  ['cpp', (text, options) => buildCLikeChunks(text, '.cpp', options)],
+  ['objc', (text, options) => buildCLikeChunks(text, '.m', options)]
+]);
+
+function resolveEmbeddedChunks(language, text, options) {
+  if (!language) return null;
+  const handler = EMBEDDED_CHUNKERS.get(language);
+  if (!handler) return null;
+  return handler(text, options);
+}
+
+function buildEmbeddedChunks(text, blocks, options = {}) {
+  const chunks = [];
+  for (const block of blocks) {
+    if (!block || block.start == null || block.end == null || block.end <= block.start) continue;
+    const slice = text.slice(block.start, block.end);
+    const embedded = resolveEmbeddedChunks(block.language, slice, options);
+    if (Array.isArray(embedded) && embedded.length) {
+      for (const chunk of embedded) {
+        if (!chunk) continue;
+        chunks.push({
+          ...chunk,
+          start: chunk.start + block.start,
+          end: chunk.end + block.start,
+          meta: { ...chunk.meta, embeddedLanguage: block.language, embeddedTag: block.tag }
+        });
+      }
+      continue;
+    }
+    chunks.push({
+      start: block.start,
+      end: block.end,
+      name: block.name,
+      kind: block.kind,
+      meta: {
+        embeddedLanguage: block.language,
+        embeddedTag: block.tag
+      }
+    });
+  }
+  return chunks;
+}
+
+export function buildHtmlChunks(text, options = {}) {
+  const treeChunks = buildTreeSitterChunks({ text, languageId: 'html', options });
+  const filteredTree = Array.isArray(treeChunks)
+    ? treeChunks.filter((chunk) => IMPORTANT_TAGS.has(String(chunk.name || '').toLowerCase()))
+    : null;
+  let document = null;
+  try {
+    document = parseHtml(text, { sourceCodeLocationInfo: true });
+  } catch {
+    return null;
+  }
+  const lineIndex = buildLineIndex(text);
+  const chunks = [];
+  const embeddedBlocks = [];
+  walkHtml(document, (node) => {
+    if (!node || typeof node.nodeName !== 'string') return;
+    const tag = node.nodeName.toLowerCase();
+    if (tag.startsWith('#')) return;
+    const loc = node.sourceCodeLocation;
+    const start = loc?.startOffset;
+    const end = loc?.endOffset;
+    const hasRange = Number.isFinite(start) && Number.isFinite(end) && end > start;
+    if (IMPORTANT_TAGS.has(tag) && hasRange) {
+      const startLine = offsetToLine(lineIndex, start);
+      const endLine = offsetToLine(lineIndex, Math.max(start, end - 1));
+      chunks.push({
+        start,
+        end,
+        name: tag,
+        kind: 'ElementDeclaration',
+        meta: {
+          tag,
+          startLine,
+          endLine,
+          signature: extractTagSignature(text, start, end)
+        }
+      });
+    }
+    if (!hasRange) return;
+    if (tag === 'script' || tag === 'style') {
+      const innerStart = loc?.startTag?.endOffset;
+      const innerEnd = loc?.endTag?.startOffset;
+      if (Number.isFinite(innerStart) && Number.isFinite(innerEnd) && innerEnd > innerStart) {
+        const attrs = Array.isArray(node.attrs)
+          ? Object.fromEntries(node.attrs.map((attr) => [attr.name.toLowerCase(), attr.value]))
+          : {};
+        const language = tag === 'style' ? 'css' : (extractLangFromAttrs(attrs) || 'javascript');
+        embeddedBlocks.push({
+          start: innerStart,
+          end: innerEnd,
+          language,
+          tag,
+          kind: tag === 'style' ? 'StyleBlock' : 'ScriptBlock',
+          name: tag
+        });
+      }
+    }
+    if (tag === 'code' || tag === 'pre') {
+      const innerStart = loc?.startTag?.endOffset;
+      const innerEnd = loc?.endTag?.startOffset;
+      if (Number.isFinite(innerStart) && Number.isFinite(innerEnd) && innerEnd > innerStart) {
+        const attrs = Array.isArray(node.attrs)
+          ? Object.fromEntries(node.attrs.map((attr) => [attr.name.toLowerCase(), attr.value]))
+          : {};
+        const language = extractLangFromAttrs(attrs);
+        embeddedBlocks.push({
+          start: innerStart,
+          end: innerEnd,
+          language: language || 'text',
+          tag,
+          kind: 'CodeBlock',
+          name: language ? `code:${language}` : 'code'
+        });
+      }
+    }
+  });
+  const embeddedChunks = buildEmbeddedChunks(text, embeddedBlocks, options);
+  const baseChunks = filteredTree && filteredTree.length ? filteredTree : chunks;
+  const merged = baseChunks.concat(embeddedChunks);
+  if (!merged.length) return null;
+  merged.sort((a, b) => a.start - b.start);
+  return merged;
+}
+
+export function buildHtmlRelations(text, allImports, htmlChunks, htmlMeta) {
+  const imports = Array.isArray(htmlMeta?.imports) ? htmlMeta.imports : [];
+  return { imports, exports: [], calls: [], usages: [], importLinks: [], functionMeta: {}, classMeta: {} };
+}
+
+export function extractHtmlDocMeta(chunk, htmlMeta) {
+  const meta = chunk?.meta || {};
+  const summary = htmlMeta && typeof htmlMeta === 'object' ? htmlMeta : {};
+  return {
+    tag: meta.tag || chunk?.name || 'element',
+    signature: meta.signature || null,
+    embeddedLanguage: meta.embeddedLanguage || null,
+    embeddedTag: meta.embeddedTag || null,
+    title: summary.title || null,
+    description: summary.description || null,
+    keywords: Array.isArray(summary.keywords) ? summary.keywords : [],
+    scripts: Array.isArray(summary.scripts) ? summary.scripts : [],
+    links: Array.isArray(summary.links) ? summary.links : []
+  };
+}
+
+export function computeHtmlFlow() {
+  return null;
+}
diff --git a/src/lang/java.js b/src/lang/java.js
index 8a21adf58..9aa26b09e 100644
--- a/src/lang/java.js
+++ b/src/lang/java.js
@@ -2,6 +2,7 @@ import { buildLineIndex, offsetToLine } from '../shared/lines.js';
 import { findCLikeBodyBounds } from './clike.js';
 import { collectAttributes, extractDocComment, sliceSignature } from './shared.js';
 import { buildHeuristicDataflow, hasReturnValue, summarizeControlFlow } from './flow.js';
+import { buildTreeSitterChunks } from './tree-sitter.js';
 
 /**
  * Java language chunking and relations.
@@ -159,7 +160,9 @@ export function collectJavaImports(text) {
  * @param {string} text
  * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null}
  */
-export function buildJavaChunks(text) {
+export function buildJavaChunks(text, options = {}) {
+  const treeChunks = buildTreeSitterChunks({ text, languageId: 'java', options });
+  if (treeChunks && treeChunks.length) return treeChunks;
   const lineIndex = buildLineIndex(text);
   const lines = text.split('\n');
   const decls = [];
diff --git a/src/lang/javascript.js b/src/lang/javascript.js
index 3dfabad77..23e65f07d 100644
--- a/src/lang/javascript.js
+++ b/src/lang/javascript.js
@@ -1,779 +1,5 @@
-import * as acorn from 'acorn';
-import * as esprima from 'esprima';
-
-/**
- * JavaScript/TypeScript-like chunking and relations.
- * Uses Acorn/Esprima for lightweight AST extraction.
- */
-
-function locMeta(node) {
-  return node && node.loc ? {
-    startLine: node.loc.start.line,
-    endLine: node.loc.end.line
-  } : {};
-}
-
-function nodeStart(node) {
-  if (!node) return 0;
-  if (Number.isFinite(node.start)) return node.start;
-  if (Array.isArray(node.range)) return node.range[0];
-  return 0;
-}
-
-function nodeEnd(node) {
-  if (!node) return 0;
-  if (Number.isFinite(node.end)) return node.end;
-  if (Array.isArray(node.range)) return node.range[1];
-  return 0;
-}
-
-function keyName(key) {
-  if (!key) return 'anonymous';
-  if (key.type === 'Identifier') return key.name;
-  if (key.type === 'Literal') return String(key.value);
-  if (key.type === 'PrivateIdentifier') return `#${key.name}`;
-  return 'computed';
-}
-
-function visibilityFor(name) {
-  if (!name) return 'public';
-  if (name.startsWith('#')) return 'private';
-  if (name.startsWith('__') && !name.endsWith('__')) return 'private';
-  if (name.startsWith('_') && !name.startsWith('__')) return 'protected';
-  return 'public';
-}
-
-function collectPatternNames(node, out) {
-  if (!node) return;
-  if (node.type === 'Identifier') {
-    out.push(node.name);
-    return;
-  }
-  if (node.type === 'RestElement') {
-    collectPatternNames(node.argument, out);
-    return;
-  }
-  if (node.type === 'AssignmentPattern') {
-    collectPatternNames(node.left, out);
-    return;
-  }
-  if (node.type === 'ArrayPattern') {
-    node.elements?.forEach((el) => collectPatternNames(el, out));
-    return;
-  }
-  if (node.type === 'ObjectPattern') {
-    node.properties?.forEach((prop) => {
-      if (prop.type === 'Property') collectPatternNames(prop.value, out);
-      if (prop.type === 'RestElement') collectPatternNames(prop.argument, out);
-    });
-  }
-}
-
-function formatDefault(node) {
-  if (!node) return '...';
-  if (node.type === 'Literal') return JSON.stringify(node.value);
-  if (node.type === 'Identifier') return node.name;
-  if (node.type === 'TemplateLiteral') return '`...`';
-  if (node.type === 'ArrayExpression') return '[...]';
-  if (node.type === 'ObjectExpression') return '{...}';
-  if (node.type === 'CallExpression') return 'call(...)';
-  return '...';
-}
-
-function formatParam(node) {
-  if (!node) return 'param';
-  if (node.type === 'Identifier') return node.name;
-  if (node.type === 'RestElement') return `...${formatParam(node.argument)}`;
-  if (node.type === 'AssignmentPattern') {
-    return `${formatParam(node.left)}=${formatDefault(node.right)}`;
-  }
-  if (node.type === 'ObjectPattern') return '{...}';
-  if (node.type === 'ArrayPattern') return '[...]';
-  return 'param';
-}
-
-/**
- * Build chunk metadata for JS declarations.
- * Returns null when parsing fails.
- * @param {string} text
- * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null}
- */
-export function buildJsChunks(text) {
-  const chunks = [];
-  const addChunk = (node, name, kind) => {
-    if (!node) return;
-    chunks.push({
-      start: nodeStart(node),
-      end: nodeEnd(node),
-      name: name || 'anonymous',
-      kind,
-      meta: { ...locMeta(node) }
-    });
-  };
-
-  const addFunctionFromDeclarator = (decl, kind) => {
-    if (!decl || !decl.init) return;
-    const init = decl.init;
-    if (init.type !== 'FunctionExpression' && init.type !== 'ArrowFunctionExpression') return;
-    const name = decl.id && decl.id.name ? decl.id.name : 'anonymous';
-    const derivedKind = init.type === 'FunctionExpression' ? 'FunctionExpression' : 'ArrowFunction';
-    addChunk(decl, name, kind || derivedKind);
-  };
-
-  const addFunctionFromAssignment = (expr, kind) => {
-    if (!expr || expr.type !== 'AssignmentExpression') return;
-    const right = expr.right;
-    if (!right || (right.type !== 'FunctionExpression' && right.type !== 'ArrowFunctionExpression')) return;
-    let name = 'anonymous';
-    if (expr.left && expr.left.type === 'MemberExpression') {
-      const obj = expr.left.object?.name || '';
-      const prop = keyName(expr.left.property);
-      name = obj ? `${obj}.${prop}` : prop;
-    }
-    addChunk(expr, name, kind);
-  };
-  const addClassChunks = (node, name, kind) => {
-    if (!node) return;
-    const className = name || 'anonymous';
-    addChunk(node, className, kind || 'ClassDeclaration');
-    if (!node.body?.body) return;
-    for (const method of node.body.body) {
-      if (method.type === 'MethodDefinition' && method.key && method.value) {
-        addChunk(method, `${className}.${keyName(method.key)}`, 'MethodDefinition');
-      }
-      if (method.type === 'PropertyDefinition' && method.key && method.value &&
-        (method.value.type === 'FunctionExpression' || method.value.type === 'ArrowFunctionExpression')) {
-        addChunk(method, `${className}.${keyName(method.key)}`, 'ClassPropertyFunction');
-      }
-    }
-  };
-
-  try {
-    let ast = null;
-    try {
-      ast = acorn.parse(text, { ecmaVersion: 'latest', locations: true, sourceType: 'module' });
-    } catch {
-      ast = esprima.parseModule(text, { jsx: true, tolerant: true, loc: true, range: true });
-    }
-    for (const node of ast.body) {
-      if (node.type === 'FunctionDeclaration') {
-        addChunk(node, node.id ? node.id.name : 'anonymous', 'FunctionDeclaration');
-      }
-
-      if (node.type === 'ClassDeclaration') {
-        const className = node.id ? node.id.name : 'anonymous';
-        addClassChunks(node, className, 'ClassDeclaration');
-      }
-
-      if (node.type === 'ExportNamedDeclaration' && node.declaration) {
-        if (node.declaration.type === 'FunctionDeclaration') {
-          addChunk(node.declaration, node.declaration.id ? node.declaration.id.name : 'anonymous', 'ExportedFunction');
-        }
-        if (node.declaration.type === 'VariableDeclaration') {
-          for (const decl of node.declaration.declarations) {
-            const init = decl.init;
-            if (!init) continue;
-            const exportKind = init.type === 'FunctionExpression'
-              ? 'ExportedFunctionExpression'
-              : 'ExportedArrowFunction';
-            addFunctionFromDeclarator(decl, exportKind);
-          }
-        }
-        if (node.declaration.type === 'ClassDeclaration') {
-          const className = node.declaration.id ? node.declaration.id.name : 'anonymous';
-          addClassChunks(node.declaration, className, 'ExportedClass');
-        }
-      }
-
-      if (node.type === 'VariableDeclaration') {
-        for (const decl of node.declarations) {
-          addFunctionFromDeclarator(decl);
-        }
-      }
-
-      if (node.type === 'ExportDefaultDeclaration' && node.declaration) {
-        const decl = node.declaration;
-        if (decl.type === 'FunctionDeclaration' || decl.type === 'ClassDeclaration') {
-          const name = decl.id ? decl.id.name : 'default';
-          if (decl.type === 'ClassDeclaration') {
-            addClassChunks(decl, name, `ExportDefault${decl.type}`);
-          } else {
-            addChunk(decl, name, `ExportDefault${decl.type}`);
-          }
-        } else if (decl.type === 'FunctionExpression' || decl.type === 'ArrowFunctionExpression') {
-          addChunk(decl, 'default', 'ExportDefaultFunction');
-        }
-      }
-
-      if (node.type === 'ExpressionStatement' && node.expression) {
-        addFunctionFromAssignment(node.expression, 'ExportedAssignmentFunction');
-      }
-    }
-  } catch {
-    return null;
-  }
-
-  if (!chunks.length) return [{ start: 0, end: text.length, name: 'root', kind: 'Module', meta: {} }];
-  return chunks;
-}
-
-/**
- * Collect import/require dependencies from JS source.
- * @param {string} text
- * @returns {string[]}
- */
-export function collectImports(text) {
-  const imports = new Set();
-  const walk = (node) => {
-    if (!node) return;
-    if (Array.isArray(node)) {
-      node.forEach(walk);
-      return;
-    }
-    if (typeof node !== 'object') return;
-
-    if (node.type === 'ImportDeclaration') {
-      if (node.source && node.source.value) imports.add(node.source.value);
-    }
-    if (node.type === 'ImportExpression' && node.source && node.source.type === 'Literal') {
-      if (typeof node.source.value === 'string') imports.add(node.source.value);
-    }
-    if (node.type === 'CallExpression' && node.callee?.type === 'Identifier' &&
-      node.callee.name === 'require') {
-      const arg = node.arguments?.[0];
-      if (arg && arg.type === 'Literal' && typeof arg.value === 'string') {
-        imports.add(arg.value);
-      }
-    }
-    for (const key of Object.keys(node)) {
-      if (key === 'loc' || key === 'start' || key === 'end') continue;
-      const child = node[key];
-      if (child && typeof child === 'object') walk(child);
-    }
-  };
-
-  try {
-    const ast = acorn.parse(text, { ecmaVersion: 'latest', sourceType: 'module' });
-    walk(ast);
-  } catch {}
-  return Array.from(imports);
-}
-
-/**
- * Build import/export/call/usage relations for JS chunks.
- * @param {string} text
- * @param {string} relPath
- * @param {Record<string,string[]>} allImports
- * @returns {{imports:string[],exports:string[],calls:Array<[string,string]>,usages:string[],importLinks:string[]}}
- */
-export function buildCodeRelations(text, relPath, allImports, options = {}) {
-  const dataflowEnabled = options.dataflow !== false;
-  const imports = new Set();
-  const exports = new Set();
-  const calls = [];
-  const callDetails = [];
-  const usages = new Set();
-  const functionMeta = {};
-  const classMeta = {};
-  const flowByName = new Map();
-  const functionStack = [];
-  const classStack = [];
-
-  const getMemberName = (node) => {
-    if (!node) return null;
-    if (node.type === 'Identifier') return node.name;
-    if (node.type === 'ThisExpression') return 'this';
-    if (node.type === 'Super') return 'super';
-    if (node.type === 'MemberExpression') {
-      const obj = getMemberName(node.object);
-      const prop = node.computed
-        ? (node.property?.type === 'Literal' ? String(node.property.value) : null)
-        : (node.property?.name || null);
-      if (obj && prop) return `${obj}.${prop}`;
-      return obj || prop;
-    }
-    return null;
-  };
-
-  const getCalleeName = (callee) => {
-    if (!callee) return null;
-    if (callee.type === 'ChainExpression') return getCalleeName(callee.expression);
-    if (callee.type === 'Identifier') return callee.name;
-    if (callee.type === 'MemberExpression') return getMemberName(callee);
-    if (callee.type === 'Super') return 'super';
-    return null;
-  };
-
-  const currentFunction = () => (functionStack.length ? functionStack[functionStack.length - 1] : null);
-
-  const ensureFlow = (name) => {
-    if (!flowByName.has(name)) {
-      flowByName.set(name, {
-        reads: new Set(),
-        writes: new Set(),
-        mutations: new Set(),
-        aliases: new Set(),
-        throws: new Set(),
-        awaits: new Set(),
-        returns: false,
-        yields: false
-      });
-    }
-    return flowByName.get(name);
-  };
-
-  const recordRead = (name) => {
-    if (!dataflowEnabled || !name) return;
-    const scope = currentFunction();
-    if (!scope) return;
-    ensureFlow(scope).reads.add(name);
-  };
-
-  const recordWrite = (name) => {
-    if (!dataflowEnabled || !name) return;
-    const scope = currentFunction();
-    if (!scope) return;
-    ensureFlow(scope).writes.add(name);
-  };
-
-  const recordMutation = (name) => {
-    if (!dataflowEnabled || !name) return;
-    const scope = currentFunction();
-    if (!scope) return;
-    ensureFlow(scope).mutations.add(name);
-  };
-
-  const recordAlias = (name, target) => {
-    if (!dataflowEnabled || !name || !target) return;
-    const scope = currentFunction();
-    if (!scope) return;
-    ensureFlow(scope).aliases.add(`${name}=${target}`);
-  };
-
-  const recordThrow = (name) => {
-    if (!dataflowEnabled || !name) return;
-    const scope = currentFunction();
-    if (!scope) return;
-    ensureFlow(scope).throws.add(name);
-  };
-
-  const recordAwait = (name) => {
-    if (!dataflowEnabled || !name) return;
-    const scope = currentFunction();
-    if (!scope) return;
-    ensureFlow(scope).awaits.add(name);
-  };
-
-  const recordReturn = () => {
-    if (!dataflowEnabled) return;
-    const scope = currentFunction();
-    if (!scope) return;
-    ensureFlow(scope).returns = true;
-  };
-
-  const recordYield = () => {
-    if (!dataflowEnabled) return;
-    const scope = currentFunction();
-    if (!scope) return;
-    ensureFlow(scope).yields = true;
-  };
-
-  const inferFunctionName = (node, parent) => {
-    if (node.id && node.id.name) return node.id.name;
-    if (parent && parent.type === 'VariableDeclarator' && parent.id?.name) return parent.id.name;
-    if (parent && parent.type === 'AssignmentExpression') {
-      const left = getMemberName(parent.left);
-      if (left) return left;
-    }
-    if (parent && (parent.type === 'Property' || parent.type === 'PropertyDefinition') && parent.key) {
-      const propName = keyName(parent.key);
-      const className = classStack[classStack.length - 1];
-      return className ? `${className}.${propName}` : propName;
-    }
-    if (parent && parent.type === 'MethodDefinition' && parent.key) {
-      const propName = keyName(parent.key);
-      const className = classStack[classStack.length - 1];
-      return className ? `${className}.${propName}` : propName;
-    }
-    return '(anonymous)';
-  };
-
-  const collectParamMeta = (node) => {
-    const params = [];
-    const paramNames = [];
-    const paramDefaults = {};
-    if (!node?.params) return { params, paramNames, paramDefaults };
-    node.params.forEach((param) => {
-      params.push(formatParam(param));
-      const names = [];
-      collectPatternNames(param, names);
-      names.forEach((name) => {
-        paramNames.push(name);
-        if (param.type === 'AssignmentPattern' && param.left?.type === 'Identifier') {
-          paramDefaults[name] = formatDefault(param.right);
-        }
-      });
-    });
-    return { params, paramNames, paramDefaults };
-  };
-
-  const buildSignature = (node, name) => {
-    const { params } = collectParamMeta(node);
-    const paramsStr = params.join(', ');
-    if (node.type === 'ArrowFunctionExpression') {
-      return `(${paramsStr}) =>`;
-    }
-    const fnName = name && name !== '(anonymous)' ? ` ${name}` : '';
-    return `function${fnName}(${paramsStr})`;
-  };
-
-  const registerFunctionMeta = (node, parent) => {
-    const name = inferFunctionName(node, parent);
-    const existing = functionMeta[name];
-    const { paramNames, paramDefaults } = collectParamMeta(node);
-    const signature = buildSignature(node, name);
-    const modifiers = {
-      async: !!node.async,
-      generator: !!node.generator,
-      static: false,
-      visibility: visibilityFor(name.split('.').pop() || name)
-    };
-    let methodKind = null;
-    if (parent && parent.type === 'MethodDefinition') {
-      modifiers.static = !!parent.static;
-      methodKind = parent.kind || null;
-      const key = keyName(parent.key);
-      modifiers.visibility = visibilityFor(key);
-    }
-    if (!existing) {
-      functionMeta[name] = {
-        params: paramNames,
-        paramDefaults,
-        signature,
-        modifiers,
-        methodKind,
-        returnType: null,
-        returnsValue: false,
-        throws: [],
-        awaits: [],
-        yields: false,
-        dataflow: null
-      };
-    } else {
-      existing.params = existing.params?.length ? existing.params : paramNames;
-      existing.paramDefaults = Object.keys(existing.paramDefaults || {}).length ? existing.paramDefaults : paramDefaults;
-      existing.signature = existing.signature || signature;
-      existing.modifiers = existing.modifiers || modifiers;
-      existing.methodKind = existing.methodKind || methodKind;
-    }
-    return name;
-  };
-
-  const getThrownName = (node) => {
-    if (!node) return null;
-    if (node.type === 'Identifier') return node.name;
-    if (node.type === 'MemberExpression') return getMemberName(node);
-    if (node.type === 'CallExpression' || node.type === 'NewExpression') {
-      return getCalleeName(node.callee);
-    }
-    return null;
-  };
-
-  const getAwaitName = (node) => {
-    if (!node) return null;
-    if (node.type === 'CallExpression') return getCalleeName(node.callee);
-    return getCalleeName(node) || getMemberName(node);
-  };
-
-  const isFunctionNode = (node) =>
-    node.type === 'FunctionDeclaration' ||
-    node.type === 'FunctionExpression' ||
-    node.type === 'ArrowFunctionExpression';
-
-  const isIdentifierBinding = (node, parent) => {
-    if (!parent || node.type !== 'Identifier') return false;
-    if ((parent.type === 'VariableDeclarator' || parent.type === 'AssignmentPattern') && parent.id === node) return true;
-    if ((parent.type === 'FunctionDeclaration' || parent.type === 'FunctionExpression') && parent.id === node) return true;
-    if ((parent.type === 'ClassDeclaration' || parent.type === 'ClassExpression') && parent.id === node) return true;
-    if (parent.type === 'CatchClause' && parent.param === node) return true;
-    if (parent.type === 'ImportSpecifier' || parent.type === 'ImportDefaultSpecifier' || parent.type === 'ImportNamespaceSpecifier') {
-      return parent.local === node;
-    }
-    if (parent.type === 'Property' && parent.key === node && !parent.computed) return true;
-    if (parent.type === 'MemberExpression' && parent.property === node && !parent.computed) return true;
-    if (parent.type === 'MethodDefinition' && parent.key === node && !parent.computed) return true;
-    if (parent.type === 'PropertyDefinition' && parent.key === node && !parent.computed) return true;
-    if (parent.type === 'LabeledStatement' && parent.label === node) return true;
-    return false;
-  };
-
-  const shouldCountRead = (node, parent) => !isIdentifierBinding(node, parent);
-
-  const recordPatternWrite = (pattern) => {
-    const names = [];
-    collectPatternNames(pattern, names);
-    names.forEach((name) => recordWrite(name));
-  };
-
-  const formatCallArg = (arg) => {
-    if (!arg) return '...';
-    if (arg.type === 'Identifier') return arg.name;
-    if (arg.type === 'Literal') return JSON.stringify(arg.value);
-    if (arg.type === 'MemberExpression') return getMemberName(arg) || 'member';
-    if (arg.type === 'CallExpression') {
-      const callee = getCalleeName(arg.callee);
-      return callee ? `${callee}(...)` : 'call(...)';
-    }
-    if (arg.type === 'ArrowFunctionExpression' || arg.type === 'FunctionExpression') return 'fn(...)';
-    if (arg.type === 'ObjectExpression') return '{...}';
-    if (arg.type === 'ArrayExpression') return '[...]';
-    if (arg.type === 'TemplateLiteral') return '`...`';
-    return '...';
-  };
-
-  const walk = (node, parent) => {
-    if (!node) return;
-    if (Array.isArray(node)) {
-      node.forEach((child) => walk(child, parent));
-      return;
-    }
-    if (typeof node !== 'object') return;
-
-    if (node.type === 'ImportDeclaration') {
-      if (node.source?.value) imports.add(node.source.value);
-      node.specifiers?.forEach((s) => {
-        if (s.local?.name) usages.add(s.local.name);
-      });
-    }
-
-    if (node.type === 'ImportExpression' && node.source?.type === 'Literal') {
-      if (typeof node.source.value === 'string') imports.add(node.source.value);
-    }
-
-    if (node.type === 'ExportAllDeclaration') {
-      exports.add('*');
-    }
-
-    if (node.type === 'ExportNamedDeclaration') {
-      if (node.declaration) {
-        if (node.declaration.id?.name) exports.add(node.declaration.id.name);
-        if (node.declaration.declarations) {
-          node.declaration.declarations.forEach((d) => d.id?.name && exports.add(d.id.name));
-        }
-      }
-      node.specifiers?.forEach((s) => {
-        if (s.exported?.name) exports.add(s.exported.name);
-      });
-    }
-
-    if (node.type === 'ExportDefaultDeclaration') {
-      if (node.declaration?.id?.name) exports.add(node.declaration.id.name);
-      else exports.add('default');
-    }
-
-    if (node.type === 'AssignmentExpression') {
-      const left = getMemberName(node.left);
-      if (left === 'module.exports') exports.add('default');
-      if (left && left.startsWith('exports.')) exports.add(left.slice('exports.'.length));
-    }
-
-    if (node.type === 'CallExpression') {
-      const calleeName = getCalleeName(node.callee);
-      const callerName = functionStack.length ? functionStack[functionStack.length - 1] : '(module)';
-      if (calleeName) {
-        calls.push([callerName, calleeName]);
-        const args = Array.isArray(node.arguments) ? node.arguments.map((arg) => formatCallArg(arg)) : [];
-        callDetails.push({ caller: callerName, callee: calleeName, args });
-      }
-    }
-
-    if (node.type === 'Identifier') {
-      usages.add(node.name);
-      if (shouldCountRead(node, parent)) {
-        recordRead(node.name);
-      }
-    }
-
-    if (node.type === 'VariableDeclarator' && node.id) {
-      recordPatternWrite(node.id);
-      if (node.id.type === 'Identifier' && node.init) {
-        const target = getMemberName(node.init);
-        if (target) recordAlias(node.id.name, target);
-      }
-    }
-
-    if (node.type === 'AssignmentExpression' && node.left) {
-      if (node.left.type === 'Identifier') {
-        recordWrite(node.left.name);
-        const target = getMemberName(node.right);
-        if (target) recordAlias(node.left.name, target);
-      } else if (node.left.type === 'MemberExpression') {
-        recordMutation(getMemberName(node.left));
-      } else {
-        recordPatternWrite(node.left);
-      }
-    }
-
-    if (node.type === 'UpdateExpression' && node.argument) {
-      if (node.argument.type === 'Identifier') {
-        recordWrite(node.argument.name);
-      } else if (node.argument.type === 'MemberExpression') {
-        recordMutation(getMemberName(node.argument));
-      }
-    }
-
-    if (node.type === 'ReturnStatement') {
-      recordReturn();
-    }
-
-    if (node.type === 'ThrowStatement') {
-      const thrown = getThrownName(node.argument);
-      if (thrown) recordThrow(thrown);
-    }
-
-    if (node.type === 'AwaitExpression') {
-      const awaited = getAwaitName(node.argument);
-      if (awaited) recordAwait(awaited);
-    }
-
-    if (node.type === 'YieldExpression') {
-      recordYield();
-    }
-
-    if (node.type === 'CatchClause' && node.param) {
-      recordPatternWrite(node.param);
-    }
-
-    if ((node.type === 'ForInStatement' || node.type === 'ForOfStatement') && node.left && node.left.type !== 'VariableDeclaration') {
-      if (node.left.type === 'Identifier') {
-        recordWrite(node.left.name);
-      } else if (node.left.type === 'MemberExpression') {
-        recordMutation(getMemberName(node.left));
-      } else {
-        recordPatternWrite(node.left);
-      }
-    }
-
-    if (node.type === 'ClassDeclaration' && node.id?.name) {
-      const className = node.id.name;
-      const extendsName = getMemberName(node.superClass);
-      classMeta[className] = {
-        extends: extendsName ? [extendsName] : [],
-        modifiers: { visibility: visibilityFor(className) }
-      };
-      classStack.push(node.id.name);
-      if (node.body?.body) {
-        walk(node.body.body, node);
-      }
-      classStack.pop();
-      return;
-    }
-
-    if (isFunctionNode(node)) {
-      const fnName = registerFunctionMeta(node, parent);
-      functionStack.push(fnName);
-      walk(node.body, node);
-      functionStack.pop();
-      return;
-    }
-
-    for (const key of Object.keys(node)) {
-      if (key === 'loc' || key === 'start' || key === 'end') continue;
-      const child = node[key];
-      if (child && typeof child === 'object') {
-        walk(child, node);
-      }
-    }
-  };
-
-  try {
-    const ast = acorn.parse(text, { ecmaVersion: 'latest', sourceType: 'module' });
-    walk(ast, null);
-    const tokens = esprima.tokenize(text, { tolerant: true });
-    tokens.forEach((t) => {
-      if (t.type === 'Identifier') usages.add(t.value);
-    });
-  } catch {}
-
-  if (dataflowEnabled) {
-    for (const [name, flow] of flowByName.entries()) {
-      const meta = functionMeta[name] || {};
-      meta.dataflow = {
-        reads: Array.from(flow.reads),
-        writes: Array.from(flow.writes),
-        mutations: Array.from(flow.mutations),
-        aliases: Array.from(flow.aliases)
-      };
-      meta.throws = Array.from(flow.throws);
-      meta.awaits = Array.from(flow.awaits);
-      meta.returnsValue = !!flow.returns;
-      meta.yields = !!flow.yields;
-      meta.modifiers = meta.modifiers || {};
-      meta.modifiers.generator = !!flow.yields;
-      functionMeta[name] = meta;
-    }
-  }
-
-  const importLinks = Array.from(imports)
-    .map((i) => allImports[i])
-    .filter((x) => !!x)
-    .flat();
-  return {
-    imports: Array.from(imports),
-    exports: Array.from(exports),
-    calls,
-    callDetails,
-    usages: Array.from(usages),
-    importLinks,
-    functionMeta,
-    classMeta
-  };
-}
-
-/**
- * Extract lightweight doc metadata for JS chunks.
- * @param {string} text
- * @param {{start:number,end:number}} chunk
- * @returns {{doc:string,params:string[],returns:boolean,signature:(string|null)}}
- */
-export function extractDocMeta(text, chunk, astMeta = null) {
-  const chunkText = text.slice(chunk.start, chunk.end);
-  const lines = chunkText.split('\n');
-  const docLines = lines.filter((l) => l.trim().startsWith('*') || l.trim().startsWith('//') || l.trim().startsWith('#'));
-  const params = [...chunkText.matchAll(/@param +(\w+)/g)].map((m) => m[1]);
-  const returnsDoc = !!chunkText.match(/@returns? /);
-  const returnTypeMatch = chunkText.match(/@returns?\s+{([^}]+)}/);
-  const returnType = returnTypeMatch ? returnTypeMatch[1].trim() : null;
-  const paramTypes = {};
-  for (const match of chunkText.matchAll(/@param\s+{([^}]+)}\s+(\w+)/g)) {
-    paramTypes[match[2]] = match[1].trim();
-  }
-  let signature = null;
-  const matchFn = chunkText.match(/function\s+([A-Za-z0-9_$]+)?\s*\(([^\)]*)\)/);
-  if (matchFn) {
-    signature = `function ${matchFn[1] || ''}(${matchFn[2]})`;
-  }
-
-  const nameMeta = astMeta?.functionMeta?.[chunk.name] || astMeta?.classMeta?.[chunk.name] || null;
-  const metaParams = Array.isArray(nameMeta?.params) && nameMeta.params.length ? nameMeta.params : params;
-  const mergedSignature = nameMeta?.signature || signature;
-  const mergedReturnType = nameMeta?.returnType || returnType || null;
-
-  return {
-    doc: docLines.join('\n').slice(0, 300),
-    params: metaParams,
-    paramTypes,
-    paramDefaults: nameMeta?.paramDefaults || {},
-    returnType: mergedReturnType,
-    returnsValue: nameMeta?.returnsValue || false,
-    returns: returnsDoc,
-    signature: mergedSignature,
-    modifiers: nameMeta?.modifiers || null,
-    methodKind: nameMeta?.methodKind || null,
-    dataflow: nameMeta?.dataflow || null,
-    throws: nameMeta?.throws || [],
-    awaits: nameMeta?.awaits || [],
-    yields: nameMeta?.yields || false,
-    extends: nameMeta?.extends || astMeta?.classMeta?.[chunk.name]?.extends || []
-  };
-}
+export { parseJavaScriptAst } from './javascript/parse.js';
+export { buildJsChunks } from './javascript/chunks.js';
+export { collectImportsFromAst, collectImports } from './javascript/imports.js';
+export { buildCodeRelations } from './javascript/relations.js';
+export { extractDocMeta } from './javascript/docmeta.js';
diff --git a/src/lang/javascript/ast-utils.js b/src/lang/javascript/ast-utils.js
new file mode 100644
index 000000000..1faefde74
--- /dev/null
+++ b/src/lang/javascript/ast-utils.js
@@ -0,0 +1,92 @@
+export function locMeta(node) {
+  return node && node.loc ? {
+    startLine: node.loc.start.line,
+    endLine: node.loc.end.line
+  } : {};
+}
+
+export function nodeStart(node) {
+  if (!node) return 0;
+  if (Number.isFinite(node.start)) return node.start;
+  if (Array.isArray(node.range)) return node.range[0];
+  return 0;
+}
+
+export function nodeEnd(node) {
+  if (!node) return 0;
+  if (Number.isFinite(node.end)) return node.end;
+  if (Array.isArray(node.range)) return node.range[1];
+  return 0;
+}
+
+export function keyName(key) {
+  if (!key) return 'anonymous';
+  if (key.type === 'Identifier') return key.name;
+  if (key.type === 'Literal') return String(key.value);
+  if (key.type === 'StringLiteral' || key.type === 'NumericLiteral') return String(key.value);
+  if (key.type === 'PrivateIdentifier') return `#${key.name}`;
+  if (key.type === 'PrivateName' && key.id?.name) return `#${key.id.name}`;
+  return 'computed';
+}
+
+export function visibilityFor(name) {
+  if (!name) return 'public';
+  if (name.startsWith('#')) return 'private';
+  if (name.startsWith('__') && !name.endsWith('__')) return 'private';
+  if (name.startsWith('_') && !name.startsWith('__')) return 'protected';
+  return 'public';
+}
+
+export function collectPatternNames(node, out) {
+  if (!node) return;
+  if (node.type === 'Identifier') {
+    out.push(node.name);
+    return;
+  }
+  if (node.type === 'RestElement') {
+    collectPatternNames(node.argument, out);
+    return;
+  }
+  if (node.type === 'AssignmentPattern') {
+    collectPatternNames(node.left, out);
+    return;
+  }
+  if (node.type === 'ArrayPattern') {
+    node.elements?.forEach((el) => collectPatternNames(el, out));
+    return;
+  }
+  if (node.type === 'ObjectPattern') {
+    node.properties?.forEach((prop) => {
+      if (prop.type === 'Property' || prop.type === 'ObjectProperty') {
+        collectPatternNames(prop.value, out);
+      }
+      if (prop.type === 'RestElement') collectPatternNames(prop.argument, out);
+    });
+  }
+}
+
+export function formatDefault(node) {
+  if (!node) return '...';
+  if (node.type === 'Literal') return JSON.stringify(node.value);
+  if (node.type === 'StringLiteral' || node.type === 'NumericLiteral') return JSON.stringify(node.value);
+  if (node.type === 'BooleanLiteral') return node.value ? 'true' : 'false';
+  if (node.type === 'NullLiteral') return 'null';
+  if (node.type === 'Identifier') return node.name;
+  if (node.type === 'TemplateLiteral') return '`...`';
+  if (node.type === 'ArrayExpression') return '[...]';
+  if (node.type === 'ObjectExpression') return '{...}';
+  if (node.type === 'CallExpression') return 'call(...)';
+  return '...';
+}
+
+export function formatParam(node) {
+  if (!node) return 'param';
+  if (node.type === 'Identifier') return node.name;
+  if (node.type === 'RestElement') return `...${formatParam(node.argument)}`;
+  if (node.type === 'AssignmentPattern') {
+    return `${formatParam(node.left)}=${formatDefault(node.right)}`;
+  }
+  if (node.type === 'ObjectPattern') return '{...}';
+  if (node.type === 'ArrayPattern') return '[...]';
+  return 'param';
+}
diff --git a/src/lang/javascript/chunks.js b/src/lang/javascript/chunks.js
new file mode 100644
index 000000000..5a12278d2
--- /dev/null
+++ b/src/lang/javascript/chunks.js
@@ -0,0 +1,139 @@
+import { buildTreeSitterChunks } from '../tree-sitter.js';
+import { keyName, locMeta, nodeEnd, nodeStart } from './ast-utils.js';
+import { parseJavaScriptAst } from './parse.js';
+
+/**
+ * Build chunk metadata for JS declarations.
+ * Returns null when parsing fails.
+ * @param {string} text
+ * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null}
+ */
+export function buildJsChunks(text, options = {}) {
+  if (options.treeSitter) {
+    const treeChunks = buildTreeSitterChunks({
+      text,
+      languageId: null,
+      ext: options.ext,
+      options: { treeSitter: options.treeSitter, log: options.log }
+    });
+    if (treeChunks && treeChunks.length) return treeChunks;
+  }
+  const chunks = [];
+  const addChunk = (node, name, kind) => {
+    if (!node) return;
+    chunks.push({
+      start: nodeStart(node),
+      end: nodeEnd(node),
+      name: name || 'anonymous',
+      kind,
+      meta: { ...locMeta(node) }
+    });
+  };
+
+  const addFunctionFromDeclarator = (decl, kind) => {
+    if (!decl || !decl.init) return;
+    const init = decl.init;
+    if (init.type !== 'FunctionExpression' && init.type !== 'ArrowFunctionExpression') return;
+    const name = decl.id && decl.id.name ? decl.id.name : 'anonymous';
+    const derivedKind = init.type === 'FunctionExpression' ? 'FunctionExpression' : 'ArrowFunction';
+    addChunk(decl, name, kind || derivedKind);
+  };
+
+  const addFunctionFromAssignment = (expr, kind) => {
+    if (!expr || expr.type !== 'AssignmentExpression') return;
+    const right = expr.right;
+    if (!right || (right.type !== 'FunctionExpression' && right.type !== 'ArrowFunctionExpression')) return;
+    let name = 'anonymous';
+    if (expr.left && expr.left.type === 'MemberExpression') {
+      const obj = expr.left.object?.name || '';
+      const prop = keyName(expr.left.property);
+      name = obj ? `${obj}.${prop}` : prop;
+    }
+    addChunk(expr, name, kind);
+  };
+  const addClassChunks = (node, name, kind) => {
+    if (!node) return;
+    const className = name || 'anonymous';
+    addChunk(node, className, kind || 'ClassDeclaration');
+    if (!node.body?.body) return;
+    for (const method of node.body.body) {
+      if ((method.type === 'MethodDefinition' && method.key && method.value)
+        || method.type === 'ClassMethod'
+        || method.type === 'ClassPrivateMethod') {
+        const key = method.key || method.id;
+        addChunk(method, `${className}.${keyName(key)}`, 'MethodDefinition');
+      }
+      if ((method.type === 'PropertyDefinition'
+        || method.type === 'ClassProperty'
+        || method.type === 'ClassPrivateProperty')
+        && method.key && method.value
+        && (method.value.type === 'FunctionExpression' || method.value.type === 'ArrowFunctionExpression')) {
+        addChunk(method, `${className}.${keyName(method.key)}`, 'ClassPropertyFunction');
+      }
+    }
+  };
+
+  const ast = options.ast || parseJavaScriptAst(text, options);
+  if (!ast || !Array.isArray(ast.body)) return null;
+  for (const node of ast.body) {
+    if (node.type === 'FunctionDeclaration') {
+      addChunk(node, node.id ? node.id.name : 'anonymous', 'FunctionDeclaration');
+    }
+
+    if (node.type === 'ClassDeclaration') {
+      const className = node.id ? node.id.name : 'anonymous';
+      addClassChunks(node, className, 'ClassDeclaration');
+    }
+
+    if (node.type === 'ExportNamedDeclaration' && node.declaration) {
+      if (node.declaration.type === 'FunctionDeclaration') {
+        addChunk(
+          node.declaration,
+          node.declaration.id ? node.declaration.id.name : 'anonymous',
+          'ExportedFunction'
+        );
+      }
+      if (node.declaration.type === 'VariableDeclaration') {
+        for (const decl of node.declaration.declarations) {
+          const init = decl.init;
+          if (!init) continue;
+          const exportKind = init.type === 'FunctionExpression'
+            ? 'ExportedFunctionExpression'
+            : 'ExportedArrowFunction';
+          addFunctionFromDeclarator(decl, exportKind);
+        }
+      }
+      if (node.declaration.type === 'ClassDeclaration') {
+        const className = node.declaration.id ? node.declaration.id.name : 'anonymous';
+        addClassChunks(node.declaration, className, 'ExportedClass');
+      }
+    }
+
+    if (node.type === 'VariableDeclaration') {
+      for (const decl of node.declarations) {
+        addFunctionFromDeclarator(decl);
+      }
+    }
+
+    if (node.type === 'ExportDefaultDeclaration' && node.declaration) {
+      const decl = node.declaration;
+      if (decl.type === 'FunctionDeclaration' || decl.type === 'ClassDeclaration') {
+        const name = decl.id ? decl.id.name : 'default';
+        if (decl.type === 'ClassDeclaration') {
+          addClassChunks(decl, name, `ExportDefault${decl.type}`);
+        } else {
+          addChunk(decl, name, `ExportDefault${decl.type}`);
+        }
+      } else if (decl.type === 'FunctionExpression' || decl.type === 'ArrowFunctionExpression') {
+        addChunk(decl, 'default', 'ExportDefaultFunction');
+      }
+    }
+
+    if (node.type === 'ExpressionStatement' && node.expression) {
+      addFunctionFromAssignment(node.expression, 'ExportedAssignmentFunction');
+    }
+  }
+
+  if (!chunks.length) return [{ start: 0, end: text.length, name: 'root', kind: 'Module', meta: {} }];
+  return chunks;
+}
diff --git a/src/lang/javascript/docmeta.js b/src/lang/javascript/docmeta.js
new file mode 100644
index 000000000..79f83deb2
--- /dev/null
+++ b/src/lang/javascript/docmeta.js
@@ -0,0 +1,50 @@
+/**
+ * Extract lightweight doc metadata for JS chunks.
+ * @param {string} text
+ * @param {{start:number,end:number}} chunk
+ * @returns {{doc:string,params:string[],returns:boolean,signature:(string|null)}}
+ */
+export function extractDocMeta(text, chunk, astMeta = null) {
+  const chunkText = text.slice(chunk.start, chunk.end);
+  const lines = chunkText.split('\n');
+  const docLines = lines.filter((l) =>
+    l.trim().startsWith('*') || l.trim().startsWith('//') || l.trim().startsWith('#')
+  );
+  const params = [...chunkText.matchAll(/@param +(\w+)/g)].map((m) => m[1]);
+  const returnsDoc = !!chunkText.match(/@returns? /);
+  const returnTypeMatch = chunkText.match(/@returns?\s+{([^}]+)}/);
+  const returnType = returnTypeMatch ? returnTypeMatch[1].trim() : null;
+  const paramTypes = {};
+  for (const match of chunkText.matchAll(/@param\s+{([^}]+)}\s+(\w+)/g)) {
+    paramTypes[match[2]] = match[1].trim();
+  }
+  let signature = null;
+  const matchFn = chunkText.match(/function\s+([A-Za-z0-9_$]+)?\s*\(([^\)]*)\)/);
+  if (matchFn) {
+    signature = `function ${matchFn[1] || ''}(${matchFn[2]})`;
+  }
+
+  const nameMeta = astMeta?.functionMeta?.[chunk.name] || astMeta?.classMeta?.[chunk.name] || null;
+  const metaParams = Array.isArray(nameMeta?.params) && nameMeta.params.length ? nameMeta.params : params;
+  const mergedSignature = nameMeta?.signature || signature;
+  const mergedReturnType = nameMeta?.returnType || returnType || null;
+
+  return {
+    doc: docLines.join('\n').slice(0, 300),
+    params: metaParams,
+    paramTypes,
+    paramDefaults: nameMeta?.paramDefaults || {},
+    returnType: mergedReturnType,
+    returnsValue: nameMeta?.returnsValue || false,
+    returns: returnsDoc,
+    signature: mergedSignature,
+    modifiers: nameMeta?.modifiers || null,
+    methodKind: nameMeta?.methodKind || null,
+    dataflow: nameMeta?.dataflow || null,
+    controlFlow: nameMeta?.controlFlow || null,
+    throws: nameMeta?.throws || [],
+    awaits: nameMeta?.awaits || [],
+    yields: nameMeta?.yields || false,
+    extends: nameMeta?.extends || astMeta?.classMeta?.[chunk.name]?.extends || []
+  };
+}
diff --git a/src/lang/javascript/imports.js b/src/lang/javascript/imports.js
new file mode 100644
index 000000000..57de71a3f
--- /dev/null
+++ b/src/lang/javascript/imports.js
@@ -0,0 +1,66 @@
+import { parseJavaScriptAst } from './parse.js';
+
+/**
+ * Collect import/require dependencies from JS AST.
+ * @param {object} ast
+ * @returns {string[]}
+ */
+export function collectImportsFromAst(ast) {
+  const imports = new Set();
+  const walk = (node) => {
+    if (!node) return;
+    if (Array.isArray(node)) {
+      node.forEach(walk);
+      return;
+    }
+    if (typeof node !== 'object') return;
+
+    if (node.type === 'ImportDeclaration') {
+      if (node.source && node.source.value) imports.add(node.source.value);
+    }
+    if ((node.type === 'ExportNamedDeclaration' || node.type === 'ExportAllDeclaration')
+      && node.source && node.source.value) {
+      imports.add(node.source.value);
+    }
+    if (node.type === 'TSImportEqualsDeclaration') {
+      const value = node.moduleReference?.expression?.value;
+      if (typeof value === 'string') imports.add(value);
+    }
+    if (node.type === 'ImportExpression' && node.source) {
+      const sourceValue = node.source.value;
+      if (typeof sourceValue === 'string') imports.add(sourceValue);
+    }
+    if (node.type === 'CallExpression' && node.callee?.type === 'Import') {
+      const arg = node.arguments?.[0];
+      const value = arg && (arg.value ?? null);
+      if (typeof value === 'string') imports.add(value);
+    }
+    if (node.type === 'CallExpression' && node.callee?.type === 'Identifier'
+      && node.callee.name === 'require') {
+      const arg = node.arguments?.[0];
+      if (arg && typeof arg.value === 'string') {
+        imports.add(arg.value);
+      }
+    }
+    for (const key of Object.keys(node)) {
+      if (key === 'loc' || key === 'start' || key === 'end') continue;
+      const child = node[key];
+      if (child && typeof child === 'object') walk(child);
+    }
+  };
+
+  walk(ast);
+  return Array.from(imports);
+}
+
+/**
+ * Collect import/require dependencies from JS source.
+ * @param {string} text
+ * @param {object} [options]
+ * @returns {string[]}
+ */
+export function collectImports(text, options = {}) {
+  const ast = options.ast || parseJavaScriptAst(text, options);
+  if (!ast) return [];
+  return collectImportsFromAst(ast);
+}
diff --git a/src/lang/javascript/parse.js b/src/lang/javascript/parse.js
new file mode 100644
index 000000000..b338699ae
--- /dev/null
+++ b/src/lang/javascript/parse.js
@@ -0,0 +1,68 @@
+import * as acorn from 'acorn';
+import * as esprima from 'esprima';
+import { parseBabelAst } from '../babel-parser.js';
+
+const JS_PARSERS = new Set(['auto', 'babel', 'acorn', 'esprima']);
+
+function resolveJsParser(options = {}) {
+  const raw = options.parser || options.javascript?.parser || options.javascriptParser;
+  const normalized = typeof raw === 'string' ? raw.trim().toLowerCase() : '';
+  return JS_PARSERS.has(normalized) ? normalized : 'babel';
+}
+
+function resolveFlowMode(options = {}) {
+  const raw = options.flowMode ?? options.flow ?? options.javascript?.flow ?? options.javascriptFlow;
+  if (raw === true) return 'on';
+  if (raw === false) return 'off';
+  const normalized = typeof raw === 'string' ? raw.trim().toLowerCase() : '';
+  return ['auto', 'on', 'off'].includes(normalized) ? normalized : 'auto';
+}
+
+function parseWithAcorn(text) {
+  return acorn.parse(text, {
+    ecmaVersion: 'latest',
+    locations: true,
+    ranges: true,
+    sourceType: 'module'
+  });
+}
+
+function parseWithEsprima(text) {
+  return esprima.parseModule(text, {
+    jsx: true,
+    tolerant: true,
+    loc: true,
+    range: true
+  });
+}
+
+export function parseJavaScriptAst(text, options = {}) {
+  const parser = resolveJsParser(options);
+  const flowMode = resolveFlowMode(options);
+  const ext = typeof options.ext === 'string' ? options.ext : '';
+  const tryParse = (kind) => {
+    try {
+      if (kind === 'babel') return parseBabelAst(text, { ext, flowMode, mode: 'javascript' });
+      if (kind === 'acorn') return parseWithAcorn(text);
+      if (kind === 'esprima') return parseWithEsprima(text);
+      return null;
+    } catch {
+      return null;
+    }
+  };
+
+
+  const order = (() => {
+    if (parser === 'auto') return ['babel', 'acorn', 'esprima'];
+    if (parser === 'babel') return ['babel', 'acorn', 'esprima'];
+    if (parser === 'acorn') return ['acorn', 'babel', 'esprima'];
+    if (parser === 'esprima') return ['esprima', 'babel', 'acorn'];
+    return ['babel', 'acorn', 'esprima'];
+  })();
+
+  for (const kind of order) {
+    const ast = tryParse(kind);
+    if (ast) return ast;
+  }
+  return null;
+}
diff --git a/src/lang/javascript/relations.js b/src/lang/javascript/relations.js
new file mode 100644
index 000000000..1771982e4
--- /dev/null
+++ b/src/lang/javascript/relations.js
@@ -0,0 +1,606 @@
+import * as esprima from 'esprima';
+import {
+  collectPatternNames,
+  formatDefault,
+  formatParam,
+  keyName,
+  visibilityFor
+} from './ast-utils.js';
+import { parseJavaScriptAst } from './parse.js';
+
+/**
+ * Build import/export/call/usage relations for JS chunks.
+ * @param {string} text
+ * @param {string} relPath
+ * @param {Record<string,string[]>} allImports
+ * @returns {{imports:string[],exports:string[],calls:Array<[string,string]>,usages:string[],importLinks:string[]}}
+ */
+export function buildCodeRelations(text, relPath, allImports, options = {}) {
+  const dataflowEnabled = options.dataflow !== false;
+  const controlFlowEnabled = options.controlFlow !== false;
+  const imports = new Set();
+  const exports = new Set();
+  const calls = [];
+  const callDetails = [];
+  const usages = new Set();
+  const functionMeta = {};
+  const classMeta = {};
+  const flowByName = new Map();
+  const functionStack = [];
+  const classStack = [];
+
+  const getMemberName = (node) => {
+    if (!node) return null;
+    if (node.type === 'Identifier') return node.name;
+    if (node.type === 'PrivateName' && node.id?.name) return `#${node.id.name}`;
+    if (node.type === 'ThisExpression') return 'this';
+    if (node.type === 'Super') return 'super';
+    if (node.type === 'MemberExpression' || node.type === 'OptionalMemberExpression') {
+      const obj = getMemberName(node.object);
+      const prop = node.computed
+        ? (node.property?.type === 'Literal' || node.property?.type === 'StringLiteral'
+          ? String(node.property.value)
+          : null)
+        : (node.property?.name || node.property?.id?.name || null);
+      if (obj && prop) return `${obj}.${prop}`;
+      return obj || prop;
+    }
+    return null;
+  };
+
+  const getCalleeName = (callee) => {
+    if (!callee) return null;
+    if (callee.type === 'ChainExpression') return getCalleeName(callee.expression);
+    if (callee.type === 'OptionalCallExpression') return getCalleeName(callee.callee);
+    if (callee.type === 'Import') return null;
+    if (callee.type === 'Identifier') return callee.name;
+    if (callee.type === 'MemberExpression' || callee.type === 'OptionalMemberExpression') {
+      return getMemberName(callee);
+    }
+    if (callee.type === 'Super') return 'super';
+    return null;
+  };
+
+  const currentFunction = () => (functionStack.length ? functionStack[functionStack.length - 1] : null);
+
+  const ensureFlow = (name) => {
+    if (!flowByName.has(name)) {
+      flowByName.set(name, {
+        reads: new Set(),
+        writes: new Set(),
+        mutations: new Set(),
+        aliases: new Set(),
+        throws: new Set(),
+        awaits: new Set(),
+        returns: false,
+        yields: false,
+        controlFlow: null
+      });
+    }
+    return flowByName.get(name);
+  };
+
+  const recordControl = (key, delta = 1) => {
+    if (!controlFlowEnabled) return;
+    const scope = currentFunction();
+    if (!scope) return;
+    const flow = ensureFlow(scope);
+    if (!flow.controlFlow) {
+      flow.controlFlow = {
+        branches: 0,
+        loops: 0,
+        returns: 0,
+        throws: 0,
+        awaits: 0,
+        yields: 0,
+        breaks: 0,
+        continues: 0
+      };
+    }
+    flow.controlFlow[key] = (flow.controlFlow[key] || 0) + delta;
+  };
+
+  const recordRead = (name) => {
+    if (!dataflowEnabled || !name) return;
+    const scope = currentFunction();
+    if (!scope) return;
+    ensureFlow(scope).reads.add(name);
+  };
+
+  const recordWrite = (name) => {
+    if (!dataflowEnabled || !name) return;
+    const scope = currentFunction();
+    if (!scope) return;
+    ensureFlow(scope).writes.add(name);
+  };
+
+  const recordMutation = (name) => {
+    if (!dataflowEnabled || !name) return;
+    const scope = currentFunction();
+    if (!scope) return;
+    ensureFlow(scope).mutations.add(name);
+  };
+
+  const recordAlias = (name, target) => {
+    if (!dataflowEnabled || !name || !target) return;
+    const scope = currentFunction();
+    if (!scope) return;
+    ensureFlow(scope).aliases.add(`${name}=${target}`);
+  };
+
+  const recordThrow = (name) => {
+    recordControl('throws');
+    if (!dataflowEnabled || !name) return;
+    const scope = currentFunction();
+    if (!scope) return;
+    ensureFlow(scope).throws.add(name);
+  };
+
+  const recordAwait = (name) => {
+    recordControl('awaits');
+    if (!dataflowEnabled || !name) return;
+    const scope = currentFunction();
+    if (!scope) return;
+    ensureFlow(scope).awaits.add(name);
+  };
+
+  const recordReturn = () => {
+    recordControl('returns');
+    if (!dataflowEnabled) return;
+    const scope = currentFunction();
+    if (!scope) return;
+    ensureFlow(scope).returns = true;
+  };
+
+  const recordYield = () => {
+    recordControl('yields');
+    if (!dataflowEnabled) return;
+    const scope = currentFunction();
+    if (!scope) return;
+    ensureFlow(scope).yields = true;
+  };
+
+  const inferFunctionName = (node, parent) => {
+    if (node.id && node.id.name) return node.id.name;
+    if (parent && parent.type === 'VariableDeclarator' && parent.id?.name) return parent.id.name;
+    if (parent && parent.type === 'AssignmentExpression') {
+      const left = getMemberName(parent.left);
+      if (left) return left;
+    }
+    if ((node.type === 'ClassMethod' || node.type === 'ClassPrivateMethod') && node.key) {
+      const propName = keyName(node.key);
+      const className = classStack[classStack.length - 1];
+      return className ? `${className}.${propName}` : propName;
+    }
+    if (parent && (parent.type === 'Property' || parent.type === 'PropertyDefinition') && parent.key) {
+      const propName = keyName(parent.key);
+      const className = classStack[classStack.length - 1];
+      return className ? `${className}.${propName}` : propName;
+    }
+    if (parent && (parent.type === 'ObjectProperty' || parent.type === 'ClassProperty'
+      || parent.type === 'ClassPrivateProperty') && parent.key) {
+      const propName = keyName(parent.key);
+      const className = classStack[classStack.length - 1];
+      return className ? `${className}.${propName}` : propName;
+    }
+    if (parent && parent.type === 'MethodDefinition' && parent.key) {
+      const propName = keyName(parent.key);
+      const className = classStack[classStack.length - 1];
+      return className ? `${className}.${propName}` : propName;
+    }
+    return '(anonymous)';
+  };
+
+  const collectParamMeta = (node) => {
+    const params = [];
+    const paramNames = [];
+    const paramDefaults = {};
+    if (!node?.params) return { params, paramNames, paramDefaults };
+    node.params.forEach((param) => {
+      params.push(formatParam(param));
+      const names = [];
+      collectPatternNames(param, names);
+      names.forEach((name) => {
+        paramNames.push(name);
+        if (param.type === 'AssignmentPattern' && param.left?.type === 'Identifier') {
+          paramDefaults[name] = formatDefault(param.right);
+        }
+      });
+    });
+    return { params, paramNames, paramDefaults };
+  };
+
+  const buildSignature = (node, name) => {
+    const { params } = collectParamMeta(node);
+    const paramsStr = params.join(', ');
+    if (node.type === 'ArrowFunctionExpression') {
+      return `(${paramsStr}) =>`;
+    }
+    const fnName = name && name !== '(anonymous)' ? ` ${name}` : '';
+    return `function${fnName}(${paramsStr})`;
+  };
+
+  const registerFunctionMeta = (node, parent) => {
+    const name = inferFunctionName(node, parent);
+    const existing = functionMeta[name];
+    const { paramNames, paramDefaults } = collectParamMeta(node);
+    const signature = buildSignature(node, name);
+    const modifiers = {
+      async: !!node.async,
+      generator: !!node.generator,
+      static: false,
+      visibility: visibilityFor(name.split('.').pop() || name)
+    };
+    let methodKind = null;
+    if (parent && parent.type === 'MethodDefinition') {
+      modifiers.static = !!parent.static;
+      methodKind = parent.kind || null;
+      const key = keyName(parent.key);
+      modifiers.visibility = visibilityFor(key);
+    }
+    if (node.type === 'ClassMethod' || node.type === 'ClassPrivateMethod') {
+      modifiers.static = !!node.static;
+      methodKind = node.kind || null;
+      const key = keyName(node.key);
+      modifiers.visibility = visibilityFor(key);
+    }
+    if (!existing) {
+      functionMeta[name] = {
+        params: paramNames,
+        paramDefaults,
+        signature,
+        modifiers,
+        methodKind,
+        returnType: null,
+        returnsValue: false,
+        throws: [],
+        awaits: [],
+        yields: false,
+        dataflow: null
+      };
+    } else {
+      existing.params = existing.params?.length ? existing.params : paramNames;
+      existing.paramDefaults = Object.keys(existing.paramDefaults || {}).length ? existing.paramDefaults : paramDefaults;
+      existing.signature = existing.signature || signature;
+      existing.modifiers = existing.modifiers || modifiers;
+      existing.methodKind = existing.methodKind || methodKind;
+    }
+    return name;
+  };
+
+  const getThrownName = (node) => {
+    if (!node) return null;
+    if (node.type === 'Identifier') return node.name;
+    if (node.type === 'MemberExpression') return getMemberName(node);
+    if (node.type === 'CallExpression' || node.type === 'NewExpression') {
+      return getCalleeName(node.callee);
+    }
+    return null;
+  };
+
+  const getAwaitName = (node) => {
+    if (!node) return null;
+    if (node.type === 'CallExpression') return getCalleeName(node.callee);
+    return getCalleeName(node) || getMemberName(node);
+  };
+
+  const isFunctionNode = (node) =>
+    node.type === 'FunctionDeclaration' ||
+    node.type === 'FunctionExpression' ||
+    node.type === 'ArrowFunctionExpression' ||
+    node.type === 'ClassMethod' ||
+    node.type === 'ClassPrivateMethod' ||
+    node.type === 'ObjectMethod';
+
+  const isIdentifierBinding = (node, parent) => {
+    if (!parent || node.type !== 'Identifier') return false;
+    if ((parent.type === 'VariableDeclarator' || parent.type === 'AssignmentPattern') && parent.id === node) return true;
+    if ((parent.type === 'FunctionDeclaration' || parent.type === 'FunctionExpression') && parent.id === node) return true;
+    if ((parent.type === 'ClassDeclaration' || parent.type === 'ClassExpression') && parent.id === node) return true;
+    if (parent.type === 'CatchClause' && parent.param === node) return true;
+    if (parent.type === 'ImportSpecifier' || parent.type === 'ImportDefaultSpecifier' || parent.type === 'ImportNamespaceSpecifier') {
+      return parent.local === node;
+    }
+    if (parent.type === 'Property' && parent.key === node && !parent.computed) return true;
+    if (parent.type === 'ObjectProperty' && parent.key === node && !parent.computed) return true;
+    if (parent.type === 'MemberExpression' && parent.property === node && !parent.computed) return true;
+    if (parent.type === 'OptionalMemberExpression' && parent.property === node && !parent.computed) return true;
+    if (parent.type === 'MethodDefinition' && parent.key === node && !parent.computed) return true;
+    if (parent.type === 'PropertyDefinition' && parent.key === node && !parent.computed) return true;
+    if ((parent.type === 'ClassProperty' || parent.type === 'ClassPrivateProperty')
+      && parent.key === node && !parent.computed) {
+      return true;
+    }
+    if ((parent.type === 'ClassMethod' || parent.type === 'ClassPrivateMethod')
+      && parent.key === node && !parent.computed) {
+      return true;
+    }
+    if (parent.type === 'LabeledStatement' && parent.label === node) return true;
+    return false;
+  };
+
+  const shouldCountRead = (node, parent) => !isIdentifierBinding(node, parent);
+
+  const recordPatternWrite = (pattern) => {
+    const names = [];
+    collectPatternNames(pattern, names);
+    names.forEach((name) => recordWrite(name));
+  };
+
+  const formatCallArg = (arg) => {
+    if (!arg) return '...';
+    if (arg.type === 'Identifier') return arg.name;
+    if (arg.type === 'Literal') return JSON.stringify(arg.value);
+    if (arg.type === 'MemberExpression') return getMemberName(arg) || 'member';
+    if (arg.type === 'CallExpression') {
+      const callee = getCalleeName(arg.callee);
+      return callee ? `${callee}(...)` : 'call(...)';
+    }
+    if (arg.type === 'ArrowFunctionExpression' || arg.type === 'FunctionExpression') return 'fn(...)';
+    if (arg.type === 'ObjectExpression') return '{...}';
+    if (arg.type === 'ArrayExpression') return '[...]';
+    if (arg.type === 'TemplateLiteral') return '`...`';
+    return '...';
+  };
+
+  const walk = (node, parent) => {
+    if (!node) return;
+    if (Array.isArray(node)) {
+      node.forEach((child) => walk(child, parent));
+      return;
+    }
+    if (typeof node !== 'object') return;
+
+    if (node.type === 'ImportDeclaration') {
+      if (node.source?.value) imports.add(node.source.value);
+      node.specifiers?.forEach((s) => {
+        if (s.local?.name) usages.add(s.local.name);
+      });
+    }
+
+    if (node.type === 'ImportExpression' && node.source) {
+      const sourceValue = node.source.value;
+      if (typeof sourceValue === 'string') imports.add(sourceValue);
+    }
+    if (node.type === 'CallExpression' && node.callee?.type === 'Import') {
+      const arg = node.arguments?.[0];
+      const value = arg && (arg.value ?? null);
+      if (typeof value === 'string') imports.add(value);
+    }
+    if (node.type === 'CallExpression' && node.callee?.type === 'Identifier'
+      && node.callee.name === 'require') {
+      const arg = node.arguments?.[0];
+      if (arg && typeof arg.value === 'string') imports.add(arg.value);
+    }
+
+    if (node.type === 'ExportAllDeclaration') {
+      exports.add('*');
+      if (node.source?.value) imports.add(node.source.value);
+    }
+
+    if (node.type === 'ExportNamedDeclaration') {
+      if (node.declaration) {
+        if (node.declaration.id?.name) exports.add(node.declaration.id.name);
+        if (node.declaration.declarations) {
+          node.declaration.declarations.forEach((d) => d.id?.name && exports.add(d.id.name));
+        }
+      }
+      node.specifiers?.forEach((s) => {
+        if (s.exported?.name) exports.add(s.exported.name);
+      });
+      if (node.source?.value) imports.add(node.source.value);
+    }
+
+    if (node.type === 'ExportDefaultDeclaration') {
+      if (node.declaration?.id?.name) exports.add(node.declaration.id.name);
+      else exports.add('default');
+    }
+
+    if (node.type === 'TSImportEqualsDeclaration') {
+      const value = node.moduleReference?.expression?.value;
+      if (typeof value === 'string') imports.add(value);
+    }
+
+    if (node.type === 'AssignmentExpression') {
+      const left = getMemberName(node.left);
+      if (left === 'module.exports') exports.add('default');
+      if (left && left.startsWith('exports.')) exports.add(left.slice('exports.'.length));
+    }
+
+    if (node.type === 'CallExpression') {
+      const calleeName = getCalleeName(node.callee);
+      const callerName = functionStack.length ? functionStack[functionStack.length - 1] : '(module)';
+      if (calleeName) {
+        calls.push([callerName, calleeName]);
+        const args = Array.isArray(node.arguments) ? node.arguments.map((arg) => formatCallArg(arg)) : [];
+        callDetails.push({ caller: callerName, callee: calleeName, args });
+      }
+    }
+
+    if (node.type === 'IfStatement' || node.type === 'ConditionalExpression') {
+      recordControl('branches');
+    }
+    if (node.type === 'SwitchStatement') {
+      const count = Array.isArray(node.cases) && node.cases.length ? node.cases.length : 1;
+      recordControl('branches', count);
+    }
+    if (node.type === 'TryStatement') {
+      recordControl('branches');
+    }
+    if (node.type === 'CatchClause') {
+      recordControl('branches');
+    }
+    if (node.type === 'ForStatement'
+      || node.type === 'ForInStatement'
+      || node.type === 'ForOfStatement'
+      || node.type === 'WhileStatement'
+      || node.type === 'DoWhileStatement') {
+      recordControl('loops');
+    }
+    if (node.type === 'BreakStatement') {
+      recordControl('breaks');
+    }
+    if (node.type === 'ContinueStatement') {
+      recordControl('continues');
+    }
+
+    if (node.type === 'Identifier') {
+      usages.add(node.name);
+      if (shouldCountRead(node, parent)) {
+        recordRead(node.name);
+      }
+    }
+
+    if (node.type === 'VariableDeclarator' && node.id) {
+      recordPatternWrite(node.id);
+      if (node.id.type === 'Identifier' && node.init) {
+        const target = getMemberName(node.init);
+        if (target) recordAlias(node.id.name, target);
+      }
+    }
+
+    if (node.type === 'AssignmentExpression' && node.left) {
+      if (node.left.type === 'Identifier') {
+        recordWrite(node.left.name);
+        const target = getMemberName(node.right);
+        if (target) recordAlias(node.left.name, target);
+      } else if (node.left.type === 'MemberExpression') {
+        recordMutation(getMemberName(node.left));
+      } else {
+        recordPatternWrite(node.left);
+      }
+    }
+
+    if (node.type === 'UpdateExpression' && node.argument) {
+      if (node.argument.type === 'Identifier') {
+        recordWrite(node.argument.name);
+      } else if (node.argument.type === 'MemberExpression') {
+        recordMutation(getMemberName(node.argument));
+      }
+    }
+
+    if (node.type === 'ReturnStatement') {
+      recordReturn();
+    }
+
+    if (node.type === 'ThrowStatement') {
+      const thrown = getThrownName(node.argument);
+      if (thrown) recordThrow(thrown);
+    }
+
+    if (node.type === 'AwaitExpression') {
+      const awaited = getAwaitName(node.argument);
+      if (awaited) recordAwait(awaited);
+    }
+
+    if (node.type === 'YieldExpression') {
+      recordYield();
+    }
+
+    if (node.type === 'CatchClause' && node.param) {
+      recordPatternWrite(node.param);
+    }
+
+    if ((node.type === 'ForInStatement' || node.type === 'ForOfStatement') && node.left && node.left.type !== 'VariableDeclaration') {
+      if (node.left.type === 'Identifier') {
+        recordWrite(node.left.name);
+      } else if (node.left.type === 'MemberExpression') {
+        recordMutation(getMemberName(node.left));
+      } else {
+        recordPatternWrite(node.left);
+      }
+    }
+
+    if (node.type === 'ClassDeclaration' && node.id?.name) {
+      const className = node.id.name;
+      const extendsName = getMemberName(node.superClass);
+      classMeta[className] = {
+        extends: extendsName ? [extendsName] : [],
+        modifiers: { visibility: visibilityFor(className) }
+      };
+      classStack.push(node.id.name);
+      if (node.body?.body) {
+        walk(node.body.body, node);
+      }
+      classStack.pop();
+      return;
+    }
+
+    if (isFunctionNode(node)) {
+      const fnName = registerFunctionMeta(node, parent);
+      functionStack.push(fnName);
+      walk(node.body, node);
+      functionStack.pop();
+      return;
+    }
+
+    for (const key of Object.keys(node)) {
+      if (key === 'loc' || key === 'start' || key === 'end') continue;
+      const child = node[key];
+      if (child && typeof child === 'object') {
+        walk(child, node);
+      }
+    }
+  };
+
+  const ast = options.ast || parseJavaScriptAst(text, options);
+  if (ast) {
+    walk(ast, null);
+  }
+  const astTokens = Array.isArray(ast?.tokens) ? ast.tokens : null;
+  if (astTokens && astTokens.length) {
+    for (const token of astTokens) {
+      const label = token?.type?.label || token?.type;
+      if (label === 'name' || label === 'Identifier' || label === 'jsxName') {
+        const value = token.value || token.name;
+        if (value) usages.add(value);
+      }
+    }
+  } else {
+    try {
+      const tokens = esprima.tokenize(text, { tolerant: true });
+      tokens.forEach((t) => {
+        if (t.type === 'Identifier') usages.add(t.value);
+      });
+    } catch {}
+  }
+
+  if (dataflowEnabled || controlFlowEnabled) {
+    for (const [name, flow] of flowByName.entries()) {
+      const meta = functionMeta[name] || {};
+      if (dataflowEnabled) {
+        meta.dataflow = {
+          reads: Array.from(flow.reads),
+          writes: Array.from(flow.writes),
+          mutations: Array.from(flow.mutations),
+          aliases: Array.from(flow.aliases)
+        };
+        meta.throws = Array.from(flow.throws);
+        meta.awaits = Array.from(flow.awaits);
+        meta.returnsValue = !!flow.returns;
+        meta.yields = !!flow.yields;
+        meta.modifiers = meta.modifiers || {};
+        meta.modifiers.generator = !!flow.yields;
+      }
+      if (controlFlowEnabled && flow.controlFlow) {
+        meta.controlFlow = { ...flow.controlFlow };
+      }
+      functionMeta[name] = meta;
+    }
+  }
+
+  const importLinks = Array.from(imports)
+    .map((i) => allImports[i])
+    .filter((x) => !!x)
+    .flat();
+  return {
+    imports: Array.from(imports),
+    exports: Array.from(exports),
+    calls,
+    callDetails,
+    usages: Array.from(usages),
+    importLinks,
+    functionMeta,
+    classMeta
+  };
+}
diff --git a/src/lang/kotlin.js b/src/lang/kotlin.js
index 3c0ea0e05..9d6c9b976 100644
--- a/src/lang/kotlin.js
+++ b/src/lang/kotlin.js
@@ -2,6 +2,7 @@ import { buildLineIndex, offsetToLine } from '../shared/lines.js';
 import { findCLikeBodyBounds } from './clike.js';
 import { collectAttributes, extractDocComment, sliceSignature } from './shared.js';
 import { buildHeuristicDataflow, hasReturnValue, summarizeControlFlow } from './flow.js';
+import { buildTreeSitterChunks } from './tree-sitter.js';
 
 /**
  * Kotlin language chunking and relations.
@@ -24,6 +25,36 @@ const KOTLIN_USAGE_SKIP = new Set([
   'false', 'Unit', 'Nothing', 'Any', 'Int', 'Long', 'Double', 'Float', 'Boolean',
   'String'
 ]);
+const DEFAULT_KOTLIN_LIMITS = {
+  flowMaxBytes: 200 * 1024,
+  flowMaxLines: 3000,
+  relationsMaxBytes: 200 * 1024,
+  relationsMaxLines: 2000
+};
+
+const normalizeLimit = (value, fallback) => {
+  if (value === 0 || value === false) return null;
+  const parsed = Number(value);
+  if (Number.isFinite(parsed) && parsed > 0) return Math.floor(parsed);
+  return fallback;
+};
+
+const resolveKotlinLimits = (options = {}) => {
+  const config = options.kotlin || {};
+  return {
+    flowMaxBytes: normalizeLimit(config.flowMaxBytes, DEFAULT_KOTLIN_LIMITS.flowMaxBytes),
+    flowMaxLines: normalizeLimit(config.flowMaxLines, DEFAULT_KOTLIN_LIMITS.flowMaxLines),
+    relationsMaxBytes: normalizeLimit(config.relationsMaxBytes, DEFAULT_KOTLIN_LIMITS.relationsMaxBytes),
+    relationsMaxLines: normalizeLimit(config.relationsMaxLines, DEFAULT_KOTLIN_LIMITS.relationsMaxLines)
+  };
+};
+
+const exceedsLimit = (stats, maxBytes, maxLines) => {
+  if (!stats) return false;
+  if (Number.isFinite(maxBytes) && maxBytes > 0 && stats.bytes > maxBytes) return true;
+  if (Number.isFinite(maxLines) && maxLines > 0 && stats.lines > maxLines) return true;
+  return false;
+};
 
 function extractKotlinModifiers(signature) {
   const mods = [];
@@ -160,13 +191,23 @@ export function collectKotlinImports(text) {
   return Array.from(imports);
 }
 
+export function getKotlinFileStats(text) {
+  const safeText = typeof text === 'string' ? text : '';
+  return {
+    bytes: Buffer.byteLength(safeText, 'utf8'),
+    lines: safeText ? safeText.split('\n').length : 0
+  };
+}
+
 /**
  * Build chunk metadata for Kotlin declarations.
  * Returns null when no declarations are found.
  * @param {string} text
  * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null}
  */
-export function buildKotlinChunks(text) {
+export function buildKotlinChunks(text, options = {}) {
+  const treeChunks = buildTreeSitterChunks({ text, languageId: 'kotlin', options });
+  if (treeChunks && treeChunks.length) return treeChunks;
   const lineIndex = buildLineIndex(text);
   const lines = text.split('\n');
   const decls = [];
@@ -288,17 +329,21 @@ export function buildKotlinChunks(text) {
  * @param {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null} kotlinChunks
  * @returns {{imports:string[],exports:string[],calls:Array<[string,string]>,usages:string[],importLinks:string[]}}
  */
-export function buildKotlinRelations(text, allImports, kotlinChunks) {
+export function buildKotlinRelations(text, allImports, kotlinChunks, options = {}) {
   const imports = collectKotlinImports(text);
   const exports = new Set();
   const calls = [];
   const usages = new Set();
+  const stats = options.stats || getKotlinFileStats(text);
+  const limits = resolveKotlinLimits(options);
+  const skipRelations = exceedsLimit(stats, limits.relationsMaxBytes, limits.relationsMaxLines);
   if (Array.isArray(kotlinChunks)) {
     for (const chunk of kotlinChunks) {
       if (!chunk || !chunk.name || chunk.start == null || chunk.end == null) continue;
       const mods = Array.isArray(chunk.meta?.modifiers) ? chunk.meta.modifiers : [];
       if (mods.includes('public')) exports.add(chunk.name);
       if (!['MethodDeclaration', 'FunctionDeclaration'].includes(chunk.kind)) continue;
+      if (skipRelations) continue;
       const bounds = findCLikeBodyBounds(text, chunk.start);
       const scanStart = bounds.bodyStart > -1 && bounds.bodyStart < chunk.end ? bounds.bodyStart + 1 : chunk.start;
       const scanEnd = bounds.bodyEnd > scanStart && bounds.bodyEnd <= chunk.end ? bounds.bodyEnd : chunk.end;
@@ -356,11 +401,15 @@ export function extractKotlinDocMeta(chunk) {
  * Heuristic control-flow/dataflow extraction for Kotlin chunks.
  * @param {string} text
  * @param {{start:number,end:number}} chunk
- * @param {{dataflow?:boolean,controlFlow?:boolean}} [options]
+ * @param {{dataflow?:boolean,controlFlow?:boolean,kotlin?:object,stats?:{bytes:number,lines:number}}} [options]
  * @returns {{dataflow:(object|null),controlFlow:(object|null),throws:string[],awaits:string[],yields:boolean,returnsValue:boolean}|null}
  */
 export function computeKotlinFlow(text, chunk, options = {}) {
   if (!chunk || !Number.isFinite(chunk.start) || !Number.isFinite(chunk.end)) return null;
+  const stats = options.stats || getKotlinFileStats(text);
+  const limits = resolveKotlinLimits(options);
+  const skipFlow = exceedsLimit(stats, limits.flowMaxBytes, limits.flowMaxLines);
+  if (skipFlow) return null;
   const bounds = findCLikeBodyBounds(text, chunk.start);
   const scanStart = bounds.bodyStart > -1 && bounds.bodyStart < chunk.end ? bounds.bodyStart + 1 : chunk.start;
   const scanEnd = bounds.bodyEnd > scanStart && bounds.bodyEnd <= chunk.end ? bounds.bodyEnd : chunk.end;
diff --git a/src/lang/lua.js b/src/lang/lua.js
index 294304e7e..6e40bec4a 100644
--- a/src/lang/lua.js
+++ b/src/lang/lua.js
@@ -1,4 +1,5 @@
 import { buildLineIndex, offsetToLine } from '../shared/lines.js';
+import { extractDocComment } from './shared.js';
 import { buildHeuristicDataflow, hasReturnValue, summarizeControlFlow } from './flow.js';
 
 /**
@@ -15,23 +16,11 @@ const LUA_USAGE_SKIP = new Set([
   'nil', 'true', 'false', 'self'
 ]);
 
-function extractLuaDocComment(lines, startLineIdx) {
-  let i = startLineIdx - 1;
-  while (i >= 0 && lines[i].trim() === '') i--;
-  if (i < 0) return '';
-  const out = [];
-  while (i >= 0) {
-    const trimmed = lines[i].trim();
-    if (!trimmed.startsWith('--')) break;
-    if (trimmed.startsWith('---')) {
-      out.unshift(trimmed.replace(/^---\s?/, ''));
-    } else {
-      out.unshift(trimmed.replace(/^--\s?/, ''));
-    }
-    i--;
-  }
-  return out.join('\n').trim();
-}
+const LUA_DOC_OPTIONS = {
+  linePrefixes: ['---', '--'],
+  blockStarts: [],
+  blockEnd: null
+};
 
 function stripLuaComments(text) {
   return text.replace(/--\[\[[\s\S]*?\]\]/g, ' ').replace(/--.*$/gm, ' ');
@@ -120,8 +109,10 @@ export function buildLuaChunks(text) {
     const rawLine = lines[i];
     const trimmed = rawLine.trim();
     if (!trimmed) continue;
+    const codeLine = trimmed.replace(/--.*$/, '').trim();
+    if (!codeLine) continue;
 
-    if (trimmed === 'end') {
+    if (codeLine === 'end') {
       const block = blockStack.pop();
       if (!block || !block.isDecl) continue;
       const end = lineIndex[i] + rawLine.length;
@@ -141,7 +132,7 @@ export function buildLuaChunks(text) {
       continue;
     }
 
-    if (/^until\b/.test(trimmed)) {
+    if (/^until\b/.test(codeLine)) {
       const block = blockStack.pop();
       if (block && block.isDecl) {
         const end = lineIndex[i] + rawLine.length;
@@ -162,12 +153,12 @@ export function buildLuaChunks(text) {
       continue;
     }
 
-    const fnName = parseLuaFunctionName(trimmed);
+    const fnName = parseLuaFunctionName(codeLine);
     if (fnName) {
       const start = lineIndex[i] + rawLine.indexOf(trimmed);
-      const signature = rawLine.trim();
+      const signature = codeLine;
       const params = parseLuaParams(signature);
-      const docstring = extractLuaDocComment(lines, i);
+      const docstring = extractDocComment(lines, i, LUA_DOC_OPTIONS);
       const normalized = normalizeLuaName(fnName);
       const kind = normalized && normalized.includes('.') ? 'MethodDeclaration' : 'FunctionDeclaration';
       blockStack.push({
@@ -183,7 +174,7 @@ export function buildLuaChunks(text) {
       continue;
     }
 
-    if (/^(if|for|while|repeat|do)\b/.test(trimmed)) {
+    if (/^(if|for|while|repeat|do)\b/.test(codeLine)) {
       blockStack.push({ isDecl: false });
     }
   }
diff --git a/src/lang/perl.js b/src/lang/perl.js
index 0fb7506d1..f915ef33f 100644
--- a/src/lang/perl.js
+++ b/src/lang/perl.js
@@ -1,7 +1,7 @@
 import { buildLineIndex, offsetToLine } from '../shared/lines.js';
 import { buildHeuristicDataflow, hasReturnValue, summarizeControlFlow } from './flow.js';
 import { findCLikeBodyBounds } from './clike.js';
-import { sliceSignature } from './shared.js';
+import { extractDocComment, sliceSignature } from './shared.js';
 
 /**
  * Perl (lite) language chunking and relations.
@@ -18,20 +18,12 @@ const PERL_USAGE_SKIP = new Set([
   'undef', 'true', 'false'
 ]);
 
-function extractPerlDocComment(lines, startLineIdx) {
-  let i = startLineIdx - 1;
-  while (i >= 0 && lines[i].trim() === '') i--;
-  if (i < 0) return '';
-  const out = [];
-  while (i >= 0) {
-    const trimmed = lines[i].trim();
-    if (!trimmed.startsWith('#')) break;
-    if (trimmed.startsWith('#!')) break;
-    out.unshift(trimmed.replace(/^#\s?/, ''));
-    i--;
-  }
-  return out.join('\n').trim();
-}
+const PERL_DOC_OPTIONS = {
+  linePrefixes: ['#'],
+  blockStarts: [],
+  blockEnd: null,
+  skipLine: (line) => line.startsWith('#!')
+};
 
 function readSignatureLines(lines, startLine) {
   const parts = [];
@@ -135,7 +127,7 @@ export function buildPerlChunks(text) {
       startLine: i + 1,
       endLine: offsetToLine(lineIndex, end),
       signature: trimmed,
-      docstring: extractPerlDocComment(lines, i)
+      docstring: extractDocComment(lines, i, PERL_DOC_OPTIONS)
     };
     decls.push({ start, end, name: match[1], kind: 'PackageDeclaration', meta });
   }
@@ -155,7 +147,7 @@ export function buildPerlChunks(text) {
       startLine: i + 1,
       endLine: offsetToLine(lineIndex, end),
       signature: signatureText,
-      docstring: extractPerlDocComment(lines, i)
+      docstring: extractDocComment(lines, i, PERL_DOC_OPTIONS)
     };
     decls.push({ start, end, name: match[1], kind: 'FunctionDeclaration', meta });
     i = endLine;
diff --git a/src/lang/python.js b/src/lang/python.js
index 3e8cc6c20..cf368ee37 100644
--- a/src/lang/python.js
+++ b/src/lang/python.js
@@ -1,834 +1,7 @@
-import { spawnSync } from 'node:child_process';
-import { buildLineIndex, lineColToOffset, offsetToLine } from '../shared/lines.js';
-
-/**
- * Python language chunking and relations.
- * Uses optional Python AST parsing with a heuristic fallback.
- */
-
-const PYTHON_CANDIDATES = ['python', 'python3'];
-let pythonExecutable = null;
-let pythonChecked = false;
-let pythonWarned = false;
-
-const PYTHON_AST_SCRIPT = `
-import ast, json, os, sys
-source = sys.stdin.read()
-try:
-    tree = ast.parse(source)
-except Exception as e:
-    print(json.dumps({"error": str(e)}))
-    sys.exit(0)
-
-dataflow_enabled = os.environ.get("PAIROFCLEATS_AST_DATAFLOW", "1").lower() not in ("0", "false", "no")
-
-def safe_unparse(node):
-    try:
-        return ast.unparse(node)
-    except Exception:
-        return None
-
-def deco_name(node):
-    if isinstance(node, ast.Name):
-        return node.id
-    if isinstance(node, ast.Attribute):
-        base = deco_name(node.value)
-        return base + "." + node.attr if base else node.attr
-    if isinstance(node, ast.Call):
-        return deco_name(node.func)
-    return None
-
-def call_name(node):
-    if isinstance(node, ast.Name):
-        return node.id
-    if isinstance(node, ast.Attribute):
-        base = call_name(node.value)
-        return base + "." + node.attr if base else node.attr
-    return None
-
-def format_arg(arg, default_map):
-    name = arg.arg
-    ann = safe_unparse(arg.annotation) if getattr(arg, "annotation", None) is not None else None
-    value = name + (": " + ann if ann else "")
-    if name in default_map:
-        default = safe_unparse(default_map[name]) if default_map[name] is not None else None
-        value += ("=" + default) if default else "=..."
-    return value
-
-def format_args(args):
-    defaults = list(args.defaults) if args.defaults else []
-    default_map = {}
-    if defaults and args.args:
-        for arg, default in zip(args.args[-len(defaults):], defaults):
-            default_map[arg.arg] = default
-    if getattr(args, "kw_defaults", None) and args.kwonlyargs:
-        for arg, default in zip(args.kwonlyargs, args.kw_defaults):
-            if default is not None:
-                default_map[arg.arg] = default
-
-    parts = []
-    for arg in getattr(args, "posonlyargs", []):
-        parts.append(format_arg(arg, default_map))
-    if getattr(args, "posonlyargs", []):
-        parts.append("/")
-    for arg in args.args:
-        parts.append(format_arg(arg, default_map))
-    if args.vararg:
-        parts.append("*" + format_arg(args.vararg, {}))
-    elif args.kwonlyargs:
-        parts.append("*")
-    for arg in args.kwonlyargs:
-        parts.append(format_arg(arg, default_map))
-    if args.kwarg:
-        parts.append("**" + format_arg(args.kwarg, {}))
-    return ", ".join(parts)
-
-def format_signature(node):
-    args = format_args(node.args)
-    sig = "def " + node.name + "(" + args + ")"
-    if getattr(node, "returns", None) is not None:
-        ret = safe_unparse(node.returns)
-        if ret:
-            sig += " -> " + ret
-    return sig
-
-def format_class_signature(node):
-    bases = [safe_unparse(b) for b in node.bases] if node.bases else []
-    bases = [b for b in bases if b]
-    sig = "class " + node.name
-    if bases:
-        sig += "(" + ", ".join(bases) + ")"
-    return sig
-
-def is_dataclass_decorator(decorators):
-    for name in decorators:
-        if name in ("dataclass", "attrs.define", "attr.s", "attr.define"):
-            return True
-    return False
-
-def extract_fields(node):
-    fields = []
-    for stmt in getattr(node, "body", []):
-        if isinstance(stmt, ast.AnnAssign) and isinstance(stmt.target, ast.Name):
-            name = stmt.target.id
-            ann = safe_unparse(stmt.annotation) if stmt.annotation is not None else None
-            default = safe_unparse(stmt.value) if stmt.value is not None else None
-            fields.append({"name": name, "type": ann, "default": default})
-        elif isinstance(stmt, ast.Assign) and len(stmt.targets) == 1 and isinstance(stmt.targets[0], ast.Name):
-            name = stmt.targets[0].id
-            default = safe_unparse(stmt.value) if stmt.value is not None else None
-            fields.append({"name": name, "type": None, "default": default})
-    return fields
-
-def visibility_for(name):
-    if not name:
-        return "public"
-    if name.startswith("__") and not name.endswith("__"):
-        return "private"
-    if name.startswith("_") and not name.startswith("__"):
-        return "protected"
-    if name.startswith("__") and name.endswith("__"):
-        return "dunder"
-    return "public"
-
-def collect_param_info(args):
-    defaults = list(args.defaults) if args.defaults else []
-    default_map = {}
-    if defaults and args.args:
-        for arg, default in zip(args.args[-len(defaults):], defaults):
-            default_map[arg.arg] = default
-    if getattr(args, "kw_defaults", None) and args.kwonlyargs:
-        for arg, default in zip(args.kwonlyargs, args.kw_defaults):
-            if default is not None:
-                default_map[arg.arg] = default
-
-    params = []
-    param_types = {}
-    param_defaults = {}
-
-    def add_arg(arg, defaults_map):
-        name = arg.arg
-        params.append(name)
-        if getattr(arg, "annotation", None) is not None:
-            ann = safe_unparse(arg.annotation)
-            if ann:
-                param_types[name] = ann
-        if name in defaults_map:
-            value = safe_unparse(defaults_map[name]) if defaults_map[name] is not None else None
-            param_defaults[name] = value if value is not None else "..."
-
-    for arg in getattr(args, "posonlyargs", []):
-        add_arg(arg, default_map)
-    for arg in args.args:
-        add_arg(arg, default_map)
-    if args.vararg:
-        add_arg(args.vararg, {})
-    for arg in args.kwonlyargs:
-        add_arg(arg, default_map)
-    if args.kwarg:
-        add_arg(args.kwarg, {})
-    return params, param_types, param_defaults
-
-def target_name(node):
-    if isinstance(node, ast.Name):
-        return node.id
-    if isinstance(node, ast.Attribute):
-        base = call_name(node.value) or target_name(node.value)
-        return base + "." + node.attr if base else node.attr
-    if isinstance(node, ast.Subscript):
-        base = call_name(node.value) or target_name(node.value)
-        return base + "[]" if base else None
-    return None
-
-def collect_targets(node, writes, mutations):
-    if isinstance(node, (ast.Tuple, ast.List)):
-        for elt in node.elts:
-            collect_targets(elt, writes, mutations)
-        return
-    if isinstance(node, ast.Name):
-        writes.add(node.id)
-        return
-    if isinstance(node, (ast.Attribute, ast.Subscript)):
-        name = target_name(node)
-        if name:
-            mutations.add(name)
-        return
-
-def await_name(node):
-    if isinstance(node, ast.Call):
-        return call_name(node.func)
-    return call_name(node)
-
-def alias_target(node):
-    if isinstance(node, ast.Name):
-        return node.id
-    if isinstance(node, ast.Attribute):
-        return call_name(node) or target_name(node)
-    return None
-
-def format_arg_value(node):
-    if isinstance(node, ast.Name):
-        return node.id
-    if isinstance(node, ast.Constant):
-        return repr(node.value)
-    if isinstance(node, ast.Attribute):
-        return call_name(node) or target_name(node) or "attr"
-    if isinstance(node, ast.Call):
-        callee = call_name(node.func)
-        return f"{callee}(...)" if callee else "call(...)"
-    if isinstance(node, ast.Lambda):
-        return "lambda"
-    if isinstance(node, ast.Dict):
-        return "{...}"
-    if isinstance(node, (ast.List, ast.Tuple, ast.Set)):
-        return "[...]"
-    return "..."
-
-class Collector(ast.NodeVisitor):
-    def __init__(self):
-        self.defs = []
-        self.imports = set()
-        self.calls = []
-        self.call_details = []
-        self.usages = set()
-        self.exports = set()
-        self.class_stack = []
-        self.func_stack = []
-        self.call_map = {}
-        self.flow = {}
-        self.scope_stack = []
-    def current_func(self):
-        return self.func_stack[-1] if self.func_stack else "(module)"
-    def current_scope(self):
-        return self.scope_stack[-1] if self.scope_stack else None
-    def ensure_flow(self, name):
-        if name not in self.flow:
-            self.flow[name] = {
-                "reads": set(),
-                "writes": set(),
-                "mutations": set(),
-                "aliases": set(),
-                "globals": set(),
-                "nonlocals": set(),
-                "throws": set(),
-                "awaits": set(),
-                "returns": False,
-                "yields": False
-            }
-        return self.flow[name]
-    def record_read(self, name):
-        if not dataflow_enabled or not name:
-            return
-        scope = self.current_scope()
-        if not scope:
-            return
-        self.ensure_flow(scope)["reads"].add(name)
-    def record_write(self, name):
-        if not dataflow_enabled or not name:
-            return
-        scope = self.current_scope()
-        if not scope:
-            return
-        self.ensure_flow(scope)["writes"].add(name)
-    def record_mutation(self, name):
-        if not dataflow_enabled or not name:
-            return
-        scope = self.current_scope()
-        if not scope:
-            return
-        self.ensure_flow(scope)["mutations"].add(name)
-    def record_alias(self, name, target):
-        if not dataflow_enabled or not name or not target:
-            return
-        scope = self.current_scope()
-        if not scope:
-            return
-        self.ensure_flow(scope)["aliases"].add(name + "=" + target)
-    def record_throw(self, name):
-        if not dataflow_enabled or not name:
-            return
-        scope = self.current_scope()
-        if not scope:
-            return
-        self.ensure_flow(scope)["throws"].add(name)
-    def record_await(self, name):
-        if not dataflow_enabled or not name:
-            return
-        scope = self.current_scope()
-        if not scope:
-            return
-        self.ensure_flow(scope)["awaits"].add(name)
-    def record_return(self):
-        if not dataflow_enabled:
-            return
-        scope = self.current_scope()
-        if not scope:
-            return
-        self.ensure_flow(scope)["returns"] = True
-    def record_yield(self):
-        if not dataflow_enabled:
-            return
-        scope = self.current_scope()
-        if not scope:
-            return
-        self.ensure_flow(scope)["yields"] = True
-    def record_def(self, node, kind, name):
-        doc = ast.get_docstring(node) or ""
-        decorators = []
-        for d in getattr(node, "decorator_list", []):
-            dn = deco_name(d)
-            if dn:
-                decorators.append(dn)
-        params = []
-        param_types = {}
-        param_defaults = {}
-        if hasattr(node, "args"):
-            params, param_types, param_defaults = collect_param_info(node.args)
-        visibility = visibility_for(name.split(".")[-1] if name else name)
-        entry = {
-            "kind": kind,
-            "name": name,
-            "startLine": getattr(node, "lineno", None),
-            "startCol": getattr(node, "col_offset", None),
-            "endLine": getattr(node, "end_lineno", None),
-            "endCol": getattr(node, "end_col_offset", None),
-            "docstring": doc,
-            "decorators": decorators,
-            "params": params,
-            "paramTypes": param_types,
-            "paramDefaults": param_defaults,
-            "visibility": visibility
-        }
-        entry["modifiers"] = {
-            "async": False,
-            "generator": False,
-            "visibility": visibility
-        }
-        if kind in ("FunctionDeclaration", "MethodDeclaration"):
-            entry["signature"] = format_signature(node)
-            entry["returnType"] = safe_unparse(node.returns) if getattr(node, "returns", None) is not None else None
-            entry["async"] = isinstance(node, ast.AsyncFunctionDef)
-            entry["modifiers"]["async"] = entry["async"]
-        elif kind == "ClassDeclaration":
-            entry["signature"] = format_class_signature(node)
-            bases = [safe_unparse(b) for b in node.bases] if node.bases else []
-            entry["bases"] = [b for b in bases if b]
-            if is_dataclass_decorator(decorators):
-                entry["fields"] = extract_fields(node)
-        self.defs.append(entry)
-    def visit_ClassDef(self, node):
-        name = node.name
-        qualified = ".".join(self.class_stack + [name]) if self.class_stack else name
-        if not self.func_stack:
-            self.exports.add(qualified)
-        self.record_def(node, "ClassDeclaration", qualified)
-        self.class_stack.append(name)
-        self.scope_stack.append(qualified)
-        self.generic_visit(node)
-        self.scope_stack.pop()
-        self.class_stack.pop()
-    def visit_FunctionDef(self, node):
-        name = node.name
-        base = self.func_stack[-1] if self.func_stack else (".".join(self.class_stack) if self.class_stack else "")
-        qualified = base + "." + name if base else name
-        is_method = bool(self.class_stack) and not self.func_stack
-        kind = "MethodDeclaration" if is_method else "FunctionDeclaration"
-        if not self.func_stack:
-            self.exports.add(qualified)
-        self.record_def(node, kind, qualified)
-        self.func_stack.append(qualified)
-        self.scope_stack.append(qualified)
-        self.generic_visit(node)
-        self.scope_stack.pop()
-        self.func_stack.pop()
-    def visit_AsyncFunctionDef(self, node):
-        self.visit_FunctionDef(node)
-    def visit_Import(self, node):
-        for alias in node.names:
-            self.imports.add(alias.name)
-            if alias.asname:
-                self.usages.add(alias.asname)
-    def visit_ImportFrom(self, node):
-        if node.module:
-            self.imports.add(node.module)
-        for alias in node.names:
-            if alias.name:
-                self.usages.add(alias.name)
-            if alias.asname:
-                self.usages.add(alias.asname)
-    def visit_Name(self, node):
-        if isinstance(node.ctx, ast.Load):
-            self.usages.add(node.id)
-            self.record_read(node.id)
-        elif isinstance(node.ctx, ast.Store):
-            self.record_write(node.id)
-    def visit_Call(self, node):
-        callee = call_name(node.func)
-        if callee:
-            caller = self.current_func()
-            self.calls.append([caller, callee])
-            self.call_map.setdefault(caller, set()).add(callee)
-            args = []
-            for arg in node.args:
-                args.append(format_arg_value(arg))
-            for kw in node.keywords:
-                if kw.arg:
-                    args.append(f"{kw.arg}=" + format_arg_value(kw.value))
-                else:
-                    args.append("**...")
-            self.call_details.append({"caller": caller, "callee": callee, "args": args})
-        self.generic_visit(node)
-    def visit_Assign(self, node):
-        writes = set()
-        mutations = set()
-        for target in node.targets:
-            collect_targets(target, writes, mutations)
-        for name in writes:
-            self.record_write(name)
-        for name in mutations:
-            self.record_mutation(name)
-        if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
-            target = alias_target(node.value)
-            if target:
-                self.record_alias(node.targets[0].id, target)
-        self.generic_visit(node)
-    def visit_AnnAssign(self, node):
-        writes = set()
-        mutations = set()
-        collect_targets(node.target, writes, mutations)
-        for name in writes:
-            self.record_write(name)
-        for name in mutations:
-            self.record_mutation(name)
-        if isinstance(node.target, ast.Name):
-            target = alias_target(node.value) if getattr(node, "value", None) is not None else None
-            if target:
-                self.record_alias(node.target.id, target)
-        self.generic_visit(node)
-    def visit_AugAssign(self, node):
-        writes = set()
-        mutations = set()
-        collect_targets(node.target, writes, mutations)
-        for name in writes:
-            self.record_read(name)
-            self.record_write(name)
-        for name in mutations:
-            self.record_mutation(name)
-        self.generic_visit(node)
-    def visit_For(self, node):
-        writes = set()
-        mutations = set()
-        collect_targets(node.target, writes, mutations)
-        for name in writes:
-            self.record_write(name)
-        for name in mutations:
-            self.record_mutation(name)
-        self.generic_visit(node)
-    def visit_AsyncFor(self, node):
-        self.visit_For(node)
-    def visit_With(self, node):
-        for item in node.items:
-            if item.optional_vars:
-                writes = set()
-                mutations = set()
-                collect_targets(item.optional_vars, writes, mutations)
-                for name in writes:
-                    self.record_write(name)
-                for name in mutations:
-                    self.record_mutation(name)
-        self.generic_visit(node)
-    def visit_AsyncWith(self, node):
-        self.visit_With(node)
-    def visit_Return(self, node):
-        self.record_return()
-        self.generic_visit(node)
-    def visit_Raise(self, node):
-        exc = None
-        if node.exc is not None:
-            exc = call_name(node.exc) or safe_unparse(node.exc)
-        if exc:
-            self.record_throw(exc)
-        self.generic_visit(node)
-    def visit_Await(self, node):
-        name = await_name(node.value)
-        if name:
-            self.record_await(name)
-        self.generic_visit(node)
-    def visit_Yield(self, node):
-        self.record_yield()
-        self.generic_visit(node)
-    def visit_YieldFrom(self, node):
-        self.record_yield()
-        self.generic_visit(node)
-    def visit_Global(self, node):
-        if dataflow_enabled:
-            scope = self.current_scope()
-            if scope:
-                flow = self.ensure_flow(scope)
-                for name in node.names:
-                    flow["globals"].add(name)
-        self.generic_visit(node)
-    def visit_Nonlocal(self, node):
-        if dataflow_enabled:
-            scope = self.current_scope()
-            if scope:
-                flow = self.ensure_flow(scope)
-                for name in node.names:
-                    flow["nonlocals"].add(name)
-        self.generic_visit(node)
-
-collector = Collector()
-collector.visit(tree)
-for entry in collector.defs:
-    calls = collector.call_map.get(entry["name"])
-    entry["calls"] = sorted(calls) if calls else []
-    flow = collector.flow.get(entry["name"])
-    if flow:
-        entry["dataflow"] = {
-            "reads": sorted(flow["reads"]),
-            "writes": sorted(flow["writes"]),
-            "mutations": sorted(flow["mutations"]),
-            "aliases": sorted(flow["aliases"]),
-            "globals": sorted(flow["globals"]),
-            "nonlocals": sorted(flow["nonlocals"])
-        }
-        entry["throws"] = sorted(flow["throws"])
-        entry["awaits"] = sorted(flow["awaits"])
-        entry["returnsValue"] = bool(flow["returns"])
-        entry["yields"] = bool(flow["yields"])
-        entry["modifiers"] = {
-            "async": bool(entry.get("async")),
-            "generator": bool(flow["yields"]),
-            "visibility": entry.get("visibility") or "public"
-        }
-result = {
-    "defs": collector.defs,
-    "imports": sorted(collector.imports),
-    "calls": collector.calls,
-    "callDetails": collector.call_details,
-    "usages": sorted(collector.usages),
-    "exports": sorted(collector.exports)
-}
-print(json.dumps(result))
-`;
-
-function findPythonExecutable(log) {
-  if (pythonChecked) return pythonExecutable;
-  pythonChecked = true;
-  for (const candidate of PYTHON_CANDIDATES) {
-    const result = spawnSync(candidate, ['-c', 'import sys; sys.stdout.write("ok")'], { encoding: 'utf8' });
-    if (result.status === 0 && result.stdout.trim() === 'ok') {
-      pythonExecutable = candidate;
-      break;
-    }
-  }
-  if (!pythonExecutable && !pythonWarned) {
-    if (typeof log === 'function') {
-      log('Python AST unavailable (python not found); using heuristic chunking for .py.');
-    }
-    pythonWarned = true;
-  }
-  return pythonExecutable;
-}
-
-/**
- * Parse Python source to AST metadata using a local Python interpreter.
- * Returns null when python is unavailable or parsing fails.
- * @param {string} text
- * @param {(msg:string)=>void} [log]
- * @returns {object|null}
- */
-export function getPythonAst(text, log, options = {}) {
-  const pythonBin = findPythonExecutable(log);
-  if (!pythonBin) return null;
-  const dataflowEnabled = options.dataflow !== false;
-  const result = spawnSync(pythonBin, ['-c', PYTHON_AST_SCRIPT], {
-    input: text,
-    encoding: 'utf8',
-    maxBuffer: 10 * 1024 * 1024,
-    env: {
-      ...process.env,
-      PAIROFCLEATS_AST_DATAFLOW: dataflowEnabled ? '1' : '0'
-    }
-  });
-  if (result.status !== 0 || !result.stdout) return null;
-  try {
-    const parsed = JSON.parse(result.stdout);
-    if (parsed && parsed.error) return null;
-    return parsed;
-  } catch {
-    return null;
-  }
-}
-
-/**
- * Build chunk metadata from Python AST metadata.
- * Returns null when AST data is missing.
- * @param {string} text
- * @param {object} astData
- * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null}
- */
-export function buildPythonChunksFromAst(text, astData) {
-  if (!astData || !Array.isArray(astData.defs) || !astData.defs.length) return null;
-  const lineIndex = buildLineIndex(text);
-  const defs = astData.defs
-    .filter((def) => Number.isFinite(def.startLine))
-    .map((def) => ({
-      ...def,
-      start: lineColToOffset(lineIndex, def.startLine, def.startCol)
-    }))
-    .sort((a, b) => a.start - b.start);
-  if (!defs.length) return null;
-
-  const chunks = [];
-  for (let i = 0; i < defs.length; i++) {
-    const current = defs[i];
-    const next = defs[i + 1];
-    let end = null;
-    if (Number.isFinite(current.endLine)) {
-      end = lineColToOffset(lineIndex, current.endLine, current.endCol || 0);
-    }
-    if (!end || end <= current.start) {
-      end = next ? next.start : text.length;
-    }
-    const endLine = offsetToLine(lineIndex, end);
-    chunks.push({
-      start: current.start,
-      end,
-      name: current.name,
-      kind: current.kind || 'FunctionDeclaration',
-      meta: {
-        startLine: current.startLine,
-        endLine,
-        decorators: current.decorators || [],
-        signature: current.signature || null,
-        params: current.params || [],
-        returnType: current.returnType || current.returns || null,
-        returnsValue: current.returnsValue || false,
-        paramTypes: current.paramTypes || {},
-        paramDefaults: current.paramDefaults || {},
-        visibility: current.visibility || null,
-        bases: current.bases || [],
-        modifiers: current.modifiers || null,
-        dataflow: current.dataflow || null,
-        throws: current.throws || [],
-        awaits: current.awaits || [],
-        yields: current.yields || false,
-        async: current.async || false,
-        docstring: current.docstring || '',
-        fields: current.fields || []
-      }
-    });
-  }
-  return chunks;
-}
-
-/**
- * Heuristic Python chunker when AST is unavailable.
- * Returns null when no declarations are found.
- * @param {string} text
- * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null}
- */
-export function buildPythonHeuristicChunks(text) {
-  const lineIndex = buildLineIndex(text);
-  const defs = [];
-  const classStack = [];
-  const indentValue = (prefix) => prefix.replace(/\t/g, '    ').length;
-  const lines = text.split('\n');
-  for (let i = 0; i < lines.length; i++) {
-    const line = lines[i];
-    const match = line.match(/^([ \t]*)(class|def)\s+([A-Za-z_][A-Za-z0-9_]*)/);
-    if (!match) continue;
-    const indent = indentValue(match[1]);
-    while (classStack.length && indent <= classStack[classStack.length - 1].indent) {
-      classStack.pop();
-    }
-    const kind = match[2] === 'class' ? 'ClassDeclaration' : 'FunctionDeclaration';
-    let name = match[3];
-    if (kind === 'ClassDeclaration') {
-      classStack.push({ name, indent });
-    } else if (classStack.length && indent > classStack[classStack.length - 1].indent) {
-      name = `${classStack[classStack.length - 1].name}.${name}`;
-    }
-    defs.push({
-      start: lineIndex[i],
-      startLine: i + 1,
-      indent,
-      name,
-      kind
-    });
-  }
-  if (defs.length) {
-    const chunks = [];
-    for (let i = 0; i < defs.length; i++) {
-      const current = defs[i];
-      let end = text.length;
-      for (let j = i + 1; j < defs.length; j++) {
-        if (defs[j].indent <= current.indent) {
-          end = defs[j].start;
-          break;
-        }
-      }
-      const endLine = offsetToLine(lineIndex, end);
-      chunks.push({
-        start: current.start,
-        end,
-        name: current.name,
-        kind: current.kind,
-        meta: { startLine: current.startLine, endLine }
-      });
-    }
-    return chunks;
-  }
-  return null;
-}
-
-/**
- * Collect Python import statements and simple usages.
- * @param {string} text
- * @returns {{imports:string[],usages:string[]}}
- */
-export function collectPythonImports(text) {
-  const imports = new Set();
-  const usages = new Set();
-  const lines = text.split('\n');
-  for (const line of lines) {
-    const trimmed = line.trim();
-    if (!trimmed || trimmed.startsWith('#')) continue;
-    let match = trimmed.match(/^import\s+(.+)$/);
-    if (match) {
-      const parts = match[1].split(',').map((p) => p.trim()).filter(Boolean);
-      for (const part of parts) {
-        const [moduleName, alias] = part.split(/\s+as\s+/);
-        if (moduleName) imports.add(moduleName);
-        if (alias) usages.add(alias);
-      }
-      continue;
-    }
-    match = trimmed.match(/^from\s+([A-Za-z0-9_\.]+)\s+import\s+(.+)$/);
-    if (match) {
-      imports.add(match[1]);
-      const names = match[2].split(',').map((p) => p.trim()).filter(Boolean);
-      for (const namePart of names) {
-        const [name, alias] = namePart.split(/\s+as\s+/);
-        if (name) usages.add(name);
-        if (alias) usages.add(alias);
-      }
-    }
-  }
-  return { imports: Array.from(imports), usages: Array.from(usages) };
-}
-
-/**
- * Build import/export/call/usage relations for Python chunks.
- * @param {string} text
- * @param {Record<string,string[]>} allImports
- * @param {object|null} pythonAst
- * @returns {{imports:string[],exports:string[],calls:Array<[string,string]>,usages:string[],importLinks:string[]}}
- */
-export function buildPythonRelations(text, allImports, pythonAst) {
-  let imports = [];
-  let usages = [];
-  let calls = [];
-  let callDetails = [];
-  let exports = [];
-  if (pythonAst) {
-    imports = Array.isArray(pythonAst.imports) ? pythonAst.imports : [];
-    usages = Array.isArray(pythonAst.usages) ? pythonAst.usages : [];
-    calls = Array.isArray(pythonAst.calls) ? pythonAst.calls : [];
-    callDetails = Array.isArray(pythonAst.callDetails) ? pythonAst.callDetails : [];
-    exports = Array.isArray(pythonAst.exports) ? pythonAst.exports : [];
-  } else {
-    const fallback = collectPythonImports(text);
-    imports = fallback.imports;
-    usages = fallback.usages;
-  }
-  const importLinks = imports
-    .map((i) => allImports[i])
-    .filter((x) => !!x)
-    .flat();
-  return {
-    imports,
-    exports,
-    calls,
-    callDetails,
-    usages,
-    importLinks
-  };
-}
-
-/**
- * Normalize Python-specific doc metadata for search output.
- * @param {{meta?:Object}} chunk
- * @returns {{doc:string,params:string[],returns:(string|null),signature:(string|null),decorators:string[],fields:Array<{name:string,type:(string|null),default:(string|null)}>>}}
- */
-export function extractPythonDocMeta(chunk) {
-  const meta = chunk.meta || {};
-  const params = Array.isArray(meta.params) ? meta.params : [];
-  const decorators = Array.isArray(meta.decorators) ? meta.decorators : [];
-  const fields = Array.isArray(meta.fields) ? meta.fields : [];
-  const modifiers = meta.modifiers && typeof meta.modifiers === 'object' ? meta.modifiers : null;
-  const dataflow = meta.dataflow && typeof meta.dataflow === 'object' ? meta.dataflow : null;
-  const bases = Array.isArray(meta.bases) ? meta.bases : [];
-  const throws = Array.isArray(meta.throws) ? meta.throws : [];
-  const awaits = Array.isArray(meta.awaits) ? meta.awaits : [];
-  return {
-    doc: meta.docstring ? String(meta.docstring).slice(0, 300) : '',
-    params,
-    returnType: meta.returnType || meta.returns || null,
-    returnsValue: meta.returnsValue || false,
-    paramTypes: meta.paramTypes || {},
-    paramDefaults: meta.paramDefaults || {},
-    signature: meta.signature || null,
-    decorators,
-    fields,
-    modifiers,
-    visibility: meta.visibility || null,
-    bases,
-    dataflow,
-    throws,
-    awaits,
-    yields: meta.yields || false,
-    async: meta.async || false
-  };
-}
+export { shutdownPythonAstPool } from './python/pool.js';
+export { getPythonAst } from './python/ast.js';
+export { buildPythonChunksFromAst } from './python/chunks-from-ast.js';
+export { buildPythonHeuristicChunks } from './python/chunks-heuristic.js';
+export { collectPythonImports } from './python/imports.js';
+export { buildPythonRelations } from './python/relations.js';
+export { extractPythonDocMeta } from './python/docmeta.js';
diff --git a/src/lang/python/ast-script.js b/src/lang/python/ast-script.js
new file mode 100644
index 000000000..a990f73b2
--- /dev/null
+++ b/src/lang/python/ast-script.js
@@ -0,0 +1,632 @@
+export const PYTHON_AST_SCRIPT = `
+import ast, json, sys
+
+dataflow_enabled = True
+control_flow_enabled = True
+
+def to_bool(value, default=True):
+    if value is None:
+        return default
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, (int, float)):
+        return value != 0
+    if isinstance(value, str):
+        return value.lower() not in ("0", "false", "no", "off", "")
+    return bool(value)
+
+def safe_unparse(node):
+    try:
+        return ast.unparse(node)
+    except Exception:
+        return None
+
+def deco_name(node):
+    if isinstance(node, ast.Name):
+        return node.id
+    if isinstance(node, ast.Attribute):
+        base = deco_name(node.value)
+        return base + "." + node.attr if base else node.attr
+    if isinstance(node, ast.Call):
+        return deco_name(node.func)
+    return None
+
+def call_name(node):
+    if isinstance(node, ast.Name):
+        return node.id
+    if isinstance(node, ast.Attribute):
+        base = call_name(node.value)
+        return base + "." + node.attr if base else node.attr
+    return None
+
+def format_arg(arg, default_map):
+    name = arg.arg
+    ann = safe_unparse(arg.annotation) if getattr(arg, "annotation", None) is not None else None
+    value = name + (": " + ann if ann else "")
+    if name in default_map:
+        default = safe_unparse(default_map[name]) if default_map[name] is not None else None
+        value += ("=" + default) if default else "=..."
+    return value
+
+def format_args(args):
+    defaults = list(args.defaults) if args.defaults else []
+    default_map = {}
+    if defaults and args.args:
+        for arg, default in zip(args.args[-len(defaults):], defaults):
+            default_map[arg.arg] = default
+    if getattr(args, "kw_defaults", None) and args.kwonlyargs:
+        for arg, default in zip(args.kwonlyargs, args.kw_defaults):
+            if default is not None:
+                default_map[arg.arg] = default
+
+    parts = []
+    for arg in getattr(args, "posonlyargs", []):
+        parts.append(format_arg(arg, default_map))
+    if getattr(args, "posonlyargs", []):
+        parts.append("/")
+    for arg in args.args:
+        parts.append(format_arg(arg, default_map))
+    if args.vararg:
+        parts.append("*" + format_arg(args.vararg, {}))
+    elif args.kwonlyargs:
+        parts.append("*")
+    for arg in args.kwonlyargs:
+        parts.append(format_arg(arg, default_map))
+    if args.kwarg:
+        parts.append("**" + format_arg(args.kwarg, {}))
+    return ", ".join(parts)
+
+def format_signature(node):
+    args = format_args(node.args)
+    sig = "def " + node.name + "(" + args + ")"
+    if getattr(node, "returns", None) is not None:
+        ret = safe_unparse(node.returns)
+        if ret:
+            sig += " -> " + ret
+    return sig
+
+def format_class_signature(node):
+    bases = [safe_unparse(b) for b in node.bases] if node.bases else []
+    bases = [b for b in bases if b]
+    sig = "class " + node.name
+    if bases:
+        sig += "(" + ", ".join(bases) + ")"
+    return sig
+
+def is_dataclass_decorator(decorators):
+    for name in decorators:
+        if name in ("dataclass", "attrs.define", "attr.s", "attr.define"):
+            return True
+    return False
+
+def extract_fields(node):
+    fields = []
+    for stmt in getattr(node, "body", []):
+        if isinstance(stmt, ast.AnnAssign) and isinstance(stmt.target, ast.Name):
+            name = stmt.target.id
+            ann = safe_unparse(stmt.annotation) if stmt.annotation is not None else None
+            default = safe_unparse(stmt.value) if stmt.value is not None else None
+            fields.append({"name": name, "type": ann, "default": default})
+        elif isinstance(stmt, ast.Assign) and len(stmt.targets) == 1 and isinstance(stmt.targets[0], ast.Name):
+            name = stmt.targets[0].id
+            default = safe_unparse(stmt.value) if stmt.value is not None else None
+            fields.append({"name": name, "type": None, "default": default})
+    return fields
+
+def visibility_for(name):
+    if not name:
+        return "public"
+    if name.startswith("__") and not name.endswith("__"):
+        return "private"
+    if name.startswith("_") and not name.startswith("__"):
+        return "protected"
+    if name.startswith("__") and name.endswith("__"):
+        return "dunder"
+    return "public"
+
+def collect_param_info(args):
+    defaults = list(args.defaults) if args.defaults else []
+    default_map = {}
+    if defaults and args.args:
+        for arg, default in zip(args.args[-len(defaults):], defaults):
+            default_map[arg.arg] = default
+    if getattr(args, "kw_defaults", None) and args.kwonlyargs:
+        for arg, default in zip(args.kwonlyargs, args.kw_defaults):
+            if default is not None:
+                default_map[arg.arg] = default
+
+    params = []
+    param_types = {}
+    param_defaults = {}
+
+    def add_arg(arg, defaults_map):
+        name = arg.arg
+        params.append(name)
+        if getattr(arg, "annotation", None) is not None:
+            ann = safe_unparse(arg.annotation)
+            if ann:
+                param_types[name] = ann
+        if name in defaults_map:
+            value = safe_unparse(defaults_map[name]) if defaults_map[name] is not None else None
+            param_defaults[name] = value if value is not None else "..."
+
+    for arg in getattr(args, "posonlyargs", []):
+        add_arg(arg, default_map)
+    for arg in args.args:
+        add_arg(arg, default_map)
+    if args.vararg:
+        add_arg(args.vararg, {})
+    for arg in args.kwonlyargs:
+        add_arg(arg, default_map)
+    if args.kwarg:
+        add_arg(args.kwarg, {})
+    return params, param_types, param_defaults
+
+def target_name(node):
+    if isinstance(node, ast.Name):
+        return node.id
+    if isinstance(node, ast.Attribute):
+        base = call_name(node.value) or target_name(node.value)
+        return base + "." + node.attr if base else node.attr
+    if isinstance(node, ast.Subscript):
+        base = call_name(node.value) or target_name(node.value)
+        return base + "[]" if base else None
+    return None
+
+def collect_targets(node, writes, mutations):
+    if isinstance(node, (ast.Tuple, ast.List)):
+        for elt in node.elts:
+            collect_targets(elt, writes, mutations)
+        return
+    if isinstance(node, ast.Name):
+        writes.add(node.id)
+        return
+    if isinstance(node, (ast.Attribute, ast.Subscript)):
+        name = target_name(node)
+        if name:
+            mutations.add(name)
+        return
+
+def await_name(node):
+    if isinstance(node, ast.Call):
+        return call_name(node.func)
+    return call_name(node)
+
+def alias_target(node):
+    if isinstance(node, ast.Name):
+        return node.id
+    if isinstance(node, ast.Attribute):
+        return call_name(node) or target_name(node)
+    return None
+
+def format_arg_value(node):
+    if isinstance(node, ast.Name):
+        return node.id
+    if isinstance(node, ast.Constant):
+        return repr(node.value)
+    if isinstance(node, ast.Attribute):
+        return call_name(node) or target_name(node) or "attr"
+    if isinstance(node, ast.Call):
+        callee = call_name(node.func)
+        return f"{callee}(...)" if callee else "call(...)"
+    if isinstance(node, ast.Lambda):
+        return "lambda"
+    if isinstance(node, ast.Dict):
+        return "{...}"
+    if isinstance(node, (ast.List, ast.Tuple, ast.Set)):
+        return "[...]"
+    return "..."
+
+class Collector(ast.NodeVisitor):
+    def __init__(self):
+        self.defs = []
+        self.imports = set()
+        self.calls = []
+        self.call_details = []
+        self.usages = set()
+        self.exports = set()
+        self.class_stack = []
+        self.func_stack = []
+        self.call_map = {}
+        self.flow = {}
+        self.scope_stack = []
+    def current_func(self):
+        return self.func_stack[-1] if self.func_stack else "(module)"
+    def current_scope(self):
+        return self.scope_stack[-1] if self.scope_stack else None
+    def ensure_flow(self, name):
+        if name not in self.flow:
+            self.flow[name] = {
+                "reads": set(),
+                "writes": set(),
+                "mutations": set(),
+                "aliases": set(),
+                "globals": set(),
+                "nonlocals": set(),
+                "throws": set(),
+                "awaits": set(),
+                "returns": False,
+                "yields": False,
+                "controlFlow": {
+                    "branches": 0,
+                    "loops": 0,
+                    "returns": 0,
+                    "breaks": 0,
+                    "continues": 0,
+                    "throws": 0,
+                    "awaits": 0,
+                    "yields": 0
+                }
+            }
+        return self.flow[name]
+    def record_control(self, kind, amount=1):
+        if not control_flow_enabled or not kind:
+            return
+        scope = self.current_scope()
+        if not scope:
+            return
+        flow = self.ensure_flow(scope)
+        if kind in flow["controlFlow"]:
+            flow["controlFlow"][kind] += amount
+    def record_read(self, name):
+        if not dataflow_enabled or not name:
+            return
+        scope = self.current_scope()
+        if not scope:
+            return
+        self.ensure_flow(scope)["reads"].add(name)
+    def record_write(self, name):
+        if not dataflow_enabled or not name:
+            return
+        scope = self.current_scope()
+        if not scope:
+            return
+        self.ensure_flow(scope)["writes"].add(name)
+    def record_mutation(self, name):
+        if not dataflow_enabled or not name:
+            return
+        scope = self.current_scope()
+        if not scope:
+            return
+        self.ensure_flow(scope)["mutations"].add(name)
+    def record_alias(self, name, target):
+        if not dataflow_enabled or not name or not target:
+            return
+        scope = self.current_scope()
+        if not scope:
+            return
+        self.ensure_flow(scope)["aliases"].add(name + "=" + target)
+    def record_throw(self, name):
+        self.record_control("throws")
+        if not dataflow_enabled or not name:
+            return
+        scope = self.current_scope()
+        if not scope:
+            return
+        self.ensure_flow(scope)["throws"].add(name)
+    def record_await(self, name):
+        self.record_control("awaits")
+        if not dataflow_enabled or not name:
+            return
+        scope = self.current_scope()
+        if not scope:
+            return
+        self.ensure_flow(scope)["awaits"].add(name)
+    def record_return(self):
+        self.record_control("returns")
+        if not dataflow_enabled:
+            return
+        scope = self.current_scope()
+        if not scope:
+            return
+        self.ensure_flow(scope)["returns"] = True
+    def record_yield(self):
+        self.record_control("yields")
+        if not dataflow_enabled:
+            return
+        scope = self.current_scope()
+        if not scope:
+            return
+        self.ensure_flow(scope)["yields"] = True
+    def record_def(self, node, kind, name):
+        doc = ast.get_docstring(node) or ""
+        decorators = []
+        for d in getattr(node, "decorator_list", []):
+            dn = deco_name(d)
+            if dn:
+                decorators.append(dn)
+        params = []
+        param_types = {}
+        param_defaults = {}
+        if hasattr(node, "args"):
+            params, param_types, param_defaults = collect_param_info(node.args)
+        visibility = visibility_for(name.split(".")[-1] if name else name)
+        entry = {
+            "kind": kind,
+            "name": name,
+            "startLine": getattr(node, "lineno", None),
+            "startCol": getattr(node, "col_offset", None),
+            "endLine": getattr(node, "end_lineno", None),
+            "endCol": getattr(node, "end_col_offset", None),
+            "docstring": doc,
+            "decorators": decorators,
+            "params": params,
+            "paramTypes": param_types,
+            "paramDefaults": param_defaults,
+            "visibility": visibility
+        }
+        entry["modifiers"] = {
+            "async": False,
+            "generator": False,
+            "visibility": visibility
+        }
+        if kind in ("FunctionDeclaration", "MethodDeclaration"):
+            entry["signature"] = format_signature(node)
+            entry["returnType"] = safe_unparse(node.returns) if getattr(node, "returns", None) is not None else None
+            entry["async"] = isinstance(node, ast.AsyncFunctionDef)
+            entry["modifiers"]["async"] = entry["async"]
+        elif kind == "ClassDeclaration":
+            entry["signature"] = format_class_signature(node)
+            bases = [safe_unparse(b) for b in node.bases] if node.bases else []
+            entry["bases"] = [b for b in bases if b]
+            if is_dataclass_decorator(decorators):
+                entry["fields"] = extract_fields(node)
+        self.defs.append(entry)
+    def visit_ClassDef(self, node):
+        name = node.name
+        qualified = ".".join(self.class_stack + [name]) if self.class_stack else name
+        if not self.func_stack:
+            self.exports.add(qualified)
+        self.record_def(node, "ClassDeclaration", qualified)
+        self.class_stack.append(name)
+        self.scope_stack.append(qualified)
+        self.generic_visit(node)
+        self.scope_stack.pop()
+        self.class_stack.pop()
+    def visit_FunctionDef(self, node):
+        name = node.name
+        base = self.func_stack[-1] if self.func_stack else (".".join(self.class_stack) if self.class_stack else "")
+        qualified = base + "." + name if base else name
+        is_method = bool(self.class_stack) and not self.func_stack
+        kind = "MethodDeclaration" if is_method else "FunctionDeclaration"
+        if not self.func_stack:
+            self.exports.add(qualified)
+        self.record_def(node, kind, qualified)
+        self.func_stack.append(qualified)
+        self.scope_stack.append(qualified)
+        self.generic_visit(node)
+        self.scope_stack.pop()
+        self.func_stack.pop()
+    def visit_AsyncFunctionDef(self, node):
+        self.visit_FunctionDef(node)
+    def visit_Import(self, node):
+        for alias in node.names:
+            self.imports.add(alias.name)
+            if alias.asname:
+                self.usages.add(alias.asname)
+    def visit_ImportFrom(self, node):
+        if node.module:
+            self.imports.add(node.module)
+        for alias in node.names:
+            if alias.name:
+                self.usages.add(alias.name)
+            if alias.asname:
+                self.usages.add(alias.asname)
+    def visit_Name(self, node):
+        if isinstance(node.ctx, ast.Load):
+            self.usages.add(node.id)
+            self.record_read(node.id)
+        elif isinstance(node.ctx, ast.Store):
+            self.record_write(node.id)
+    def visit_Call(self, node):
+        callee = call_name(node.func)
+        if callee:
+            caller = self.current_func()
+            self.calls.append([caller, callee])
+            self.call_map.setdefault(caller, set()).add(callee)
+            args = []
+            for arg in node.args:
+                args.append(format_arg_value(arg))
+            for kw in node.keywords:
+                if kw.arg:
+                    args.append(f"{kw.arg}=" + format_arg_value(kw.value))
+                else:
+                    args.append("**...")
+            self.call_details.append({"caller": caller, "callee": callee, "args": args})
+        self.generic_visit(node)
+    def visit_Assign(self, node):
+        writes = set()
+        mutations = set()
+        for target in node.targets:
+            collect_targets(target, writes, mutations)
+        for name in writes:
+            self.record_write(name)
+        for name in mutations:
+            self.record_mutation(name)
+        if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
+            target = alias_target(node.value)
+            if target:
+                self.record_alias(node.targets[0].id, target)
+        self.generic_visit(node)
+    def visit_AnnAssign(self, node):
+        writes = set()
+        mutations = set()
+        collect_targets(node.target, writes, mutations)
+        for name in writes:
+            self.record_write(name)
+        for name in mutations:
+            self.record_mutation(name)
+        if isinstance(node.target, ast.Name):
+            target = alias_target(node.value) if getattr(node, "value", None) is not None else None
+            if target:
+                self.record_alias(node.target.id, target)
+        self.generic_visit(node)
+    def visit_AugAssign(self, node):
+        writes = set()
+        mutations = set()
+        collect_targets(node.target, writes, mutations)
+        for name in writes:
+            self.record_read(name)
+            self.record_write(name)
+        for name in mutations:
+            self.record_mutation(name)
+        self.generic_visit(node)
+    def visit_If(self, node):
+        self.record_control("branches")
+        self.generic_visit(node)
+    def visit_IfExp(self, node):
+        self.record_control("branches")
+        self.generic_visit(node)
+    def visit_For(self, node):
+        self.record_control("loops")
+        writes = set()
+        mutations = set()
+        collect_targets(node.target, writes, mutations)
+        for name in writes:
+            self.record_write(name)
+        for name in mutations:
+            self.record_mutation(name)
+        self.generic_visit(node)
+    def visit_AsyncFor(self, node):
+        self.visit_For(node)
+    def visit_While(self, node):
+        self.record_control("loops")
+        self.generic_visit(node)
+    def visit_Try(self, node):
+        if control_flow_enabled:
+            branch_count = len(getattr(node, "handlers", []) or [])
+            if getattr(node, "orelse", None):
+                branch_count += 1
+            if getattr(node, "finalbody", None):
+                branch_count += 1
+            if branch_count:
+                self.record_control("branches", branch_count)
+        self.generic_visit(node)
+    def visit_Match(self, node):
+        if control_flow_enabled:
+            case_count = len(getattr(node, "cases", []) or [])
+            self.record_control("branches", case_count or 1)
+        self.generic_visit(node)
+    def visit_With(self, node):
+        for item in node.items:
+            if item.optional_vars:
+                writes = set()
+                mutations = set()
+                collect_targets(item.optional_vars, writes, mutations)
+                for name in writes:
+                    self.record_write(name)
+                for name in mutations:
+                    self.record_mutation(name)
+        self.generic_visit(node)
+    def visit_AsyncWith(self, node):
+        self.visit_With(node)
+    def visit_Return(self, node):
+        self.record_return()
+        self.generic_visit(node)
+    def visit_Raise(self, node):
+        exc = None
+        if node.exc is not None:
+            exc = call_name(node.exc) or safe_unparse(node.exc)
+        self.record_throw(exc)
+        self.generic_visit(node)
+    def visit_Await(self, node):
+        name = await_name(node.value)
+        self.record_await(name)
+        self.generic_visit(node)
+    def visit_Yield(self, node):
+        self.record_yield()
+        self.generic_visit(node)
+    def visit_YieldFrom(self, node):
+        self.record_yield()
+        self.generic_visit(node)
+    def visit_Break(self, node):
+        self.record_control("breaks")
+        self.generic_visit(node)
+    def visit_Continue(self, node):
+        self.record_control("continues")
+        self.generic_visit(node)
+    def visit_Global(self, node):
+        if dataflow_enabled:
+            scope = self.current_scope()
+            if scope:
+                flow = self.ensure_flow(scope)
+                for name in node.names:
+                    flow["globals"].add(name)
+        self.generic_visit(node)
+    def visit_Nonlocal(self, node):
+        if dataflow_enabled:
+            scope = self.current_scope()
+            if scope:
+                flow = self.ensure_flow(scope)
+                for name in node.names:
+                    flow["nonlocals"].add(name)
+        self.generic_visit(node)
+
+def parse_source(source, filename=None, dataflow_flag=True, control_flow_flag=True):
+    global dataflow_enabled, control_flow_enabled
+    dataflow_enabled = bool(dataflow_flag)
+    control_flow_enabled = bool(control_flow_flag)
+    try:
+        tree = ast.parse(source, filename=filename or "<unknown>")
+    except Exception as e:
+        return {"error": str(e)}
+    collector = Collector()
+    collector.visit(tree)
+    for entry in collector.defs:
+        calls = collector.call_map.get(entry["name"])
+        entry["calls"] = sorted(calls) if calls else []
+        flow = collector.flow.get(entry["name"])
+        if flow:
+            if dataflow_enabled:
+                entry["dataflow"] = {
+                    "reads": sorted(flow["reads"]),
+                    "writes": sorted(flow["writes"]),
+                    "mutations": sorted(flow["mutations"]),
+                    "aliases": sorted(flow["aliases"]),
+                    "globals": sorted(flow["globals"]),
+                    "nonlocals": sorted(flow["nonlocals"])
+                }
+                entry["throws"] = sorted(flow["throws"])
+                entry["awaits"] = sorted(flow["awaits"])
+                entry["returnsValue"] = bool(flow["returns"])
+                entry["yields"] = bool(flow["yields"])
+                entry["modifiers"] = {
+                    "async": bool(entry.get("async")),
+                    "generator": bool(flow["yields"]),
+                    "visibility": entry.get("visibility") or "public"
+                }
+            if control_flow_enabled:
+                entry["controlFlow"] = flow["controlFlow"]
+    result = {
+        "defs": collector.defs,
+        "imports": sorted(collector.imports),
+        "calls": collector.calls,
+        "callDetails": collector.call_details,
+        "usages": sorted(collector.usages),
+        "exports": sorted(collector.exports)
+    }
+    return result
+
+def main():
+    for line in sys.stdin:
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            payload = json.loads(line)
+        except Exception as e:
+            sys.stdout.write(json.dumps({"id": None, "error": str(e)}) + "\\n")
+            sys.stdout.flush()
+            continue
+        req_id = payload.get("id")
+        source = payload.get("text") or ""
+        dataflow_flag = to_bool(payload.get("dataflow"), True)
+        control_flow_flag = to_bool(payload.get("controlFlow"), True)
+        source_path = payload.get("path") or None
+        result = parse_source(source, source_path, dataflow_flag, control_flow_flag)
+        sys.stdout.write(json.dumps({"id": req_id, "result": result}) + "\\n")
+        sys.stdout.flush()
+
+if __name__ == "__main__":
+    main()
+`;
diff --git a/src/lang/python/ast.js b/src/lang/python/ast.js
new file mode 100644
index 000000000..e05a05bf2
--- /dev/null
+++ b/src/lang/python/ast.js
@@ -0,0 +1,23 @@
+import { getPythonAstPool } from './pool.js';
+
+/**
+ * Parse Python source to AST metadata using a local Python interpreter.
+ * Returns null when python is unavailable or parsing fails.
+ * @param {string} text
+ * @param {(msg:string)=>void} [log]
+ * @returns {Promise<object|null>}
+ */
+export async function getPythonAst(text, log, options = {}) {
+  const pool = await getPythonAstPool(log, options.pythonAst || {});
+  if (!pool) return null;
+  const dataflowEnabled = options.dataflow !== false;
+  const controlFlowEnabled = options.controlFlow !== false;
+  const path = typeof options.path === 'string' && options.path.trim()
+    ? options.path.trim()
+    : null;
+  return pool.request(text, {
+    dataflow: dataflowEnabled,
+    controlFlow: controlFlowEnabled,
+    path
+  });
+}
diff --git a/src/lang/python/chunks-from-ast.js b/src/lang/python/chunks-from-ast.js
new file mode 100644
index 000000000..7e3a0bf62
--- /dev/null
+++ b/src/lang/python/chunks-from-ast.js
@@ -0,0 +1,64 @@
+import { buildLineIndex, lineColToOffset, offsetToLine } from './normalize.js';
+
+/**
+ * Build chunk metadata from Python AST metadata.
+ * Returns null when AST data is missing.
+ * @param {string} text
+ * @param {object} astData
+ * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null}
+ */
+export function buildPythonChunksFromAst(text, astData) {
+  if (!astData || !Array.isArray(astData.defs) || !astData.defs.length) return null;
+  const lineIndex = buildLineIndex(text);
+  const defs = astData.defs
+    .filter((def) => Number.isFinite(def.startLine))
+    .map((def) => ({
+      ...def,
+      start: lineColToOffset(lineIndex, def.startLine, def.startCol)
+    }))
+    .sort((a, b) => a.start - b.start);
+  if (!defs.length) return null;
+
+  const chunks = [];
+  for (let i = 0; i < defs.length; i++) {
+    const current = defs[i];
+    const next = defs[i + 1];
+    let end = null;
+    if (Number.isFinite(current.endLine)) {
+      end = lineColToOffset(lineIndex, current.endLine, current.endCol || 0);
+    }
+    if (!end || end <= current.start) {
+      end = next ? next.start : text.length;
+    }
+    const endLine = offsetToLine(lineIndex, end);
+    chunks.push({
+      start: current.start,
+      end,
+      name: current.name,
+      kind: current.kind || 'FunctionDeclaration',
+      meta: {
+        startLine: current.startLine,
+        endLine,
+        decorators: current.decorators || [],
+        signature: current.signature || null,
+        params: current.params || [],
+        returnType: current.returnType || current.returns || null,
+        returnsValue: current.returnsValue || false,
+        paramTypes: current.paramTypes || {},
+        paramDefaults: current.paramDefaults || {},
+        visibility: current.visibility || null,
+        bases: current.bases || [],
+        modifiers: current.modifiers || null,
+        dataflow: current.dataflow || null,
+        controlFlow: current.controlFlow || null,
+        throws: current.throws || [],
+        awaits: current.awaits || [],
+        yields: current.yields || false,
+        async: current.async || false,
+        docstring: current.docstring || '',
+        fields: current.fields || []
+      }
+    });
+  }
+  return chunks;
+}
diff --git a/src/lang/python/chunks-heuristic.js b/src/lang/python/chunks-heuristic.js
new file mode 100644
index 000000000..80a06f225
--- /dev/null
+++ b/src/lang/python/chunks-heuristic.js
@@ -0,0 +1,63 @@
+import { buildLineIndex, offsetToLine } from './normalize.js';
+
+/**
+ * Heuristic Python chunker when AST is unavailable.
+ * Returns null when no declarations are found.
+ * @param {string} text
+ * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null}
+ */
+export function buildPythonHeuristicChunks(text) {
+  const lineIndex = buildLineIndex(text);
+  const defs = [];
+  const classStack = [];
+  const indentValue = (prefix) => prefix.replace(/\t/g, '    ').length;
+  const lines = text.split('\n');
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    const match = line.match(/^([ \t]*)(async\s+)?(class|def)\s+([A-Za-z_][A-Za-z0-9_]*)/);
+    if (!match) continue;
+    const indent = indentValue(match[1]);
+    const isAsync = Boolean(match[2]);
+    while (classStack.length && indent <= classStack[classStack.length - 1].indent) {
+      classStack.pop();
+    }
+    const kind = match[3] === 'class' ? 'ClassDeclaration' : 'FunctionDeclaration';
+    let name = match[4];
+    if (kind === 'ClassDeclaration') {
+      classStack.push({ name, indent });
+    } else if (classStack.length && indent > classStack[classStack.length - 1].indent) {
+      name = `${classStack[classStack.length - 1].name}.${name}`;
+    }
+    defs.push({
+      start: lineIndex[i],
+      startLine: i + 1,
+      indent,
+      name,
+      kind,
+      async: kind === 'FunctionDeclaration' ? isAsync : false
+    });
+  }
+  if (defs.length) {
+    const chunks = [];
+    for (let i = 0; i < defs.length; i++) {
+      const current = defs[i];
+      let end = text.length;
+      for (let j = i + 1; j < defs.length; j++) {
+        if (defs[j].indent <= current.indent) {
+          end = defs[j].start;
+          break;
+        }
+      }
+      const endLine = offsetToLine(lineIndex, end);
+      chunks.push({
+        start: current.start,
+        end,
+        name: current.name,
+        kind: current.kind,
+        meta: { startLine: current.startLine, endLine, async: current.async || false }
+      });
+    }
+    return chunks;
+  }
+  return null;
+}
diff --git a/src/lang/python/docmeta.js b/src/lang/python/docmeta.js
new file mode 100644
index 000000000..30c0c8eca
--- /dev/null
+++ b/src/lang/python/docmeta.js
@@ -0,0 +1,37 @@
+/**
+ * Normalize Python-specific doc metadata for search output.
+ * @param {{meta?:Object}} chunk
+ * @returns {{doc:string,params:string[],returns:(string|null),signature:(string|null),decorators:string[],fields:Array<{name:string,type:(string|null),default:(string|null)}>>}}
+ */
+export function extractPythonDocMeta(chunk) {
+  const meta = chunk.meta || {};
+  const params = Array.isArray(meta.params) ? meta.params : [];
+  const decorators = Array.isArray(meta.decorators) ? meta.decorators : [];
+  const fields = Array.isArray(meta.fields) ? meta.fields : [];
+  const modifiers = meta.modifiers && typeof meta.modifiers === 'object' ? meta.modifiers : null;
+  const dataflow = meta.dataflow && typeof meta.dataflow === 'object' ? meta.dataflow : null;
+  const controlFlow = meta.controlFlow && typeof meta.controlFlow === 'object' ? meta.controlFlow : null;
+  const bases = Array.isArray(meta.bases) ? meta.bases : [];
+  const throws = Array.isArray(meta.throws) ? meta.throws : [];
+  const awaits = Array.isArray(meta.awaits) ? meta.awaits : [];
+  return {
+    doc: meta.docstring ? String(meta.docstring).slice(0, 300) : '',
+    params,
+    returnType: meta.returnType || meta.returns || null,
+    returnsValue: meta.returnsValue || false,
+    paramTypes: meta.paramTypes || {},
+    paramDefaults: meta.paramDefaults || {},
+    signature: meta.signature || null,
+    decorators,
+    fields,
+    modifiers,
+    visibility: meta.visibility || null,
+    bases,
+    dataflow,
+    controlFlow,
+    throws,
+    awaits,
+    yields: meta.yields || false,
+    async: meta.async || false
+  };
+}
diff --git a/src/lang/python/executable.js b/src/lang/python/executable.js
new file mode 100644
index 000000000..60e7de618
--- /dev/null
+++ b/src/lang/python/executable.js
@@ -0,0 +1,43 @@
+import { spawn } from 'node:child_process';
+
+const PYTHON_CANDIDATES = ['python', 'python3'];
+
+let pythonExecutable = null;
+let pythonWarned = false;
+let pythonCheckPromise = null;
+
+async function checkPythonCandidate(candidate) {
+  return new Promise((resolve) => {
+    const proc = spawn(candidate, ['-c', 'import sys; sys.stdout.write("ok")'], {
+      stdio: ['ignore', 'pipe', 'ignore']
+    });
+    let output = '';
+    proc.stdout.on('data', (chunk) => {
+      output += chunk.toString();
+    });
+    proc.on('error', () => resolve(false));
+    proc.on('close', (code) => resolve(code === 0 && output.trim() === 'ok'));
+  });
+}
+
+export async function findPythonExecutable(log) {
+  if (pythonExecutable) return pythonExecutable;
+  if (pythonCheckPromise) return pythonCheckPromise;
+  pythonCheckPromise = (async () => {
+    for (const candidate of PYTHON_CANDIDATES) {
+      const ok = await checkPythonCandidate(candidate);
+      if (ok) {
+        pythonExecutable = candidate;
+        break;
+      }
+    }
+    if (!pythonExecutable && !pythonWarned) {
+      if (typeof log === 'function') {
+        log('Python AST unavailable (python not found); using heuristic chunking for .py.');
+      }
+      pythonWarned = true;
+    }
+    return pythonExecutable;
+  })();
+  return pythonCheckPromise;
+}
diff --git a/src/lang/python/imports.js b/src/lang/python/imports.js
new file mode 100644
index 000000000..1e5f2e950
--- /dev/null
+++ b/src/lang/python/imports.js
@@ -0,0 +1,35 @@
+/**
+ * Collect Python import statements and simple usages.
+ * @param {string} text
+ * @returns {{imports:string[],usages:string[]}}
+ */
+export function collectPythonImports(text) {
+  const imports = new Set();
+  const usages = new Set();
+  const lines = text.split('\n');
+  for (const line of lines) {
+    const trimmed = line.trim();
+    if (!trimmed || trimmed.startsWith('#')) continue;
+    let match = trimmed.match(/^import\s+(.+)$/);
+    if (match) {
+      const parts = match[1].split(',').map((p) => p.trim()).filter(Boolean);
+      for (const part of parts) {
+        const [moduleName, alias] = part.split(/\s+as\s+/);
+        if (moduleName) imports.add(moduleName);
+        if (alias) usages.add(alias);
+      }
+      continue;
+    }
+    match = trimmed.match(/^from\s+([A-Za-z0-9_\.]+)\s+import\s+(.+)$/);
+    if (match) {
+      imports.add(match[1]);
+      const names = match[2].split(',').map((p) => p.trim()).filter(Boolean);
+      for (const namePart of names) {
+        const [name, alias] = namePart.split(/\s+as\s+/);
+        if (name) usages.add(name);
+        if (alias) usages.add(alias);
+      }
+    }
+  }
+  return { imports: Array.from(imports), usages: Array.from(usages) };
+}
diff --git a/src/lang/python/normalize.js b/src/lang/python/normalize.js
new file mode 100644
index 000000000..36738ce13
--- /dev/null
+++ b/src/lang/python/normalize.js
@@ -0,0 +1 @@
+export { buildLineIndex, lineColToOffset, offsetToLine } from '../../shared/lines.js';
diff --git a/src/lang/python/pool.js b/src/lang/python/pool.js
new file mode 100644
index 000000000..c7eda1622
--- /dev/null
+++ b/src/lang/python/pool.js
@@ -0,0 +1,374 @@
+import { spawn } from 'node:child_process';
+import { createInterface } from 'node:readline';
+import { PYTHON_AST_SCRIPT } from './ast-script.js';
+import { findPythonExecutable } from './executable.js';
+
+const PYTHON_AST_DEFAULTS = {
+  enabled: true,
+  workerCount: 2,
+  maxWorkers: 2,
+  scaleUpQueueMs: 250,
+  taskTimeoutMs: 30000,
+  maxRetries: 1,
+  maxQueued: null,
+  crashLoopMax: 3,
+  crashWindowMs: 60000,
+  crashBackoffMs: 30000
+};
+
+let pythonPool = null;
+let pythonPoolSignature = null;
+let pythonPoolHooked = false;
+
+function normalizePythonAstConfig(config = {}, options = {}) {
+  if (config.enabled === false) return { enabled: false };
+  const defaultMaxWorkers = Number.isFinite(options.defaultMaxWorkers)
+    ? Math.max(1, Math.floor(options.defaultMaxWorkers))
+    : PYTHON_AST_DEFAULTS.maxWorkers;
+  const hardMaxWorkers = Number.isFinite(options.hardMaxWorkers)
+    ? Math.max(1, Math.floor(options.hardMaxWorkers))
+    : null;
+  const allowOverCap = config.allowOverCap === true || options.allowOverCap === true;
+  const workerCountRaw = Number(config.workerCount);
+  const workerCount = Number.isFinite(workerCountRaw)
+    ? Math.max(1, Math.floor(workerCountRaw))
+    : Math.min(PYTHON_AST_DEFAULTS.workerCount, defaultMaxWorkers);
+  const maxWorkersRaw = Number(config.maxWorkers);
+  const requestedMax = Number.isFinite(maxWorkersRaw)
+    ? Math.max(workerCount, Math.floor(maxWorkersRaw))
+    : Math.max(workerCount, defaultMaxWorkers);
+  const cappedMax = (!allowOverCap && Number.isFinite(hardMaxWorkers))
+    ? Math.min(requestedMax, hardMaxWorkers)
+    : requestedMax;
+  const maxWorkers = Math.max(workerCount, cappedMax);
+  const scaleUpQueueMsRaw = Number(config.scaleUpQueueMs);
+  const scaleUpQueueMs = Number.isFinite(scaleUpQueueMsRaw)
+    ? Math.max(0, Math.floor(scaleUpQueueMsRaw))
+    : PYTHON_AST_DEFAULTS.scaleUpQueueMs;
+  const taskTimeoutMsRaw = Number(config.taskTimeoutMs);
+  const taskTimeoutMs = Number.isFinite(taskTimeoutMsRaw)
+    ? Math.max(1000, Math.floor(taskTimeoutMsRaw))
+    : PYTHON_AST_DEFAULTS.taskTimeoutMs;
+  const maxRetriesRaw = Number(config.maxRetries);
+  const maxRetries = Number.isFinite(maxRetriesRaw)
+    ? Math.max(0, Math.floor(maxRetriesRaw))
+    : PYTHON_AST_DEFAULTS.maxRetries;
+  const maxQueuedRaw = Number(config.maxQueued);
+  const maxQueued = Number.isFinite(maxQueuedRaw)
+    ? Math.max(0, Math.floor(maxQueuedRaw))
+    : null;
+  const crashLoopMaxRaw = Number(config.crashLoopMax);
+  const crashLoopMax = Number.isFinite(crashLoopMaxRaw)
+    ? Math.max(0, Math.floor(crashLoopMaxRaw))
+    : PYTHON_AST_DEFAULTS.crashLoopMax;
+  const crashWindowMsRaw = Number(config.crashWindowMs);
+  const crashWindowMs = Number.isFinite(crashWindowMsRaw)
+    ? Math.max(0, Math.floor(crashWindowMsRaw))
+    : PYTHON_AST_DEFAULTS.crashWindowMs;
+  const crashBackoffMsRaw = Number(config.crashBackoffMs);
+  const crashBackoffMs = Number.isFinite(crashBackoffMsRaw)
+    ? Math.max(0, Math.floor(crashBackoffMsRaw))
+    : PYTHON_AST_DEFAULTS.crashBackoffMs;
+  return {
+    enabled: true,
+    workerCount,
+    maxWorkers,
+    scaleUpQueueMs,
+    taskTimeoutMs,
+    maxRetries,
+    maxQueued,
+    crashLoopMax,
+    crashWindowMs,
+    crashBackoffMs
+  };
+}
+
+function createPythonAstPool({ pythonBin, config, log }) {
+  const state = {
+    pythonBin,
+    config,
+    log,
+    workers: [],
+    queue: [],
+    nextId: 1,
+    stopping: false,
+    disabledUntil: 0,
+    crashCount: 0,
+    crashWindowStart: 0,
+    lastBackpressureLog: 0,
+    lastDisabledLog: 0
+  };
+
+  const isDisabled = () => state.disabledUntil && Date.now() < state.disabledUntil;
+
+  const logOnce = (message, key) => {
+    if (typeof log !== 'function' || !message) return;
+    const now = Date.now();
+    if (key === 'backpressure') {
+      if (now - state.lastBackpressureLog < 10000) return;
+      state.lastBackpressureLog = now;
+    }
+    if (key === 'disabled') {
+      if (now - state.lastDisabledLog < 10000) return;
+      state.lastDisabledLog = now;
+    }
+    log(message);
+  };
+
+  const shutdownWorkers = () => {
+    for (const worker of state.workers) {
+      try {
+        worker.proc.kill();
+      } catch {}
+    }
+    state.workers = [];
+  };
+
+  const disablePool = (reason) => {
+    if (isDisabled()) return;
+    const backoffMs = Number.isFinite(config.crashBackoffMs)
+      ? Math.max(0, config.crashBackoffMs)
+      : 0;
+    if (!backoffMs) return;
+    const reasonText = typeof reason === 'string'
+      ? reason
+      : (reason?.message || String(reason || 'unknown error'));
+    state.disabledUntil = Date.now() + backoffMs;
+    state.crashCount = 0;
+    state.crashWindowStart = 0;
+    logOnce(`[python-ast] Crash loop detected; disabling pool for ${backoffMs}ms (${reasonText}).`, 'disabled');
+    for (const job of state.queue) {
+      job.resolve(null);
+    }
+    state.queue = [];
+    shutdownWorkers();
+  };
+
+  const recordCrash = (reason) => {
+    if (state.stopping || !reason) return;
+    const windowMs = Number.isFinite(config.crashWindowMs) ? config.crashWindowMs : 0;
+    const maxCrashes = Number.isFinite(config.crashLoopMax) ? config.crashLoopMax : 0;
+    if (!windowMs || !maxCrashes) return;
+    const now = Date.now();
+    if (!state.crashWindowStart || now - state.crashWindowStart > windowMs) {
+      state.crashWindowStart = now;
+      state.crashCount = 0;
+    }
+    state.crashCount += 1;
+    if (state.crashCount >= maxCrashes) {
+      disablePool(reason);
+    }
+  };
+
+  const requeueJob = (job, reason) => {
+    if (isDisabled()) {
+      job.resolve(null);
+      return;
+    }
+    job.attempts = (job.attempts || 0) + 1;
+    job.lastError = reason || null;
+    if (job.attempts > config.maxRetries) {
+      job.resolve(null);
+      return;
+    }
+    job.queuedAt = Date.now();
+    state.queue.unshift(job);
+  };
+
+  const detachWorker = (worker) => {
+    state.workers = state.workers.filter((w) => w !== worker);
+  };
+
+  const handleWorkerExit = (worker, reason, options = {}) => {
+    if (worker.exited) return;
+    if (options.forceKill) {
+      try {
+        worker.proc.kill();
+      } catch {}
+    }
+    worker.exited = true;
+    const pending = Array.from(worker.pending.values());
+    worker.pending.clear();
+    worker.busy = false;
+    detachWorker(worker);
+    for (const job of pending) {
+      if (job.timer) clearTimeout(job.timer);
+      requeueJob(job, reason);
+    }
+    if (reason && !state.stopping) {
+      recordCrash(reason);
+    }
+    if (!state.stopping && !isDisabled() && state.workers.length < config.workerCount) {
+      spawnWorker();
+    }
+    drainQueue();
+  };
+
+  const handleLine = (worker, line) => {
+    let payload;
+    try {
+      payload = JSON.parse(line);
+    } catch (err) {
+      if (typeof log === 'function') {
+        log(`[python-ast] Failed to parse worker output: ${String(err)}`);
+      }
+      return;
+    }
+    const job = worker.pending.get(payload.id);
+    if (!job) return;
+    if (job.timer) clearTimeout(job.timer);
+    worker.pending.delete(payload.id);
+    worker.busy = false;
+    const result = payload?.result;
+    if (payload?.error || result?.error) {
+      job.resolve(null);
+    } else {
+      job.resolve(result || null);
+    }
+    drainQueue();
+  };
+
+  const spawnWorker = () => {
+    if (state.stopping || isDisabled()) return null;
+    const proc = spawn(pythonBin, ['-u', '-c', PYTHON_AST_SCRIPT], {
+      stdio: ['pipe', 'pipe', 'pipe']
+    });
+    proc.unref();
+    const worker = {
+      id: state.workers.length + 1,
+      proc,
+      pending: new Map(),
+      busy: false,
+      busySince: 0,
+      exited: false
+    };
+    state.workers.push(worker);
+    const rl = createInterface({ input: proc.stdout, crlfDelay: Infinity });
+    rl.on('line', (line) => handleLine(worker, line));
+    proc.on('error', (err) => handleWorkerExit(worker, err, { forceKill: true }));
+    proc.on('exit', (code, signal) =>
+      handleWorkerExit(worker, code ? new Error(`exit ${code}`) : signal)
+    );
+    proc.stderr.on('data', (chunk) => {
+      if (typeof log === 'function' && !state.stopping) {
+        log(`[python-ast] ${chunk.toString().trim()}`);
+      }
+    });
+    return worker;
+  };
+
+  const assignJob = (worker, job) => {
+    if (!worker || worker.exited) return;
+    job.startedAt = Date.now();
+    worker.busy = true;
+    worker.busySince = job.startedAt;
+    worker.pending.set(job.id, job);
+    const payload = {
+      id: job.id,
+      text: job.text,
+      dataflow: job.dataflow,
+      controlFlow: job.controlFlow,
+      path: job.path || null
+    };
+    try {
+      worker.proc.stdin.write(`${JSON.stringify(payload)}\n`);
+    } catch (err) {
+      handleWorkerExit(worker, err, { forceKill: true });
+      return;
+    }
+    job.timer = setTimeout(() => {
+      handleWorkerExit(worker, new Error('timeout'), { forceKill: true });
+    }, config.taskTimeoutMs);
+  };
+
+  const maybeScale = () => {
+    if (isDisabled()) return;
+    if (!state.queue.length) return;
+    if (state.workers.length >= config.maxWorkers) return;
+    const oldestWaitMs = Date.now() - state.queue[0].queuedAt;
+    if (oldestWaitMs < config.scaleUpQueueMs) return;
+    spawnWorker();
+  };
+
+  const drainQueue = () => {
+    if (state.stopping || isDisabled()) return;
+    while (state.workers.length < config.workerCount) {
+      spawnWorker();
+    }
+    let idle = state.workers.find((worker) => !worker.busy && !worker.exited);
+    while (idle && state.queue.length) {
+      const job = state.queue.shift();
+      assignJob(idle, job);
+      idle = state.workers.find((worker) => !worker.busy && !worker.exited);
+    }
+    maybeScale();
+  };
+
+  for (let i = 0; i < config.workerCount; i += 1) {
+    spawnWorker();
+  }
+
+  return {
+    request(text, { dataflow, controlFlow, path }) {
+      return new Promise((resolve) => {
+        if (isDisabled()) {
+          const remaining = Math.max(0, state.disabledUntil - Date.now());
+          logOnce(`[python-ast] Pool disabled for ${remaining}ms; falling back to heuristic chunking.`, 'disabled');
+          resolve(null);
+          return;
+        }
+        const pendingCount = state.queue.length + state.workers.reduce((sum, worker) => sum + worker.pending.size, 0);
+        if (Number.isFinite(config.maxQueued) && pendingCount >= config.maxQueued) {
+          logOnce('[python-ast] Queue backpressure triggered; falling back to heuristic chunking.', 'backpressure');
+          resolve(null);
+          return;
+        }
+        const job = {
+          id: state.nextId++,
+          text,
+          dataflow,
+          controlFlow,
+          path,
+          attempts: 0,
+          queuedAt: Date.now(),
+          resolve
+        };
+        state.queue.push(job);
+        drainQueue();
+      });
+    },
+    shutdown() {
+      state.stopping = true;
+      shutdownWorkers();
+      state.queue = [];
+    }
+  };
+}
+
+export async function getPythonAstPool(log, config = {}) {
+  const normalized = normalizePythonAstConfig(config, config);
+  if (!normalized.enabled) return null;
+  const pythonBin = await findPythonExecutable(log);
+  if (!pythonBin) return null;
+  const signature = JSON.stringify(normalized);
+  if (!pythonPool || pythonPoolSignature !== signature) {
+    if (pythonPool) pythonPool.shutdown();
+    pythonPool = createPythonAstPool({ pythonBin, config: normalized, log });
+    pythonPoolSignature = signature;
+  }
+  if (!pythonPoolHooked) {
+    pythonPoolHooked = true;
+    process.once('exit', () => pythonPool?.shutdown());
+    process.once('SIGINT', () => pythonPool?.shutdown());
+    process.once('SIGTERM', () => pythonPool?.shutdown());
+  }
+  return pythonPool;
+}
+
+export function shutdownPythonAstPool() {
+  if (pythonPool) {
+    pythonPool.shutdown();
+    pythonPool = null;
+    pythonPoolSignature = null;
+  }
+}
diff --git a/src/lang/python/relations.js b/src/lang/python/relations.js
new file mode 100644
index 000000000..2067a26ec
--- /dev/null
+++ b/src/lang/python/relations.js
@@ -0,0 +1,39 @@
+import { collectPythonImports } from './imports.js';
+
+/**
+ * Build import/export/call/usage relations for Python chunks.
+ * @param {string} text
+ * @param {Record<string,string[]>} allImports
+ * @param {object|null} pythonAst
+ * @returns {{imports:string[],exports:string[],calls:Array<[string,string]>,usages:string[],importLinks:string[]}}
+ */
+export function buildPythonRelations(text, allImports, pythonAst) {
+  let imports = [];
+  let usages = [];
+  let calls = [];
+  let callDetails = [];
+  let exports = [];
+  if (pythonAst) {
+    imports = Array.isArray(pythonAst.imports) ? pythonAst.imports : [];
+    usages = Array.isArray(pythonAst.usages) ? pythonAst.usages : [];
+    calls = Array.isArray(pythonAst.calls) ? pythonAst.calls : [];
+    callDetails = Array.isArray(pythonAst.callDetails) ? pythonAst.callDetails : [];
+    exports = Array.isArray(pythonAst.exports) ? pythonAst.exports : [];
+  } else {
+    const fallback = collectPythonImports(text);
+    imports = fallback.imports;
+    usages = fallback.usages;
+  }
+  const importLinks = imports
+    .map((i) => allImports[i])
+    .filter((x) => !!x)
+    .flat();
+  return {
+    imports,
+    exports,
+    calls,
+    callDetails,
+    usages,
+    importLinks
+  };
+}
diff --git a/src/lang/ruby.js b/src/lang/ruby.js
index d23587f44..fe70bfe79 100644
--- a/src/lang/ruby.js
+++ b/src/lang/ruby.js
@@ -1,4 +1,5 @@
 import { buildLineIndex, offsetToLine } from '../shared/lines.js';
+import { extractDocComment } from './shared.js';
 import { buildHeuristicDataflow, hasReturnValue, summarizeControlFlow } from './flow.js';
 
 /**
@@ -16,20 +17,12 @@ const RUBY_USAGE_SKIP = new Set([
   'class', 'module', 'def', 'nil', 'true', 'false', 'self'
 ]);
 
-function extractRubyDocComment(lines, startLineIdx) {
-  let i = startLineIdx - 1;
-  while (i >= 0 && lines[i].trim() === '') i--;
-  if (i < 0) return '';
-  const out = [];
-  while (i >= 0) {
-    const trimmed = lines[i].trim();
-    if (!trimmed.startsWith('#')) break;
-    if (trimmed.startsWith('#!')) break;
-    out.unshift(trimmed.replace(/^#\s?/, ''));
-    i--;
-  }
-  return out.join('\n').trim();
-}
+const RUBY_DOC_OPTIONS = {
+  linePrefixes: ['#'],
+  blockStarts: [],
+  blockEnd: null,
+  skipLine: (line) => line.startsWith('#!')
+};
 
 function stripRubyComments(text) {
   return text.replace(/#.*$/gm, ' ');
@@ -162,7 +155,7 @@ export function buildRubyChunks(text) {
       const name = match[1];
       const start = lineIndex[i] + rawLine.indexOf(match[0]);
       const signature = rawLine.trim();
-      const docstring = extractRubyDocComment(lines, i);
+      const docstring = extractDocComment(lines, i, RUBY_DOC_OPTIONS);
       scopeStack.push(name);
       blockStack.push({
         kind: 'class',
@@ -183,7 +176,7 @@ export function buildRubyChunks(text) {
       const name = match[1];
       const start = lineIndex[i] + rawLine.indexOf(match[0]);
       const signature = rawLine.trim();
-      const docstring = extractRubyDocComment(lines, i);
+      const docstring = extractDocComment(lines, i, RUBY_DOC_OPTIONS);
       scopeStack.push(name);
       blockStack.push({
         kind: 'module',
@@ -204,7 +197,7 @@ export function buildRubyChunks(text) {
       const start = lineIndex[i] + rawLine.indexOf('def');
       const signature = rawLine.trim();
       const params = parseRubyParams(signature);
-      const docstring = extractRubyDocComment(lines, i);
+      const docstring = extractDocComment(lines, i, RUBY_DOC_OPTIONS);
       let methodName = defName;
       const currentScope = scopeStack[scopeStack.length - 1] || null;
       if (currentScope && !defName.includes('.') && !defName.includes('::')) {
diff --git a/src/lang/rust.js b/src/lang/rust.js
index 63457a9f8..875c10514 100644
--- a/src/lang/rust.js
+++ b/src/lang/rust.js
@@ -1,7 +1,8 @@
 import { buildLineIndex, offsetToLine } from '../shared/lines.js';
-import { sliceSignature } from './shared.js';
+import { extractDocComment, sliceSignature } from './shared.js';
 import { findCLikeBodyBounds } from './clike.js';
 import { buildHeuristicDataflow, hasReturnValue, summarizeControlFlow } from './flow.js';
+import { buildTreeSitterChunks } from './tree-sitter.js';
 
 /**
  * Rust language chunking and relations.
@@ -19,42 +20,11 @@ const RUST_USAGE_SKIP = new Set([
   'f32', 'f64', 'bool', 'str', 'String'
 ]);
 
-function extractRustDocComment(lines, startLineIdx) {
-  let i = startLineIdx - 1;
-  while (i >= 0 && lines[i].trim() === '') i--;
-  if (i < 0) return '';
-  const trimmed = lines[i].trim();
-  if (trimmed.startsWith('///') || trimmed.startsWith('//!')) {
-    const out = [];
-    while (i >= 0) {
-      const line = lines[i].trim();
-      if (!line.startsWith('///') && !line.startsWith('//!')) break;
-      out.unshift(line.replace(/^\/\/[!/]\s?/, ''));
-      i--;
-    }
-    return out.join('\n').trim();
-  }
-  if (trimmed.includes('*/')) {
-    const raw = [];
-    while (i >= 0) {
-      raw.unshift(lines[i]);
-      if (lines[i].includes('/**') || lines[i].includes('/*!')) break;
-      i--;
-    }
-    return raw
-      .map((line) =>
-        line
-          .replace(/^\s*\/\*+!?/, '')
-          .replace(/\*\/\s*$/, '')
-          .replace(/^\s*\*\s?/, '')
-          .trim()
-      )
-      .filter(Boolean)
-      .join('\n')
-      .trim();
-  }
-  return '';
-}
+const RUST_DOC_OPTIONS = {
+  linePrefixes: ['///', '//!'],
+  blockStarts: ['/**', '/*!'],
+  blockEnd: '*/'
+};
 
 function collectRustAttributes(lines, startLineIdx, signature) {
   const attrs = new Set();
@@ -184,7 +154,27 @@ export function collectRustImports(text) {
  * @param {string} text
  * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null}
  */
-export function buildRustChunks(text) {
+export function buildRustChunks(text, options = {}) {
+  const treeChunks = buildTreeSitterChunks({ text, languageId: 'rust', options });
+  if (treeChunks && treeChunks.length) {
+    const lines = text.split('\n');
+    return treeChunks.map((chunk) => {
+      const meta = chunk.meta || {};
+      const signature = meta.signature || '';
+      const startLine = Number.isFinite(meta.startLine) ? meta.startLine : 1;
+      return {
+        ...chunk,
+        meta: {
+          ...meta,
+          signature,
+          params: extractRustParams(signature),
+          returns: extractRustReturns(signature),
+          modifiers: extractRustModifiers(signature),
+          attributes: collectRustAttributes(lines, startLine - 1, signature)
+        }
+      };
+    });
+  }
   const lineIndex = buildLineIndex(text);
   const lines = text.split('\n');
   const decls = [];
@@ -219,7 +209,7 @@ export function buildRustChunks(text) {
       endLine: offsetToLine(lineIndex, end),
       signature,
       modifiers: extractRustModifiers(signature),
-      docstring: extractRustDocComment(lines, i),
+      docstring: extractDocComment(lines, i, RUST_DOC_OPTIONS),
       attributes: collectRustAttributes(lines, i, signature)
     };
     const entry = { start, end, name: match[1], kind: 'MacroDeclaration', meta };
@@ -253,7 +243,7 @@ export function buildRustChunks(text) {
       endLine: offsetToLine(lineIndex, end),
       signature,
       modifiers: extractRustModifiers(signature),
-      docstring: extractRustDocComment(lines, i),
+      docstring: extractDocComment(lines, i, RUST_DOC_OPTIONS),
       attributes: collectRustAttributes(lines, i, signature)
     };
     const entry = { start, end, name: match[2], kind, meta };
@@ -284,7 +274,7 @@ export function buildRustChunks(text) {
         endLine: offsetToLine(lineIndex, end),
         signature,
         modifiers: extractRustModifiers(signature),
-        docstring: extractRustDocComment(lines, i),
+        docstring: extractDocComment(lines, i, RUST_DOC_OPTIONS),
         attributes: collectRustAttributes(lines, i, signature),
         implFor: typeName
       }
@@ -334,7 +324,7 @@ export function buildRustChunks(text) {
       params: extractRustParams(signature),
       returns: extractRustReturns(signature),
       modifiers: extractRustModifiers(signature),
-      docstring: extractRustDocComment(lines, i),
+      docstring: extractDocComment(lines, i, RUST_DOC_OPTIONS),
       attributes: collectRustAttributes(lines, i, signature)
     };
     decls.push({ start, end, name, kind, meta });
diff --git a/src/lang/shared.js b/src/lang/shared.js
index 68f7d9683..e154c2e43 100644
--- a/src/lang/shared.js
+++ b/src/lang/shared.js
@@ -11,45 +11,109 @@ export function sliceSignature(text, start, bodyStart) {
   return text.slice(start, end).replace(/\s+/g, ' ').trim();
 }
 
+/**
+ * Escape a value for use in a RegExp.
+ * @param {string} value
+ * @returns {string}
+ */
+function escapeRegExp(value) {
+  return String(value).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}
+
+const resolveLinesAccessor = (lines) => {
+  if (Array.isArray(lines)) {
+    return {
+      getLine: (idx) => lines[idx] ?? '',
+      length: lines.length
+    };
+  }
+  if (lines && typeof lines.getLine === 'function') {
+    const length = Number.isFinite(lines.length)
+      ? lines.length
+      : (Number.isFinite(lines.lineCount) ? lines.lineCount : 0);
+    return {
+      getLine: (idx) => lines.getLine(idx) ?? '',
+      length
+    };
+  }
+  return {
+    getLine: () => '',
+    length: 0
+  };
+};
+
 /**
  * Extract a doc comment immediately above a declaration.
- * Supports /// and /** block comment styles.
- * @param {string[]} lines
+ * Supports configurable line/block styles.
+ * @param {string[]|{getLine:(idx:number)=>string,length?:number,lineCount?:number}} lines
  * @param {number} startLineIdx
+ * @param {{linePrefixes?:string[]|string,blockStarts?:string[]|string,blockEnd?:string,skipLine?:(line:string)=>boolean}} [options]
  * @returns {string}
  */
-export function extractDocComment(lines, startLineIdx) {
+export function extractDocComment(lines, startLineIdx, options = {}) {
+  const accessor = resolveLinesAccessor(lines);
+  const linePrefixesRaw = options.linePrefixes ?? ['///'];
+  const blockStartsRaw = options.blockStarts ?? ['/**'];
+  const linePrefixes = Array.isArray(linePrefixesRaw) ? linePrefixesRaw.filter(Boolean) : [linePrefixesRaw].filter(Boolean);
+  const blockStarts = Array.isArray(blockStartsRaw) ? blockStartsRaw.filter(Boolean) : [blockStartsRaw].filter(Boolean);
+  const blockEnd = options.blockEnd ?? '*/';
+  const skipLine = typeof options.skipLine === 'function' ? options.skipLine : null;
   let i = startLineIdx - 1;
-  while (i >= 0 && lines[i].trim() === '') i--;
+  while (i >= 0 && accessor.getLine(i).trim() === '') i--;
   if (i < 0) return '';
-  const trimmed = lines[i].trim();
-  if (trimmed.startsWith('///')) {
-    const out = [];
-    while (i >= 0 && lines[i].trim().startsWith('///')) {
-      out.unshift(lines[i].trim().replace(/^\/\/\/\s?/, ''));
-      i--;
+  const trimmed = accessor.getLine(i).trim();
+  if (linePrefixes.length) {
+    const initialPrefix = linePrefixes.find((prefix) => trimmed.startsWith(prefix));
+    if (initialPrefix) {
+      const out = [];
+      while (i >= 0) {
+        const line = accessor.getLine(i).trim();
+        if (skipLine && skipLine(line)) {
+          i--;
+          continue;
+        }
+        const matchedPrefix = linePrefixes.find((prefix) => line.startsWith(prefix));
+        if (!matchedPrefix) break;
+        const prefixRegex = new RegExp(`^\\s*${escapeRegExp(matchedPrefix)}\\s?`);
+        out.unshift(line.replace(prefixRegex, '').trim());
+        i--;
+      }
+      return out.join('\n').trim();
     }
-    return out.join('\n').trim();
   }
-  if (trimmed.includes('*/')) {
+
+  if (blockEnd && trimmed.includes(blockEnd) && blockStarts.length) {
     const raw = [];
+    let foundStart = false;
     while (i >= 0) {
-      raw.unshift(lines[i]);
-      if (lines[i].includes('/**')) break;
+      const line = accessor.getLine(i);
+      raw.unshift(line);
+      if (blockStarts.some((start) => line.includes(start))) {
+        foundStart = true;
+        break;
+      }
       i--;
     }
+    if (!foundStart) return '';
     return raw
-      .map((line) =>
-        line
-          .replace(/^\s*\/\*\*?/, '')
-          .replace(/\*\/\s*$/, '')
-          .replace(/^\s*\*\s?/, '')
-          .trim()
-      )
+      .map((line) => {
+        let cleaned = line;
+        for (const start of blockStarts) {
+          const startRegex = new RegExp(`^\\s*${escapeRegExp(start)}`);
+          cleaned = cleaned.replace(startRegex, '');
+        }
+        if (blockEnd) {
+          const endRegex = new RegExp(`${escapeRegExp(blockEnd)}\\s*$`);
+          cleaned = cleaned.replace(endRegex, '');
+        }
+        cleaned = cleaned.replace(/^\s*\*\s?/, '');
+        return cleaned.trim();
+      })
       .filter(Boolean)
       .join('\n')
       .trim();
   }
+
   return '';
 }
 
@@ -61,6 +125,7 @@ export function extractDocComment(lines, startLineIdx) {
  * @returns {string[]}
  */
 export function collectAttributes(lines, startLineIdx, signature) {
+  const accessor = resolveLinesAccessor(lines);
   const attrs = new Set();
   const addLine = (line) => {
     for (const match of line.matchAll(/@([A-Za-z_][A-Za-z0-9_]*)/g)) {
@@ -70,7 +135,7 @@ export function collectAttributes(lines, startLineIdx, signature) {
   if (signature) addLine(signature);
   let i = startLineIdx - 1;
   while (i >= 0) {
-    const trimmed = lines[i].trim();
+    const trimmed = accessor.getLine(i).trim();
     if (!trimmed) {
       if (attrs.size) break;
       i--;
diff --git a/src/lang/shell.js b/src/lang/shell.js
index 104dc884b..707f21b8f 100644
--- a/src/lang/shell.js
+++ b/src/lang/shell.js
@@ -1,6 +1,6 @@
 import { buildLineIndex, offsetToLine } from '../shared/lines.js';
 import { findCLikeBodyBounds } from './clike.js';
-import { sliceSignature } from './shared.js';
+import { extractDocComment, sliceSignature } from './shared.js';
 import { buildHeuristicDataflow, hasReturnValue, summarizeControlFlow } from './flow.js';
 
 /**
@@ -21,20 +21,12 @@ const SHELL_USAGE_SKIP = new Set([
   'nil', 'null', 'yes', 'no'
 ]);
 
-function extractShellDocComment(lines, startLineIdx) {
-  let i = startLineIdx - 1;
-  while (i >= 0 && lines[i].trim() === '') i--;
-  if (i < 0) return '';
-  const out = [];
-  while (i >= 0) {
-    const trimmed = lines[i].trim();
-    if (!trimmed.startsWith('#')) break;
-    if (trimmed.startsWith('#!')) break;
-    out.unshift(trimmed.replace(/^#\s?/, ''));
-    i--;
-  }
-  return out.join('\n').trim();
-}
+const SHELL_DOC_OPTIONS = {
+  linePrefixes: ['#'],
+  blockStarts: [],
+  blockEnd: null,
+  skipLine: (line) => line.startsWith('#!')
+};
 
 function readSignatureLines(lines, startLine) {
   const parts = [];
@@ -142,7 +134,7 @@ export function buildShellChunks(text) {
       startLine: i + 1,
       endLine: offsetToLine(lineIndex, end),
       signature: signatureText || trimmed,
-      docstring: extractShellDocComment(lines, i)
+      docstring: extractDocComment(lines, i, SHELL_DOC_OPTIONS)
     };
     decls.push({ start, end, name, kind: 'FunctionDeclaration', meta });
     i = endLine;
diff --git a/src/lang/sql.js b/src/lang/sql.js
index 25ff35994..254653220 100644
--- a/src/lang/sql.js
+++ b/src/lang/sql.js
@@ -1,4 +1,18 @@
 import { buildLineIndex, offsetToLine } from '../shared/lines.js';
+import { extractDocComment } from './shared.js';
+import { buildHeuristicDataflow, hasReturnValue, summarizeControlFlow } from './flow.js';
+import { createRequire } from 'node:module';
+
+const require = createRequire(import.meta.url);
+let sqlParserInstance = null;
+let sqlParserLoadFailed = false;
+
+const SQL_PARSER_DIALECTS = {
+  postgres: 'postgresql',
+  postgresql: 'postgresql',
+  mysql: 'mysql',
+  sqlite: 'sqlite'
+};
 
 /**
  * SQL language chunking and relations.
@@ -11,10 +25,21 @@ function splitSqlStatements(text) {
   let inDouble = false;
   let inLineComment = false;
   let inBlockComment = false;
+  let dollarTag = null;
+  let delimiter = ';';
 
   for (let i = 0; i < text.length; i++) {
     const ch = text[i];
     const next = text[i + 1];
+    const lineStart = i === 0 || text[i - 1] === '\n' || text[i - 1] === '\r';
+
+    if (dollarTag) {
+      if (text.startsWith(dollarTag, i)) {
+        i += dollarTag.length - 1;
+        dollarTag = null;
+      }
+      continue;
+    }
 
     if (inLineComment) {
       if (ch === '\n') inLineComment = false;
@@ -39,6 +64,21 @@ function splitSqlStatements(text) {
         continue;
       }
     }
+    if (lineStart && !inSingle && !inDouble && !inLineComment && !inBlockComment) {
+      let j = i;
+      while (j < text.length && (text[j] === ' ' || text[j] === '\t')) j++;
+      if (text.slice(j, j + 9).toLowerCase() === 'delimiter' && /\s/.test(text[j + 9] || '')) {
+        let k = j + 9;
+        while (k < text.length && (text[k] === ' ' || text[k] === '\t')) k++;
+        let endLine = text.indexOf('\n', k);
+        if (endLine === -1) endLine = text.length;
+        const rawDelimiter = text.slice(k, endLine).trim();
+        if (rawDelimiter) delimiter = rawDelimiter;
+        start = Math.max(start, endLine + 1);
+        i = endLine;
+        continue;
+      }
+    }
     if (!inDouble && ch === '\'' && text[i - 1] !== '\\') {
       inSingle = !inSingle;
       continue;
@@ -48,11 +88,26 @@ function splitSqlStatements(text) {
       continue;
     }
 
-    if (!inSingle && !inDouble && ch === ';') {
-      const end = i + 1;
-      const slice = text.slice(start, end);
-      if (slice.trim()) statements.push({ start, end, text: slice });
-      start = end;
+    if (!inSingle && !inDouble) {
+      if (delimiter && text.startsWith(delimiter, i)) {
+        const end = i + delimiter.length;
+        const slice = text.slice(start, end);
+        if (slice.trim()) statements.push({ start, end, text: slice });
+        start = end;
+        i = end - 1;
+        continue;
+      }
+      if (ch === '$') {
+        const end = text.indexOf('$', i + 1);
+        if (end !== -1) {
+          const tag = text.slice(i, end + 1);
+          if (tag === '$$' || /^\$[A-Za-z_][A-Za-z0-9_]*\$$/.test(tag)) {
+            dollarTag = tag;
+            i = end;
+            continue;
+          }
+        }
+      }
     }
   }
   if (start < text.length) {
@@ -62,36 +117,123 @@ function splitSqlStatements(text) {
   return statements;
 }
 
-function extractSqlDocComment(lines, startLineIdx) {
-  let i = startLineIdx - 1;
-  while (i >= 0 && lines[i].trim() === '') i--;
-  if (i < 0) return '';
-  const out = [];
-  while (i >= 0) {
+function stripSqlComments(text) {
+  let out = '';
+  let inSingle = false;
+  let inDouble = false;
+  let inLineComment = false;
+  let inBlockComment = false;
+  for (let i = 0; i < text.length; i++) {
+    const ch = text[i];
+    const next = text[i + 1];
+    if (inLineComment) {
+      if (ch === '\n') {
+        inLineComment = false;
+        out += ch;
+      }
+      continue;
+    }
+    if (inBlockComment) {
+      if (ch === '*' && next === '/') {
+        inBlockComment = false;
+        i++;
+      }
+      continue;
+    }
+    if (!inSingle && !inDouble) {
+      if (ch === '-' && next === '-') {
+        inLineComment = true;
+        i++;
+        continue;
+      }
+      if (ch === '/' && next === '*') {
+        inBlockComment = true;
+        i++;
+        continue;
+      }
+    }
+    if (!inDouble && ch === '\'' && text[i - 1] !== '\\') {
+      inSingle = !inSingle;
+    } else if (!inSingle && ch === '"' && text[i - 1] !== '\\') {
+      inDouble = !inDouble;
+    }
+    out += ch;
+  }
+  return out;
+}
+
+const SQL_FLOW_SKIP = new Set();
+const SQL_FLOW_SKIP_WORDS = [
+  'select', 'from', 'where', 'join', 'inner', 'left', 'right', 'full', 'cross',
+  'on', 'group', 'order', 'by', 'having', 'limit', 'offset',
+  'insert', 'into', 'update', 'delete', 'create', 'table', 'view', 'materialized',
+  'procedure', 'function', 'trigger', 'index', 'schema', 'database',
+  'values', 'set', 'as', 'and', 'or', 'distinct',
+  'case', 'when', 'then', 'else', 'end', 'if', 'elseif', 'elsif',
+  'return', 'returns', 'begin', 'loop', 'while', 'repeat', 'until', 'for',
+  'declare', 'cursor', 'fetch', 'raise', 'signal',
+  'primary', 'key', 'foreign', 'references', 'constraint', 'default', 'null', 'is', 'not',
+  'true', 'false'
+];
+const addSqlSkip = (keyword) => {
+  if (!keyword) return;
+  SQL_FLOW_SKIP.add(keyword);
+  SQL_FLOW_SKIP.add(keyword.toUpperCase());
+  SQL_FLOW_SKIP.add(keyword[0].toUpperCase() + keyword.slice(1));
+};
+SQL_FLOW_SKIP_WORDS.forEach(addSqlSkip);
+
+const SQL_CONTROL_FLOW = {
+  branchKeywords: ['case', 'when', 'then', 'else', 'if', 'elseif', 'elsif'],
+  loopKeywords: ['loop', 'while', 'repeat', 'until', 'for', 'foreach'],
+  returnKeywords: ['return'],
+  breakKeywords: ['break', 'leave', 'exit'],
+  continueKeywords: ['continue'],
+  throwKeywords: ['raise', 'signal']
+};
+
+const SQL_DOC_OPTIONS = {
+  linePrefixes: ['--'],
+  blockStarts: ['/*'],
+  blockEnd: '*/'
+};
+
+function extractSqlLeadingDoc(statementText) {
+  const lines = statementText.split('\n');
+  const docLines = [];
+  let signature = '';
+  let i = 0;
+  while (i < lines.length) {
     const trimmed = lines[i].trim();
+    if (!trimmed) {
+      i++;
+      continue;
+    }
     if (trimmed.startsWith('--')) {
-      out.unshift(trimmed.replace(/^--\s?/, ''));
-      i--;
+      docLines.push(trimmed.replace(/^--\s?/, ''));
+      i++;
       continue;
     }
-    if (trimmed.endsWith('*/')) {
+    if (trimmed.startsWith('/*')) {
       const raw = [];
-      while (i >= 0) {
-        raw.unshift(lines[i]);
-        if (lines[i].includes('/*')) break;
-        i--;
+      while (i < lines.length) {
+        raw.push(lines[i]);
+        if (lines[i].includes('*/')) break;
+        i++;
       }
       const cleaned = raw
         .map((line) => line.replace(/^\s*\/\*+/, '').replace(/\*\/\s*$/, '').replace(/^\s*\*\s?/, '').trim())
         .filter(Boolean)
         .join('\n')
         .trim();
-      if (cleaned) out.unshift(cleaned);
-      break;
+      if (cleaned) docLines.push(cleaned);
+      i++;
+      continue;
     }
+    signature = trimmed;
     break;
   }
-  return out.join('\n').trim();
+  return { docstring: docLines.join('\n').trim(), signature };
 }
 
 function classifySqlStatement(statement) {
@@ -120,6 +262,69 @@ function classifySqlStatement(statement) {
   return { kind: 'Statement', name: 'statement' };
 }
 
+function getSqlParser(log) {
+  if (sqlParserInstance || sqlParserLoadFailed) return sqlParserInstance;
+  try {
+    const mod = require('node-sql-parser');
+    const Parser = mod.Parser || mod.default?.Parser || mod.default || mod;
+    sqlParserInstance = new Parser();
+  } catch (err) {
+    sqlParserLoadFailed = true;
+    if (log) log(`SQL parser unavailable; falling back to heuristic SQL handling. ${err?.message || err}`);
+  }
+  return sqlParserInstance;
+}
+
+function normalizeSqlIdentifier(raw) {
+  if (!raw) return '';
+  return String(raw).replace(/[\"`\[\]]/g, '').trim();
+}
+
+function collectSqlTablesFromAst(node, tables) {
+  if (!node) return;
+  if (Array.isArray(node)) {
+    for (const entry of node) collectSqlTablesFromAst(entry, tables);
+    return;
+  }
+  if (typeof node !== 'object') return;
+  if (typeof node.table === 'string') {
+    const cleaned = normalizeSqlIdentifier(node.table);
+    if (cleaned) tables.add(cleaned);
+  } else if (node.table && typeof node.table === 'object') {
+    if (typeof node.table.table === 'string') {
+      const cleaned = normalizeSqlIdentifier(node.table.table);
+      if (cleaned) tables.add(cleaned);
+    }
+    if (typeof node.table.name === 'string') {
+      const cleaned = normalizeSqlIdentifier(node.table.name);
+      if (cleaned) tables.add(cleaned);
+    }
+  }
+  if (Array.isArray(node.tableList)) {
+    for (const entry of node.tableList) {
+      const cleaned = normalizeSqlIdentifier(entry);
+      if (cleaned) tables.add(cleaned);
+    }
+  }
+  for (const value of Object.values(node)) {
+    collectSqlTablesFromAst(value, tables);
+  }
+}
+
+function collectSqlParserUsages(text, dialect, log) {
+  const parser = getSqlParser(log);
+  if (!parser) return [];
+  const tables = new Set();
+  const dialectKey = SQL_PARSER_DIALECTS[dialect] || null;
+  try {
+    const ast = parser.astify(text, dialectKey ? { database: dialectKey } : undefined);
+    collectSqlTablesFromAst(ast, tables);
+  } catch {
+    return [];
+  }
+  return Array.from(tables);
+}
+
 /**
  * Collect imports from SQL source (none).
  * @returns {string[]}
@@ -146,7 +351,9 @@ export function buildSqlChunks(text, options = {}) {
     const { kind, name } = classifySqlStatement(stmt.text);
     const startLine = offsetToLine(lineIndex, stmt.start);
     const endLine = offsetToLine(lineIndex, stmt.end);
-    const docstring = extractSqlDocComment(lines, startLine - 1);
+    const leading = extractSqlLeadingDoc(stmt.text);
+    const docstring = extractDocComment(lines, startLine - 1, SQL_DOC_OPTIONS) || leading.docstring;
+    const signature = leading.signature || stmt.text.trim().split('\n')[0].trim();
     decls.push({
       start: stmt.start,
       end: stmt.end,
@@ -155,7 +362,7 @@ export function buildSqlChunks(text, options = {}) {
       meta: {
         startLine,
         endLine,
-        signature: stmt.text.trim().split('\n')[0].trim(),
+        signature,
         docstring,
         dialect
       }
@@ -171,23 +378,72 @@ export function buildSqlChunks(text, options = {}) {
  * @param {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null} sqlChunks
  * @returns {{imports:string[],exports:string[],calls:Array<[string,string]>,usages:string[],importLinks:string[]}}
  */
-export function buildSqlRelations(text, allImports, sqlChunks) {
+export function buildSqlRelations(text, allImports, sqlChunks, options = {}) {
   const exports = new Set();
+  const usages = new Set();
   if (Array.isArray(sqlChunks)) {
     for (const chunk of sqlChunks) {
       if (!chunk || !chunk.name) continue;
       if (chunk.kind && chunk.kind.endsWith('Declaration')) exports.add(chunk.name);
     }
   }
+  const parsedUsages = collectSqlParserUsages(text, options.dialect || 'generic', options.log);
+  for (const entry of parsedUsages) {
+    if (entry) usages.add(entry);
+  }
   return {
     imports: [],
     exports: Array.from(exports),
     calls: [],
-    usages: [],
+    usages: Array.from(usages),
     importLinks: []
   };
 }
 
+/**
+ * Heuristic control-flow/dataflow extraction for SQL chunks.
+ * @param {string} text
+ * @param {{start:number,end:number}} chunk
+ * @param {{dataflow?:boolean,controlFlow?:boolean}} [options]
+ * @returns {{dataflow:(object|null),controlFlow:(object|null),throws:string[],awaits:string[],yields:boolean,returnsValue:boolean}|null}
+ */
+export function computeSqlFlow(text, chunk, options = {}) {
+  if (!chunk || !Number.isFinite(chunk.start) || !Number.isFinite(chunk.end)) return null;
+  if (chunk.end <= chunk.start) return null;
+  const slice = text.slice(chunk.start, chunk.end);
+  const cleaned = stripSqlComments(slice);
+  const dataflowEnabled = options.dataflow !== false;
+  const controlFlowEnabled = options.controlFlow !== false;
+  const out = {
+    dataflow: null,
+    controlFlow: null,
+    throws: [],
+    awaits: [],
+    yields: false,
+    returnsValue: false
+  };
+
+  if (dataflowEnabled) {
+    out.dataflow = buildHeuristicDataflow(cleaned, {
+      skip: SQL_FLOW_SKIP,
+      memberOperators: ['.', '::']
+    });
+    out.returnsValue = hasReturnValue(cleaned);
+    const throws = new Set();
+    for (const match of cleaned.matchAll(/\b(?:raise|signal)\b\s+([A-Za-z_][A-Za-z0-9_]*)/gi)) {
+      const name = match[1];
+      if (name) throws.add(name);
+    }
+    out.throws = Array.from(throws);
+  }
+
+  if (controlFlowEnabled) {
+    out.controlFlow = summarizeControlFlow(cleaned, SQL_CONTROL_FLOW);
+  }
+
+  return out;
+}
+
 /**
  * Normalize SQL-specific doc metadata for search output.
  * @param {{meta?:Object}} chunk
@@ -200,6 +456,12 @@ export function extractSqlDocMeta(chunk) {
     params: [],
     returns: null,
     signature: meta.signature || null,
-    dialect: meta.dialect || null
+    dialect: meta.dialect || null,
+    dataflow: meta.dataflow || null,
+    throws: meta.throws || [],
+    awaits: meta.awaits || [],
+    yields: meta.yields || false,
+    returnsValue: meta.returnsValue || false,
+    controlFlow: meta.controlFlow || null
   };
 }
diff --git a/src/lang/swift.js b/src/lang/swift.js
index c057e49b9..97a9246cf 100644
--- a/src/lang/swift.js
+++ b/src/lang/swift.js
@@ -1,6 +1,7 @@
-import { buildLineIndex, offsetToLine } from '../shared/lines.js';
+import { buildLineIndex, lineColToOffset, offsetToLine } from '../shared/lines.js';
 import { collectAttributes, extractDocComment, isCommentLine, sliceSignature } from './shared.js';
 import { buildHeuristicDataflow, hasReturnValue, summarizeControlFlow } from './flow.js';
+import { buildTreeSitterChunks } from './tree-sitter.js';
 
 /**
  * Swift language chunking and relations.
@@ -185,7 +186,78 @@ function stripSwiftComments(text) {
  * @param {string} text
  * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null}
  */
-export function buildSwiftChunks(text) {
+export function buildSwiftChunks(text, options = {}) {
+  const treeChunks = buildTreeSitterChunks({ text, languageId: 'swift', options });
+  if (treeChunks && treeChunks.length) {
+    const lines = text.split('\n');
+    const lineIndex = buildLineIndex(text);
+    const typeKinds = new Set([
+      'ClassDeclaration',
+      'StructDeclaration',
+      'EnumDeclaration',
+      'ProtocolDeclaration',
+      'ExtensionDeclaration',
+      'ActorDeclaration'
+    ]);
+    const funcSignatureRe = /\b(func|init|deinit)\b/;
+    const typeSignatureRe = /\b(class|struct|enum|protocol|extension|actor)\b/;
+    const resolveSignatureFallback = (startLine, keywordRe) => {
+      const startIdx = Math.max(0, startLine - 1);
+      const maxIdx = Math.min(lines.length, startIdx + 8);
+      for (let i = startIdx; i < maxIdx; i += 1) {
+        const raw = lines[i] || '';
+        const trimmed = raw.trim();
+        if (!trimmed) continue;
+        if (trimmed.startsWith('///') || trimmed.startsWith('//')) continue;
+        if (trimmed.startsWith('@')) continue;
+        if (!keywordRe.test(trimmed)) continue;
+        const offset = lineColToOffset(lineIndex, i + 1, raw.indexOf(trimmed));
+        const bounds = findSwiftBodyBounds(text, offset);
+        return { signature: sliceSignature(text, offset, bounds.bodyStart), line: i + 1 };
+      }
+      return null;
+    };
+    return treeChunks.map((chunk) => {
+      const meta = chunk.meta || {};
+      let signature = meta.signature || '';
+      const startLine = Number.isFinite(meta.startLine) ? meta.startLine : 1;
+      let signatureLine = startLine;
+      const isType = typeKinds.has(chunk.kind);
+      const keywordRe = isType ? typeSignatureRe : funcSignatureRe;
+      if (signature && !keywordRe.test(signature)) {
+        const fallback = resolveSignatureFallback(startLine, keywordRe);
+        if (fallback?.signature) {
+          signature = fallback.signature;
+          signatureLine = fallback.line;
+        }
+      }
+      const modifiers = extractSwiftModifiers(signature);
+      const attributes = collectAttributes(lines, signatureLine - 1, signature);
+      const params = isType ? [] : extractSwiftParams(signature);
+      const returns = isType ? null : extractSwiftReturns(signature);
+      const conforms = isType ? extractSwiftConforms(signature) : [];
+      const generics = extractSwiftGenerics(signature);
+      const whereClause = extractSwiftWhereClause(signature);
+      const extendedType = chunk.kind === 'ExtensionDeclaration'
+        ? extractSwiftExtensionTarget(signature)
+        : null;
+      return {
+        ...chunk,
+        meta: {
+          ...meta,
+          signature,
+          params,
+          returns,
+          modifiers,
+          attributes,
+          conforms,
+          generics,
+          whereClause,
+          extendedType
+        }
+      };
+    });
+  }
   const lineIndex = buildLineIndex(text);
   const lines = text.split('\n');
   const decls = [];
diff --git a/src/lang/tree-sitter.js b/src/lang/tree-sitter.js
new file mode 100644
index 000000000..7ba2ee79c
--- /dev/null
+++ b/src/lang/tree-sitter.js
@@ -0,0 +1,4 @@
+export { TREE_SITTER_LANGUAGE_IDS } from './tree-sitter/config.js';
+export { initTreeSitterWasm, preloadTreeSitterLanguages, getTreeSitterParser } from './tree-sitter/runtime.js';
+export { resolveEnabledTreeSitterLanguages } from './tree-sitter/options.js';
+export { buildTreeSitterChunks, buildTreeSitterChunksAsync } from './tree-sitter/chunking.js';
diff --git a/src/lang/tree-sitter/ast.js b/src/lang/tree-sitter/ast.js
new file mode 100644
index 000000000..b30401d7d
--- /dev/null
+++ b/src/lang/tree-sitter/ast.js
@@ -0,0 +1,38 @@
+export const COMMON_NAME_NODE_TYPES = new Set([
+  'identifier',
+  'type_identifier',
+  'scoped_identifier',
+  'qualified_identifier',
+  'field_identifier',
+  'simple_identifier',
+  'namespace_identifier'
+]);
+
+export const getNamedChildCount = (node) => {
+  if (!node) return 0;
+  if (Number.isFinite(node.namedChildCount)) return node.namedChildCount;
+  return Array.isArray(node.namedChildren) ? node.namedChildren.length : 0;
+};
+
+export const getNamedChild = (node, index) => {
+  if (!node) return null;
+  if (typeof node.namedChild === 'function') return node.namedChild(index);
+  if (Array.isArray(node.namedChildren)) return node.namedChildren[index] || null;
+  return null;
+};
+
+export function findDescendantByType(root, types, maxDepth = 6) {
+  if (!root) return null;
+  const stack = [{ node: root, depth: 0 }];
+  while (stack.length) {
+    const { node, depth } = stack.pop();
+    if (!node) continue;
+    if (types.has(node.type)) return node;
+    if (depth >= maxDepth) continue;
+    const count = getNamedChildCount(node);
+    for (let i = count - 1; i >= 0; i -= 1) {
+      stack.push({ node: getNamedChild(node, i), depth: depth + 1 });
+    }
+  }
+  return null;
+}
diff --git a/src/lang/tree-sitter/chunking.js b/src/lang/tree-sitter/chunking.js
new file mode 100644
index 000000000..0d3c78f43
--- /dev/null
+++ b/src/lang/tree-sitter/chunking.js
@@ -0,0 +1,356 @@
+import { buildLineIndex, offsetToLine } from '../../shared/lines.js';
+import { extractDocComment, sliceSignature } from '../shared.js';
+import {
+  COMMON_NAME_NODE_TYPES,
+  findDescendantByType,
+  getNamedChild,
+  getNamedChildCount
+} from './ast.js';
+import { LANG_CONFIG } from './config.js';
+import { isTreeSitterEnabled } from './options.js';
+import { getTreeSitterParser } from './runtime.js';
+import { treeSitterState } from './state.js';
+import { getTreeSitterWorkerPool, sanitizeTreeSitterOptions } from './worker.js';
+
+const loggedParseFailures = new Set();
+const loggedParseTimeouts = new Set();
+const loggedSizeSkips = new Set();
+const loggedUnavailable = new Set();
+
+function countLines(text) {
+  if (!text) return 0;
+  let count = 1;
+  for (let i = 0; i < text.length; i += 1) {
+    if (text.charCodeAt(i) === 10) count += 1;
+  }
+  return count;
+}
+
+const createLineAccessor = (text, lineIndex) => {
+  const index = Array.isArray(lineIndex) ? lineIndex : buildLineIndex(text);
+  const lineCount = index.length;
+  return {
+    length: lineCount,
+    getLine: (idx) => {
+      if (!Number.isFinite(idx) || idx < 0 || idx >= lineCount) return '';
+      const start = index[idx] ?? 0;
+      const end = index[idx + 1] ?? text.length;
+      let line = text.slice(start, end);
+      if (line.endsWith('\n')) line = line.slice(0, -1);
+      if (line.endsWith('\r')) line = line.slice(0, -1);
+      return line;
+    }
+  };
+};
+
+function exceedsTreeSitterLimits(text, options, resolvedId) {
+  const config = options?.treeSitter || {};
+  const perLanguage = config.byLanguage?.[resolvedId] || {};
+  const maxBytes = perLanguage.maxBytes ?? config.maxBytes;
+  const maxLines = perLanguage.maxLines ?? config.maxLines;
+  if (typeof maxBytes === 'number' && maxBytes > 0) {
+    const bytes = Buffer.byteLength(text, 'utf8');
+    if (bytes > maxBytes) {
+      const key = `${resolvedId}:bytes`;
+      if (!loggedSizeSkips.has(key) && options?.log) {
+        options.log(`Tree-sitter disabled for ${resolvedId}; file exceeds maxBytes (${bytes} > ${maxBytes}).`);
+        loggedSizeSkips.add(key);
+      }
+      return true;
+    }
+  }
+  if (typeof maxLines === 'number' && maxLines > 0) {
+    const lines = countLines(text);
+    if (lines > maxLines) {
+      const key = `${resolvedId}:lines`;
+      if (!loggedSizeSkips.has(key) && options?.log) {
+        options.log(`Tree-sitter disabled for ${resolvedId}; file exceeds maxLines (${lines} > ${maxLines}).`);
+        loggedSizeSkips.add(key);
+      }
+      return true;
+    }
+  }
+  return false;
+}
+
+function resolveParseTimeoutMs(options, resolvedId) {
+  const config = options?.treeSitter || {};
+  const perLanguage = config.byLanguage?.[resolvedId] || {};
+  const raw = perLanguage.maxParseMs ?? config.maxParseMs;
+  const parsed = Number(raw);
+  return Number.isFinite(parsed) && parsed > 0 ? Math.floor(parsed) : null;
+}
+
+function extractSignature(text, start, end) {
+  const limit = Math.min(end, start + 2000);
+  const slice = text.slice(start, limit);
+  const newline = slice.indexOf('\n');
+  const brace = slice.indexOf('{');
+  const semi = slice.indexOf(';');
+  const arrow = slice.indexOf('=>');
+  const candidates = [newline, brace, semi].filter((idx) => idx >= 0);
+  if (arrow >= 0) candidates.push(arrow + 2);
+  const cutoff = candidates.length ? Math.min(...candidates) : slice.length;
+  const endIdx = start + cutoff;
+  return sliceSignature(text, start, endIdx).replace(/\s+/g, ' ').trim();
+}
+
+function findNameNode(node, config) {
+  if (!node) return null;
+  const direct = node.childForFieldName('name');
+  if (direct) return direct;
+  const fieldNames = Array.isArray(config?.nameFields) ? config.nameFields : [];
+  for (const field of fieldNames) {
+    const child = node.childForFieldName(field);
+    if (child) return child;
+  }
+  const nameTypes = config?.nameNodeTypes || COMMON_NAME_NODE_TYPES;
+  const declarator = node.childForFieldName('declarator');
+  if (declarator) {
+    const named = findDescendantByType(declarator, nameTypes, 8);
+    if (named) return named;
+  }
+  const queue = [];
+  const initialCount = getNamedChildCount(node);
+  for (let i = 0; i < initialCount; i += 1) {
+    queue.push(getNamedChild(node, i));
+  }
+  let depth = 0;
+  while (queue.length && depth < 4) {
+    const next = queue.shift();
+    if (!next) {
+      depth += 1;
+      continue;
+    }
+    if (nameTypes.has(next.type)) return next;
+    const childCount = getNamedChildCount(next);
+    for (let i = 0; i < childCount; i += 1) {
+      queue.push(getNamedChild(next, i));
+    }
+    depth += 1;
+  }
+  return null;
+}
+
+function extractNodeName(node, text, config) {
+  const nameNode = findNameNode(node, config);
+  if (!nameNode) return '';
+  return text.slice(nameNode.startIndex, nameNode.endIndex).trim();
+}
+
+function findNearestType(node, config) {
+  let current = node?.parent || null;
+  while (current) {
+    if (config.typeNodes.has(current.type)) return current;
+    current = current.parent;
+  }
+  return null;
+}
+
+function gatherChunkNodes(root, config) {
+  const nodes = [];
+  const stack = [root];
+  while (stack.length) {
+    const node = stack.pop();
+    if (!node) continue;
+    const missing = typeof node.isMissing === 'function' ? node.isMissing() : node.isMissing;
+    if (missing) continue;
+    if (config.typeNodes.has(node.type) || config.memberNodes.has(node.type)) {
+      nodes.push(node);
+    }
+    const count = getNamedChildCount(node);
+    for (let i = count - 1; i >= 0; i -= 1) {
+      stack.push(getNamedChild(node, i));
+    }
+  }
+  return nodes;
+}
+
+function toChunk(node, text, config, lineIndex, lineAccessor) {
+  const name = extractNodeName(node, text, config);
+  if (!name) return null;
+  let kind = config.kindMap[node.type] || 'Declaration';
+  if (typeof config.resolveKind === 'function') {
+    kind = config.resolveKind(node, kind, text) || kind;
+  }
+  const start = node.startIndex;
+  const end = node.endIndex;
+  const parentType = findNearestType(node, config);
+  let fullName = name;
+  let finalKind = kind;
+  if (parentType && config.memberNodes.has(node.type)) {
+    const parentName = extractNodeName(parentType, text, config);
+    if (parentName) fullName = `${parentName}.${name}`;
+    if (kind === 'FunctionDeclaration') finalKind = 'MethodDeclaration';
+  }
+  if (!parentType && config.memberNodes.has(node.type)
+    && typeof config.resolveMemberName === 'function') {
+    const resolved = config.resolveMemberName(node, name, text);
+    if (resolved?.name) fullName = resolved.name;
+    if (resolved?.kind) finalKind = resolved.kind;
+  }
+  const startLine = offsetToLine(lineIndex, start);
+  const endOffset = end > start ? end - 1 : start;
+  const endLine = offsetToLine(lineIndex, endOffset);
+  const signature = extractSignature(text, start, end);
+  const docstring = extractDocComment(
+    lineAccessor,
+    startLine - 1,
+    config.docComments || {}
+  );
+  return {
+    start,
+    end,
+    name: fullName,
+    kind: finalKind,
+    meta: {
+      startLine,
+      endLine,
+      signature,
+      docstring
+    }
+  };
+}
+
+function resolveLanguageForExt(languageId, ext) {
+  const normalizedExt = typeof ext === 'string' ? ext.toLowerCase() : '';
+  if (normalizedExt === '.tsx') return 'tsx';
+  if (normalizedExt === '.jsx') return 'jsx';
+  if (normalizedExt === '.ts' || normalizedExt === '.cts' || normalizedExt === '.mts') return 'typescript';
+  if (normalizedExt === '.js' || normalizedExt === '.mjs' || normalizedExt === '.cjs' || normalizedExt === '.jsm') {
+    return 'javascript';
+  }
+  if (normalizedExt === '.py') return 'python';
+  if (normalizedExt === '.json') return 'json';
+  if (normalizedExt === '.yaml' || normalizedExt === '.yml') return 'yaml';
+  if (normalizedExt === '.toml') return 'toml';
+  if (normalizedExt === '.md' || normalizedExt === '.mdx') return 'markdown';
+  if (languageId) return languageId;
+  if (!normalizedExt) return null;
+  if (normalizedExt === '.m' || normalizedExt === '.mm') return 'objc';
+  if (normalizedExt === '.cpp' || normalizedExt === '.cc' || normalizedExt === '.cxx'
+    || normalizedExt === '.hpp' || normalizedExt === '.hh') return 'cpp';
+  if (normalizedExt === '.c' || normalizedExt === '.h') return 'clike';
+  return null;
+}
+
+export function buildTreeSitterChunks({ text, languageId, ext, options }) {
+  const resolvedId = resolveLanguageForExt(languageId, ext);
+  if (!resolvedId) return null;
+  if (!isTreeSitterEnabled(options, resolvedId)) return null;
+  if (exceedsTreeSitterLimits(text, options, resolvedId)) return null;
+  const metricsCollector = options?.metricsCollector;
+  const shouldRecordMetrics = metricsCollector && typeof metricsCollector.add === 'function';
+  const lineCount = shouldRecordMetrics ? countLines(text) : 0;
+  const metricsStart = shouldRecordMetrics ? Date.now() : 0;
+  const recordMetrics = () => {
+    if (!shouldRecordMetrics) return;
+    const durationMs = Date.now() - metricsStart;
+    metricsCollector.add('treeSitter', resolvedId, lineCount, durationMs);
+  };
+  const parser = getTreeSitterParser(resolvedId, options);
+  if (!parser) {
+    if (options?.log && !loggedUnavailable.has(resolvedId)) {
+      options.log(`Tree-sitter unavailable for ${resolvedId}; falling back to heuristic chunking.`);
+      loggedUnavailable.add(resolvedId);
+    }
+    return null;
+  }
+  const config = LANG_CONFIG[resolvedId];
+  if (!config) return null;
+  let tree;
+  try {
+    const parseTimeoutMs = resolveParseTimeoutMs(options, resolvedId);
+    if (typeof parser.setTimeoutMicros === 'function') {
+      parser.setTimeoutMicros(parseTimeoutMs ? parseTimeoutMs * 1000 : 0);
+    }
+    tree = parser.parse(text);
+  } catch (err) {
+    recordMetrics();
+    const message = err?.message || String(err);
+    if (/timeout/i.test(message)) {
+      if (options?.log && !loggedParseTimeouts.has(resolvedId)) {
+        options.log(`Tree-sitter parse timed out for ${resolvedId}; falling back to heuristic chunking.`);
+        loggedParseTimeouts.add(resolvedId);
+      }
+      return null;
+    }
+    return null;
+  }
+  let rootNode = null;
+  try {
+    rootNode = tree.rootNode;
+  } catch (err) {
+    recordMetrics();
+    if (!loggedParseFailures.has(resolvedId) && options?.log) {
+      options.log(`Tree-sitter parse failed for ${resolvedId}; falling back to heuristic chunking.`);
+      loggedParseFailures.add(resolvedId);
+    }
+    return null;
+  }
+  const lineIndex = buildLineIndex(text);
+  const lineAccessor = createLineAccessor(text, lineIndex);
+  let nodes = [];
+  try {
+    nodes = gatherChunkNodes(rootNode, config);
+  } catch (err) {
+    recordMetrics();
+    if (!loggedParseFailures.has(resolvedId) && options?.log) {
+      options.log(`Tree-sitter parse failed for ${resolvedId}; falling back to heuristic chunking.`);
+      loggedParseFailures.add(resolvedId);
+    }
+    return null;
+  }
+  if (!nodes.length) {
+    recordMetrics();
+    return null;
+  }
+  const chunks = [];
+  for (const node of nodes) {
+    const chunk = toChunk(node, text, config, lineIndex, lineAccessor);
+    if (chunk) chunks.push(chunk);
+  }
+  if (!chunks.length) {
+    recordMetrics();
+    return null;
+  }
+  chunks.sort((a, b) => a.start - b.start);
+  recordMetrics();
+  return chunks;
+}
+
+export async function buildTreeSitterChunksAsync({ text, languageId, ext, options }) {
+  if (!options?.treeSitter || options.treeSitter.enabled === false) {
+    return buildTreeSitterChunks({ text, languageId, ext, options });
+  }
+  const pool = await getTreeSitterWorkerPool(options?.treeSitter?.worker, options);
+  if (!pool) {
+    return buildTreeSitterChunks({ text, languageId, ext, options });
+  }
+  const resolvedId = resolveLanguageForExt(languageId, ext) || languageId || 'unknown';
+  const metricsCollector = options?.metricsCollector;
+  const shouldRecordMetrics = metricsCollector && typeof metricsCollector.add === 'function';
+  const lineCount = shouldRecordMetrics ? countLines(text) : 0;
+  const metricsStart = shouldRecordMetrics ? Date.now() : 0;
+  const payload = {
+    text,
+    languageId,
+    ext,
+    treeSitter: sanitizeTreeSitterOptions(options?.treeSitter)
+  };
+  try {
+    const result = await pool.run(payload, { name: 'parseTreeSitter' });
+    return Array.isArray(result) ? result : null;
+  } catch (err) {
+    if (options?.log && !treeSitterState.loggedWorkerFailures.has('run')) {
+      options.log(`[tree-sitter] Worker parse failed; falling back to main thread (${err?.message || err}).`);
+      treeSitterState.loggedWorkerFailures.add('run');
+    }
+    return buildTreeSitterChunks({ text, languageId, ext, options });
+  } finally {
+    if (shouldRecordMetrics) {
+      const durationMs = Date.now() - metricsStart;
+      metricsCollector.add('treeSitter', resolvedId, lineCount, durationMs);
+    }
+  }
+}
diff --git a/src/lang/tree-sitter/config.js b/src/lang/tree-sitter/config.js
new file mode 100644
index 000000000..4ef2b99fe
--- /dev/null
+++ b/src/lang/tree-sitter/config.js
@@ -0,0 +1,355 @@
+import { findDescendantByType } from './ast.js';
+
+const LANGUAGE_WASM_FILES = {
+  javascript: 'tree-sitter-javascript.wasm',
+  typescript: 'tree-sitter-typescript.wasm',
+  tsx: 'tree-sitter-tsx.wasm',
+  jsx: 'tree-sitter-javascript.wasm',
+  python: 'tree-sitter-python.wasm',
+  json: 'tree-sitter-json.wasm',
+  yaml: 'tree-sitter-yaml.wasm',
+  toml: 'tree-sitter-toml.wasm',
+  markdown: 'tree-sitter-markdown.wasm',
+  swift: 'tree-sitter-swift.wasm',
+  kotlin: 'tree-sitter-kotlin.wasm',
+  csharp: 'tree-sitter-c_sharp.wasm',
+  clike: 'tree-sitter-c.wasm',
+  cpp: 'tree-sitter-cpp.wasm',
+  objc: 'tree-sitter-objc.wasm',
+  go: 'tree-sitter-go.wasm',
+  rust: 'tree-sitter-rust.wasm',
+  java: 'tree-sitter-java.wasm',
+  css: 'tree-sitter-css.wasm',
+  html: 'tree-sitter-html.wasm'
+};
+
+export const TREE_SITTER_LANGUAGE_IDS = Object.freeze(
+  Object.keys(LANGUAGE_WASM_FILES)
+);
+
+const JS_TS_CONFIG = {
+  typeNodes: new Set([
+    'class_declaration',
+    'interface_declaration',
+    'type_alias_declaration',
+    'enum_declaration'
+  ]),
+  memberNodes: new Set([
+    'function_declaration',
+    'method_definition',
+    'function',
+    'arrow_function'
+  ]),
+  kindMap: {
+    class_declaration: 'ClassDeclaration',
+    interface_declaration: 'InterfaceDeclaration',
+    type_alias_declaration: 'TypeAlias',
+    enum_declaration: 'EnumDeclaration',
+    function_declaration: 'FunctionDeclaration',
+    method_definition: 'MethodDeclaration',
+    function: 'FunctionDeclaration',
+    arrow_function: 'ArrowFunction'
+  },
+  docComments: { linePrefixes: ['//'], blockStarts: ['/**'] }
+};
+
+const LANG_CONFIG = {
+  javascript: JS_TS_CONFIG,
+  typescript: JS_TS_CONFIG,
+  tsx: JS_TS_CONFIG,
+  jsx: JS_TS_CONFIG,
+  python: {
+    typeNodes: new Set(['class_definition']),
+    memberNodes: new Set(['function_definition']),
+    kindMap: {
+      class_definition: 'ClassDeclaration',
+      function_definition: 'FunctionDeclaration'
+    },
+    docComments: { linePrefixes: ['#'] },
+    nameFields: ['name']
+  },
+  json: {
+    typeNodes: new Set(['pair']),
+    memberNodes: new Set([]),
+    kindMap: { pair: 'ConfigEntry' },
+    nameFields: ['key']
+  },
+  yaml: {
+    typeNodes: new Set(['block_mapping_pair', 'flow_pair']),
+    memberNodes: new Set([]),
+    kindMap: {
+      block_mapping_pair: 'ConfigEntry',
+      flow_pair: 'ConfigEntry'
+    },
+    nameFields: ['key'],
+    docComments: { linePrefixes: ['#'] }
+  },
+  toml: {
+    typeNodes: new Set(['pair']),
+    memberNodes: new Set([]),
+    kindMap: { pair: 'ConfigEntry' },
+    nameFields: ['key']
+  },
+  markdown: {
+    typeNodes: new Set(['atx_heading', 'setext_heading']),
+    memberNodes: new Set([]),
+    kindMap: {
+      atx_heading: 'Section',
+      setext_heading: 'Section'
+    }
+  },
+  swift: {
+    typeNodes: new Set([
+      'class_declaration',
+      'struct_declaration',
+      'enum_declaration',
+      'protocol_declaration',
+      'extension_declaration',
+      'actor_declaration'
+    ]),
+    memberNodes: new Set([
+      'function_declaration',
+      'initializer_declaration',
+      'deinitializer_declaration',
+      'subscript_declaration'
+    ]),
+    kindMap: {
+      class_declaration: 'ClassDeclaration',
+      struct_declaration: 'StructDeclaration',
+      enum_declaration: 'EnumDeclaration',
+      protocol_declaration: 'ProtocolDeclaration',
+      extension_declaration: 'ExtensionDeclaration',
+      actor_declaration: 'ActorDeclaration',
+      function_declaration: 'FunctionDeclaration',
+      initializer_declaration: 'Initializer',
+      deinitializer_declaration: 'Deinitializer',
+      subscript_declaration: 'SubscriptDeclaration'
+    },
+    docComments: { linePrefixes: ['///', '//'] },
+    resolveKind: (node, kind, text) => {
+      if (node.type !== 'class_declaration') return kind;
+      const head = text.slice(node.startIndex, Math.min(text.length, node.startIndex + 40));
+      const match = head.match(/^\s*(struct|class|extension)\b/);
+      if (!match) return kind;
+      if (match[1] === 'struct') return 'StructDeclaration';
+      if (match[1] === 'extension') return 'ExtensionDeclaration';
+      return kind;
+    }
+  },
+  kotlin: {
+    typeNodes: new Set([
+      'class_declaration',
+      'object_declaration',
+      'interface_declaration',
+      'enum_class_body'
+    ]),
+    memberNodes: new Set([
+      'function_declaration',
+      'secondary_constructor'
+    ]),
+    kindMap: {
+      class_declaration: 'ClassDeclaration',
+      object_declaration: 'ObjectDeclaration',
+      interface_declaration: 'InterfaceDeclaration',
+      enum_class_body: 'EnumDeclaration',
+      function_declaration: 'FunctionDeclaration',
+      secondary_constructor: 'ConstructorDeclaration'
+    },
+    docComments: { linePrefixes: ['//'], blockStarts: ['/**'] }
+  },
+  csharp: {
+    typeNodes: new Set([
+      'class_declaration',
+      'struct_declaration',
+      'interface_declaration',
+      'enum_declaration',
+      'record_declaration'
+    ]),
+    memberNodes: new Set([
+      'method_declaration',
+      'constructor_declaration',
+      'property_declaration',
+      'event_declaration'
+    ]),
+    kindMap: {
+      class_declaration: 'ClassDeclaration',
+      struct_declaration: 'StructDeclaration',
+      interface_declaration: 'InterfaceDeclaration',
+      enum_declaration: 'EnumDeclaration',
+      record_declaration: 'RecordDeclaration',
+      method_declaration: 'MethodDeclaration',
+      constructor_declaration: 'ConstructorDeclaration',
+      property_declaration: 'PropertyDeclaration',
+      event_declaration: 'EventDeclaration'
+    },
+    docComments: { linePrefixes: ['///', '//'] }
+  },
+  clike: {
+    typeNodes: new Set([
+      'struct_specifier',
+      'class_specifier',
+      'enum_specifier',
+      'union_specifier'
+    ]),
+    memberNodes: new Set([
+      'function_definition',
+      'function_declaration',
+      'method_definition'
+    ]),
+    kindMap: {
+      struct_specifier: 'StructDeclaration',
+      class_specifier: 'ClassDeclaration',
+      enum_specifier: 'EnumDeclaration',
+      union_specifier: 'UnionDeclaration',
+      function_definition: 'FunctionDeclaration',
+      function_declaration: 'FunctionDeclaration',
+      method_definition: 'MethodDeclaration'
+    },
+    docComments: { linePrefixes: ['///', '//'], blockStarts: ['/**'] }
+  },
+  cpp: {
+    typeNodes: new Set([
+      'class_specifier',
+      'struct_specifier',
+      'enum_specifier',
+      'union_specifier',
+      'namespace_definition'
+    ]),
+    memberNodes: new Set([
+      'function_definition',
+      'function_declaration',
+      'method_definition'
+    ]),
+    kindMap: {
+      class_specifier: 'ClassDeclaration',
+      struct_specifier: 'StructDeclaration',
+      enum_specifier: 'EnumDeclaration',
+      union_specifier: 'UnionDeclaration',
+      namespace_definition: 'NamespaceDeclaration',
+      function_definition: 'FunctionDeclaration',
+      function_declaration: 'FunctionDeclaration',
+      method_definition: 'MethodDeclaration'
+    },
+    docComments: { linePrefixes: ['///', '//'], blockStarts: ['/**'] }
+  },
+  objc: {
+    typeNodes: new Set([
+      'class_interface',
+      'protocol_declaration',
+      'category_interface'
+    ]),
+    memberNodes: new Set([
+      'method_definition',
+      'method_declaration',
+      'function_definition',
+      'function_declaration'
+    ]),
+    kindMap: {
+      class_interface: 'ClassDeclaration',
+      protocol_declaration: 'ProtocolDeclaration',
+      category_interface: 'CategoryDeclaration',
+      method_definition: 'MethodDeclaration',
+      method_declaration: 'MethodDeclaration',
+      function_definition: 'FunctionDeclaration',
+      function_declaration: 'FunctionDeclaration'
+    },
+    docComments: { linePrefixes: ['///', '//'], blockStarts: ['/**'] }
+  },
+  go: {
+    typeNodes: new Set([
+      'type_spec',
+      'type_declaration'
+    ]),
+    memberNodes: new Set([
+      'function_declaration',
+      'method_declaration'
+    ]),
+    kindMap: {
+      type_spec: 'TypeDeclaration',
+      type_declaration: 'TypeDeclaration',
+      function_declaration: 'FunctionDeclaration',
+      method_declaration: 'MethodDeclaration'
+    },
+    docComments: { linePrefixes: ['//'], blockStarts: ['/**'] },
+    resolveKind: (node, kind) => {
+      if (node.type !== 'type_spec' && node.type !== 'type_declaration') return kind;
+      const structNode = findDescendantByType(node, new Set(['struct_type']));
+      if (structNode) return 'StructDeclaration';
+      const ifaceNode = findDescendantByType(node, new Set(['interface_type']));
+      if (ifaceNode) return 'InterfaceDeclaration';
+      return kind;
+    },
+    resolveMemberName: (node, name) => {
+      if (node.type !== 'method_declaration') return null;
+      const receiver = node.childForFieldName('receiver');
+      const receiverType = findDescendantByType(receiver, new Set(['type_identifier']));
+      if (!receiverType) return null;
+      return { name: `${receiverType.text}.${name}` };
+    }
+  },
+  rust: {
+    typeNodes: new Set([
+      'struct_item',
+      'enum_item',
+      'trait_item',
+      'impl_item',
+      'mod_item'
+    ]),
+    memberNodes: new Set([
+      'function_item',
+      'function_definition',
+      'method_definition',
+      'macro_definition'
+    ]),
+    kindMap: {
+      struct_item: 'StructDeclaration',
+      enum_item: 'EnumDeclaration',
+      trait_item: 'TraitDeclaration',
+      impl_item: 'ImplDeclaration',
+      mod_item: 'ModuleDeclaration',
+      function_item: 'FunctionDeclaration',
+      function_definition: 'FunctionDeclaration',
+      method_definition: 'MethodDeclaration',
+      macro_definition: 'MacroDeclaration'
+    },
+    docComments: { linePrefixes: ['///', '//'], blockStarts: ['/**'] },
+    nameFields: ['name', 'type']
+  },
+  java: {
+    typeNodes: new Set([
+      'class_declaration',
+      'interface_declaration',
+      'enum_declaration',
+      'record_declaration'
+    ]),
+    memberNodes: new Set([
+      'method_declaration',
+      'constructor_declaration'
+    ]),
+    kindMap: {
+      class_declaration: 'ClassDeclaration',
+      interface_declaration: 'InterfaceDeclaration',
+      enum_declaration: 'EnumDeclaration',
+      record_declaration: 'RecordDeclaration',
+      method_declaration: 'MethodDeclaration',
+      constructor_declaration: 'ConstructorDeclaration'
+    },
+    docComments: { linePrefixes: ['//'], blockStarts: ['/**'] }
+  },
+  html: {
+    typeNodes: new Set([
+      'element',
+      'script_element',
+      'style_element'
+    ]),
+    memberNodes: new Set([]),
+    kindMap: {
+      element: 'ElementDeclaration',
+      script_element: 'ScriptElement',
+      style_element: 'StyleElement'
+    },
+    nameNodeTypes: new Set(['tag_name'])
+  }
+};
+
+export { LANGUAGE_WASM_FILES, LANG_CONFIG };
diff --git a/src/lang/tree-sitter/options.js b/src/lang/tree-sitter/options.js
new file mode 100644
index 000000000..ac21d5a47
--- /dev/null
+++ b/src/lang/tree-sitter/options.js
@@ -0,0 +1,27 @@
+import { TREE_SITTER_LANGUAGE_IDS } from './config.js';
+
+function normalizeEnabled(value) {
+  if (value === false) return false;
+  if (value === 'off') return false;
+  return true;
+}
+
+export function isTreeSitterEnabled(options, languageId) {
+  const config = options?.treeSitter || {};
+  const enabled = normalizeEnabled(config.enabled);
+  if (!enabled) return false;
+  const langs = config.languages || {};
+  if (languageId && Object.prototype.hasOwnProperty.call(langs, languageId)) {
+    return normalizeEnabled(langs[languageId]);
+  }
+  if ((languageId === 'cpp' || languageId === 'objc')
+    && Object.prototype.hasOwnProperty.call(langs, 'clike')) {
+    return normalizeEnabled(langs.clike);
+  }
+  return true;
+}
+
+export function resolveEnabledTreeSitterLanguages(config = {}) {
+  const options = { treeSitter: config };
+  return TREE_SITTER_LANGUAGE_IDS.filter((id) => isTreeSitterEnabled(options, id));
+}
diff --git a/src/lang/tree-sitter/runtime.js b/src/lang/tree-sitter/runtime.js
new file mode 100644
index 000000000..371cd78af
--- /dev/null
+++ b/src/lang/tree-sitter/runtime.js
@@ -0,0 +1,175 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { createRequire } from 'node:module';
+import { LANGUAGE_WASM_FILES, TREE_SITTER_LANGUAGE_IDS } from './config.js';
+import { treeSitterState } from './state.js';
+
+const require = createRequire(import.meta.url);
+
+function resolveLanguageId(languageId) {
+  return typeof languageId === 'string' ? languageId : null;
+}
+
+function resolveWasmRoot() {
+  if (treeSitterState.wasmRoot) return treeSitterState.wasmRoot;
+  const pkgPath = require.resolve('tree-sitter-wasms/package.json');
+  treeSitterState.wasmRoot = path.join(path.dirname(pkgPath), 'out');
+  return treeSitterState.wasmRoot;
+}
+
+function resolveRuntimePath() {
+  if (treeSitterState.wasmRuntimePath) return treeSitterState.wasmRuntimePath;
+  const candidates = [
+    'web-tree-sitter/web-tree-sitter.wasm',
+    'web-tree-sitter/tree-sitter.wasm'
+  ];
+  for (const candidate of candidates) {
+    try {
+      treeSitterState.wasmRuntimePath = require.resolve(candidate);
+      return treeSitterState.wasmRuntimePath;
+    } catch {
+      // try next candidate
+    }
+  }
+  throw new Error('web-tree-sitter WASM runtime not found');
+}
+
+export async function initTreeSitterWasm(options = {}) {
+  if (treeSitterState.TreeSitter || treeSitterState.treeSitterInitError) {
+    return Boolean(treeSitterState.TreeSitter);
+  }
+  if (treeSitterState.treeSitterInitPromise) return treeSitterState.treeSitterInitPromise;
+  treeSitterState.treeSitterInitPromise = (async () => {
+    try {
+      const mod = require('web-tree-sitter');
+      treeSitterState.TreeSitter = mod?.Parser || mod;
+      if (!treeSitterState.TreeSitter?.init) {
+        throw new Error('web-tree-sitter Parser not available');
+      }
+      await treeSitterState.TreeSitter.init({
+        locateFile: () => resolveRuntimePath()
+      });
+      treeSitterState.TreeSitterLanguage = mod?.Language || treeSitterState.TreeSitter?.Language || null;
+      if (!treeSitterState.TreeSitterLanguage) {
+        throw new Error('web-tree-sitter Language not available');
+      }
+      return true;
+    } catch (err) {
+      treeSitterState.treeSitterInitError = err;
+      treeSitterState.TreeSitter = null;
+      treeSitterState.TreeSitterLanguage = null;
+      if (options?.log) {
+        options.log(`[tree-sitter] WASM init failed: ${err?.message || err}.`);
+      }
+      return false;
+    }
+  })();
+  return treeSitterState.treeSitterInitPromise;
+}
+
+async function loadWasmLanguage(languageId, options = {}) {
+  const resolvedId = resolveLanguageId(languageId);
+  if (!resolvedId) return { language: null, error: new Error('invalid language id') };
+  const cached = treeSitterState.languageCache.get(resolvedId);
+  if (cached?.language || cached?.error) return cached;
+  const pending = treeSitterState.languageLoadPromises.get(resolvedId);
+  if (pending) return pending;
+  const promise = (async () => {
+    const ok = await initTreeSitterWasm(options);
+    if (!ok) {
+      return { language: null, error: treeSitterState.treeSitterInitError || new Error('Tree-sitter WASM init failed') };
+    }
+    const wasmFile = LANGUAGE_WASM_FILES[resolvedId];
+    if (!wasmFile) {
+      return { language: null, error: new Error(`Missing WASM file for ${resolvedId}`) };
+    }
+    try {
+      const wasmPath = path.join(resolveWasmRoot(), wasmFile);
+      const wasmBytes = await fs.readFile(wasmPath);
+      const language = await treeSitterState.TreeSitterLanguage.load(wasmBytes);
+      const entry = { language, error: null };
+      treeSitterState.languageCache.set(resolvedId, entry);
+      return entry;
+    } catch (err) {
+      const entry = { language: null, error: err };
+      treeSitterState.languageCache.set(resolvedId, entry);
+      return entry;
+    } finally {
+      treeSitterState.languageLoadPromises.delete(resolvedId);
+    }
+  })();
+  treeSitterState.languageLoadPromises.set(resolvedId, promise);
+  return promise;
+}
+
+export async function preloadTreeSitterLanguages(languageIds = TREE_SITTER_LANGUAGE_IDS, options = {}) {
+  const ok = await initTreeSitterWasm(options);
+  if (!ok) return false;
+  const unique = Array.from(new Set(languageIds || []));
+  const parallel = options.parallel === true;
+  const concurrency = Number.isFinite(Number(options.concurrency))
+    ? Math.max(1, Math.floor(Number(options.concurrency)))
+    : unique.length;
+  if (!parallel || concurrency <= 1) {
+    for (const id of unique) {
+      // Load sequentially to avoid wasm runtime contention.
+      await loadWasmLanguage(id, options);
+    }
+    return true;
+  }
+  const pending = new Set();
+  for (const id of unique) {
+    const task = loadWasmLanguage(id, options)
+      .finally(() => pending.delete(task));
+    pending.add(task);
+    if (pending.size >= concurrency) {
+      await Promise.race(pending);
+    }
+  }
+  await Promise.all(pending);
+  return true;
+}
+
+export function getTreeSitterParser(languageId, options = {}) {
+  if (!treeSitterState.TreeSitter) {
+    const resolvedId = resolveLanguageId(languageId);
+    if (resolvedId && !treeSitterState.loggedInitFailure.has(resolvedId) && options?.log) {
+      const reason = treeSitterState.treeSitterInitError?.message || 'WASM runtime not initialized';
+      options.log(`[tree-sitter] WASM runtime unavailable for ${resolvedId} (${reason}).`);
+      treeSitterState.loggedInitFailure.add(resolvedId);
+    }
+    return null;
+  }
+  const resolvedId = resolveLanguageId(languageId);
+  if (!resolvedId) return null;
+  if (treeSitterState.parserCache.has(resolvedId)) return treeSitterState.parserCache.get(resolvedId);
+  const entry = treeSitterState.languageCache.get(resolvedId) || null;
+  const language = entry?.language || null;
+  if (!language) {
+    if (!treeSitterState.loggedMissing.has(resolvedId)) {
+      const reason = entry?.error?.message || 'WASM grammar not loaded';
+      if (options?.log) {
+        options.log(`[tree-sitter] Missing WASM grammar for ${resolvedId} (${reason}).`);
+      } else {
+        console.warn(`[tree-sitter] Missing WASM grammar for ${resolvedId} (${reason}).`);
+      }
+      treeSitterState.loggedMissing.add(resolvedId);
+    }
+    return null;
+  }
+  const parser = new treeSitterState.TreeSitter();
+  try {
+    parser.setLanguage(language);
+  } catch (err) {
+    treeSitterState.parserCache.set(resolvedId, null);
+    if (!treeSitterState.loggedMissing.has(resolvedId)) {
+      const message = err?.message || err;
+      const log = options?.log || console.warn;
+      log(`[tree-sitter] Failed to load ${resolvedId} WASM grammar: ${message}.`);
+      treeSitterState.loggedMissing.add(resolvedId);
+    }
+    return null;
+  }
+  treeSitterState.parserCache.set(resolvedId, parser);
+  return parser;
+}
diff --git a/src/lang/tree-sitter/state.js b/src/lang/tree-sitter/state.js
new file mode 100644
index 000000000..a4fb0953c
--- /dev/null
+++ b/src/lang/tree-sitter/state.js
@@ -0,0 +1,16 @@
+export const treeSitterState = {
+  TreeSitter: null,
+  TreeSitterLanguage: null,
+  treeSitterInitError: null,
+  treeSitterInitPromise: null,
+  wasmRoot: null,
+  wasmRuntimePath: null,
+  parserCache: new Map(),
+  languageCache: new Map(),
+  languageLoadPromises: new Map(),
+  loggedMissing: new Set(),
+  loggedInitFailure: new Set(),
+  loggedWorkerFailures: new Set(),
+  treeSitterWorkerPool: null,
+  treeSitterWorkerConfigSignature: null
+};
diff --git a/src/lang/tree-sitter/worker.js b/src/lang/tree-sitter/worker.js
new file mode 100644
index 000000000..8f3d67c3a
--- /dev/null
+++ b/src/lang/tree-sitter/worker.js
@@ -0,0 +1,82 @@
+import os from 'node:os';
+import { fileURLToPath } from 'node:url';
+import { treeSitterState } from './state.js';
+
+const normalizeTreeSitterWorkerConfig = (raw) => {
+  if (raw === false) return { enabled: false };
+  if (raw === true) return { enabled: true };
+  if (!raw || typeof raw !== 'object') return { enabled: false };
+  const enabled = raw.enabled !== false;
+  const maxWorkersRaw = Number(raw.maxWorkers);
+  const defaultMax = Math.max(1, Math.min(4, os.cpus().length));
+  const maxWorkers = Number.isFinite(maxWorkersRaw) && maxWorkersRaw > 0
+    ? Math.max(1, Math.floor(maxWorkersRaw))
+    : defaultMax;
+  const idleTimeoutMsRaw = Number(raw.idleTimeoutMs);
+  const idleTimeoutMs = Number.isFinite(idleTimeoutMsRaw) && idleTimeoutMsRaw > 0
+    ? Math.floor(idleTimeoutMsRaw)
+    : 30000;
+  const taskTimeoutMsRaw = Number(raw.taskTimeoutMs);
+  const taskTimeoutMs = Number.isFinite(taskTimeoutMsRaw) && taskTimeoutMsRaw > 0
+    ? Math.floor(taskTimeoutMsRaw)
+    : 60000;
+  return {
+    enabled,
+    maxWorkers,
+    idleTimeoutMs,
+    taskTimeoutMs
+  };
+};
+
+export const sanitizeTreeSitterOptions = (treeSitter) => {
+  const config = treeSitter && typeof treeSitter === 'object' ? treeSitter : {};
+  return {
+    enabled: config.enabled !== false,
+    languages: config.languages || {},
+    maxBytes: config.maxBytes ?? null,
+    maxLines: config.maxLines ?? null,
+    maxParseMs: config.maxParseMs ?? null,
+    byLanguage: config.byLanguage || {},
+    configChunking: config.configChunking === true
+  };
+};
+
+export const getTreeSitterWorkerPool = async (rawConfig, options = {}) => {
+  const config = normalizeTreeSitterWorkerConfig(rawConfig);
+  if (!config.enabled) return null;
+  const signature = JSON.stringify(config);
+  if (treeSitterState.treeSitterWorkerPool && treeSitterState.treeSitterWorkerConfigSignature === signature) {
+    return treeSitterState.treeSitterWorkerPool;
+  }
+  if (treeSitterState.treeSitterWorkerPool && treeSitterState.treeSitterWorkerPool.destroy) {
+    await treeSitterState.treeSitterWorkerPool.destroy();
+    treeSitterState.treeSitterWorkerPool = null;
+  }
+  treeSitterState.treeSitterWorkerConfigSignature = signature;
+  let Piscina;
+  try {
+    Piscina = (await import('piscina')).default;
+  } catch (err) {
+    if (options?.log && !treeSitterState.loggedWorkerFailures.has('piscina')) {
+      options.log(`[tree-sitter] Worker pool unavailable (piscina missing): ${err?.message || err}.`);
+      treeSitterState.loggedWorkerFailures.add('piscina');
+    }
+    return null;
+  }
+  try {
+    treeSitterState.treeSitterWorkerPool = new Piscina({
+      filename: fileURLToPath(new URL('../workers/tree-sitter-worker.js', import.meta.url)),
+      maxThreads: config.maxWorkers,
+      idleTimeout: config.idleTimeoutMs,
+      taskTimeout: config.taskTimeoutMs
+    });
+    return treeSitterState.treeSitterWorkerPool;
+  } catch (err) {
+    if (options?.log && !treeSitterState.loggedWorkerFailures.has('init')) {
+      options.log(`[tree-sitter] Worker pool init failed: ${err?.message || err}.`);
+      treeSitterState.loggedWorkerFailures.add('init');
+    }
+    treeSitterState.treeSitterWorkerPool = null;
+    return null;
+  }
+};
diff --git a/src/lang/typescript.js b/src/lang/typescript.js
index eb94e3bac..ecf3f627c 100644
--- a/src/lang/typescript.js
+++ b/src/lang/typescript.js
@@ -1,529 +1,4 @@
-import { buildLineIndex, offsetToLine } from '../shared/lines.js';
-import { findCLikeBodyBounds } from './clike.js';
-import { collectAttributes, extractDocComment, sliceSignature } from './shared.js';
-import { buildHeuristicDataflow, hasReturnValue, summarizeControlFlow } from './flow.js';
-
-/**
- * TypeScript language chunking and relations.
- * Heuristic parser for classes, interfaces, enums, types, and functions.
- */
-const TS_MODIFIERS = new Set([
-  'public', 'private', 'protected', 'static', 'readonly', 'abstract', 'declare',
-  'async', 'export', 'default', 'override'
-]);
-
-const TS_CALL_KEYWORDS = new Set([
-  'if', 'for', 'while', 'switch', 'case', 'return', 'new', 'throw', 'catch',
-  'try', 'else', 'do', 'typeof', 'instanceof', 'await', 'yield'
-]);
-
-const TS_USAGE_SKIP = new Set([
-  ...TS_CALL_KEYWORDS,
-  'class', 'interface', 'enum', 'type', 'namespace', 'module', 'void',
-  'string', 'number', 'boolean', 'any', 'unknown', 'never', 'null', 'undefined',
-  'true', 'false', 'object', 'symbol', 'bigint'
-]);
-
-function extractTypeScriptModifiers(signature) {
-  const mods = [];
-  const tokens = signature.split(/\s+/).filter(Boolean);
-  for (const tok of tokens) {
-    if (TS_MODIFIERS.has(tok)) mods.push(tok);
-  }
-  return mods;
-}
-
-function extractTypeScriptParams(signature) {
-  const match = signature.match(/\(([^)]*)\)/);
-  if (!match) return [];
-  const params = [];
-  for (const part of match[1].split(',')) {
-    let seg = part.trim();
-    if (!seg) continue;
-    seg = seg.replace(/=.+$/g, '').trim();
-    seg = seg.replace(/:[^,]+/g, '').trim();
-    seg = seg.replace(/\b(public|private|protected|readonly|override)\b/g, '').trim();
-    seg = seg.replace(/\?/g, '').trim();
-    const tokens = seg.split(/\s+/).filter(Boolean);
-    if (!tokens.length) continue;
-    let name = tokens[tokens.length - 1];
-    name = name.replace(/[^A-Za-z0-9_$]/g, '');
-    if (!name || !/^[A-Za-z_$]/.test(name)) continue;
-    params.push(name);
-  }
-  return params;
-}
-
-function extractTypeScriptParamTypes(signature) {
-  const match = signature.match(/\(([^)]*)\)/);
-  if (!match) return {};
-  const paramTypes = {};
-  for (const part of match[1].split(',')) {
-    let seg = part.trim();
-    if (!seg) continue;
-    seg = seg.replace(/=.+$/g, '').trim();
-    seg = seg.replace(/\b(public|private|protected|readonly|override)\b/g, '').trim();
-    seg = seg.replace(/^\.\.\./, '').trim();
-    const [rawName, ...rest] = seg.split(':');
-    if (!rawName || !rest.length) continue;
-    const name = rawName.replace(/\?/g, '').replace(/[^A-Za-z0-9_$]/g, '').trim();
-    const type = rest.join(':').trim();
-    if (!name || !type) continue;
-    paramTypes[name] = type;
-  }
-  return paramTypes;
-}
-
-function extractTypeScriptReturns(signature) {
-  const idx = signature.indexOf(')');
-  if (idx === -1) return null;
-  const after = signature.slice(idx + 1);
-  const match = after.match(/:\s*([^=;{]+)/);
-  if (!match) return null;
-  const ret = match[1].trim();
-  return ret || null;
-}
-
-function parseTypeScriptSignature(signature) {
-  const idx = signature.indexOf('(');
-  if (idx === -1) return { name: '', returns: null };
-  const before = signature.slice(0, idx).replace(/\s+/g, ' ').trim();
-  const match = before.match(/([A-Za-z_$][A-Za-z0-9_$]*)$/);
-  if (!match) return { name: '', returns: null };
-  const name = match[1];
-  const returns = extractTypeScriptReturns(signature);
-  return { name, returns };
-}
-
-function readSignatureLines(lines, startLine) {
-  const parts = [];
-  let hasBrace = false;
-  let hasSemi = false;
-  let endLine = startLine;
-  for (let i = startLine; i < lines.length; i++) {
-    const line = lines[i];
-    parts.push(line.trim());
-    if (line.includes('{')) {
-      hasBrace = true;
-      endLine = i;
-      break;
-    }
-    if (line.includes(';')) {
-      hasSemi = true;
-      endLine = i;
-      break;
-    }
-    endLine = i;
-  }
-  const signature = parts.join(' ');
-  const braceIdx = signature.indexOf('{');
-  const semiIdx = signature.indexOf(';');
-  const hasBody = hasBrace && (semiIdx === -1 || (braceIdx !== -1 && braceIdx < semiIdx));
-  return { signature, endLine, hasBody };
-}
-
-function stripTypeScriptComments(text) {
-  return text
-    .replace(/\/\*[\s\S]*?\*\//g, ' ')
-    .replace(/\/\/.*$/gm, ' ');
-}
-
-function collectTypeScriptCallsAndUsages(text) {
-  const calls = new Set();
-  const usages = new Set();
-  const normalized = stripTypeScriptComments(text);
-  for (const match of normalized.matchAll(/\b([A-Za-z_$][A-Za-z0-9_$.]*)\s*\(/g)) {
-    const raw = match[1];
-    if (!raw) continue;
-    const base = raw.split('.').filter(Boolean).pop();
-    if (!base || TS_CALL_KEYWORDS.has(base)) continue;
-    calls.add(raw);
-    if (base !== raw) calls.add(base);
-  }
-  for (const match of normalized.matchAll(/\b([A-Za-z_$][A-Za-z0-9_$]*)\b/g)) {
-    const name = match[1];
-    if (!name || name.length < 2) continue;
-    if (TS_USAGE_SKIP.has(name)) continue;
-    usages.add(name);
-  }
-  return { calls: Array.from(calls), usages: Array.from(usages) };
-}
-
-function extractTypeScriptInheritance(signature) {
-  const extendsList = [];
-  const implementsList = [];
-  const extendsMatch = signature.match(/\bextends\s+([^\{]+)/);
-  if (extendsMatch) {
-    const raw = extendsMatch[1].split(/\bimplements\b/)[0];
-    raw.split(',').map((s) => s.trim()).filter(Boolean).forEach((s) => extendsList.push(s));
-  }
-  const implMatch = signature.match(/\bimplements\s+([^\{]+)/);
-  if (implMatch) {
-    implMatch[1].split(',').map((s) => s.trim()).filter(Boolean).forEach((s) => implementsList.push(s));
-  }
-  return { extendsList, implementsList };
-}
-
-function extractVisibility(modifiers) {
-  if (modifiers.includes('private')) return 'private';
-  if (modifiers.includes('protected')) return 'protected';
-  return 'public';
-}
-
-/**
- * Collect import paths from TypeScript source text.
- * @param {string} text
- * @returns {string[]}
- */
-export function collectTypeScriptImports(text) {
-  const imports = new Set();
-  const normalized = stripTypeScriptComments(text);
-  const capture = (regex) => {
-    for (const match of normalized.matchAll(regex)) {
-      if (match[1]) imports.add(match[1]);
-    }
-  };
-  capture(/\b(?:import|export)\s+(?:type\s+)?(?:[\s\S]*?\s+from\s+)?['"]([^'"]+)['"]/g);
-  capture(/\bimport\s*\(\s*['"]([^'"]+)['"]\s*\)/g);
-  capture(/\brequire\s*\(\s*['"]([^'"]+)['"]\s*\)/g);
-  return Array.from(imports);
-}
-
-function collectTypeScriptExports(text) {
-  const exports = new Set();
-  const lines = text.split('\n');
-  for (const line of lines) {
-    const trimmed = line.trim();
-    if (!trimmed.startsWith('export ')) continue;
-    let match = trimmed.match(/^export\s+(?:default\s+)?(?:class|interface|enum|type|function|const|let|var)\s+([A-Za-z_$][A-Za-z0-9_$]*)/);
-    if (match) {
-      exports.add(match[1]);
-      continue;
-    }
-    match = trimmed.match(/^export\s*\{([^}]+)\}/);
-    if (match) {
-      match[1].split(',').map((s) => s.trim()).filter(Boolean).forEach((name) => {
-        const clean = name.split(/\s+as\s+/i)[0].trim();
-        if (clean) exports.add(clean);
-      });
-    }
-  }
-  return Array.from(exports);
-}
-
-/**
- * Build chunk metadata for TypeScript declarations.
- * Returns null when no declarations are found.
- * @param {string} text
- * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null}
- */
-export function buildTypeScriptChunks(text) {
-  const lineIndex = buildLineIndex(text);
-  const lines = text.split('\n');
-  const decls = [];
-  const typeDecls = [];
-
-  const typeRe = /^\s*(?:export\s+)?(?:default\s+)?(?:declare\s+)?(?:abstract\s+)?(class|interface|enum|type|namespace|module)\s+([A-Za-z_$][A-Za-z0-9_$]*)/;
-  const funcRe = /^\s*(?:export\s+)?(?:default\s+)?(?:async\s+)?function\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*\(/;
-  const assignFuncRe = /^\s*(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*=\s*(?:async\s+)?function\b/;
-  const arrowRe = /^\s*(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*=\s*(?:async\s+)?(?:<[^>]+>\s*)?(?:\([^)]*\)|[A-Za-z_$][A-Za-z0-9_$]*)\s*(?::\s*[^=]+)?=>/;
-  const kindMap = {
-    class: 'ClassDeclaration',
-    interface: 'InterfaceDeclaration',
-    enum: 'EnumDeclaration',
-    type: 'TypeAliasDeclaration',
-    namespace: 'NamespaceDeclaration',
-    module: 'NamespaceDeclaration'
-  };
-
-  for (let i = 0; i < lines.length; i++) {
-    const line = lines[i];
-    const trimmed = line.trim();
-    if (!trimmed || trimmed.startsWith('//') || trimmed.startsWith('/*') || trimmed.startsWith('*')) continue;
-    let match = trimmed.match(typeRe);
-    if (match) {
-      const kindKey = match[1];
-      const start = lineIndex[i] + line.indexOf(match[0]);
-      let end = lineIndex[i] + line.length;
-      let signature = trimmed;
-      if (kindKey !== 'type') {
-        const bounds = findCLikeBodyBounds(text, start);
-        if (bounds.bodyStart !== -1) {
-          end = bounds.bodyEnd > start ? bounds.bodyEnd : bounds.bodyStart;
-          signature = sliceSignature(text, start, bounds.bodyStart);
-        }
-      }
-      const modifiers = extractTypeScriptModifiers(signature);
-      const { extendsList, implementsList } = extractTypeScriptInheritance(signature);
-      const meta = {
-        startLine: i + 1,
-        endLine: offsetToLine(lineIndex, end),
-        signature,
-        modifiers,
-        visibility: extractVisibility(modifiers),
-        docstring: extractDocComment(lines, i),
-        attributes: collectAttributes(lines, i, signature),
-        extends: extendsList,
-        implements: implementsList
-      };
-      const entry = { start, end, name: match[2], kind: kindMap[kindKey] || 'ClassDeclaration', meta };
-      decls.push(entry);
-      if (kindKey === 'class' || kindKey === 'interface' || kindKey === 'enum') {
-        typeDecls.push(entry);
-      }
-      continue;
-    }
-    match = trimmed.match(funcRe);
-    if (match) {
-      const start = lineIndex[i] + line.indexOf(match[0]);
-      const { signature, endLine, hasBody } = readSignatureLines(lines, i);
-      const bounds = hasBody ? findCLikeBodyBounds(text, start) : { bodyStart: -1, bodyEnd: -1 };
-      const end = bounds.bodyEnd > start ? bounds.bodyEnd : lineIndex[endLine] + lines[endLine].length;
-      const modifiers = extractTypeScriptModifiers(signature);
-      const parsed = parseTypeScriptSignature(signature);
-      const meta = {
-        startLine: i + 1,
-        endLine: offsetToLine(lineIndex, end),
-        signature,
-        params: extractTypeScriptParams(signature),
-        paramTypes: extractTypeScriptParamTypes(signature),
-        returns: parsed.returns,
-        modifiers,
-        visibility: extractVisibility(modifiers),
-        docstring: extractDocComment(lines, i),
-        attributes: collectAttributes(lines, i, signature)
-      };
-      decls.push({ start, end, name: parsed.name || match[1], kind: 'FunctionDeclaration', meta });
-    }
-
-    match = trimmed.match(assignFuncRe);
-    if (match) {
-      const start = lineIndex[i] + line.indexOf(match[0]);
-      const { signature, endLine, hasBody } = readSignatureLines(lines, i);
-      const bounds = hasBody ? findCLikeBodyBounds(text, start) : { bodyStart: -1, bodyEnd: -1 };
-      const end = bounds.bodyEnd > start ? bounds.bodyEnd : lineIndex[endLine] + lines[endLine].length;
-      const modifiers = extractTypeScriptModifiers(signature);
-      const parsed = parseTypeScriptSignature(signature);
-      const meta = {
-        startLine: i + 1,
-        endLine: offsetToLine(lineIndex, end),
-        signature,
-        params: extractTypeScriptParams(signature),
-        paramTypes: extractTypeScriptParamTypes(signature),
-        returns: parsed.returns,
-        modifiers,
-        visibility: extractVisibility(modifiers),
-        docstring: extractDocComment(lines, i),
-        attributes: collectAttributes(lines, i, signature)
-      };
-      decls.push({ start, end, name: parsed.name || match[1], kind: 'FunctionDeclaration', meta });
-      continue;
-    }
-
-    match = trimmed.match(arrowRe);
-    if (match) {
-      const start = lineIndex[i] + line.indexOf(match[0]);
-      const { signature, endLine, hasBody } = readSignatureLines(lines, i);
-      const bounds = hasBody ? findCLikeBodyBounds(text, start) : { bodyStart: -1, bodyEnd: -1 };
-      const end = bounds.bodyEnd > start ? bounds.bodyEnd : lineIndex[endLine] + lines[endLine].length;
-      const modifiers = extractTypeScriptModifiers(signature);
-      const parsed = parseTypeScriptSignature(signature);
-      const meta = {
-        startLine: i + 1,
-        endLine: offsetToLine(lineIndex, end),
-        signature,
-        params: extractTypeScriptParams(signature),
-        paramTypes: extractTypeScriptParamTypes(signature),
-        returns: parsed.returns,
-        modifiers,
-        visibility: extractVisibility(modifiers),
-        docstring: extractDocComment(lines, i),
-        attributes: collectAttributes(lines, i, signature)
-      };
-      decls.push({ start, end, name: parsed.name || match[1], kind: 'FunctionDeclaration', meta });
-      continue;
-    }
-  }
-
-  const methodRe = /^\s*(?:public|private|protected|static|abstract|async|readonly|override|declare\s+)*(?:get|set\s+)?([A-Za-z_$][A-Za-z0-9_$]*|constructor)\s*(?:<[^>]+>)?\s*\(/;
-  for (const typeDecl of typeDecls) {
-    if (!typeDecl || typeDecl.start == null || typeDecl.end == null) continue;
-    const bounds = findCLikeBodyBounds(text, typeDecl.start);
-    if (bounds.bodyStart === -1 || bounds.bodyEnd === -1) continue;
-    const startLine = offsetToLine(lineIndex, bounds.bodyStart + 1);
-    const endLine = offsetToLine(lineIndex, bounds.bodyEnd);
-    for (let i = startLine - 1; i < Math.min(lines.length, endLine); i++) {
-      const line = lines[i];
-      const trimmed = line.trim();
-      if (!trimmed || trimmed.startsWith('//') || trimmed.startsWith('/*') || trimmed.startsWith('*')) continue;
-      const match = trimmed.match(methodRe);
-      if (!match) continue;
-      const start = lineIndex[i] + line.indexOf(match[0]);
-      const { signature, endLine: sigEndLine, hasBody } = readSignatureLines(lines, i);
-      const boundsInner = hasBody ? findCLikeBodyBounds(text, start) : { bodyStart: -1, bodyEnd: -1 };
-      const end = boundsInner.bodyEnd > start ? boundsInner.bodyEnd : lineIndex[sigEndLine] + lines[sigEndLine].length;
-      const parsed = parseTypeScriptSignature(signature);
-      const methodName = parsed.name || match[1] || 'anonymous';
-      const modifiers = extractTypeScriptModifiers(signature);
-      const meta = {
-        startLine: i + 1,
-        endLine: offsetToLine(lineIndex, end),
-        signature,
-        params: extractTypeScriptParams(signature),
-        paramTypes: extractTypeScriptParamTypes(signature),
-        returns: parsed.returns,
-        modifiers,
-        visibility: extractVisibility(modifiers),
-        docstring: extractDocComment(lines, i),
-        attributes: collectAttributes(lines, i, signature)
-      };
-      decls.push({
-        start,
-        end,
-        name: `${typeDecl.name}.${methodName}`,
-        kind: methodName === 'constructor' ? 'ConstructorDeclaration' : 'MethodDeclaration',
-        meta
-      });
-    }
-  }
-
-  if (!decls.length) return null;
-  decls.sort((a, b) => a.start - b.start);
-  return decls.map((decl) => ({
-    start: decl.start,
-    end: decl.end,
-    name: decl.name,
-    kind: decl.kind,
-    meta: decl.meta || {}
-  }));
-}
-
-/**
- * Build import/export/call/usage relations for TypeScript chunks.
- * @param {string} text
- * @param {Record<string,string[]>} allImports
- * @param {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null} tsChunks
- * @returns {{imports:string[],exports:string[],calls:Array<[string,string]>,usages:string[],importLinks:string[]}}
- */
-export function buildTypeScriptRelations(text, allImports, tsChunks) {
-  const imports = collectTypeScriptImports(text);
-  const exports = new Set(collectTypeScriptExports(text));
-  const calls = [];
-  const usages = new Set();
-  if (Array.isArray(tsChunks)) {
-    for (const chunk of tsChunks) {
-      if (!chunk || !chunk.name || chunk.start == null || chunk.end == null) continue;
-      if (!['MethodDeclaration', 'ConstructorDeclaration', 'FunctionDeclaration'].includes(chunk.kind)) continue;
-      const bounds = findCLikeBodyBounds(text, chunk.start);
-      const scanStart = bounds.bodyStart > -1 && bounds.bodyStart < chunk.end ? bounds.bodyStart + 1 : chunk.start;
-      const scanEnd = bounds.bodyEnd > scanStart && bounds.bodyEnd <= chunk.end ? bounds.bodyEnd : chunk.end;
-      const slice = text.slice(scanStart, scanEnd);
-      const { calls: chunkCalls, usages: chunkUsages } = collectTypeScriptCallsAndUsages(slice);
-      for (const callee of chunkCalls) calls.push([chunk.name, callee]);
-      for (const usage of chunkUsages) usages.add(usage);
-    }
-  }
-  const importLinks = imports
-    .map((i) => allImports[i])
-    .filter((x) => !!x)
-    .flat();
-  return {
-    imports,
-    exports: Array.from(exports),
-    calls,
-    usages: Array.from(usages),
-    importLinks
-  };
-}
-
-/**
- * Normalize TypeScript-specific doc metadata for search output.
- * @param {{meta?:Object}} chunk
- * @returns {{doc:string,params:string[],returns:(string|null),signature:(string|null),decorators:string[],modifiers:string[],visibility:(string|null),returnType:(string|null),extends:string[],implements:string[]}}
- */
-export function extractTypeScriptDocMeta(chunk) {
-  const meta = chunk.meta || {};
-  const params = Array.isArray(meta.params) ? meta.params : [];
-  const paramTypes = meta.paramTypes && typeof meta.paramTypes === 'object' ? meta.paramTypes : {};
-  const decorators = Array.isArray(meta.attributes) ? meta.attributes : [];
-  const modifiers = Array.isArray(meta.modifiers) ? meta.modifiers : [];
-  const extendsList = Array.isArray(meta.extends) ? meta.extends : [];
-  const implementsList = Array.isArray(meta.implements) ? meta.implements : [];
-  const returns = meta.returns || null;
-  return {
-    doc: meta.docstring ? String(meta.docstring).slice(0, 300) : '',
-    params,
-    paramTypes,
-    returns,
-    returnType: returns,
-    signature: meta.signature || null,
-    decorators,
-    modifiers,
-    visibility: meta.visibility || null,
-    extends: extendsList,
-    implements: implementsList,
-    dataflow: meta.dataflow || null,
-    throws: meta.throws || [],
-    awaits: meta.awaits || [],
-    yields: meta.yields || false,
-    returnsValue: meta.returnsValue || false,
-    controlFlow: meta.controlFlow || null
-  };
-}
-
-/**
- * Heuristic control-flow/dataflow extraction for TypeScript chunks.
- * @param {string} text
- * @param {{start:number,end:number}} chunk
- * @param {{dataflow?:boolean,controlFlow?:boolean}} [options]
- * @returns {{dataflow:(object|null),controlFlow:(object|null),throws:string[],awaits:string[],yields:boolean,returnsValue:boolean}|null}
- */
-export function computeTypeScriptFlow(text, chunk, options = {}) {
-  if (!chunk || !Number.isFinite(chunk.start) || !Number.isFinite(chunk.end)) return null;
-  const bounds = findCLikeBodyBounds(text, chunk.start);
-  const scanStart = bounds.bodyStart > -1 && bounds.bodyStart < chunk.end ? bounds.bodyStart + 1 : chunk.start;
-  const scanEnd = bounds.bodyEnd > scanStart && bounds.bodyEnd <= chunk.end ? bounds.bodyEnd : chunk.end;
-  if (scanEnd <= scanStart) return null;
-  const slice = text.slice(scanStart, scanEnd);
-  const cleaned = stripTypeScriptComments(slice);
-  const dataflowEnabled = options.dataflow !== false;
-  const controlFlowEnabled = options.controlFlow !== false;
-  const out = {
-    dataflow: null,
-    controlFlow: null,
-    throws: [],
-    awaits: [],
-    yields: false,
-    returnsValue: false
-  };
-
-  if (dataflowEnabled) {
-    out.dataflow = buildHeuristicDataflow(cleaned, {
-      skip: TS_USAGE_SKIP,
-      memberOperators: ['.']
-    });
-    out.returnsValue = hasReturnValue(cleaned);
-    const throws = new Set();
-    for (const match of cleaned.matchAll(/\bthrow\b\s+(?:new\s+)?([A-Za-z_$][A-Za-z0-9_$.]*)/g)) {
-      const name = match[1].replace(/[({].*$/, '').trim();
-      if (name) throws.add(name);
-    }
-    out.throws = Array.from(throws);
-    const awaits = new Set();
-    for (const match of cleaned.matchAll(/\bawait\b\s+([A-Za-z_$][A-Za-z0-9_$.]*)/g)) {
-      const name = match[1].replace(/[({].*$/, '').trim();
-      if (name) awaits.add(name);
-    }
-    out.awaits = Array.from(awaits);
-    out.yields = /\byield\b/.test(cleaned);
-  }
-
-  if (controlFlowEnabled) {
-    out.controlFlow = summarizeControlFlow(cleaned, {
-      branchKeywords: ['if', 'else', 'switch', 'case', 'catch', 'try'],
-      loopKeywords: ['for', 'while', 'do']
-    });
-  }
-
-  return out;
-}
+export { buildTypeScriptChunks } from './typescript/chunks.js';
+export { buildTypeScriptRelations, collectTypeScriptImports } from './typescript/relations.js';
+export { computeTypeScriptFlow } from './typescript/flow.js';
+export { extractTypeScriptDocMeta } from './typescript/doc.js';
diff --git a/src/lang/typescript/chunks-ast.js b/src/lang/typescript/chunks-ast.js
new file mode 100644
index 000000000..c36dd1e5f
--- /dev/null
+++ b/src/lang/typescript/chunks-ast.js
@@ -0,0 +1,280 @@
+import { buildLineIndex, offsetToLine } from '../../shared/lines.js';
+import { collectAttributes, extractDocComment, sliceSignature } from '../shared.js';
+import { findCLikeBodyBounds } from '../clike.js';
+import {
+  extractTypeScriptModifiers,
+  extractTypeScriptParamTypes,
+  extractTypeScriptParams,
+  extractTypeScriptReturns,
+  extractVisibility,
+  mergeParamTypes
+} from './signature.js';
+import { isLikelyTsx, loadTypeScriptModule, resolveTypeScriptFilename } from './parser.js';
+
+function getTypeScriptName(ts, node, sourceFile) {
+  if (!node) return null;
+  if (ts.isIdentifier(node)) return node.text;
+  if (ts.isPrivateIdentifier(node)) return `#${node.text}`;
+  if (ts.isStringLiteral(node) || ts.isNumericLiteral(node)) return node.text;
+  if (typeof node.getText === 'function') {
+    const raw = node.getText(sourceFile);
+    if (/^[A-Za-z_$][A-Za-z0-9_$]*$/.test(raw)) return raw;
+  }
+  return null;
+}
+
+function extractTypeScriptHeritage(ts, node, sourceFile) {
+  const extendsList = [];
+  const implementsList = [];
+  for (const clause of node?.heritageClauses || []) {
+    const list = clause?.types?.map((entry) => entry.getText(sourceFile).trim()).filter(Boolean) || [];
+    if (!list.length) continue;
+    if (clause.token === ts.SyntaxKind.ExtendsKeyword) {
+      extendsList.push(...list);
+    } else if (clause.token === ts.SyntaxKind.ImplementsKeyword) {
+      implementsList.push(...list);
+    }
+  }
+  return { extendsList, implementsList };
+}
+
+function collectParamDetails(ts, node, sourceFile, signature) {
+  const params = [];
+  const paramTypes = {};
+  for (const param of node?.parameters || []) {
+    if (!param?.name || !ts.isIdentifier(param.name)) continue;
+    const name = param.name.text;
+    if (!name) continue;
+    params.push(name);
+    const typeText = param.type ? param.type.getText(sourceFile).trim() : '';
+    if (typeText) paramTypes[name] = typeText;
+  }
+  const fallbackParams = extractTypeScriptParams(signature);
+  const fallbackTypes = extractTypeScriptParamTypes(signature);
+  for (const name of fallbackParams) {
+    if (!params.includes(name)) params.push(name);
+  }
+  return {
+    params: params.length ? params : fallbackParams,
+    paramTypes: mergeParamTypes(paramTypes, fallbackTypes)
+  };
+}
+
+export function buildTypeScriptChunksFromAst(text, options = {}) {
+  const ts = loadTypeScriptModule(options.rootDir);
+  if (!ts) return null;
+  const ext = options.ext || '';
+  const tsx = isLikelyTsx(text, ext);
+  const fileName = resolveTypeScriptFilename(ext, tsx);
+  let sourceFile;
+  try {
+    sourceFile = ts.createSourceFile(
+      fileName,
+      text,
+      ts.ScriptTarget.Latest,
+      true,
+      tsx ? ts.ScriptKind.TSX : ts.ScriptKind.TS
+    );
+  } catch {
+    return null;
+  }
+  if (!sourceFile) return null;
+
+  const lineIndex = buildLineIndex(text);
+  const lines = text.split('\n');
+  const decls = [];
+  const qualify = (prefix, name) => (prefix ? `${prefix}.${name}` : name);
+
+  const buildSignature = (start, bodyStart) => sliceSignature(text, start, bodyStart);
+
+  const buildMetaBase = (start, end, signature) => {
+    const startLine = offsetToLine(lineIndex, start);
+    const endLine = offsetToLine(lineIndex, end);
+    const modifiers = extractTypeScriptModifiers(signature);
+    return {
+      startLine,
+      endLine,
+      signature,
+      modifiers,
+      visibility: extractVisibility(modifiers),
+      docstring: extractDocComment(lines, startLine - 1),
+      attributes: collectAttributes(lines, startLine - 1, signature)
+    };
+  };
+
+  const buildFunctionMeta = (node, signature, start, end) => {
+    const base = buildMetaBase(start, end, signature);
+    const { params, paramTypes } = collectParamDetails(ts, node, sourceFile, signature);
+    const returns = ts.isConstructorDeclaration(node)
+      ? null
+      : (node?.type ? node.type.getText(sourceFile).trim() : extractTypeScriptReturns(signature));
+    return { ...base, params, paramTypes, returns };
+  };
+
+  const buildTypeMeta = (node, signature, start, end) => {
+    const base = buildMetaBase(start, end, signature);
+    const { extendsList, implementsList } = extractTypeScriptHeritage(ts, node, sourceFile);
+    return { ...base, extends: extendsList, implements: implementsList };
+  };
+
+  const addChunk = (start, end, name, kind, meta) => {
+    if (!name) return;
+    decls.push({ start, end, name, kind, meta });
+  };
+
+  const isFunctionInitializer = (node) => ts.isArrowFunction(node) || ts.isFunctionExpression(node);
+
+  const handleClassMembers = (prefix, className, members) => {
+    const qualified = qualify(prefix, className);
+    for (const member of members || []) {
+      if (ts.isConstructorDeclaration(member)) {
+        const start = member.getStart(sourceFile);
+        const end = member.end;
+        const bodyStart = member.body ? member.body.getStart(sourceFile) : -1;
+        const signature = buildSignature(start, bodyStart);
+        addChunk(start, end, `${qualified}.constructor`, 'ConstructorDeclaration',
+          buildFunctionMeta(member, signature, start, end));
+        continue;
+      }
+      if (ts.isMethodDeclaration(member) || ts.isMethodSignature(member)) {
+        const methodName = getTypeScriptName(ts, member.name, sourceFile);
+        if (!methodName) continue;
+        const start = member.getStart(sourceFile);
+        const end = member.end;
+        const bodyStart = member.body ? member.body.getStart(sourceFile) : -1;
+        const signature = buildSignature(start, bodyStart);
+        addChunk(start, end, `${qualified}.${methodName}`, 'MethodDeclaration',
+          buildFunctionMeta(member, signature, start, end));
+        continue;
+      }
+      if (ts.isGetAccessorDeclaration(member) || ts.isSetAccessorDeclaration(member)) {
+        const accessorName = getTypeScriptName(ts, member.name, sourceFile);
+        if (!accessorName) continue;
+        const start = member.getStart(sourceFile);
+        const end = member.end;
+        const bodyStart = member.body ? member.body.getStart(sourceFile) : -1;
+        const signature = buildSignature(start, bodyStart);
+        addChunk(start, end, `${qualified}.${accessorName}`, 'MethodDeclaration',
+          buildFunctionMeta(member, signature, start, end));
+        continue;
+      }
+      if (ts.isPropertyDeclaration(member) && member.name && member.initializer
+        && isFunctionInitializer(member.initializer)) {
+        const propName = getTypeScriptName(ts, member.name, sourceFile);
+        if (!propName) continue;
+        const start = member.getStart(sourceFile);
+        const end = member.end;
+        const bodyStart = member.initializer.body ? member.initializer.body.getStart(sourceFile) : -1;
+        const signature = buildSignature(start, bodyStart);
+        addChunk(start, end, `${qualified}.${propName}`, 'MethodDeclaration',
+          buildFunctionMeta(member.initializer, signature, start, end));
+      }
+    }
+  };
+
+  const handleStatements = (statements, prefix = '') => {
+    for (const stmt of statements || []) {
+      if (ts.isClassDeclaration(stmt) && stmt.name) {
+        const name = getTypeScriptName(ts, stmt.name, sourceFile);
+        if (!name) continue;
+        const start = stmt.getStart(sourceFile);
+        const end = stmt.end;
+        const bounds = findCLikeBodyBounds(text, start);
+        const signature = buildSignature(start, bounds.bodyStart);
+        addChunk(start, end, qualify(prefix, name), 'ClassDeclaration',
+          buildTypeMeta(stmt, signature, start, end));
+        handleClassMembers(prefix, name, stmt.members);
+        continue;
+      }
+      if (ts.isInterfaceDeclaration(stmt) && stmt.name) {
+        const name = getTypeScriptName(ts, stmt.name, sourceFile);
+        if (!name) continue;
+        const start = stmt.getStart(sourceFile);
+        const end = stmt.end;
+        const bounds = findCLikeBodyBounds(text, start);
+        const signature = buildSignature(start, bounds.bodyStart);
+        addChunk(start, end, qualify(prefix, name), 'InterfaceDeclaration',
+          buildTypeMeta(stmt, signature, start, end));
+        handleClassMembers(prefix, name, stmt.members);
+        continue;
+      }
+      if (ts.isEnumDeclaration(stmt) && stmt.name) {
+        const name = getTypeScriptName(ts, stmt.name, sourceFile);
+        if (!name) continue;
+        const start = stmt.getStart(sourceFile);
+        const end = stmt.end;
+        const bounds = findCLikeBodyBounds(text, start);
+        const signature = buildSignature(start, bounds.bodyStart);
+        addChunk(start, end, qualify(prefix, name), 'EnumDeclaration',
+          buildMetaBase(start, end, signature));
+        continue;
+      }
+      if (ts.isTypeAliasDeclaration(stmt) && stmt.name) {
+        const name = getTypeScriptName(ts, stmt.name, sourceFile);
+        if (!name) continue;
+        const start = stmt.getStart(sourceFile);
+        const end = stmt.end;
+        const signature = buildSignature(start, -1);
+        addChunk(start, end, qualify(prefix, name), 'TypeAliasDeclaration',
+          buildMetaBase(start, end, signature));
+        continue;
+      }
+      if (ts.isFunctionDeclaration(stmt) && stmt.name) {
+        const name = getTypeScriptName(ts, stmt.name, sourceFile);
+        if (!name) continue;
+        const start = stmt.getStart(sourceFile);
+        const end = stmt.end;
+        const bodyStart = stmt.body ? stmt.body.getStart(sourceFile) : -1;
+        const signature = buildSignature(start, bodyStart);
+        addChunk(start, end, qualify(prefix, name), 'FunctionDeclaration',
+          buildFunctionMeta(stmt, signature, start, end));
+        continue;
+      }
+      if (ts.isVariableStatement(stmt)) {
+        const declarations = stmt.declarationList?.declarations || [];
+        const useStatementStart = declarations.length === 1;
+        for (const decl of declarations) {
+          if (!decl?.name || !decl.initializer || !ts.isIdentifier(decl.name)) continue;
+          if (!isFunctionInitializer(decl.initializer)) continue;
+          const name = decl.name.text;
+          const start = useStatementStart ? stmt.getStart(sourceFile) : decl.getStart(sourceFile);
+          const end = decl.initializer.end;
+          const bodyStart = decl.initializer.body ? decl.initializer.body.getStart(sourceFile) : -1;
+          const signature = buildSignature(start, bodyStart);
+          addChunk(start, end, qualify(prefix, name), 'FunctionDeclaration',
+            buildFunctionMeta(decl.initializer, signature, start, end));
+        }
+        continue;
+      }
+      if (ts.isModuleDeclaration(stmt) && stmt.name) {
+        const name = getTypeScriptName(ts, stmt.name, sourceFile);
+        if (!name) continue;
+        const start = stmt.getStart(sourceFile);
+        const end = stmt.end;
+        const bounds = findCLikeBodyBounds(text, start);
+        const signature = buildSignature(start, bounds.bodyStart);
+        addChunk(start, end, qualify(prefix, name), 'NamespaceDeclaration',
+          buildMetaBase(start, end, signature));
+        if (stmt.body) {
+          if (ts.isModuleBlock(stmt.body)) {
+            handleStatements(stmt.body.statements, qualify(prefix, name));
+          } else if (ts.isModuleDeclaration(stmt.body)) {
+            handleStatements([stmt.body], qualify(prefix, name));
+          }
+        }
+      }
+    }
+  };
+
+  handleStatements(sourceFile.statements, '');
+
+  if (!decls.length) return null;
+  decls.sort((a, b) => a.start - b.start);
+  return decls.map((decl) => ({
+    start: decl.start,
+    end: decl.end,
+    name: decl.name,
+    kind: decl.kind,
+    meta: decl.meta || {}
+  }));
+}
diff --git a/src/lang/typescript/chunks-babel.js b/src/lang/typescript/chunks-babel.js
new file mode 100644
index 000000000..b692f5141
--- /dev/null
+++ b/src/lang/typescript/chunks-babel.js
@@ -0,0 +1,178 @@
+import { parseBabelAst } from '../babel-parser.js';
+import { buildLineIndex, offsetToLine } from '../../shared/lines.js';
+import { collectAttributes, extractDocComment, sliceSignature } from '../shared.js';
+import {
+  extractTypeScriptInheritance,
+  extractTypeScriptModifiers,
+  extractTypeScriptParamTypes,
+  extractTypeScriptParams,
+  extractTypeScriptReturns,
+  extractVisibility,
+  parseTypeScriptSignature
+} from './signature.js';
+
+function getBabelName(node) {
+  if (!node) return null;
+  if (node.type === 'Identifier') return node.name;
+  if (node.type === 'StringLiteral' || node.type === 'NumericLiteral') return String(node.value);
+  if (node.type === 'PrivateName' && node.id?.name) return `#${node.id.name}`;
+  if (node.type === 'TSQualifiedName') {
+    const left = getBabelName(node.left);
+    const right = getBabelName(node.right);
+    if (left && right) return `${left}.${right}`;
+  }
+  return null;
+}
+
+export function buildTypeScriptChunksFromBabel(text, options = {}) {
+  const ast = parseBabelAst(text, { ext: options.ext || '', mode: 'typescript' });
+  if (!ast || !Array.isArray(ast.body)) return null;
+  const lineIndex = buildLineIndex(text);
+  const lines = text.split('\n');
+  const decls = [];
+
+  const qualify = (prefix, name) => (prefix ? `${prefix}.${name}` : name);
+  const buildSignature = (start, bodyStart) => sliceSignature(text, start, bodyStart);
+  const buildMetaBase = (start, end, signature) => {
+    const startLine = offsetToLine(lineIndex, start);
+    const endLine = offsetToLine(lineIndex, end);
+    const modifiers = extractTypeScriptModifiers(signature);
+    return {
+      startLine,
+      endLine,
+      signature,
+      modifiers,
+      visibility: extractVisibility(modifiers),
+      docstring: extractDocComment(lines, startLine - 1),
+      attributes: collectAttributes(lines, startLine - 1, signature)
+    };
+  };
+  const buildFunctionMeta = (start, end, signature) => ({
+    ...buildMetaBase(start, end, signature),
+    params: extractTypeScriptParams(signature),
+    paramTypes: extractTypeScriptParamTypes(signature),
+    returns: extractTypeScriptReturns(signature)
+  });
+  const buildTypeMeta = (start, end, signature) => {
+    const base = buildMetaBase(start, end, signature);
+    const { extendsList, implementsList } = extractTypeScriptInheritance(signature);
+    return { ...base, extends: extendsList, implements: implementsList };
+  };
+  const addChunk = (node, name, kind, meta) => {
+    if (!node || !name) return;
+    const start = Number.isFinite(node.start) ? node.start : 0;
+    const end = Number.isFinite(node.end) ? node.end : start;
+    decls.push({ start, end, name, kind, meta });
+  };
+
+  const handleClassMembers = (prefix, className, members) => {
+    const qualified = qualify(prefix, className);
+    for (const member of members || []) {
+      if (member.type === 'ClassMethod' || member.type === 'ClassPrivateMethod') {
+        const methodName = getBabelName(member.key) || 'anonymous';
+        const signature = buildSignature(member.start, member.body?.start ?? -1);
+        addChunk(member, `${qualified}.${methodName}`, 'MethodDeclaration',
+          buildFunctionMeta(member.start, member.end, signature));
+      }
+      if ((member.type === 'ClassProperty' || member.type === 'ClassPrivateProperty')
+        && member.value && (member.value.type === 'ArrowFunctionExpression'
+          || member.value.type === 'FunctionExpression')) {
+        const propName = getBabelName(member.key) || 'anonymous';
+        const bodyStart = member.value.body?.start ?? -1;
+        const signature = buildSignature(member.start, bodyStart);
+        addChunk(member, `${qualified}.${propName}`, 'MethodDeclaration',
+          buildFunctionMeta(member.start, member.end, signature));
+      }
+    }
+  };
+
+  const handleStatement = (stmt, prefix = '') => {
+    if (!stmt) return;
+    if (stmt.type === 'ExportNamedDeclaration' || stmt.type === 'ExportDefaultDeclaration') {
+      if (stmt.declaration) {
+        handleStatement(stmt.declaration, prefix);
+      }
+      return;
+    }
+    if (stmt.type === 'ClassDeclaration' && stmt.id?.name) {
+      const start = stmt.start ?? 0;
+      const signature = buildSignature(start, stmt.body?.start ?? -1);
+      addChunk(stmt, qualify(prefix, stmt.id.name), 'ClassDeclaration',
+        buildTypeMeta(start, stmt.end ?? start, signature));
+      handleClassMembers(prefix, stmt.id.name, stmt.body?.body || []);
+      return;
+    }
+    if (stmt.type === 'TSInterfaceDeclaration' && stmt.id?.name) {
+      const start = stmt.start ?? 0;
+      const signature = buildSignature(start, stmt.body?.start ?? -1);
+      addChunk(stmt, qualify(prefix, stmt.id.name), 'InterfaceDeclaration',
+        buildTypeMeta(start, stmt.end ?? start, signature));
+      return;
+    }
+    if (stmt.type === 'TSEnumDeclaration' && stmt.id?.name) {
+      const start = stmt.start ?? 0;
+      const signature = buildSignature(start, stmt.members?.[0]?.start ?? -1);
+      addChunk(stmt, qualify(prefix, stmt.id.name), 'EnumDeclaration',
+        buildMetaBase(start, stmt.end ?? start, signature));
+      return;
+    }
+    if (stmt.type === 'TSTypeAliasDeclaration' && stmt.id?.name) {
+      const start = stmt.start ?? 0;
+      const signature = buildSignature(start, -1);
+      addChunk(stmt, qualify(prefix, stmt.id.name), 'TypeAliasDeclaration',
+        buildMetaBase(start, stmt.end ?? start, signature));
+      return;
+    }
+    if (stmt.type === 'TSModuleDeclaration' && stmt.id) {
+      const name = getBabelName(stmt.id);
+      if (!name) return;
+      const start = stmt.start ?? 0;
+      const signature = buildSignature(start, stmt.body?.start ?? -1);
+      addChunk(stmt, qualify(prefix, name), 'NamespaceDeclaration',
+        buildMetaBase(start, stmt.end ?? start, signature));
+      const moduleBody = stmt.body?.body;
+      if (Array.isArray(moduleBody)) {
+        moduleBody.forEach((child) => handleStatement(child, qualify(prefix, name)));
+      } else if (stmt.body) {
+        handleStatement(stmt.body, qualify(prefix, name));
+      }
+      return;
+    }
+    if (stmt.type === 'TSDeclareFunction' && stmt.id?.name) {
+      const start = stmt.start ?? 0;
+      const signature = buildSignature(start, -1);
+      addChunk(stmt, qualify(prefix, stmt.id.name), 'FunctionDeclaration',
+        buildFunctionMeta(start, stmt.end ?? start, signature));
+      return;
+    }
+    if (stmt.type === 'FunctionDeclaration' && stmt.id?.name) {
+      const start = stmt.start ?? 0;
+      const signature = buildSignature(start, stmt.body?.start ?? -1);
+      addChunk(stmt, qualify(prefix, stmt.id.name), 'FunctionDeclaration',
+        buildFunctionMeta(start, stmt.end ?? start, signature));
+      return;
+    }
+    if (stmt.type === 'VariableDeclaration') {
+      for (const decl of stmt.declarations || []) {
+        if (decl.id?.name && decl.init
+          && (decl.init.type === 'FunctionExpression' || decl.init.type === 'ArrowFunctionExpression')) {
+          const start = decl.start ?? stmt.start ?? 0;
+          const signature = buildSignature(start, decl.init.body?.start ?? -1);
+          addChunk(decl, qualify(prefix, decl.id.name), 'FunctionDeclaration',
+            buildFunctionMeta(start, decl.end ?? start, signature));
+        }
+      }
+    }
+  };
+
+  ast.body.forEach((stmt) => handleStatement(stmt, ''));
+  if (!decls.length) return null;
+  decls.sort((a, b) => a.start - b.start);
+  return decls.map((decl) => ({
+    start: decl.start,
+    end: decl.end,
+    name: decl.name,
+    kind: decl.kind,
+    meta: decl.meta || {}
+  }));
+}
diff --git a/src/lang/typescript/chunks-heuristic.js b/src/lang/typescript/chunks-heuristic.js
new file mode 100644
index 000000000..55fd88dc3
--- /dev/null
+++ b/src/lang/typescript/chunks-heuristic.js
@@ -0,0 +1,199 @@
+import { buildLineIndex, offsetToLine } from '../../shared/lines.js';
+import { collectAttributes, extractDocComment, sliceSignature } from '../shared.js';
+import { findCLikeBodyBounds } from '../clike.js';
+import {
+  extractTypeScriptInheritance,
+  extractTypeScriptModifiers,
+  extractTypeScriptParamTypes,
+  extractTypeScriptParams,
+  extractVisibility,
+  parseTypeScriptSignature,
+  readSignatureLines
+} from './signature.js';
+
+/**
+ * Build chunk metadata for TypeScript declarations.
+ * Returns null when no declarations are found.
+ * @param {string} text
+ * @returns {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null}
+ */
+export function buildTypeScriptChunksHeuristic(text) {
+  const lineIndex = buildLineIndex(text);
+  const lines = text.split('\n');
+  const decls = [];
+  const typeDecls = [];
+
+  const typeRe = /^\s*(?:export\s+)?(?:default\s+)?(?:declare\s+)?(?:abstract\s+)?(class|interface|enum|type|namespace|module)\s+([A-Za-z_$][A-Za-z0-9_$]*)/;
+  const funcRe = /^\s*(?:export\s+)?(?:default\s+)?(?:async\s+)?function\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*\(/;
+  const assignFuncRe = /^\s*(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*=\s*(?:async\s+)?function\b/;
+  const arrowRe = /^\s*(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*=\s*(?:async\s+)?(?:<[^>]+>\s*)?(?:\([^)]*\)|[A-Za-z_$][A-Za-z0-9_$]*)\s*(?::\s*[^=]+)?=>/;
+  const kindMap = {
+    class: 'ClassDeclaration',
+    interface: 'InterfaceDeclaration',
+    enum: 'EnumDeclaration',
+    type: 'TypeAliasDeclaration',
+    namespace: 'NamespaceDeclaration',
+    module: 'NamespaceDeclaration'
+  };
+
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    const trimmed = line.trim();
+    if (!trimmed || trimmed.startsWith('//') || trimmed.startsWith('/*') || trimmed.startsWith('*')) continue;
+    let match = trimmed.match(typeRe);
+    if (match) {
+      const kindKey = match[1];
+      const start = lineIndex[i] + line.indexOf(match[0]);
+      let end = lineIndex[i] + line.length;
+      let signature = trimmed;
+      if (kindKey !== 'type') {
+        const bounds = findCLikeBodyBounds(text, start);
+        if (bounds.bodyStart !== -1) {
+          end = bounds.bodyEnd > start ? bounds.bodyEnd : bounds.bodyStart;
+          signature = sliceSignature(text, start, bounds.bodyStart);
+        }
+      }
+      const modifiers = extractTypeScriptModifiers(signature);
+      const { extendsList, implementsList } = extractTypeScriptInheritance(signature);
+      const meta = {
+        startLine: i + 1,
+        endLine: offsetToLine(lineIndex, end),
+        signature,
+        modifiers,
+        visibility: extractVisibility(modifiers),
+        docstring: extractDocComment(lines, i),
+        attributes: collectAttributes(lines, i, signature),
+        extends: extendsList,
+        implements: implementsList
+      };
+      const entry = { start, end, name: match[2], kind: kindMap[kindKey] || 'ClassDeclaration', meta };
+      decls.push(entry);
+      if (kindKey === 'class' || kindKey === 'interface' || kindKey === 'enum') {
+        typeDecls.push(entry);
+      }
+      continue;
+    }
+    match = trimmed.match(funcRe);
+    if (match) {
+      const start = lineIndex[i] + line.indexOf(match[0]);
+      const { signature, endLine, hasBody } = readSignatureLines(lines, i);
+      const bounds = hasBody ? findCLikeBodyBounds(text, start) : { bodyStart: -1, bodyEnd: -1 };
+      const end = bounds.bodyEnd > start ? bounds.bodyEnd : lineIndex[endLine] + lines[endLine].length;
+      const modifiers = extractTypeScriptModifiers(signature);
+      const parsed = parseTypeScriptSignature(signature);
+      const meta = {
+        startLine: i + 1,
+        endLine: offsetToLine(lineIndex, end),
+        signature,
+        params: extractTypeScriptParams(signature),
+        paramTypes: extractTypeScriptParamTypes(signature),
+        returns: parsed.returns,
+        modifiers,
+        visibility: extractVisibility(modifiers),
+        docstring: extractDocComment(lines, i),
+        attributes: collectAttributes(lines, i, signature)
+      };
+      decls.push({ start, end, name: parsed.name || match[1], kind: 'FunctionDeclaration', meta });
+    }
+
+    match = trimmed.match(assignFuncRe);
+    if (match) {
+      const start = lineIndex[i] + line.indexOf(match[0]);
+      const { signature, endLine, hasBody } = readSignatureLines(lines, i);
+      const bounds = hasBody ? findCLikeBodyBounds(text, start) : { bodyStart: -1, bodyEnd: -1 };
+      const end = bounds.bodyEnd > start ? bounds.bodyEnd : lineIndex[endLine] + lines[endLine].length;
+      const modifiers = extractTypeScriptModifiers(signature);
+      const parsed = parseTypeScriptSignature(signature);
+      const meta = {
+        startLine: i + 1,
+        endLine: offsetToLine(lineIndex, end),
+        signature,
+        params: extractTypeScriptParams(signature),
+        paramTypes: extractTypeScriptParamTypes(signature),
+        returns: parsed.returns,
+        modifiers,
+        visibility: extractVisibility(modifiers),
+        docstring: extractDocComment(lines, i),
+        attributes: collectAttributes(lines, i, signature)
+      };
+      decls.push({ start, end, name: parsed.name || match[1], kind: 'FunctionDeclaration', meta });
+      continue;
+    }
+
+    match = trimmed.match(arrowRe);
+    if (match) {
+      const start = lineIndex[i] + line.indexOf(match[0]);
+      const { signature, endLine, hasBody } = readSignatureLines(lines, i);
+      const bounds = hasBody ? findCLikeBodyBounds(text, start) : { bodyStart: -1, bodyEnd: -1 };
+      const end = bounds.bodyEnd > start ? bounds.bodyEnd : lineIndex[endLine] + lines[endLine].length;
+      const modifiers = extractTypeScriptModifiers(signature);
+      const parsed = parseTypeScriptSignature(signature);
+      const meta = {
+        startLine: i + 1,
+        endLine: offsetToLine(lineIndex, end),
+        signature,
+        params: extractTypeScriptParams(signature),
+        paramTypes: extractTypeScriptParamTypes(signature),
+        returns: parsed.returns,
+        modifiers,
+        visibility: extractVisibility(modifiers),
+        docstring: extractDocComment(lines, i),
+        attributes: collectAttributes(lines, i, signature)
+      };
+      decls.push({ start, end, name: parsed.name || match[1], kind: 'FunctionDeclaration', meta });
+      continue;
+    }
+  }
+
+  const methodRe = /^\s*(?:public|private|protected|static|abstract|async|readonly|override|declare\s+)*(?:get|set\s+)?([A-Za-z_$][A-Za-z0-9_$]*|constructor)\s*(?:<[^>]+>)?\s*\(/;
+  for (const typeDecl of typeDecls) {
+    if (!typeDecl || typeDecl.start == null || typeDecl.end == null) continue;
+    const bounds = findCLikeBodyBounds(text, typeDecl.start);
+    if (bounds.bodyStart === -1 || bounds.bodyEnd === -1) continue;
+    const startLine = offsetToLine(lineIndex, bounds.bodyStart + 1);
+    const endLine = offsetToLine(lineIndex, bounds.bodyEnd);
+    for (let i = startLine - 1; i < Math.min(lines.length, endLine); i++) {
+      const line = lines[i];
+      const trimmed = line.trim();
+      if (!trimmed || trimmed.startsWith('//') || trimmed.startsWith('/*') || trimmed.startsWith('*')) continue;
+      const match = trimmed.match(methodRe);
+      if (!match) continue;
+      const start = lineIndex[i] + line.indexOf(match[0]);
+      const { signature, endLine: sigEndLine, hasBody } = readSignatureLines(lines, i);
+      const boundsInner = hasBody ? findCLikeBodyBounds(text, start) : { bodyStart: -1, bodyEnd: -1 };
+      const end = boundsInner.bodyEnd > start ? boundsInner.bodyEnd : lineIndex[sigEndLine] + lines[sigEndLine].length;
+      const parsed = parseTypeScriptSignature(signature);
+      const methodName = parsed.name || match[1] || 'anonymous';
+      const modifiers = extractTypeScriptModifiers(signature);
+      const meta = {
+        startLine: i + 1,
+        endLine: offsetToLine(lineIndex, end),
+        signature,
+        params: extractTypeScriptParams(signature),
+        paramTypes: extractTypeScriptParamTypes(signature),
+        returns: parsed.returns,
+        modifiers,
+        visibility: extractVisibility(modifiers),
+        docstring: extractDocComment(lines, i),
+        attributes: collectAttributes(lines, i, signature)
+      };
+      decls.push({
+        start,
+        end,
+        name: `${typeDecl.name}.${methodName}`,
+        kind: methodName === 'constructor' ? 'ConstructorDeclaration' : 'MethodDeclaration',
+        meta
+      });
+    }
+  }
+
+  if (!decls.length) return null;
+  decls.sort((a, b) => a.start - b.start);
+  return decls.map((decl) => ({
+    start: decl.start,
+    end: decl.end,
+    name: decl.name,
+    kind: decl.kind,
+    meta: decl.meta || {}
+  }));
+}
diff --git a/src/lang/typescript/chunks.js b/src/lang/typescript/chunks.js
new file mode 100644
index 000000000..ba7dff947
--- /dev/null
+++ b/src/lang/typescript/chunks.js
@@ -0,0 +1,34 @@
+import { buildTreeSitterChunks } from '../tree-sitter.js';
+import { buildTypeScriptChunksFromAst } from './chunks-ast.js';
+import { buildTypeScriptChunksFromBabel } from './chunks-babel.js';
+import { buildTypeScriptChunksHeuristic } from './chunks-heuristic.js';
+import { resolveTypeScriptParser } from './parser.js';
+
+export function buildTypeScriptChunks(text, options = {}) {
+  if (options.treeSitter) {
+    const treeChunks = buildTreeSitterChunks({
+      text,
+      languageId: (options.ext || '').toLowerCase() === '.tsx' ? 'tsx' : 'typescript',
+      ext: options.ext,
+      options: { treeSitter: options.treeSitter, log: options.log }
+    });
+    if (treeChunks && treeChunks.length) return treeChunks;
+  }
+  const parser = resolveTypeScriptParser(options);
+  if (parser === 'heuristic') return buildTypeScriptChunksHeuristic(text);
+  if (parser === 'babel') {
+    const babelChunks = buildTypeScriptChunksFromBabel(text, options);
+    if (babelChunks && babelChunks.length) return babelChunks;
+    return buildTypeScriptChunksHeuristic(text);
+  }
+  if (parser === 'typescript') {
+    const astChunks = buildTypeScriptChunksFromAst(text, options);
+    if (astChunks && astChunks.length) return astChunks;
+    return buildTypeScriptChunksHeuristic(text);
+  }
+  const astChunks = buildTypeScriptChunksFromAst(text, options);
+  if (astChunks && astChunks.length) return astChunks;
+  const babelChunks = buildTypeScriptChunksFromBabel(text, options);
+  if (babelChunks && babelChunks.length) return babelChunks;
+  return buildTypeScriptChunksHeuristic(text);
+}
diff --git a/src/lang/typescript/constants.js b/src/lang/typescript/constants.js
new file mode 100644
index 000000000..589ec49c6
--- /dev/null
+++ b/src/lang/typescript/constants.js
@@ -0,0 +1,34 @@
+const TS_MODIFIERS = new Set([
+  'public', 'private', 'protected', 'static', 'readonly', 'abstract', 'declare',
+  'async', 'export', 'default', 'override'
+]);
+
+const TS_CALL_KEYWORDS = new Set([
+  'if', 'for', 'while', 'switch', 'case', 'return', 'new', 'throw', 'catch',
+  'try', 'else', 'do', 'typeof', 'instanceof', 'await', 'yield'
+]);
+
+const TS_USAGE_SKIP = new Set([
+  ...TS_CALL_KEYWORDS,
+  'class', 'interface', 'enum', 'type', 'namespace', 'module', 'void',
+  'string', 'number', 'boolean', 'any', 'unknown', 'never', 'null', 'undefined',
+  'true', 'false', 'object', 'symbol', 'bigint'
+]);
+
+const TSX_CLOSE_TAG = /<\/[A-Za-z]/;
+const TSX_SELF_CLOSING = /<([A-Za-z][A-Za-z0-9]*)\b[^>]*\/>/;
+const TSX_FRAGMENT_OPEN = /<>/;
+const TSX_FRAGMENT_CLOSE = /<\/>/;
+
+const TS_PARSERS = new Set(['auto', 'typescript', 'babel', 'heuristic']);
+
+export {
+  TS_CALL_KEYWORDS,
+  TS_MODIFIERS,
+  TS_PARSERS,
+  TS_USAGE_SKIP,
+  TSX_CLOSE_TAG,
+  TSX_FRAGMENT_CLOSE,
+  TSX_FRAGMENT_OPEN,
+  TSX_SELF_CLOSING
+};
diff --git a/src/lang/typescript/doc.js b/src/lang/typescript/doc.js
new file mode 100644
index 000000000..4c5c2fce5
--- /dev/null
+++ b/src/lang/typescript/doc.js
@@ -0,0 +1,56 @@
+import {
+  extractTypeScriptInheritance,
+  extractTypeScriptParamTypes,
+  extractTypeScriptParams,
+  extractTypeScriptReturns
+} from './signature.js';
+
+/**
+ * Normalize TypeScript-specific doc metadata for search output.
+ * @param {{meta?:Object}} chunk
+ * @returns {{doc:string,params:string[],returns:(string|null),signature:(string|null),decorators:string[],modifiers:string[],visibility:(string|null),returnType:(string|null),extends:string[],implements:string[]}}
+ */
+export function extractTypeScriptDocMeta(chunk) {
+  const meta = chunk.meta || {};
+  const signature = meta.signature || '';
+  const params = Array.isArray(meta.params) && meta.params.length
+    ? meta.params
+    : (signature ? extractTypeScriptParams(signature) : []);
+  const paramTypes = meta.paramTypes && typeof meta.paramTypes === 'object'
+    && Object.keys(meta.paramTypes).length
+    ? meta.paramTypes
+    : (signature ? extractTypeScriptParamTypes(signature) : {});
+  const decorators = Array.isArray(meta.attributes) ? meta.attributes : [];
+  const modifiers = Array.isArray(meta.modifiers) ? meta.modifiers : [];
+  let extendsList = Array.isArray(meta.extends) ? meta.extends : [];
+  let implementsList = Array.isArray(meta.implements) ? meta.implements : [];
+  if ((!extendsList.length || !implementsList.length) && signature) {
+    const inheritance = extractTypeScriptInheritance(signature);
+    if (!extendsList.length && inheritance.extendsList.length) {
+      extendsList = inheritance.extendsList;
+    }
+    if (!implementsList.length && inheritance.implementsList.length) {
+      implementsList = inheritance.implementsList;
+    }
+  }
+  const returns = meta.returns || (signature ? extractTypeScriptReturns(signature) : null);
+  return {
+    doc: meta.docstring ? String(meta.docstring).slice(0, 300) : '',
+    params,
+    paramTypes,
+    returns,
+    returnType: returns,
+    signature: signature || null,
+    decorators,
+    modifiers,
+    visibility: meta.visibility || null,
+    extends: extendsList,
+    implements: implementsList,
+    dataflow: meta.dataflow || null,
+    throws: meta.throws || [],
+    awaits: meta.awaits || [],
+    yields: meta.yields || false,
+    returnsValue: meta.returnsValue || false,
+    controlFlow: meta.controlFlow || null
+  };
+}
diff --git a/src/lang/typescript/flow.js b/src/lang/typescript/flow.js
new file mode 100644
index 000000000..e661ed84c
--- /dev/null
+++ b/src/lang/typescript/flow.js
@@ -0,0 +1,61 @@
+import { findCLikeBodyBounds } from '../clike.js';
+import { buildHeuristicDataflow, hasReturnValue, summarizeControlFlow } from '../flow.js';
+import { TS_USAGE_SKIP } from './constants.js';
+import { stripTypeScriptComments } from './parser.js';
+
+/**
+ * Heuristic control-flow/dataflow extraction for TypeScript chunks.
+ * @param {string} text
+ * @param {{start:number,end:number}} chunk
+ * @param {{dataflow?:boolean,controlFlow?:boolean}} [options]
+ * @returns {{dataflow:(object|null),controlFlow:(object|null),throws:string[],awaits:string[],yields:boolean,returnsValue:boolean}|null}
+ */
+export function computeTypeScriptFlow(text, chunk, options = {}) {
+  if (!chunk || !Number.isFinite(chunk.start) || !Number.isFinite(chunk.end)) return null;
+  const bounds = findCLikeBodyBounds(text, chunk.start);
+  const scanStart = bounds.bodyStart > -1 && bounds.bodyStart < chunk.end ? bounds.bodyStart + 1 : chunk.start;
+  const scanEnd = bounds.bodyEnd > scanStart && bounds.bodyEnd <= chunk.end ? bounds.bodyEnd : chunk.end;
+  if (scanEnd <= scanStart) return null;
+  const slice = text.slice(scanStart, scanEnd);
+  const cleaned = stripTypeScriptComments(slice);
+  const dataflowEnabled = options.dataflow !== false;
+  const controlFlowEnabled = options.controlFlow !== false;
+  const out = {
+    dataflow: null,
+    controlFlow: null,
+    throws: [],
+    awaits: [],
+    yields: false,
+    returnsValue: false
+  };
+
+  if (dataflowEnabled) {
+    out.dataflow = buildHeuristicDataflow(cleaned, {
+      skip: TS_USAGE_SKIP,
+      memberOperators: ['.']
+    });
+    out.returnsValue = hasReturnValue(cleaned);
+    const throws = new Set();
+    for (const match of cleaned.matchAll(/\bthrow\b\s+(?:new\s+)?([A-Za-z_$][A-Za-z0-9_$.]*)/g)) {
+      const name = match[1].replace(/[({].*$/, '').trim();
+      if (name) throws.add(name);
+    }
+    out.throws = Array.from(throws);
+    const awaits = new Set();
+    for (const match of cleaned.matchAll(/\bawait\b\s+([A-Za-z_$][A-Za-z0-9_$.]*)/g)) {
+      const name = match[1].replace(/[({].*$/, '').trim();
+      if (name) awaits.add(name);
+    }
+    out.awaits = Array.from(awaits);
+    out.yields = /\byield\b/.test(cleaned);
+  }
+
+  if (controlFlowEnabled) {
+    out.controlFlow = summarizeControlFlow(cleaned, {
+      branchKeywords: ['if', 'else', 'switch', 'case', 'catch', 'try'],
+      loopKeywords: ['for', 'while', 'do']
+    });
+  }
+
+  return out;
+}
diff --git a/src/lang/typescript/parser.js b/src/lang/typescript/parser.js
new file mode 100644
index 000000000..e2fa23c82
--- /dev/null
+++ b/src/lang/typescript/parser.js
@@ -0,0 +1,63 @@
+import { createRequire } from 'node:module';
+import path from 'node:path';
+import {
+  TS_PARSERS,
+  TSX_CLOSE_TAG,
+  TSX_FRAGMENT_CLOSE,
+  TSX_FRAGMENT_OPEN,
+  TSX_SELF_CLOSING
+} from './constants.js';
+
+const nodeRequire = createRequire(import.meta.url);
+const typeScriptCache = new Map();
+
+export function resolveTypeScriptParser(options = {}) {
+  const raw = options.parser || options.typescript?.parser || options.typescriptParser;
+  const normalized = typeof raw === 'string' ? raw.trim().toLowerCase() : '';
+  return TS_PARSERS.has(normalized) ? normalized : 'auto';
+}
+
+export function loadTypeScriptModule(rootDir) {
+  const key = rootDir || '__default__';
+  if (typeScriptCache.has(key)) return typeScriptCache.get(key);
+  let resolved = null;
+  if (rootDir) {
+    try {
+      const requireFromRoot = createRequire(path.join(rootDir, 'package.json'));
+      const mod = requireFromRoot('typescript');
+      resolved = mod?.default || mod;
+    } catch {
+      resolved = null;
+    }
+  }
+  if (!resolved) {
+    try {
+      const mod = nodeRequire('typescript');
+      resolved = mod?.default || mod;
+    } catch {
+      resolved = null;
+    }
+  }
+  typeScriptCache.set(key, resolved);
+  return resolved;
+}
+
+export function isLikelyTsx(text, ext) {
+  const normalized = ext ? ext.toLowerCase() : '';
+  if (normalized === '.tsx') return true;
+  if (normalized && normalized !== '.tsx') return false;
+  if (TSX_CLOSE_TAG.test(text)) return true;
+  if (TSX_SELF_CLOSING.test(text)) return true;
+  return TSX_FRAGMENT_OPEN.test(text) || TSX_FRAGMENT_CLOSE.test(text);
+}
+
+export function resolveTypeScriptFilename(ext, isTsx) {
+  if (ext) return `module${ext}`;
+  return isTsx ? 'module.tsx' : 'module.ts';
+}
+
+export function stripTypeScriptComments(text) {
+  return text
+    .replace(/\/\*[\s\S]*?\*\//g, ' ')
+    .replace(/\/\/.*$/gm, ' ');
+}
diff --git a/src/lang/typescript/relations.js b/src/lang/typescript/relations.js
new file mode 100644
index 000000000..5bf02e9f4
--- /dev/null
+++ b/src/lang/typescript/relations.js
@@ -0,0 +1,115 @@
+import { parseBabelAst } from '../babel-parser.js';
+import { collectImportsFromAst } from '../javascript.js';
+import { findCLikeBodyBounds } from '../clike.js';
+import { TS_CALL_KEYWORDS, TS_USAGE_SKIP } from './constants.js';
+import { resolveTypeScriptParser, stripTypeScriptComments } from './parser.js';
+
+/**
+ * Collect import paths from TypeScript source text.
+ * @param {string} text
+ * @returns {string[]}
+ */
+export function collectTypeScriptImports(text, options = {}) {
+  const importsOnly = options?.importsOnly === true || options?.typescript?.importsOnly === true;
+  const parser = resolveTypeScriptParser(options);
+  if (!importsOnly && (parser === 'babel' || parser === 'auto')) {
+    const ast = parseBabelAst(text, { ext: options.ext || '', mode: 'typescript' });
+    if (ast) return collectImportsFromAst(ast);
+  }
+  const imports = new Set();
+  const normalized = stripTypeScriptComments(text);
+  const capture = (regex) => {
+    for (const match of normalized.matchAll(regex)) {
+      if (match[1]) imports.add(match[1]);
+    }
+  };
+  capture(/\b(?:import|export)\s+(?:type\s+)?(?:[\s\S]*?\s+from\s+)?['"]([^'"]+)['"]/g);
+  capture(/\bimport\s*\(\s*['"]([^'"]+)['"]\s*\)/g);
+  capture(/\brequire\s*\(\s*['"]([^'"]+)['"]\s*\)/g);
+  return Array.from(imports);
+}
+
+function collectTypeScriptExports(text) {
+  const exports = new Set();
+  const lines = text.split('\n');
+  for (const line of lines) {
+    const trimmed = line.trim();
+    if (!trimmed.startsWith('export ')) continue;
+    let match = trimmed.match(/^export\s+(?:default\s+)?(?:class|interface|enum|type|function|const|let|var)\s+([A-Za-z_$][A-Za-z0-9_$]*)/);
+    if (match) {
+      exports.add(match[1]);
+      continue;
+    }
+    match = trimmed.match(/^export\s*\{([^}]+)\}/);
+    if (match) {
+      match[1].split(',').map((s) => s.trim()).filter(Boolean).forEach((name) => {
+        const clean = name.split(/\s+as\s+/i)[0].trim();
+        if (clean) exports.add(clean);
+      });
+    }
+  }
+  return Array.from(exports);
+}
+
+function collectTypeScriptCallsAndUsages(text) {
+  const calls = new Set();
+  const usages = new Set();
+  const normalized = stripTypeScriptComments(text);
+  for (const match of normalized.matchAll(/\b([A-Za-z_$][A-Za-z0-9_$.]*)\s*\(/g)) {
+    const raw = match[1];
+    if (!raw) continue;
+    const base = raw.split('.').filter(Boolean).pop();
+    if (!base || TS_CALL_KEYWORDS.has(base)) continue;
+    calls.add(raw);
+    if (base !== raw) calls.add(base);
+  }
+  for (const match of normalized.matchAll(/\b([A-Za-z_$][A-Za-z0-9_$]*)\b/g)) {
+    const name = match[1];
+    if (!name || name.length < 2) continue;
+    if (TS_USAGE_SKIP.has(name)) continue;
+    usages.add(name);
+  }
+  return { calls: Array.from(calls), usages: Array.from(usages) };
+}
+
+/**
+ * Build import/export/call/usage relations for TypeScript chunks.
+ * @param {string} text
+ * @param {Record<string,string[]>} allImports
+ * @param {Array<{start:number,end:number,name:string,kind:string,meta:Object}>|null} tsChunks
+ * @returns {{imports:string[],exports:string[],calls:Array<[string,string]>,usages:string[],importLinks:string[]}}
+ */
+export function buildTypeScriptRelations(text, allImports, tsChunks, options = {}) {
+  const imports = collectTypeScriptImports(text, options);
+  const exports = new Set(collectTypeScriptExports(text));
+  const calls = [];
+  const usages = new Set();
+  if (Array.isArray(tsChunks)) {
+    for (const chunk of tsChunks) {
+      if (!chunk || !chunk.name || chunk.start == null || chunk.end == null) continue;
+      if (!['MethodDeclaration', 'ConstructorDeclaration', 'FunctionDeclaration'].includes(chunk.kind)) continue;
+      const bounds = findCLikeBodyBounds(text, chunk.start);
+      const scanStart = bounds.bodyStart > -1 && bounds.bodyStart < chunk.end
+        ? bounds.bodyStart + 1
+        : chunk.start;
+      const scanEnd = bounds.bodyEnd > scanStart && bounds.bodyEnd <= chunk.end
+        ? bounds.bodyEnd
+        : chunk.end;
+      const slice = text.slice(scanStart, scanEnd);
+      const { calls: chunkCalls, usages: chunkUsages } = collectTypeScriptCallsAndUsages(slice);
+      for (const callee of chunkCalls) calls.push([chunk.name, callee]);
+      for (const usage of chunkUsages) usages.add(usage);
+    }
+  }
+  const importLinks = imports
+    .map((i) => allImports[i])
+    .filter((x) => !!x)
+    .flat();
+  return {
+    imports,
+    exports: Array.from(exports),
+    calls,
+    usages: Array.from(usages),
+    importLinks
+  };
+}
diff --git a/src/lang/typescript/signature.js b/src/lang/typescript/signature.js
new file mode 100644
index 000000000..b2d608abd
--- /dev/null
+++ b/src/lang/typescript/signature.js
@@ -0,0 +1,134 @@
+import { TS_MODIFIERS } from './constants.js';
+
+export function extractTypeScriptModifiers(signature) {
+  const mods = [];
+  const tokens = signature.split(/\s+/).filter(Boolean);
+  for (const tok of tokens) {
+    if (TS_MODIFIERS.has(tok)) mods.push(tok);
+  }
+  return mods;
+}
+
+export function extractTypeScriptParams(signature) {
+  const match = signature.match(/\(([^)]*)\)/);
+  if (!match) return [];
+  const params = [];
+  for (const part of match[1].split(',')) {
+    let seg = part.trim();
+    if (!seg) continue;
+    seg = seg.replace(/=.+$/g, '').trim();
+    seg = seg.replace(/:[^,]+/g, '').trim();
+    seg = seg.replace(/\b(public|private|protected|readonly|override)\b/g, '').trim();
+    seg = seg.replace(/\?/g, '').trim();
+    const tokens = seg.split(/\s+/).filter(Boolean);
+    if (!tokens.length) continue;
+    let name = tokens[tokens.length - 1];
+    name = name.replace(/[^A-Za-z0-9_$]/g, '');
+    if (!name || !/^[A-Za-z_$]/.test(name)) continue;
+    params.push(name);
+  }
+  return params;
+}
+
+export function extractTypeScriptParamTypes(signature) {
+  const match = signature.match(/\(([^)]*)\)/);
+  if (!match) return {};
+  const paramTypes = {};
+  for (const part of match[1].split(',')) {
+    let seg = part.trim();
+    if (!seg) continue;
+    seg = seg.replace(/=.+$/g, '').trim();
+    seg = seg.replace(/\b(public|private|protected|readonly|override)\b/g, '').trim();
+    seg = seg.replace(/^\.\.\./, '').trim();
+    const [rawName, ...rest] = seg.split(':');
+    if (!rawName || !rest.length) continue;
+    const name = rawName.replace(/\?/g, '').replace(/[^A-Za-z0-9_$]/g, '').trim();
+    const type = rest.join(':').trim();
+    if (!name || !type) continue;
+    paramTypes[name] = type;
+  }
+  return paramTypes;
+}
+
+export function extractTypeScriptReturns(signature) {
+  const idx = signature.indexOf(')');
+  if (idx === -1) return null;
+  const after = signature.slice(idx + 1);
+  const match = after.match(/:\s*([^=;{]+)/);
+  if (!match) return null;
+  const ret = match[1].trim();
+  return ret || null;
+}
+
+export function parseTypeScriptSignature(signature) {
+  const idx = signature.indexOf('(');
+  if (idx === -1) return { name: '', returns: null };
+  const before = signature.slice(0, idx).replace(/\s+/g, ' ').trim();
+  const match = before.match(/([A-Za-z_$][A-Za-z0-9_$]*)$/);
+  if (!match) return { name: '', returns: null };
+  const name = match[1];
+  const returns = extractTypeScriptReturns(signature);
+  return { name, returns };
+}
+
+export function readSignatureLines(lines, startLine) {
+  const parts = [];
+  let hasBrace = false;
+  let hasSemi = false;
+  let endLine = startLine;
+  for (let i = startLine; i < lines.length; i++) {
+    const line = lines[i];
+    parts.push(line.trim());
+    if (line.includes('{')) {
+      hasBrace = true;
+      endLine = i;
+      break;
+    }
+    if (line.includes(';')) {
+      hasSemi = true;
+      endLine = i;
+      break;
+    }
+    endLine = i;
+  }
+  const signature = parts.join(' ');
+  const braceIdx = signature.indexOf('{');
+  const semiIdx = signature.indexOf(';');
+  const hasBody = hasBrace && (semiIdx === -1 || (braceIdx !== -1 && braceIdx < semiIdx));
+  return { signature, endLine, hasBody };
+}
+
+export function extractTypeScriptInheritance(signature) {
+  const extendsList = [];
+  const implementsList = [];
+  const extendsMatch = signature.match(/\bextends\s+([^\{]+)/);
+  if (extendsMatch) {
+    const raw = extendsMatch[1].split(/\bimplements\b/)[0];
+    raw.split(',').map((s) => s.trim()).filter(Boolean).forEach((s) => extendsList.push(s));
+  }
+  const implMatch = signature.match(/\bimplements\s+([^\{]+)/);
+  if (implMatch) {
+    implMatch[1].split(',').map((s) => s.trim()).filter(Boolean).forEach((s) => implementsList.push(s));
+  }
+  return { extendsList, implementsList };
+}
+
+export function extractVisibility(modifiers) {
+  if (modifiers.includes('private')) return 'private';
+  if (modifiers.includes('protected')) return 'protected';
+  return 'public';
+}
+
+export function mergeParamTypes(base, extra) {
+  const out = { ...(base || {}) };
+  for (const [name, value] of Object.entries(extra || {})) {
+    if (!name || !value) continue;
+    if (!out[name]) {
+      out[name] = value;
+      continue;
+    }
+    if (out[name] === value) continue;
+    out[name] = Array.from(new Set([out[name], value])).join(' | ');
+  }
+  return out;
+}
diff --git a/src/lang/workers/tree-sitter-worker.js b/src/lang/workers/tree-sitter-worker.js
new file mode 100644
index 000000000..0382cc4c0
--- /dev/null
+++ b/src/lang/workers/tree-sitter-worker.js
@@ -0,0 +1,15 @@
+import { buildTreeSitterChunks } from '../tree-sitter.js';
+
+export function parseTreeSitter(payload = {}) {
+  const { text = '', languageId = null, ext = null, treeSitter = null } = payload;
+  try {
+    return buildTreeSitterChunks({
+      text,
+      languageId,
+      ext,
+      options: { treeSitter }
+    });
+  } catch {
+    return null;
+  }
+}
diff --git a/src/map.zip b/src/map.zip
new file mode 100644
index 000000000..e771a5516
Binary files /dev/null and b/src/map.zip differ
diff --git a/src/map/build-map.js b/src/map/build-map.js
new file mode 100644
index 000000000..b72ecda4e
--- /dev/null
+++ b/src/map/build-map.js
@@ -0,0 +1,879 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { loadChunkMeta, readJsonFile } from '../shared/artifact-io.js';
+import { sha1 } from '../shared/hash.js';
+import { stableStringify } from '../shared/stable-json.js';
+import {
+  DEFAULT_LEGEND,
+  DEFAULT_LIMITS,
+  MAP_MODEL_VERSION,
+  VIEWER_DEFAULTS
+} from './constants.js';
+import {
+  basename,
+  classifyFilePath,
+  extension,
+  normalizePath,
+  sortBy,
+  unique
+} from './utils.js';
+
+const ANON_NAMES = new Set(['(anonymous)', '<anonymous>', 'anonymous']);
+const DEFAULT_INCLUDE = ['imports', 'calls', 'usages', 'dataflow', 'exports'];
+
+const readJsonOptional = (filePath, warnings) => {
+  try {
+    if (!fs.existsSync(filePath)) return null;
+    return readJsonFile(filePath);
+  } catch (err) {
+    const detail = err?.message ? ` (${err.message})` : '';
+    warnings.push(`Failed to read ${filePath}${detail}`);
+    return null;
+  }
+};
+
+const hydrateChunkMeta = (chunks, fileMetaRaw) => {
+  if (!Array.isArray(chunks)) return [];
+  if (!Array.isArray(fileMetaRaw)) return chunks;
+  const fileMetaById = new Map();
+  for (const entry of fileMetaRaw) {
+    if (!entry || entry.id == null) continue;
+    fileMetaById.set(entry.id, entry);
+  }
+  for (const chunk of chunks) {
+    if (!chunk || (chunk.file && chunk.ext)) continue;
+    const meta = fileMetaById.get(chunk.fileId);
+    if (!meta) continue;
+    if (!chunk.file) chunk.file = meta.file;
+    if (!chunk.ext) chunk.ext = meta.ext;
+  }
+  return chunks;
+};
+
+const normalizeArray = (value) => {
+  if (!Array.isArray(value)) return null;
+  const filtered = value
+    .filter((entry) => entry !== null && entry !== undefined && entry !== '')
+    .map((entry) => String(entry))
+    .filter(Boolean);
+  return filtered.length ? filtered : null;
+};
+
+const normalizeModifiers = (modifiers) => {
+  if (!modifiers || typeof modifiers !== 'object') return null;
+  return { ...modifiers };
+};
+
+const normalizeControlFlow = (controlFlow) => {
+  if (!controlFlow || typeof controlFlow !== 'object') return null;
+  return { ...controlFlow };
+};
+
+const normalizeDataflow = (dataflow) => {
+  if (!dataflow || typeof dataflow !== 'object') return null;
+  const reads = normalizeArray(dataflow.reads);
+  const writes = normalizeArray(dataflow.writes);
+  const mutations = normalizeArray(dataflow.mutations);
+  const aliases = normalizeArray(dataflow.aliases);
+  if (!reads && !writes && !mutations && !aliases) return null;
+  return {
+    reads,
+    writes,
+    mutations,
+    aliases
+  };
+};
+
+const buildSymbolId = ({ file, name, kind, startLine, chunkId }) => {
+  const safeFile = normalizePath(file || '');
+  const safeName = String(name || '').trim();
+  const lowered = safeName.toLowerCase();
+  if (safeName && !ANON_NAMES.has(lowered)) return `${safeFile}::${safeName}`;
+  if (chunkId) return `${safeFile}::${chunkId}`;
+  const suffix = Number.isFinite(startLine) ? startLine : 0;
+  const kindTag = kind ? String(kind) : 'symbol';
+  return `${safeFile}::${kindTag}:${suffix}`;
+};
+
+const buildPortId = (symbolId) => `p_${sha1(symbolId).slice(0, 12)}`;
+
+const memberTypeFromKind = (kind) => {
+  const value = typeof kind === 'string' ? kind.toLowerCase() : '';
+  if (value.includes('class') || value.includes('interface') || value.includes('struct')) return 'class';
+  if (value.includes('function') || value.includes('method') || value.includes('ctor')) return 'function';
+  return 'symbol';
+};
+
+const upsertMember = (membersByFile, memberById, file, id, base) => {
+  const list = membersByFile.get(file) || [];
+  let member = memberById.get(id);
+  if (!member) {
+    member = {
+      id,
+      file,
+      name: base.name || '',
+      kind: base.kind || null,
+      type: memberTypeFromKind(base.kind),
+      signature: base.signature || null,
+      params: base.params || null,
+      returns: base.returns || null,
+      modifiers: base.modifiers || null,
+      dataflow: base.dataflow || null,
+      controlFlow: base.controlFlow || null,
+      exported: base.exported ?? null,
+      range: base.range || { startLine: null, endLine: null },
+      port: buildPortId(id)
+    };
+    list.push(member);
+    membersByFile.set(file, list);
+    memberById.set(id, member);
+  } else {
+    if (!member.file) member.file = file;
+    if (!member.name && base.name) member.name = base.name;
+    if (!member.kind && base.kind) member.kind = base.kind;
+    if (!member.signature && base.signature) member.signature = base.signature;
+    if (!member.params && base.params) member.params = base.params;
+    if (!member.returns && base.returns) member.returns = base.returns;
+    if (!member.modifiers && base.modifiers) member.modifiers = base.modifiers;
+    if (!member.dataflow && base.dataflow) member.dataflow = base.dataflow;
+    if (!member.controlFlow && base.controlFlow) member.controlFlow = base.controlFlow;
+    if (member.exported == null && base.exported != null) member.exported = base.exported;
+    if (base.range) {
+      const range = member.range || { startLine: null, endLine: null };
+      if (range.startLine == null && base.range.startLine != null) range.startLine = base.range.startLine;
+      if (range.endLine == null && base.range.endLine != null) range.endLine = base.range.endLine;
+      member.range = range;
+    }
+    member.type = memberTypeFromKind(member.kind);
+  }
+  return member;
+};
+
+const resolveFocus = (options) => {
+  const scope = typeof options.scope === 'string' ? options.scope.toLowerCase() : 'repo';
+  const focus = typeof options.focus === 'string' ? options.focus.trim() : '';
+  return { scope, focus };
+};
+
+const normalizeIncludeList = (include) => {
+  if (!include) return DEFAULT_INCLUDE.slice();
+  const list = Array.isArray(include) ? include : String(include).split(',');
+  const normalized = list
+    .map((entry) => String(entry).trim().toLowerCase())
+    .filter(Boolean);
+  return normalized.length ? normalized : DEFAULT_INCLUDE.slice();
+};
+
+const buildMemberIndex = (memberById) => {
+  const byName = new Map();
+  for (const member of memberById.values()) {
+    if (!member?.name) continue;
+    const key = member.name;
+    const list = byName.get(key) || [];
+    list.push(member);
+    byName.set(key, list);
+  }
+  return byName;
+};
+
+const resolveMemberByName = (memberIndex, name, fileHint) => {
+  const list = memberIndex.get(name) || [];
+  if (!list.length) return null;
+  if (fileHint) {
+    const match = list.find((entry) => entry.file === fileHint);
+    if (match) return match;
+  }
+  if (list.length === 1) return list[0];
+  const sorted = sortBy(list, (entry) => `${entry.file || ''}:${entry.range?.startLine || 0}`);
+  return sorted[0];
+};
+
+const buildEdgesFromGraph = ({ graph, type, memberById }) => {
+  if (!graph || !Array.isArray(graph.nodes)) return [];
+  const edges = [];
+  for (const node of graph.nodes) {
+    if (!node?.id || !Array.isArray(node.out)) continue;
+    for (const target of node.out) {
+      if (!target) continue;
+      if (!memberById.has(node.id) || !memberById.has(target)) continue;
+      edges.push({
+        type,
+        from: { member: node.id },
+        to: { member: target },
+        label: null
+      });
+    }
+  }
+  return edges;
+};
+
+const buildEdgesFromCalls = ({ chunkMeta, memberIndex, memberById }) => {
+  const edges = [];
+  for (const chunk of chunkMeta || []) {
+    if (!chunk?.file || !chunk?.name) continue;
+    const sourceId = buildSymbolId({
+      file: chunk.file,
+      name: chunk.name,
+      kind: chunk.kind,
+      startLine: chunk.startLine,
+      chunkId: chunk.metaV2?.chunkId || null
+    });
+    if (!memberById.has(sourceId)) continue;
+    const relations = chunk.codeRelations || {};
+    if (!Array.isArray(relations.calls)) continue;
+    for (const entry of relations.calls) {
+      const targetName = Array.isArray(entry) ? entry[1] : null;
+      if (!targetName) continue;
+      const targetMember = resolveMemberByName(memberIndex, targetName, chunk.file);
+      if (!targetMember) continue;
+      edges.push({
+        type: 'call',
+        from: { member: sourceId },
+        to: { member: targetMember.id },
+        label: null
+      });
+    }
+  }
+  return edges;
+};
+
+const buildEdgesFromUsage = ({ chunkMeta, memberIndex, memberById }) => {
+  const edges = [];
+  for (const chunk of chunkMeta || []) {
+    if (!chunk?.file || !chunk?.name) continue;
+    const sourceId = buildSymbolId({
+      file: chunk.file,
+      name: chunk.name,
+      kind: chunk.kind,
+      startLine: chunk.startLine,
+      chunkId: chunk.metaV2?.chunkId || null
+    });
+    if (!memberById.has(sourceId)) continue;
+    const relations = chunk.codeRelations || {};
+    if (!Array.isArray(relations.usages)) continue;
+    for (const usage of relations.usages) {
+      const targetMember = resolveMemberByName(memberIndex, usage, chunk.file);
+      if (!targetMember) continue;
+      edges.push({
+        type: 'usage',
+        from: { member: sourceId },
+        to: { member: targetMember.id },
+        label: null
+      });
+    }
+  }
+  return edges;
+};
+
+const buildEdgesFromCallSummaries = ({ chunkMeta, memberById }) => {
+  const edges = [];
+  for (const chunk of chunkMeta || []) {
+    const meta = chunk?.metaV2;
+    if (!chunk?.file || !chunk?.name || !meta?.relations?.callSummaries) continue;
+    const sourceId = buildSymbolId({
+      file: chunk.file,
+      name: chunk.name,
+      kind: chunk.kind,
+      startLine: chunk.startLine,
+      chunkId: meta.chunkId || null
+    });
+    if (!memberById.has(sourceId)) continue;
+    for (const summary of meta.relations.callSummaries) {
+      const targetId = summary?.file && summary?.target
+        ? `${normalizePath(summary.file)}::${summary.target}`
+        : null;
+      if (!targetId || !memberById.has(targetId)) continue;
+      const args = summary.argMap ? Object.keys(summary.argMap) : [];
+      const argLabel = args.length ? `args:${args.slice(0, 3).join(',')}` : null;
+      edges.push({
+        type: 'dataflow',
+        from: { member: sourceId },
+        to: { member: targetId },
+        label: argLabel
+      });
+      if (Array.isArray(summary.returnTypes) && summary.returnTypes.length) {
+        edges.push({
+          type: 'dataflow',
+          from: { member: targetId },
+          to: { member: sourceId },
+          label: 'return'
+        });
+      }
+    }
+  }
+  return edges;
+};
+
+const buildImportEdges = ({ fileRelations, fileSet }) => {
+  if (!Array.isArray(fileRelations)) return [];
+  const edges = [];
+  for (const entry of fileRelations) {
+    if (!entry?.file) continue;
+    if (fileSet && !fileSet.has(entry.file)) continue;
+    const imports = Array.isArray(entry.relations?.importLinks)
+      ? entry.relations.importLinks
+      : [];
+    for (const target of imports) {
+      if (!target) continue;
+      if (fileSet && !fileSet.has(target)) continue;
+      edges.push({
+        type: 'import',
+        from: { file: entry.file },
+        to: { file: target },
+        label: null
+      });
+    }
+  }
+  return edges;
+};
+
+const buildExportEdges = ({ membersByFile }) => {
+  const edges = [];
+  for (const [file, members] of membersByFile.entries()) {
+    for (const member of members || []) {
+      if (member.exported !== true) continue;
+      edges.push({
+        type: 'export',
+        from: { file },
+        to: { member: member.id },
+        label: member.name || null
+      });
+    }
+  }
+  return edges;
+};
+
+const buildAliasEdges = ({ membersByFile }) => {
+  const edges = [];
+  for (const [file, members] of membersByFile.entries()) {
+    for (const member of members || []) {
+      const aliases = member.dataflow?.aliases || null;
+      if (!Array.isArray(aliases) || !aliases.length) continue;
+      for (const alias of aliases.slice(0, 3)) {
+        if (!alias) continue;
+        edges.push({
+          type: 'alias',
+          from: { member: member.id },
+          to: { member: member.id },
+          label: `alias:${alias}`,
+          meta: { file }
+        });
+      }
+    }
+  }
+  return edges;
+};
+
+const applyLimits = ({ nodes, edges, limits, topKByDegree }) => {
+  const dropped = { files: 0, members: 0, edges: 0 };
+  const limitedNodes = [];
+  const maxFiles = limits.maxFiles;
+  const maxMembers = limits.maxMembersPerFile;
+
+  let fileList = sortBy(nodes, (node) => node.path);
+  if (topKByDegree) {
+    const degree = new Map();
+    for (const edge of edges) {
+      const fromFile = edge.from?.file || edge.from?.member?.split('::')[0] || null;
+      const toFile = edge.to?.file || edge.to?.member?.split('::')[0] || null;
+      if (fromFile) degree.set(fromFile, (degree.get(fromFile) || 0) + 1);
+      if (toFile) degree.set(toFile, (degree.get(toFile) || 0) + 1);
+    }
+    fileList = nodes.slice().sort((a, b) => {
+      const scoreA = degree.get(a.path) || 0;
+      const scoreB = degree.get(b.path) || 0;
+      if (scoreA !== scoreB) return scoreB - scoreA;
+      return String(a.path).localeCompare(String(b.path));
+    });
+  }
+  for (const node of fileList.slice(0, maxFiles)) {
+    const members = Array.isArray(node.members) ? node.members : [];
+    const memberList = sortBy(members, (member) => `${member.name}:${member.range?.startLine || 0}`);
+    const keptMembers = memberList.slice(0, maxMembers);
+    dropped.members += Math.max(0, memberList.length - keptMembers.length);
+    limitedNodes.push({
+      ...node,
+      members: keptMembers
+    });
+  }
+  dropped.files = Math.max(0, fileList.length - limitedNodes.length);
+
+  const memberSet = new Set();
+  const fileSet = new Set();
+  for (const node of limitedNodes) {
+    fileSet.add(node.path);
+    for (const member of node.members || []) {
+      memberSet.add(member.id);
+    }
+  }
+
+  const filteredEdges = edges.filter((edge) => {
+    const fromMember = edge.from?.member;
+    const toMember = edge.to?.member;
+    const fromFile = edge.from?.file;
+    const toFile = edge.to?.file;
+    if (fromMember && !memberSet.has(fromMember)) return false;
+    if (toMember && !memberSet.has(toMember)) return false;
+    if (fromFile && !fileSet.has(fromFile)) return false;
+    if (toFile && !fileSet.has(toFile)) return false;
+    return true;
+  });
+
+  const edgeList = sortBy(filteredEdges, (edge) => {
+    const from = edge.from?.member || edge.from?.file || '';
+    const to = edge.to?.member || edge.to?.file || '';
+    return `${edge.type}:${from}->${to}:${edge.label || ''}`;
+  });
+  let limitedEdges = edgeList;
+  if (edgeList.length > limits.maxEdges) {
+    const byType = new Map();
+    for (const edge of edgeList) {
+      const list = byType.get(edge.type) || [];
+      list.push(edge);
+      byType.set(edge.type, list);
+    }
+    const types = Array.from(byType.keys()).sort();
+    const allocations = new Map();
+    let remaining = limits.maxEdges;
+    for (const type of types) {
+      const list = byType.get(type) || [];
+      const share = Math.floor((limits.maxEdges * list.length) / edgeList.length);
+      const count = Math.min(list.length, share);
+      allocations.set(type, count);
+      remaining -= count;
+    }
+    for (const type of types) {
+      if (remaining <= 0) break;
+      const list = byType.get(type) || [];
+      const current = allocations.get(type) || 0;
+      if (current === 0 && list.length > 0) {
+        allocations.set(type, 1);
+        remaining -= 1;
+      }
+    }
+    while (remaining > 0) {
+      let progressed = false;
+      for (const type of types) {
+        if (remaining <= 0) break;
+        const list = byType.get(type) || [];
+        const current = allocations.get(type) || 0;
+        if (current < list.length) {
+          allocations.set(type, current + 1);
+          remaining -= 1;
+          progressed = true;
+        }
+      }
+      if (!progressed) break;
+    }
+    limitedEdges = [];
+    for (const type of types) {
+      const list = byType.get(type) || [];
+      const count = allocations.get(type) || 0;
+      limitedEdges.push(...list.slice(0, count));
+    }
+    limitedEdges = sortBy(limitedEdges, (edge) => {
+      const from = edge.from?.member || edge.from?.file || '';
+      const to = edge.to?.member || edge.to?.file || '';
+      return `${edge.type}:${from}->${to}:${edge.label || ''}`;
+    });
+  }
+  dropped.edges = Math.max(0, edgeList.length - limitedEdges.length);
+
+  return { nodes: limitedNodes, edges: limitedEdges, dropped };
+};
+
+const buildFileNodes = (membersByFile) => {
+  const nodes = [];
+  for (const [file, members] of membersByFile.entries()) {
+    const list = sortBy(members || [], (member) => `${member.name}:${member.range?.startLine || 0}`);
+    nodes.push({
+      id: file,
+      path: file,
+      name: basename(file),
+      ext: extension(file) || null,
+      category: classifyFilePath(file),
+      type: 'file',
+      members: list
+    });
+  }
+  return nodes;
+};
+
+const applyScopeFilter = ({ nodes, edges, scope, focus }) => {
+  if (scope === 'repo' || !focus) return { nodes, edges };
+
+  const normalizedFocus = normalizePath(focus);
+  if (scope === 'dir') {
+    const filteredNodes = nodes.filter((node) => node.path.startsWith(normalizedFocus));
+    const fileSet = new Set(filteredNodes.map((node) => node.path));
+    const filteredEdges = edges.filter((edge) => {
+      const fromFile = edge.from?.file || null;
+      const toFile = edge.to?.file || null;
+      const fromMember = edge.from?.member || null;
+      const toMember = edge.to?.member || null;
+      if (fromFile && !fileSet.has(fromFile)) return false;
+      if (toFile && !fileSet.has(toFile)) return false;
+      if (fromMember) {
+        const memberFile = fromMember.split('::')[0];
+        if (!fileSet.has(memberFile)) return false;
+      }
+      if (toMember) {
+        const memberFile = toMember.split('::')[0];
+        if (!fileSet.has(memberFile)) return false;
+      }
+      return true;
+    });
+    return { nodes: filteredNodes, edges: filteredEdges };
+  }
+
+  if (scope === 'file') {
+    const filteredNodes = nodes.filter((node) => node.path === normalizedFocus);
+    const filteredEdges = edges.filter((edge) => {
+      const fromFile = edge.from?.file || null;
+      const toFile = edge.to?.file || null;
+      const fromMember = edge.from?.member || null;
+      const toMember = edge.to?.member || null;
+      if (fromFile && fromFile !== normalizedFocus) return false;
+      if (toFile && toFile !== normalizedFocus) return false;
+      if (fromMember && !fromMember.startsWith(normalizedFocus + '::')) return false;
+      if (toMember && !toMember.startsWith(normalizedFocus + '::')) return false;
+      return true;
+    });
+    return { nodes: filteredNodes, edges: filteredEdges };
+  }
+
+  if (scope === 'symbol') {
+    let symbolId = normalizedFocus.includes('::')
+      ? normalizedFocus
+      : null;
+    if (!symbolId) {
+      const matches = [];
+      for (const node of nodes) {
+        for (const member of node.members || []) {
+          if (member.name === normalizedFocus) matches.push(member.id);
+        }
+      }
+      if (matches.length) {
+        symbolId = sortBy(matches, (entry) => entry)[0];
+      }
+    }
+    const allowedMembers = new Set();
+    if (symbolId) allowedMembers.add(symbolId);
+    const edgeMatches = edges.filter((edge) => {
+      const fromMember = edge.from?.member || null;
+      const toMember = edge.to?.member || null;
+      if (symbolId && (fromMember === symbolId || toMember === symbolId)) {
+        if (fromMember) allowedMembers.add(fromMember);
+        if (toMember) allowedMembers.add(toMember);
+        return true;
+      }
+      return false;
+    });
+
+    const filteredNodes = nodes
+      .map((node) => {
+        const members = (node.members || []).filter((member) => allowedMembers.has(member.id));
+        if (!members.length) return null;
+        return { ...node, members };
+      })
+      .filter(Boolean);
+    return { nodes: filteredNodes, edges: edgeMatches };
+  }
+
+  return { nodes, edges };
+};
+
+const applyCollapse = ({ nodes, edges, collapse }) => {
+  if (!collapse || collapse === 'none') return { nodes, edges };
+  if (collapse === 'file') {
+    const fileNodes = nodes.map((node) => ({ ...node, members: [] }));
+    const collapsedEdges = edges.map((edge) => ({
+      ...edge,
+      from: edge.from?.file
+        ? { file: edge.from.file }
+        : { file: edge.from?.member?.split('::')[0] || null },
+      to: edge.to?.file
+        ? { file: edge.to.file }
+        : { file: edge.to?.member?.split('::')[0] || null }
+    }));
+    return { nodes: fileNodes, edges: collapsedEdges };
+  }
+  if (collapse === 'dir') {
+    const dirNodes = new Map();
+    const fileToDir = new Map();
+    for (const node of nodes) {
+      const parts = normalizePath(node.path).split('/');
+      const dir = parts.length > 1 ? parts[0] : parts[0] || 'root';
+      fileToDir.set(node.path, dir);
+      if (!dirNodes.has(dir)) {
+        dirNodes.set(dir, {
+          id: dir,
+          path: dir,
+          name: dir,
+          ext: null,
+          category: 'dir',
+          type: 'file',
+          members: []
+        });
+      }
+    }
+    const collapsedEdges = edges.map((edge) => {
+      const fromFile = edge.from?.file || edge.from?.member?.split('::')[0] || null;
+      const toFile = edge.to?.file || edge.to?.member?.split('::')[0] || null;
+      return {
+        ...edge,
+        from: { file: fromFile ? fileToDir.get(fromFile) || fromFile : null },
+        to: { file: toFile ? fileToDir.get(toFile) || toFile : null }
+      };
+    });
+    return { nodes: Array.from(dirNodes.values()), edges: collapsedEdges };
+  }
+  return { nodes, edges };
+};
+
+export function buildCodeMap({ repoRoot, indexDir, options = {} }) {
+  const warnings = [];
+  const includes = normalizeIncludeList(options.include);
+  const limits = {
+    maxFiles: Number.isFinite(Number(options.maxFiles))
+      ? Math.max(1, Number(options.maxFiles))
+      : DEFAULT_LIMITS.maxFiles,
+    maxMembersPerFile: Number.isFinite(Number(options.maxMembersPerFile))
+      ? Math.max(1, Number(options.maxMembersPerFile))
+      : DEFAULT_LIMITS.maxMembersPerFile,
+    maxEdges: Number.isFinite(Number(options.maxEdges))
+      ? Math.max(1, Number(options.maxEdges))
+      : DEFAULT_LIMITS.maxEdges
+  };
+
+  const repoMap = readJsonOptional(path.join(indexDir, 'repo_map.json'), warnings) || [];
+  const fileRelations = readJsonOptional(path.join(indexDir, 'file_relations.json'), warnings) || [];
+  const graphRelations = readJsonOptional(path.join(indexDir, 'graph_relations.json'), warnings) || null;
+  const fileMeta = readJsonOptional(path.join(indexDir, 'file_meta.json'), warnings) || null;
+
+  let chunkMeta = [];
+  try {
+    chunkMeta = loadChunkMeta(indexDir);
+  } catch (err) {
+    warnings.push(`chunk_meta missing: ${err?.message || err}`);
+  }
+  chunkMeta = hydrateChunkMeta(chunkMeta, fileMeta);
+
+  const membersByFile = new Map();
+  const memberById = new Map();
+  let hasDataflow = false;
+  let hasControlFlow = false;
+
+  for (const entry of repoMap) {
+    if (!entry?.file || !entry?.name) continue;
+    const file = normalizePath(entry.file);
+    const symbolId = buildSymbolId({
+      file,
+      name: entry.name,
+      kind: entry.kind,
+      startLine: entry.startLine,
+      chunkId: null
+    });
+    upsertMember(membersByFile, memberById, file, symbolId, {
+      name: entry.name,
+      kind: entry.kind,
+      signature: entry.signature || null,
+      exported: entry.exported === true,
+      range: {
+        startLine: Number.isFinite(entry.startLine) ? entry.startLine : null,
+        endLine: Number.isFinite(entry.endLine) ? entry.endLine : null
+      }
+    });
+  }
+
+  for (const chunk of chunkMeta) {
+    const meta = chunk?.metaV2 || null;
+    const file = normalizePath(meta?.file || chunk?.file || '');
+    const name = meta?.name || chunk?.name || null;
+    if (!file || !name) continue;
+    const symbolId = buildSymbolId({
+      file,
+      name,
+      kind: meta?.kind || chunk?.kind || null,
+      startLine: meta?.range?.startLine || chunk?.startLine,
+      chunkId: meta?.chunkId || null
+    });
+    const dataflow = normalizeDataflow(meta?.dataflow || chunk?.docmeta?.dataflow);
+    const controlFlow = normalizeControlFlow(meta?.controlFlow || chunk?.docmeta?.controlFlow);
+    if (dataflow) hasDataflow = true;
+    if (controlFlow) hasControlFlow = true;
+    upsertMember(membersByFile, memberById, file, symbolId, {
+      name,
+      kind: meta?.kind || chunk?.kind || null,
+      signature: meta?.signature || chunk?.docmeta?.signature || null,
+      params: normalizeArray(meta?.params || chunk?.docmeta?.params),
+      returns: meta?.returns || chunk?.docmeta?.returns || null,
+      modifiers: normalizeModifiers(meta?.modifiers || chunk?.docmeta?.modifiers),
+      dataflow,
+      controlFlow,
+      range: {
+        startLine: Number.isFinite(meta?.range?.startLine)
+          ? meta.range.startLine
+          : (Number.isFinite(chunk?.startLine) ? chunk.startLine : null),
+        endLine: Number.isFinite(meta?.range?.endLine)
+          ? meta.range.endLine
+          : (Number.isFinite(chunk?.endLine) ? chunk.endLine : null)
+      }
+    });
+  }
+
+  if (!hasDataflow) warnings.push('dataflow metadata missing; map is limited');
+  if (!hasControlFlow) warnings.push('controlFlow metadata missing; map is limited');
+
+  const includeExportedOnly = options.onlyExported === true;
+  if (includeExportedOnly) {
+    for (const [file, members] of membersByFile.entries()) {
+      membersByFile.set(
+        file,
+        (members || []).filter((member) => member.exported === true)
+      );
+    }
+  }
+
+  let nodes = buildFileNodes(membersByFile);
+  let edges = [];
+
+  const memberIndex = buildMemberIndex(memberById);
+
+  if (includes.includes('imports')) {
+    edges.push(...buildImportEdges({ fileRelations }));
+  }
+  if (includes.includes('exports')) {
+    edges.push(...buildExportEdges({ membersByFile }));
+  }
+
+  const callEdges = graphRelations?.callGraph
+    ? buildEdgesFromGraph({ graph: graphRelations.callGraph, type: 'call', memberById })
+    : [];
+  if (includes.includes('calls')) {
+    edges.push(...callEdges);
+    if (!callEdges.length) {
+      edges.push(...buildEdgesFromCalls({ chunkMeta, memberIndex, memberById }));
+    }
+  }
+
+  const usageEdges = graphRelations?.usageGraph
+    ? buildEdgesFromGraph({ graph: graphRelations.usageGraph, type: 'usage', memberById })
+    : [];
+  if (includes.includes('usages')) {
+    edges.push(...usageEdges);
+    if (!usageEdges.length) {
+      edges.push(...buildEdgesFromUsage({ chunkMeta, memberIndex, memberById }));
+    }
+  }
+
+  if (includes.includes('dataflow')) {
+    edges.push(...buildEdgesFromCallSummaries({ chunkMeta, memberById }));
+  }
+
+  if (includes.includes('aliases')) {
+    edges.push(...buildAliasEdges({ membersByFile }));
+  }
+
+  const { scope, focus } = resolveFocus(options);
+  ({ nodes, edges } = applyScopeFilter({ nodes, edges, scope, focus }));
+  ({ nodes, edges } = applyCollapse({ nodes, edges, collapse: options.collapse }));
+
+  const { nodes: limitedNodes, edges: limitedEdges, dropped } = applyLimits({
+    nodes,
+    edges,
+    limits,
+    topKByDegree: options.topKByDegree === true
+  });
+
+  const summary = {
+    counts: {
+      files: limitedNodes.length,
+      members: limitedNodes.reduce((acc, node) => acc + (node.members?.length || 0), 0),
+      edges: limitedEdges.length
+    },
+    dropped,
+    truncated: dropped.files > 0 || dropped.members > 0 || dropped.edges > 0,
+    limits,
+    include: includes,
+    scope,
+    focus: focus || null,
+    collapse: options.collapse || 'none',
+    topKByDegree: options.topKByDegree === true
+  };
+
+  const viewer = {
+    ...VIEWER_DEFAULTS,
+    ...options.viewer,
+    controls: {
+      ...VIEWER_DEFAULTS.controls,
+      ...(options.viewer?.controls || {}),
+      wasd: {
+        ...VIEWER_DEFAULTS.controls.wasd,
+        ...(options.viewer?.controls?.wasd || {})
+      }
+    }
+  };
+
+  return {
+    version: MAP_MODEL_VERSION,
+    generatedAt: new Date().toISOString(),
+    root: { path: repoRoot, id: null },
+    mode: options.mode || null,
+    options: {
+      scope,
+      focus: focus || null,
+      include: includes,
+      onlyExported: includeExportedOnly,
+      collapse: options.collapse || 'none',
+      limits,
+      topKByDegree: options.topKByDegree === true
+    },
+    legend: DEFAULT_LEGEND,
+    nodes: sortBy(limitedNodes, (node) => node.path),
+    edges: sortBy(limitedEdges, (edge) => {
+      const from = edge.from?.member || edge.from?.file || '';
+      const to = edge.to?.member || edge.to?.file || '';
+      return `${edge.type}:${from}->${to}:${edge.label || ''}`;
+    }),
+    viewer,
+    summary,
+    warnings: unique(warnings)
+  };
+}
+
+export function buildNodeList(mapModel) {
+  const nodes = [];
+  for (const file of mapModel.nodes || []) {
+    nodes.push({
+      id: file.path,
+      label: `file: ${file.path}`,
+      file: file.path,
+      kind: 'file',
+      startLine: 1,
+      endLine: null
+    });
+    for (const member of file.members || []) {
+      const line = member.range?.startLine || null;
+      nodes.push({
+        id: member.id,
+        label: `${member.name} (${file.path}:${line || 1})`,
+        file: file.path,
+        kind: member.kind || member.type || 'symbol',
+        startLine: line,
+        endLine: member.range?.endLine || null
+      });
+    }
+  }
+  const sorted = sortBy(nodes, (entry) => entry.label);
+  return {
+    generatedAt: mapModel.generatedAt,
+    root: mapModel.root?.path || null,
+    nodes: sorted
+  };
+}
+
+export function buildMapCacheKey({ buildId, options }) {
+  const payload = { buildId: buildId || null, options: options || null };
+  return sha1(stableStringify(payload));
+}
diff --git a/src/map/constants.js b/src/map/constants.js
new file mode 100644
index 000000000..2a11021e0
--- /dev/null
+++ b/src/map/constants.js
@@ -0,0 +1,154 @@
+export const MAP_MODEL_VERSION = '1.0.0';
+
+export const DEFAULT_LIMITS = {
+  maxFiles: 200,
+  maxMembersPerFile: 60,
+  maxEdges: 3000
+};
+
+export const DEFAULT_LEGEND = {
+  nodeTypes: ['file', 'function', 'class', 'symbol'],
+  fileShapes: {
+    source: 'component',
+    test: 'box',
+    config: 'cylinder',
+    docs: 'note',
+    generated: 'folder',
+    dir: 'folder',
+    other: 'box'
+  },
+  functionBadges: {
+    async: 'A',
+    static: 'S',
+    generator: 'G',
+    visibility: 'V',
+    returns: 'R',
+    reads: 'Rd',
+    writes: 'Wr',
+    mutates: 'Mu',
+    aliases: 'Al',
+    branches: 'Br',
+    loops: 'Lp',
+    throws: 'Th',
+    awaits: 'Aw',
+    yields: 'Yd'
+  },
+  edgeTypes: {
+    imports: 'import',
+    calls: 'call',
+    usages: 'usage',
+    dataflow: 'dataflow',
+    exports: 'export',
+    aliases: 'alias'
+  },
+  edgeStyles: {
+    import: { style: 'dashed', color: '#4b7bec' },
+    call: { style: 'solid', color: '#2d3436' },
+    usage: { style: 'dotted', color: '#636e72' },
+    dataflow: { style: 'dotted', color: '#00b894' },
+    export: { style: 'bold', color: '#f0932b' },
+    alias: { style: 'dashdot', color: '#6c5ce7' }
+  }
+};
+
+export const FILE_CATEGORY_RULES = {
+  test: {
+    extensions: ['.spec', '.test'],
+    names: ['__tests__', 'tests', 'test'],
+    patterns: [/\/__tests__\//, /\/tests\//, /\.spec\./, /\.test\./]
+  },
+  docs: {
+    extensions: ['.md', '.rst', '.txt', '.adoc', '.asciidoc']
+  },
+  config: {
+    extensions: ['.json', '.yaml', '.yml', '.toml', '.ini', '.cfg', '.conf', '.env', '.properties', '.xml']
+  },
+  generated: {
+    names: ['dist', 'build', 'out', 'coverage', 'vendor', 'node_modules', '.pairofcleats'],
+    patterns: [/\/dist\//, /\/build\//, /\/out\//, /\/coverage\//, /\/vendor\//, /\/node_modules\//]
+  }
+};
+
+export const FILE_CATEGORY_COLORS = {
+  source: '#2980b9',
+  test: '#8e44ad',
+  config: '#16a085',
+  docs: '#d35400',
+  generated: '#7f8c8d',
+  dir: '#34495e',
+  other: '#2c3e50'
+};
+
+export const VIEWER_DEFAULTS = {
+  layout: {
+    style: 'flow',
+    groupDepth: 1,
+    groupSpacing: 3.2,
+    fileSpacing: 2,
+    compactness: 1,
+    baseSize: 3.2,
+    fileHeight: 1.2,
+    fileShape: 'category',
+    memberShape: 'category',
+    memberCell: 0.9,
+    memberGap: 0.2,
+    memberInset: 0.35,
+    memberHeightBase: 0.8,
+    memberHeightScale: 0.55,
+    memberHeightMax: 7,
+    edgePlane: -1,
+    routingPadding: 0.9,
+    routingStep: 1.3,
+    labelScale: 0.018,
+    labelOffset: 0.08
+  },
+  visuals: {
+    fileOpacity: 1,
+    memberOpacity: 1,
+    flowGlowBase: 0.9,
+    flowGlowRange: 0.75,
+    glowPulseSpeed: 1.4,
+    wireframeThickness: 0.08,
+    wireframeGlow: 0.18,
+    wirePulseSpeed: 0.18,
+    gridLineThickness: 0.5,
+    gridGlowBase: 0.2,
+    gridGlowRange: 0.38,
+    gridPulseSpeed: 0.2,
+    enableFlowLights: true,
+    enableFog: false,
+    enableHeightFog: false,
+    fogDistance: 2.8,
+    fogColor: '#0f1115',
+    fogHeight: 4,
+    fogHeightRange: 14,
+    enableExtraLights: true,
+    glass: {
+      metalness: 0.15,
+      roughness: 0.03,
+      transmission: 1,
+      ior: 1.6,
+      reflectivity: 1,
+      thickness: 3.6,
+      envMapIntensity: 5.2,
+      clearcoat: 1,
+      clearcoatRoughness: 0.03,
+      normalScale: 0.22,
+      clearcoatNormalScale: 0.16,
+      normalRepeat: 2.8
+    }
+  },
+  controls: {
+    wasd: {
+      sensitivity: 40000,
+      acceleration: 16000,
+      maxSpeed: 120000,
+      drag: 6
+    },
+    zoomSensitivity: 6,
+    zoomMin: 1,
+    zoomMax: 80,
+    zoomDamping: 0.9,
+    panSensitivity: 1.5
+  }
+};
diff --git a/src/map/dot-writer.js b/src/map/dot-writer.js
new file mode 100644
index 000000000..5c831f8d1
--- /dev/null
+++ b/src/map/dot-writer.js
@@ -0,0 +1,122 @@
+import { FILE_CATEGORY_COLORS, DEFAULT_LEGEND } from './constants.js';
+import { sortBy } from './utils.js';
+
+const escapeHtml = (value) => String(value || '')
+  .replace(/&/g, '&amp;')
+  .replace(/</g, '&lt;')
+  .replace(/>/g, '&gt;')
+  .replace(/"/g, '&quot;');
+
+const escapeDot = (value) => String(value || '').replace(/"/g, '\\"');
+
+const dotId = (value) => `"${escapeDot(value)}"`;
+
+const truncate = (value, max) => {
+  const text = String(value || '');
+  if (text.length <= max) return text;
+  return `${text.slice(0, max - 3)}...`;
+};
+
+const buildMemberRow = (member) => {
+  const name = escapeHtml(member.name || '(anonymous)');
+  const signature = escapeHtml(truncate(member.signature || '', 60));
+  const badges = [];
+  if (member.modifiers?.async) badges.push('async');
+  if (member.modifiers?.static) badges.push('static');
+  if (member.modifiers?.generator) badges.push('gen');
+  if (member.returns) badges.push('returns');
+  const reads = member.dataflow?.reads?.length || 0;
+  const writes = member.dataflow?.writes?.length || 0;
+  const mutations = member.dataflow?.mutations?.length || 0;
+  const aliases = member.dataflow?.aliases?.length || 0;
+  if (reads) badges.push(`r${reads}`);
+  if (writes) badges.push(`w${writes}`);
+  if (mutations) badges.push(`m${mutations}`);
+  if (aliases) badges.push(`a${aliases}`);
+  const branches = member.controlFlow?.branches || 0;
+  const loops = member.controlFlow?.loops || 0;
+  const throws = member.controlFlow?.throws || 0;
+  const awaits = member.controlFlow?.awaits || 0;
+  const yields = member.controlFlow?.yields || 0;
+  if (branches) badges.push(`b${branches}`);
+  if (loops) badges.push(`l${loops}`);
+  if (throws) badges.push(`t${throws}`);
+  if (awaits) badges.push(`aw${awaits}`);
+  if (yields) badges.push(`y${yields}`);
+  const badgeText = escapeHtml(badges.join(' '));
+
+  return `  <TR><TD PORT="${escapeHtml(member.port)}" ALIGN="LEFT">${name}</TD>`
+    + `<TD ALIGN="LEFT">${signature}</TD>`
+    + `<TD ALIGN="RIGHT">${badgeText}</TD></TR>`;
+};
+
+const buildFileLabel = (node) => {
+  const header = escapeHtml(node.path || node.name || '');
+  const rows = (node.members || []).map((member) => buildMemberRow(member));
+  return [
+    '<TABLE BORDER="0" CELLBORDER="1" CELLSPACING="0" CELLPADDING="4">',
+    `  <TR><TD BGCOLOR="#f0f0f0" COLSPAN="3" ALIGN="LEFT"><B>${header}</B></TD></TR>`,
+    ...rows,
+    '</TABLE>'
+  ].join('\n');
+};
+
+export function renderDot(mapModel) {
+  const legend = mapModel.legend || DEFAULT_LEGEND;
+  const memberPorts = new Map();
+  for (const node of mapModel.nodes || []) {
+    for (const member of node.members || []) {
+      memberPorts.set(member.id, { file: node.path, port: member.port });
+    }
+  }
+
+  const lines = [];
+  lines.push('digraph CodeMap {');
+  lines.push('  rankdir=LR;');
+  lines.push('  graph [fontsize=10];');
+  lines.push('  node [fontsize=10, fontname="Helvetica"];');
+  lines.push('  edge [fontsize=9, fontname="Helvetica"];');
+
+  const nodes = sortBy(mapModel.nodes || [], (node) => node.path);
+  for (const node of nodes) {
+    const shape = legend.fileShapes?.[node.category] || 'box';
+    const color = FILE_CATEGORY_COLORS[node.category] || FILE_CATEGORY_COLORS.other;
+    const style = node.category === 'test' ? 'dashed' : 'solid';
+    const label = buildFileLabel(node);
+    lines.push(
+      `  ${dotId(node.path)} [shape=${shape}, style="${style}", color="${color}", label=<${label}>];`
+    );
+  }
+
+  const edges = sortBy(mapModel.edges || [], (edge) => {
+    const from = edge.from?.member || edge.from?.file || '';
+    const to = edge.to?.member || edge.to?.file || '';
+    return `${edge.type}:${from}->${to}:${edge.label || ''}`;
+  });
+
+  for (const edge of edges) {
+    const style = legend.edgeStyles?.[edge.type] || {};
+    const attrs = [];
+    if (style.style) attrs.push(`style="${style.style}"`);
+    if (style.color) attrs.push(`color="${style.color}"`);
+    if (edge.label) attrs.push(`label="${escapeDot(edge.label)}"`);
+
+    const fromMember = edge.from?.member;
+    const toMember = edge.to?.member;
+    let fromId = edge.from?.file ? dotId(edge.from.file) : null;
+    let toId = edge.to?.file ? dotId(edge.to.file) : null;
+    if (fromMember && memberPorts.has(fromMember)) {
+      const meta = memberPorts.get(fromMember);
+      fromId = `${dotId(meta.file)}:${meta.port}`;
+    }
+    if (toMember && memberPorts.has(toMember)) {
+      const meta = memberPorts.get(toMember);
+      toId = `${dotId(meta.file)}:${meta.port}`;
+    }
+    if (!fromId || !toId) continue;
+    lines.push(`  ${fromId} -> ${toId} [${attrs.join(', ')}];`);
+  }
+
+  lines.push('}');
+  return lines.join('\n');
+}
diff --git a/src/map/html-writer.js b/src/map/html-writer.js
new file mode 100644
index 000000000..918c2459c
--- /dev/null
+++ b/src/map/html-writer.js
@@ -0,0 +1,52 @@
+const escapeHtml = (value) => String(value || '')
+  .replace(/&/g, '&amp;')
+  .replace(/</g, '&lt;')
+  .replace(/>/g, '&gt;')
+  .replace(/"/g, '&quot;');
+
+export function renderSvgHtml({ svg, mapModel, title = 'Code Map' }) {
+  const warnings = Array.isArray(mapModel?.warnings) ? mapModel.warnings : [];
+  const summary = mapModel?.summary || {};
+  const legend = mapModel?.legend || {};
+  const badgeList = Object.entries(legend.functionBadges || {})
+    .map(([key, label]) => `<span><strong>${escapeHtml(label)}</strong> ${escapeHtml(key)}</span>`)
+    .join(' ');
+  const edgeList = Object.entries(legend.edgeStyles || {})
+    .map(([key, style]) => `<span><strong>${escapeHtml(key)}</strong> ${escapeHtml(style.style || '')}</span>`)
+    .join(' ');
+
+  return `<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8" />
+<title>${escapeHtml(title)}</title>
+<style>
+  body { margin: 0; font-family: "Segoe UI", Tahoma, sans-serif; background: #f7f7f7; color: #222; }
+  header { padding: 12px 16px; background: #111; color: #f8f8f8; }
+  .meta { font-size: 12px; opacity: 0.8; }
+  .content { display: grid; grid-template-columns: 1fr 260px; gap: 12px; padding: 12px; }
+  .panel { background: #fff; border-radius: 8px; padding: 10px; box-shadow: 0 2px 6px rgba(0,0,0,0.08); }
+  .panel h3 { margin: 0 0 6px; font-size: 14px; }
+  .legend span { display: block; font-size: 12px; margin-bottom: 4px; }
+  .warnings { color: #b33939; font-size: 12px; margin-top: 6px; }
+  .svg-wrap { overflow: auto; background: #fff; border-radius: 8px; padding: 8px; box-shadow: 0 2px 6px rgba(0,0,0,0.08); }
+  svg { width: 100%; height: auto; }
+</style>
+</head>
+<body>
+  <header>
+    <div>${escapeHtml(title)}</div>
+    <div class="meta">files: ${summary.counts?.files || 0} | members: ${summary.counts?.members || 0} | edges: ${summary.counts?.edges || 0}</div>
+  </header>
+  <div class="content">
+    <div class="svg-wrap">${svg || ''}</div>
+    <div class="panel">
+      <h3>Legend</h3>
+      <div class="legend"><strong>Badges</strong>${badgeList}</div>
+      <div class="legend" style="margin-top: 8px;"><strong>Edges</strong>${edgeList}</div>
+      ${warnings.length ? `<div class="warnings">${warnings.map(escapeHtml).join('<br />')}</div>` : ''}
+    </div>
+  </div>
+</body>
+</html>`;
+}
diff --git a/src/map/isometric-viewer.js b/src/map/isometric-viewer.js
new file mode 100644
index 000000000..8f19b4a46
--- /dev/null
+++ b/src/map/isometric-viewer.js
@@ -0,0 +1,216 @@
+const escapeScript = (value) =>
+  String(value || '').replace(/<\/script/gi, '<\\/script');
+
+export function renderIsometricHtml({
+  mapModel,
+  threeUrl,
+  openUriTemplate,
+  viewerConfig = {}
+}) {
+  const mapJson = escapeScript(JSON.stringify(mapModel || {}));
+  const config = {
+    threeUrl: threeUrl || '',
+    openUriTemplate: openUriTemplate || '',
+    ...viewerConfig
+  };
+  const configJson = escapeScript(JSON.stringify(config));
+
+  return `<!doctype html>
+<html lang="en">
+<head>
+<meta charset="utf-8" />
+<title>Code Map Isometric</title>
+<style>
+  html, body {
+    margin: 0;
+    padding: 0;
+    height: 100%;
+    background: #0f1115;
+    color: #f0f0f0;
+    font-family: "Segoe UI", Tahoma, sans-serif;
+  }
+  #app {
+    position: relative;
+    width: 100%;
+    height: 100%;
+    overflow: hidden;
+  }
+  #hud {
+    position: absolute;
+    top: 12px;
+    right: 12px;
+    background: rgba(15, 17, 21, 0.85);
+    border: 1px solid rgba(255,255,255,0.12);
+    padding: 10px 12px;
+    border-radius: 8px;
+    width: 280px;
+    max-width: 280px;
+    z-index: 2;
+  }
+  #hud h1 { font-size: 14px; margin: 0 0 6px; }
+  #hud .meta { font-size: 12px; opacity: 0.8; }
+  #hud .detail { margin-top: 8px; font-size: 12px; line-height: 1.4; }
+  #hud .detail strong { display: block; font-size: 11px; text-transform: uppercase; opacity: 0.6; }
+  #selection-body { display: grid; gap: 8px; }
+  #selection-body .sel-section { border-top: 1px solid rgba(255,255,255,0.08); padding-top: 6px; }
+  #selection-body .sel-section:first-child { border-top: none; padding-top: 0; }
+  #selection-body .sel-title {
+    font-size: 10px;
+    text-transform: uppercase;
+    letter-spacing: 0.08em;
+    opacity: 0.6;
+    margin-bottom: 4px;
+  }
+  #selection-body .sel-row {
+    display: grid;
+    grid-template-columns: 90px 1fr;
+    gap: 6px;
+    align-items: start;
+    margin-bottom: 3px;
+  }
+  #selection-body .sel-label { opacity: 0.7; }
+  #selection-body .sel-value { word-break: break-word; }
+  #selection-body .sel-list { display: flex; flex-wrap: wrap; gap: 4px; }
+  #selection-body .sel-pill {
+    background: rgba(255,255,255,0.08);
+    border: 1px solid rgba(255,255,255,0.08);
+    border-radius: 4px;
+    padding: 1px 4px;
+    font-size: 10px;
+  }
+  #selection-body .sel-empty { opacity: 0.6; }
+  #hud .controls { margin-top: 10px; font-size: 11px; opacity: 0.6; }
+  #menu {
+    position: absolute;
+    left: 12px;
+    top: 12px;
+    background: rgba(15, 17, 21, 0.9);
+    border: 1px solid rgba(255,255,255,0.12);
+    padding: 10px 12px;
+    border-radius: 8px;
+    font-size: 11px;
+    min-width: 220px;
+    max-width: 280px;
+    max-height: 85vh;
+    overflow: auto;
+    z-index: 2;
+    pointer-events: auto;
+  }
+  #menu details { margin-bottom: 8px; }
+  #menu summary {
+    font-size: 12px;
+    cursor: pointer;
+    list-style: none;
+    margin-bottom: 6px;
+  }
+  #menu summary::-webkit-details-marker { display: none; }
+  #menu summary::before {
+    content: "▸";
+    display: inline-block;
+    margin-right: 6px;
+    transform: translateY(-1px);
+  }
+  #menu details[open] summary::before { content: "▾"; }
+  #menu .section { margin-bottom: 10px; }
+  #menu label {
+    display: flex;
+    align-items: center;
+    gap: 6px;
+    margin-bottom: 4px;
+    cursor: pointer;
+  }
+  #menu input { accent-color: #4b7bec; }
+  #menu .swatch {
+    width: 10px;
+    height: 10px;
+    border-radius: 2px;
+    display: inline-block;
+  }
+  #menu .slider-row {
+    display: grid;
+    grid-template-columns: 1fr auto;
+    gap: 6px;
+    align-items: center;
+    margin-bottom: 6px;
+  }
+  #menu input[type="range"] { width: 100%; }
+  #menu .value { opacity: 0.7; font-variant-numeric: tabular-nums; }
+  #menu button {
+    width: 100%;
+    margin-top: 6px;
+    background: #1f2a38;
+    color: #f0f0f0;
+    border: 1px solid rgba(255,255,255,0.1);
+    border-radius: 6px;
+    padding: 6px 8px;
+    cursor: pointer;
+  }
+  #menu button:hover { background: #2a3647; }
+  canvas {
+    display: block;
+    outline: none;
+    position: absolute;
+    inset: 0;
+    z-index: 1;
+  }
+</style>
+</head>
+<body>
+<div id="app" tabindex="0">
+  <div id="hud">
+    <h1>Code Map Isometric</h1>
+    <div class="meta" id="summary"></div>
+    <div class="detail" id="selection">
+      <strong>Selection</strong>
+      <div id="selection-body">None</div>
+    </div>
+    <div class="controls">
+      Click to focus. WASD to move. Scroll to zoom. Double click to open.
+    </div>
+  </div>
+  <div id="menu">
+    <details open>
+      <summary>View</summary>
+      <div class="section" id="menu-view"></div>
+    </details>
+    <details>
+      <summary>Controls</summary>
+      <div class="section" id="menu-controls"></div>
+    </details>
+    <details>
+      <summary>Edges</summary>
+      <div class="section" id="menu-edges"></div>
+    </details>
+    <details open>
+      <summary>Layout</summary>
+      <div class="section" id="menu-layout"></div>
+    </details>
+    <details>
+      <summary>Scoring</summary>
+      <div class="section" id="menu-score"></div>
+    </details>
+    <details>
+      <summary>Hue / Saturation / Light</summary>
+      <div class="section" id="menu-colors"></div>
+    </details>
+    <details>
+      <summary>Color Mode</summary>
+      <div class="section" id="menu-color-mode"></div>
+    </details>
+    <details open>
+      <summary>Visuals</summary>
+      <div class="section" id="menu-visuals"></div>
+    </details>
+    <details open>
+      <summary>Effects</summary>
+      <div class="section" id="menu-effects"></div>
+    </details>
+    <div class="section" id="menu-actions"></div>
+  </div>
+</div>
+<script type="application/json" id="map-data">${mapJson}</script>
+<script type="application/json" id="viewer-config">${configJson}</script>
+<script type="module" src="/isomap/viewer.js"></script>
+</body>
+</html>`;
+}
diff --git a/src/map/isometric/client/controls.js b/src/map/isometric/client/controls.js
new file mode 100644
index 000000000..4e785c50f
--- /dev/null
+++ b/src/map/isometric/client/controls.js
@@ -0,0 +1,322 @@
+import { state } from './state.js';
+import { clamp } from './utils.js';
+import { applyHighlights, setSelection, openSelection } from './selection.js';
+
+export const initControls = () => {
+  const {
+    THREE,
+    dom,
+    renderer,
+    camera,
+    lockIsometric,
+    getViewport,
+    groundPlane,
+    lineResolution,
+    controlDefaults,
+    controls,
+    flowWaveLayers,
+    flowWaveTotal,
+    visuals,
+    visualDefaults
+  } = state;
+
+  const pointer = new THREE.Vector2();
+  const raycaster = new THREE.Raycaster();
+  const zoomRaycaster = new THREE.Raycaster();
+
+  const getPointerNdc = (event) => {
+    const rect = renderer.domElement.getBoundingClientRect();
+    const x = ((event.clientX - rect.left) / rect.width) * 2 - 1;
+    const y = -((event.clientY - rect.top) / rect.height) * 2 + 1;
+    return { x, y, rect };
+  };
+
+  const getPlanePointFromNdc = (ndc) => {
+    if (!ndc) return null;
+    zoomRaycaster.setFromCamera({ x: ndc.x, y: ndc.y }, camera);
+    const point = new THREE.Vector3();
+    if (zoomRaycaster.ray.intersectPlane(groundPlane, point)) return point;
+    return null;
+  };
+
+  const onPointer = (event) => {
+    const ndc = getPointerNdc(event);
+    pointer.x = ndc.x;
+    pointer.y = ndc.y;
+    raycaster.setFromCamera(pointer, camera);
+    const hits = raycaster.intersectObjects([...state.memberMeshes, ...state.fileMeshes]);
+    const target = hits.length ? hits[0].object : null;
+    setSelection(target);
+  };
+
+  let dragging = false;
+  let dragMoved = false;
+  let lastPointer = { x: 0, y: 0 };
+
+  const startDrag = (event) => {
+    dragging = true;
+    dragMoved = false;
+    lastPointer = { x: event.clientX, y: event.clientY };
+  };
+
+  const moveDrag = (event) => {
+    if (!dragging) return;
+    const dx = event.clientX - lastPointer.x;
+    const dy = event.clientY - lastPointer.y;
+    if (Math.abs(dx) + Math.abs(dy) > 1) dragMoved = true;
+    lastPointer = { x: event.clientX, y: event.clientY };
+    const ndc = getPointerNdc(event);
+    const rect = ndc.rect;
+    if (!rect.width || !rect.height) return;
+    const viewWidth = (camera.right - camera.left) / camera.zoom;
+    const viewHeight = (camera.top - camera.bottom) / camera.zoom;
+    const unitsX = viewWidth / rect.width;
+    const unitsZ = viewHeight / rect.height;
+    const panSensitivity = controls.panSensitivity || controlDefaults.panSensitivity;
+    const rot = Math.PI / 4;
+    const cos = Math.cos(rot);
+    const sin = Math.sin(rot);
+    const dragForward = -dy;
+    const dragSide = dx;
+    const moveX = (dragForward * cos - dragSide * sin) * unitsX * panSensitivity;
+    const moveZ = (dragForward * sin + dragSide * cos) * unitsZ * panSensitivity;
+    camera.position.x += moveX;
+    camera.position.z += moveZ;
+    lockIsometric();
+  };
+
+  const updateHover = (event) => {
+    if (dragging) return;
+    const ndc = getPointerNdc(event);
+    pointer.x = ndc.x;
+    pointer.y = ndc.y;
+    raycaster.setFromCamera(pointer, camera);
+    const hits = raycaster.intersectObjects([...state.memberMeshes, ...state.fileMeshes]);
+    const nextHover = hits.length ? hits[0].object : null;
+    if (nextHover !== state.hoveredMesh) {
+      state.hoveredMesh = nextHover;
+      applyHighlights();
+    }
+  };
+
+  const endDrag = () => {
+    dragging = false;
+  };
+
+  renderer.domElement.addEventListener('pointerdown', startDrag);
+  window.addEventListener('pointermove', moveDrag);
+  window.addEventListener('pointerup', endDrag);
+  renderer.domElement.addEventListener('pointerleave', endDrag);
+  renderer.domElement.addEventListener('pointermove', updateHover);
+  renderer.domElement.addEventListener('pointerleave', () => {
+    state.hoveredMesh = null;
+    applyHighlights();
+  });
+
+  renderer.domElement.addEventListener('click', (event) => {
+    if (dragMoved) {
+      dragMoved = false;
+      return;
+    }
+    onPointer(event);
+  });
+  renderer.domElement.addEventListener('dblclick', (event) => {
+    if (dragMoved) {
+      dragMoved = false;
+      return;
+    }
+    onPointer(event);
+    openSelection();
+  });
+
+  let focused = false;
+  dom.app.addEventListener('pointerdown', () => {
+    focused = true;
+    dom.app.focus();
+  });
+  window.addEventListener('blur', () => { focused = false; });
+
+  const keys = {};
+  window.addEventListener('keydown', (event) => {
+    if (!focused) return;
+    keys[event.code] = true;
+  });
+  window.addEventListener('keyup', (event) => {
+    if (!focused) return;
+    keys[event.code] = false;
+  });
+
+  const velocity = new THREE.Vector2(0, 0);
+
+  const updateCamera = (dt) => {
+    const wasd = controls.wasd || controlDefaults.wasd;
+    const accel = wasd.acceleration || controlDefaults.wasd.acceleration;
+    const maxSpeed = wasd.maxSpeed || controlDefaults.wasd.maxSpeed;
+    const drag = wasd.drag || controlDefaults.wasd.drag;
+    const sensitivity = wasd.sensitivity || controlDefaults.wasd.sensitivity;
+
+    if (keys.KeyW) velocity.y -= accel * dt;
+    if (keys.KeyS) velocity.y += accel * dt;
+    if (keys.KeyA) velocity.x += accel * dt;
+    if (keys.KeyD) velocity.x -= accel * dt;
+
+    velocity.x -= velocity.x * drag * dt;
+    velocity.y -= velocity.y * drag * dt;
+    velocity.x = Math.max(-maxSpeed, Math.min(maxSpeed, velocity.x));
+    velocity.y = Math.max(-maxSpeed, Math.min(maxSpeed, velocity.y));
+
+    const rot = Math.PI / 4;
+    const cos = Math.cos(rot);
+    const sin = Math.sin(rot);
+    const moveX = (velocity.y * cos - velocity.x * sin) * dt * sensitivity * 0.005;
+    const moveZ = (velocity.y * sin + velocity.x * cos) * dt * sensitivity * 0.005;
+    camera.position.x += moveX;
+    camera.position.z += moveZ;
+    lockIsometric();
+  };
+
+  let zoomVelocity = 0;
+  let zoomPointer = { x: 0, y: 0 };
+  const onWheel = (event) => {
+    event.preventDefault();
+    const zoomSensitivity = Number.isFinite(controls.zoomSensitivity)
+      ? controls.zoomSensitivity
+      : controlDefaults.zoomSensitivity;
+    const rawDelta = Number.isFinite(event.deltaY) ? event.deltaY : 0;
+    const deltaModeScale = event.deltaMode === 1 ? 18 : (event.deltaMode === 2 ? 360 : 1);
+    const delta = -rawDelta * deltaModeScale * 0.05;
+    const ndc = getPointerNdc(event);
+    zoomPointer = { x: ndc.x, y: ndc.y };
+    const direction = Math.sign(delta);
+    const velocityDir = Math.sign(zoomVelocity);
+    const momentumBoost = Math.min(6, Math.abs(zoomVelocity) * 0.6);
+    const repeatBoost = direction !== 0 && direction === velocityDir ? 1 + momentumBoost : 1;
+    zoomVelocity += delta * zoomSensitivity * (2 + repeatBoost);
+  };
+  renderer.domElement.addEventListener('wheel', onWheel, { passive: false });
+
+  let lastTime = performance.now();
+  let lastPulseUpdate = 0;
+  const animate = () => {
+    requestAnimationFrame(animate);
+    const now = performance.now();
+    const dt = Math.min(0.05, (now - lastTime) / 1000);
+    lastTime = now;
+    updateCamera(dt);
+    if (Math.abs(zoomVelocity) > 0.0001) {
+      const zoomMin = Number.isFinite(controls.zoomMin)
+        ? controls.zoomMin
+        : controlDefaults.zoomMin;
+      const zoomMax = Number.isFinite(controls.zoomMax) ? controls.zoomMax : controlDefaults.zoomMax;
+      const before = getPlanePointFromNdc(zoomPointer);
+      camera.zoom = Math.max(zoomMin, Math.min(zoomMax, camera.zoom + zoomVelocity * dt));
+      camera.updateProjectionMatrix();
+      const after = getPlanePointFromNdc(zoomPointer);
+      if (before && after) {
+        camera.position.add(before.sub(after));
+        lockIsometric();
+      }
+      const damping = Number.isFinite(controls.zoomDamping) ? controls.zoomDamping : controlDefaults.zoomDamping;
+      zoomVelocity *= Math.pow(damping, dt * 60);
+      if (Math.abs(zoomVelocity) < 0.0001) zoomVelocity = 0;
+    }
+    if (now - lastPulseUpdate > 33) {
+      lastPulseUpdate = now;
+      for (const material of state.glowMaterials) {
+        const base = material.userData?.glowBase ?? 0;
+        const range = material.userData?.glowRange ?? 0.05;
+        const glowSpeed = material.userData?.glowSpeed ?? 1;
+        const glowPhase = material.userData?.glowPhase ?? 0;
+        const pulse = 0.5 + 0.5 * Math.sin(now * 0.002 * glowSpeed + glowPhase);
+        material.emissiveIntensity = base + range * pulse;
+      }
+      const flowSpeed = visuals.glowPulseSpeed || visualDefaults.glowPulseSpeed;
+      for (const material of state.flowMaterials) {
+        const base = material.userData?.glowBase ?? 0;
+        const range = material.userData?.glowRange ?? 0.05;
+        const phase = material.userData?.flowPhase ?? 0;
+        const dir = material.userData?.flowDir ?? 1;
+        const typeSpeed = material.userData?.flowSpeed ?? 1;
+        const offset = material.userData?.flowOffset ?? 0;
+        let waveSum = 0;
+        for (const layer of flowWaveLayers) {
+          const waveTime =
+            now * 0.002 * flowSpeed * layer.speed * typeSpeed + offset - phase * dir;
+          waveSum += layer.amplitude * (0.5 + 0.5 * Math.sin(waveTime));
+        }
+        const waveValue = waveSum / flowWaveTotal;
+        material.emissiveIntensity = base + range * waveValue;
+      }
+      for (const material of state.wireMaterials) {
+        const base = material.userData?.glowBase ?? 0.3;
+        const range = material.userData?.glowRange ?? 0.4;
+        const phase = material.userData?.flowPhase ?? 0;
+        const wireSpeed =
+          material.userData?.flowSpeed ??
+          visuals.wirePulseSpeed ??
+          visualDefaults.wirePulseSpeed;
+        const wirePulse = 0.5 + 0.5 * Math.sin(now * 0.002 * wireSpeed - phase);
+        material.opacity = clamp(base + range * wirePulse, 0.02, 0.6);
+      }
+      for (const material of state.gridLineMaterials) {
+        const base = material.userData?.glowBase ?? 0.1;
+        const range = material.userData?.glowRange ?? 0.2;
+        const phase = material.userData?.flowPhase ?? 0;
+        const gridSpeed =
+          material.userData?.flowSpeed ??
+          visuals.gridPulseSpeed ??
+          visualDefaults.gridPulseSpeed;
+        const gridPulse = 0.5 + 0.5 * Math.sin(now * 0.002 * gridSpeed + phase);
+        material.opacity = clamp(base + range * gridPulse, 0.02, 0.6);
+      }
+      for (const light of state.flowLights) {
+        const base = light.userData?.base ?? 0.8;
+        const phase = light.userData?.flowPhase ?? 0;
+        const dir = light.userData?.flowDir ?? 1;
+        const typeSpeed = light.userData?.flowSpeed ?? 1;
+        const offset = light.userData?.flowOffset ?? 0;
+        let waveSum = 0;
+        for (const layer of flowWaveLayers) {
+          const waveTime =
+            now * 0.002 * flowSpeed * layer.speed * typeSpeed + offset - phase * dir;
+          waveSum += layer.amplitude * (0.5 + 0.5 * Math.sin(waveTime));
+        }
+        const waveValue = waveSum / flowWaveTotal;
+        light.intensity = base * (0.4 + 0.6 * waveValue);
+      }
+    }
+    lockIsometric();
+    renderer.render(state.scene, camera);
+  };
+  animate();
+
+  const onResize = () => {
+    const viewport = getViewport();
+    const aspect = viewport.width / viewport.height;
+    const base = state.cameraBase;
+    camera.left = -base * aspect;
+    camera.right = base * aspect;
+    camera.top = base;
+    camera.bottom = -base;
+    camera.near = state.nearPlane;
+    camera.far = state.farPlane;
+    camera.updateProjectionMatrix();
+    lineResolution.width = viewport.width;
+    lineResolution.height = viewport.height;
+    for (const material of state.wireMaterials) {
+      if (material.resolution && typeof material.resolution.set === 'function') {
+        material.resolution.set(lineResolution.width, lineResolution.height);
+      }
+    }
+    for (const material of state.gridLineMaterials) {
+      if (material.resolution && typeof material.resolution.set === 'function') {
+        material.resolution.set(lineResolution.width, lineResolution.height);
+      }
+    }
+    renderer.setPixelRatio(Math.min(2, window.devicePixelRatio || 1));
+    renderer.setSize(viewport.width, viewport.height);
+    lockIsometric();
+  };
+  window.addEventListener('resize', onResize);
+  onResize();
+};
diff --git a/src/map/isometric/client/defaults.js b/src/map/isometric/client/defaults.js
new file mode 100644
index 000000000..cb657cf6f
--- /dev/null
+++ b/src/map/isometric/client/defaults.js
@@ -0,0 +1,120 @@
+export const layoutDefaults = {
+  style: 'flow',
+  groupDepth: 1,
+  groupSpacing: 3.2,
+  fileSpacing: 2,
+  compactness: 1,
+  baseSize: 3.2,
+  fileHeight: 1.2,
+  fileShape: 'category',
+  memberShape: 'category',
+  memberCell: 0.9,
+  memberGap: 0.2,
+  memberInset: 0.35,
+  memberHeightBase: 0.8,
+  memberHeightScale: 0.55,
+  memberHeightMax: 7,
+  edgePlane: -1,
+  routingPadding: 0.9,
+  routingStep: 1.3,
+  labelScale: 0.018,
+  labelOffset: 0.08
+};
+
+export const scoringDefaults = {
+  dataflow: 0.9,
+  controlFlow: 0.9,
+  params: 0.4,
+  signature: 0.03,
+  exported: 1.4,
+  modifiers: 0.4,
+  type: 1.2,
+  returns: 0.8
+};
+
+export const colorDefaults = {
+  mode: 'score',
+  hueStart: 0.72,
+  hueEnd: 0.08,
+  saturation: 0.75,
+  lightnessMin: 0.42,
+  lightnessMax: 0.72,
+  distinctSaturation: 0.66,
+  distinctLightness: 0.58,
+  distinctHueOffset: 0.08
+};
+
+export const assetDefaults = {
+  normalMapUrl: '/assets/isomap/normal.jpg',
+  hdrEnvUrl: '/assets/isomap/moonless_golf_2k.hdr',
+  rgbeLoaderUrl: '/three/examples/jsm/loaders/RGBELoader.js'
+};
+
+export const visualDefaults = {
+  fileOpacity: 1,
+  memberOpacity: 1,
+  flowGlowBase: 0.9,
+  flowGlowRange: 0.75,
+  glowPulseSpeed: 1.4,
+  wireframeThickness: 0.08,
+  wireframeGlow: 0.18,
+  wirePulseSpeed: 0.18,
+  gridLineThickness: 0.5,
+  gridGlowBase: 0.2,
+  gridGlowRange: 0.38,
+  gridPulseSpeed: 0.2,
+  enableFlowLights: true,
+  curveEdges: false,
+  enableFog: false,
+  enableHeightFog: false,
+  fogDistance: 2.8,
+  fogColor: '#0f1115',
+  fogHeight: 4,
+  fogHeightRange: 14,
+  enableExtraLights: true,
+  glass: {
+    metalness: 0.15,
+    roughness: 0.03,
+    transmission: 1,
+    ior: 1.6,
+    reflectivity: 1,
+    thickness: 3.6,
+    envMapIntensity: 5.2,
+    clearcoat: 1,
+    clearcoatRoughness: 0.03,
+    normalScale: 0.22,
+    clearcoatNormalScale: 0.16,
+    normalRepeat: 2.8
+  }
+};
+
+export const controlDefaults = {
+  panSensitivity: 1.5,
+  zoomSensitivity: 6,
+  zoomDamping: 0.9,
+  zoomMin: 1,
+  zoomMax: 80,
+  wasd: {
+    sensitivity: 40000,
+    acceleration: 16000,
+    maxSpeed: 120000,
+    drag: 6
+  }
+};
+
+export const flowWaveLayers = [
+  { speed: 0.9, amplitude: 0.6 },
+  { speed: 1.6, amplitude: 0.35 },
+  { speed: 2.4, amplitude: 0.25 },
+  { speed: 3.4, amplitude: 0.18 }
+];
+
+export const flowTypeProfiles = {
+  dataflow: { speed: 1.2, phase: 0.0 },
+  export: { speed: 1.5, phase: 1.4 },
+  call: { speed: 1.8, phase: 2.1 },
+  import: { speed: 1.0, phase: 2.8 },
+  usage: { speed: 0.9, phase: 3.6 },
+  alias: { speed: 1.3, phase: 4.3 },
+  other: { speed: 1.0, phase: 0.8 }
+};
diff --git a/src/map/isometric/client/dom.js b/src/map/isometric/client/dom.js
new file mode 100644
index 000000000..250c80dd0
--- /dev/null
+++ b/src/map/isometric/client/dom.js
@@ -0,0 +1,44 @@
+export const storageKey = 'pairofcleats.isometric.config';
+
+export const mergeConfig = (base, override) => {
+  if (!override || typeof override !== 'object') return base;
+  const merged = { ...base, ...override };
+  merged.layout = { ...(base.layout || {}), ...(override.layout || {}) };
+  merged.controls = { ...(base.controls || {}), ...(override.controls || {}) };
+  merged.colors = { ...(base.colors || {}), ...(override.colors || {}) };
+  merged.scoring = { ...(base.scoring || {}), ...(override.scoring || {}) };
+  merged.visuals = { ...(base.visuals || {}), ...(override.visuals || {}) };
+  merged.assets = { ...(base.assets || {}), ...(override.assets || {}) };
+  return merged;
+};
+
+export const loadDomConfig = () => {
+  const map = JSON.parse(document.getElementById('map-data').textContent || '{}');
+  let config = JSON.parse(document.getElementById('viewer-config').textContent || '{}');
+  const dom = {
+    app: document.getElementById('app'),
+    selectionBody: document.getElementById('selection-body'),
+    summary: document.getElementById('summary'),
+    menuView: document.getElementById('menu-view'),
+    menuEdges: document.getElementById('menu-edges'),
+    menuControls: document.getElementById('menu-controls'),
+    menuLayout: document.getElementById('menu-layout'),
+    menuScore: document.getElementById('menu-score'),
+    menuColors: document.getElementById('menu-colors'),
+    menuColorMode: document.getElementById('menu-color-mode'),
+    menuVisuals: document.getElementById('menu-visuals'),
+    menuEffects: document.getElementById('menu-effects'),
+    menuActions: document.getElementById('menu-actions')
+  };
+
+  try {
+    const stored = window.localStorage.getItem(storageKey);
+    if (stored) {
+      config = mergeConfig(config, JSON.parse(stored));
+    }
+  } catch (err) {
+    // ignore storage failures
+  }
+
+  return { map, config, dom };
+};
diff --git a/src/map/isometric/client/edges.js b/src/map/isometric/client/edges.js
new file mode 100644
index 000000000..b2ffa8558
--- /dev/null
+++ b/src/map/isometric/client/edges.js
@@ -0,0 +1,579 @@
+import { state } from './state.js';
+import { applyHeightFog, updateFlowLights } from './materials.js';
+
+const quantize = (value) => Number(value.toFixed(3));
+
+export const buildEdges = () => {
+  const {
+    THREE,
+    edges,
+    allFiles,
+    layoutMetrics,
+    edgeWeights,
+    edgeGroup,
+    edgeVisibility,
+    flowTypeProfiles,
+    fileAnchors,
+    memberAnchors,
+    fileByMember,
+    memberColorById,
+    fileColorByPath,
+    visuals,
+    layoutStyle
+  } = state;
+
+  const edgePlane = layoutMetrics.edgePlane;
+  const routingPadding = layoutMetrics.routingPadding;
+  const routingStep = layoutMetrics.routingStep;
+
+  const resolveEdgeFile = (endpoint) => {
+    if (!endpoint) return null;
+    if (endpoint.file) return endpoint.file;
+    if (endpoint.member) return fileByMember.get(endpoint.member) || null;
+    return null;
+  };
+
+  const resolveEdgeColor = (endpoint) => {
+    if (!endpoint) return null;
+    if (endpoint.member && memberColorById.has(endpoint.member)) {
+      return memberColorById.get(endpoint.member);
+    }
+    if (endpoint.file && fileColorByPath.has(endpoint.file)) {
+      return fileColorByPath.get(endpoint.file);
+    }
+    const fileKey = resolveEdgeFile(endpoint);
+    if (fileKey && fileColorByPath.has(fileKey)) {
+      return fileColorByPath.get(fileKey);
+    }
+    return null;
+  };
+
+  const obstacles = [];
+  let minX = Infinity;
+  let maxX = -Infinity;
+  let minZ = Infinity;
+  let maxZ = -Infinity;
+  for (const fileLayout of allFiles) {
+    const fileId = fileLayout.node.path || fileLayout.node.name || null;
+    if (!fileId) continue;
+    const bounds = {
+      file: fileId,
+      minX: fileLayout.x - fileLayout.width / 2 - routingPadding,
+      maxX: fileLayout.x + fileLayout.width / 2 + routingPadding,
+      minZ: fileLayout.z - fileLayout.depth / 2 - routingPadding,
+      maxZ: fileLayout.z + fileLayout.depth / 2 + routingPadding
+    };
+    obstacles.push(bounds);
+    minX = Math.min(minX, bounds.minX);
+    maxX = Math.max(maxX, bounds.maxX);
+    minZ = Math.min(minZ, bounds.minZ);
+    maxZ = Math.max(maxZ, bounds.maxZ);
+  }
+
+  const resolveAnchor = (endpoint) => {
+    if (!endpoint) return null;
+    if (endpoint.member && memberAnchors.has(endpoint.member)) return memberAnchors.get(endpoint.member);
+    if (endpoint.file && fileAnchors.has(endpoint.file)) return fileAnchors.get(endpoint.file);
+    return null;
+  };
+
+  const segmentHitsObstacle = (x1, z1, x2, z2, ignoreFiles) => {
+    const dx = x2 - x1;
+    const dz = z2 - z1;
+    for (const obstacle of obstacles) {
+      if (ignoreFiles && ignoreFiles.has(obstacle.file)) continue;
+      const minX = obstacle.minX;
+      const maxX = obstacle.maxX;
+      const minZ = obstacle.minZ;
+      const maxZ = obstacle.maxZ;
+      const insideStart = x1 >= minX && x1 <= maxX && z1 >= minZ && z1 <= maxZ;
+      const insideEnd = x2 >= minX && x2 <= maxX && z2 >= minZ && z2 <= maxZ;
+      if (insideStart || insideEnd) return true;
+      let t0 = 0;
+      let t1 = 1;
+      const clip = (p, q) => {
+        if (p === 0) return q >= 0;
+        const r = q / p;
+        if (p < 0) {
+          if (r > t1) return false;
+          if (r > t0) t0 = r;
+        } else {
+          if (r < t0) return false;
+          if (r < t1) t1 = r;
+        }
+        return true;
+      };
+      if (
+        clip(-dx, x1 - minX)
+        && clip(dx, maxX - x1)
+        && clip(-dz, z1 - minZ)
+        && clip(dz, maxZ - z1)
+      ) {
+        return true;
+      }
+    }
+    return false;
+  };
+
+  const buildLaneValues = (min, max, step) => {
+    const values = [];
+    if (!step || step <= 0) return values;
+    const start = Math.floor(min / step) * step;
+    const end = Math.ceil(max / step) * step;
+    for (let value = start; value <= end; value += step) {
+      values.push(Number(value.toFixed(3)));
+    }
+    return values;
+  };
+
+  const sqrt3 = Math.sqrt(3);
+  const toAxial = (point, size) => {
+    const q = (sqrt3 / 3 * point.x - 1 / 3 * point.z) / size;
+    const r = (2 / 3 * point.z) / size;
+    return { q, r };
+  };
+  const axialToPoint = (axial, size) => ({
+    x: size * sqrt3 * (axial.q + axial.r / 2),
+    z: size * 1.5 * axial.r
+  });
+  const cubeRound = (cube) => {
+    let rx = Math.round(cube.x);
+    let ry = Math.round(cube.y);
+    let rz = Math.round(cube.z);
+    const dx = Math.abs(rx - cube.x);
+    const dy = Math.abs(ry - cube.y);
+    const dz = Math.abs(rz - cube.z);
+    if (dx > dy && dx > dz) {
+      rx = -ry - rz;
+    } else if (dy > dz) {
+      ry = -rx - rz;
+    } else {
+      rz = -rx - ry;
+    }
+    return { x: rx, y: ry, z: rz };
+  };
+  const axialToCube = (axial) => ({ x: axial.q, z: axial.r, y: -axial.q - axial.r });
+  const cubeToAxial = (cube) => ({ q: cube.x, r: cube.z });
+  const cubeLerp = (a, b, t) => ({
+    x: a.x + (b.x - a.x) * t,
+    y: a.y + (b.y - a.y) * t,
+    z: a.z + (b.z - a.z) * t
+  });
+  const cubeDistance = (a, b) => Math.max(Math.abs(a.x - b.x), Math.abs(a.y - b.y), Math.abs(a.z - b.z));
+  const buildHexPath = (start, end, size) => {
+    if (!size || size <= 0) return [start, end];
+    const a = axialToCube(toAxial(start, size));
+    const b = axialToCube(toAxial(end, size));
+    const steps = Math.max(1, cubeDistance(a, b));
+    const points = [];
+    for (let i = 0; i <= steps; i += 1) {
+      const t = steps === 0 ? 0 : i / steps;
+      const cube = cubeRound(cubeLerp(a, b, t));
+      points.push(axialToPoint(cubeToAxial(cube), size));
+    }
+    return points;
+  };
+
+  const useHexRouting = layoutStyle === 'hex';
+  const hexSize = Math.max(routingStep, (layoutMetrics.baseSize || 1) * 0.6);
+
+  const findRoute = (start, end, ignoreFiles) => {
+    let bestPoints = null;
+    let bestDistance = Infinity;
+    const tryPath = (points) => {
+      for (let i = 0; i < points.length - 1; i += 1) {
+        const a = points[i];
+        const b = points[i + 1];
+        if (segmentHitsObstacle(a.x, a.z, b.x, b.z, ignoreFiles)) return false;
+      }
+      let distance = 0;
+      for (let i = 0; i < points.length - 1; i += 1) {
+        const a = points[i];
+        const b = points[i + 1];
+        distance += Math.abs(a.x - b.x) + Math.abs(a.z - b.z);
+      }
+      if (distance < bestDistance) {
+        bestDistance = distance;
+        bestPoints = points;
+      }
+      return true;
+    };
+
+    if (useHexRouting) {
+      const hexPoints = buildHexPath(start, end, hexSize);
+      let hits = false;
+      for (let i = 0; i < hexPoints.length - 1; i += 1) {
+        if (segmentHitsObstacle(hexPoints[i].x, hexPoints[i].z, hexPoints[i + 1].x, hexPoints[i + 1].z, ignoreFiles)) {
+          hits = true;
+          break;
+        }
+      }
+      if (!hits) return hexPoints;
+    }
+
+    const directA = [start, { x: end.x, z: start.z }, end];
+    const directB = [start, { x: start.x, z: end.z }, end];
+    const directAOk = tryPath(directA);
+    const directBOk = tryPath(directB);
+    if (directAOk || directBOk) {
+      return bestPoints || directA;
+    }
+
+    const laneZ = buildLaneValues(minZ - routingPadding, maxZ + routingPadding, routingStep);
+    for (const z of laneZ) {
+      tryPath([start, { x: start.x, z }, { x: end.x, z }, end]);
+    }
+    const laneX = buildLaneValues(minX - routingPadding, maxX + routingPadding, routingStep);
+    for (const x of laneX) {
+      tryPath([start, { x, z: start.z }, { x, z: end.z }, end]);
+    }
+
+    return bestPoints || directA;
+  };
+
+  const flowSegmentsByType = new Map();
+  const flowLightCandidates = [];
+  const edgeStyles = state.map.legend?.edgeStyles || {};
+  const edgeTypeAliases = state.map.legend?.edgeTypes || {};
+  const resolveEdgeType = (type) => (edgeStyles[type] ? type : (edgeTypeAliases[type] || type));
+  const resolveEdgeStyle = (type) => edgeStyles[resolveEdgeType(type)] || edgeStyles[type] || {};
+  const addEndpoint = (entry, endpoint) => {
+    if (!endpoint) return;
+    if (endpoint.member) {
+      entry.endpoints.add(`member:${endpoint.member}`);
+      const memberFile = fileByMember.get(endpoint.member);
+      if (memberFile) entry.endpoints.add(`file:${memberFile}`);
+    }
+    if (endpoint.file) {
+      entry.endpoints.add(`file:${endpoint.file}`);
+    }
+  };
+
+  const addFlowSegment = (type, x1, y1, z1, x2, y2, z2, weight, color, dir, edge) => {
+    if (Math.abs(x1 - x2) < 0.0001 && Math.abs(y1 - y2) < 0.0001 && Math.abs(z1 - z2) < 0.0001) return;
+    const nx1 = quantize(x1);
+    const ny1 = quantize(y1);
+    const nz1 = quantize(z1);
+    const nx2 = quantize(x2);
+    const ny2 = quantize(y2);
+    const nz2 = quantize(z2);
+    const swap = nx1 > nx2 || (nx1 === nx2 && (ny1 > ny2 || (ny1 === ny2 && nz1 > nz2)));
+    const ax1 = swap ? nx2 : nx1;
+    const ay1 = swap ? ny2 : ny1;
+    const az1 = swap ? nz2 : nz1;
+    const ax2 = swap ? nx1 : nx2;
+    const ay2 = swap ? ny1 : ny2;
+    const az2 = swap ? nz1 : nz2;
+    const key = `${ax1},${ay1},${az1}->${ax2},${ay2},${az2}`;
+    const bucket = flowSegmentsByType.get(type) || new Map();
+    const entry = bucket.get(key) || {
+      x1: ax1,
+      y1: ay1,
+      z1: az1,
+      x2: ax2,
+      y2: ay2,
+      z2: az2,
+      weight: 0,
+      dirSum: 0,
+      rSum: 0,
+      gSum: 0,
+      bSum: 0,
+      colorWeight: 0,
+      endpoints: new Set()
+    };
+    const direction = Number.isFinite(dir) && dir !== 0 ? dir : 1;
+    const normalizedDir = swap ? -direction : direction;
+    entry.weight += weight;
+    entry.dirSum += normalizedDir * weight;
+    if (edge) {
+      addEndpoint(entry, edge.from);
+      addEndpoint(entry, edge.to);
+    }
+    if (color) {
+      entry.rSum += color.r * weight;
+      entry.gSum += color.g * weight;
+      entry.bSum += color.b * weight;
+      entry.colorWeight += weight;
+    }
+    bucket.set(key, entry);
+    flowSegmentsByType.set(type, bucket);
+  };
+
+  const edgeHighlight = new THREE.Color('#ffffff');
+  const endpointDots = new Map();
+  const planeY = edgePlane + Math.max(0.08, (layoutMetrics.memberGap || 0) * 0.3);
+  const curveEdges = visuals.curveEdges === true;
+  const addEndpointDot = (key, anchor, color) => {
+    if (!key || !anchor) return;
+    const entry = endpointDots.get(key) || {
+      x: anchor.x,
+      y: anchor.y,
+      z: anchor.z,
+      color: new THREE.Color(0, 0, 0),
+      weight: 0
+    };
+    if (color) {
+      entry.color.add(color.clone().multiplyScalar(1));
+      entry.weight += 1;
+    }
+    endpointDots.set(key, entry);
+  };
+  const addPathPoints = (points, startAnchor, endAnchor, routePoints) => {
+    const startPlane = { x: startAnchor.x, y: planeY, z: startAnchor.z };
+    const endPlane = { x: endAnchor.x, y: planeY, z: endAnchor.z };
+    if (curveEdges) {
+      const startLift = Math.max(0.4, Math.abs(startAnchor.y - planeY) * 0.5);
+      const endLift = Math.max(0.4, Math.abs(endAnchor.y - planeY) * 0.5);
+      points.push(startAnchor);
+      points.push({ x: startAnchor.x, y: Math.max(startAnchor.y, planeY) + startLift, z: startAnchor.z });
+      points.push(startPlane);
+    } else {
+      points.push(startAnchor);
+      points.push(startPlane);
+    }
+    routePoints.forEach((point, index) => {
+      if (index === 0 || index === routePoints.length - 1) return;
+      points.push({ x: point.x, y: planeY, z: point.z });
+    });
+    if (curveEdges) {
+      const endLift = Math.max(0.4, Math.abs(endAnchor.y - planeY) * 0.5);
+      points.push(endPlane);
+      points.push({ x: endAnchor.x, y: Math.max(endAnchor.y, planeY) + endLift, z: endAnchor.z });
+      points.push(endAnchor);
+    } else {
+      points.push(endPlane);
+      points.push(endAnchor);
+    }
+  };
+
+  for (const edge of edges) {
+    const startAnchor = resolveAnchor(edge.from);
+    const endAnchor = resolveAnchor(edge.to);
+    if (!startAnchor || !endAnchor) continue;
+    const fromFile = resolveEdgeFile(edge.from);
+    const toFile = resolveEdgeFile(edge.to);
+    const ignoreFiles = new Set([fromFile, toFile].filter(Boolean));
+    const start = { x: startAnchor.x, z: startAnchor.z };
+    const end = { x: endAnchor.x, z: endAnchor.z };
+    const routePoints = findRoute(start, end, ignoreFiles);
+    const rawType = edge.type || 'other';
+    const type = resolveEdgeType(rawType);
+    const style = resolveEdgeStyle(type);
+    const weight = edgeWeights[type] || edgeWeights[rawType] || 1;
+    const fromColor = resolveEdgeColor(edge.from);
+    const toColor = resolveEdgeColor(edge.to);
+    let edgeColor = null;
+    if (fromColor && toColor) {
+      edgeColor = fromColor.clone().lerp(toColor, 0.5);
+    } else {
+      edgeColor = fromColor || toColor || new THREE.Color(style.color || '#9aa0a6');
+    }
+    const pathPoints = [];
+    addPathPoints(pathPoints, startAnchor, endAnchor, routePoints);
+    const path = curveEdges
+      ? new THREE.CatmullRomCurve3(pathPoints.map((p) => new THREE.Vector3(p.x, p.y, p.z)), false, 'centripetal', 0.4)
+      : null;
+    const resolvedPoints = path
+      ? path.getPoints(Math.min(40, Math.max(12, pathPoints.length * 3)))
+      : pathPoints.map((p) => new THREE.Vector3(p.x, p.y, p.z));
+    for (let i = 0; i < resolvedPoints.length - 1; i += 1) {
+      const a = resolvedPoints[i];
+      const b = resolvedPoints[i + 1];
+      const dx = b.x - a.x;
+      const dy = b.y - a.y;
+      const dz = b.z - a.z;
+      const dominant = Math.max(Math.abs(dx), Math.abs(dy), Math.abs(dz));
+      const dir = dominant === Math.abs(dx)
+        ? Math.sign(dx)
+        : (dominant === Math.abs(dy) ? Math.sign(dy) : Math.sign(dz));
+      addFlowSegment(type, a.x, a.y, a.z, b.x, b.y, b.z, weight, edgeColor, dir, edge);
+    }
+    if (edgeColor) {
+      if (edge.from?.member) addEndpointDot(`member:${edge.from.member}`, startAnchor, edgeColor);
+      if (edge.from?.file) addEndpointDot(`file:${edge.from.file}`, startAnchor, edgeColor);
+      if (edge.to?.member) addEndpointDot(`member:${edge.to.member}`, endAnchor, edgeColor);
+      if (edge.to?.file) addEndpointDot(`file:${edge.to.file}`, endAnchor, edgeColor);
+    }
+  }
+
+  const localEdgeTypeGroups = new Map();
+  for (const [type, segments] of flowSegmentsByType.entries()) {
+    if (!segments.size) continue;
+    const group = new THREE.Group();
+    edgeGroup.add(group);
+    localEdgeTypeGroups.set(type, group);
+    if (edgeVisibility.has(type)) {
+      group.visible = edgeVisibility.get(type);
+    }
+      const style = resolveEdgeStyle(type);
+      const typeProfile = flowTypeProfiles[type] || flowTypeProfiles.other;
+      const fallbackColor = new THREE.Color(style.color || '#9aa0a6');
+      const entries = Array.from(segments.values());
+      if (!entries.length) continue;
+    const geometry = state.edgeUnitBoxGeometry || (state.edgeUnitBoxGeometry = (() => {
+      const unit = new THREE.BoxGeometry(1, 1, 1);
+      unit.userData = { ...(unit.userData || {}), shared: true };
+      return unit;
+    })());
+      const material = new THREE.MeshStandardMaterial({
+        color: 0xffffff,
+        roughness: 0.2,
+        metalness: 0.8,
+        envMapIntensity: visuals.glass.envMapIntensity,
+        transparent: true,
+        opacity: 0.85,
+        depthWrite: false,
+        depthTest: true,
+        vertexColors: true
+      });
+    if ('toneMapped' in material) material.toneMapped = false;
+    material.emissive = new THREE.Color(0xffffff);
+    material.emissiveIntensity = visuals.flowGlowBase;
+    material.userData = {
+      glowBase: visuals.flowGlowBase,
+      glowRange: visuals.flowGlowRange,
+      baseEmissiveIntensity: visuals.flowGlowBase,
+      baseOpacity: 0.8
+    };
+    const prevCompile = material.onBeforeCompile;
+    material.onBeforeCompile = (shader) => {
+      if (typeof prevCompile === 'function') prevCompile(shader);
+      if (shader.fragmentShader.includes('vColor')) {
+        shader.fragmentShader = shader.fragmentShader.replace(
+          '#include <emissivemap_fragment>',
+          '#include <emissivemap_fragment>\n  totalEmissiveRadiance *= vColor;'
+        );
+      }
+    };
+    applyHeightFog(material);
+    state.flowMaterials.push(material);
+
+    const mesh = new THREE.InstancedMesh(geometry, material, entries.length);
+    mesh.renderOrder = 7;
+    const dummy = new THREE.Object3D();
+    const axis = new THREE.Vector3(1, 0, 0);
+    const direction = new THREE.Vector3();
+    const baseColors = new Array(entries.length);
+    const highlightColors = new Array(entries.length);
+    entries.forEach((entry, index) => {
+      const dx = entry.x2 - entry.x1;
+      const dy = entry.y2 - entry.y1;
+      const dz = entry.z2 - entry.z1;
+      const length = Math.sqrt(dx * dx + dy * dy + dz * dz);
+      if (!length) return;
+      const thickness = 0.08 + Math.log1p(entry.weight) * 0.04;
+      const colorWeight = entry.colorWeight || 0;
+      const averaged = colorWeight
+        ? new THREE.Color(entry.rSum / colorWeight, entry.gSum / colorWeight, entry.bSum / colorWeight)
+        : fallbackColor.clone();
+      const edgeBase = style.color ? new THREE.Color(style.color) : averaged;
+      const brightColor = edgeBase.clone().lerp(edgeHighlight, 0.65);
+      const highlightColor = brightColor.clone().lerp(edgeHighlight, 0.35);
+      const flowDirection = entry.dirSum >= 0 ? 1 : -1;
+      dummy.position.set((entry.x1 + entry.x2) / 2, (entry.y1 + entry.y2) / 2, (entry.z1 + entry.z2) / 2);
+      direction.set(dx, dy, dz).normalize();
+      dummy.quaternion.setFromUnitVectors(axis, direction);
+      dummy.scale.set(length, thickness, thickness);
+      dummy.updateMatrix();
+      mesh.setMatrixAt(index, dummy.matrix);
+      mesh.setColorAt(index, brightColor);
+      baseColors[index] = brightColor;
+      highlightColors[index] = highlightColor;
+      state.edgeSegments.push({
+        mesh,
+        index,
+        endpoints: entry.endpoints,
+        edgeColor: brightColor,
+        highlightColor
+      });
+      flowLightCandidates.push({
+        x: dummy.position.x,
+        y: dummy.position.y,
+        z: dummy.position.z,
+        color: brightColor,
+        weight: entry.weight,
+        phase: (entry.x1 + entry.x2 + entry.z1 + entry.z2) * 0.18,
+        speed: typeProfile.speed || 1,
+        offset: typeProfile.phase || 0,
+        dir: flowDirection
+      });
+    });
+    mesh.instanceMatrix.needsUpdate = true;
+    if (mesh.instanceColor) mesh.instanceColor.needsUpdate = true;
+    mesh.userData = {
+      instanceBaseColors: baseColors,
+      instanceHighlightColors: highlightColors
+    };
+    group.add(mesh);
+    state.edgeMeshes.push(mesh);
+  }
+
+  if (flowLightCandidates.length) {
+    flowLightCandidates.sort((a, b) => b.weight - a.weight);
+    const maxLights = Math.min(32, flowLightCandidates.length);
+    for (let i = 0; i < maxLights; i += 1) {
+      const entry = flowLightCandidates[i];
+      const light = new THREE.PointLight(entry.color, 2.2, 40, 2);
+      light.position.set(entry.x, (entry.y ?? edgePlane) + 0.6, entry.z);
+      light.userData = {
+        flowPhase: entry.phase,
+        base: 1.6,
+        flowSpeed: entry.speed || 1,
+        flowOffset: entry.offset || 0,
+        flowDir: entry.dir || 1
+      };
+      state.flowLights.push(light);
+      state.scene.add(light);
+    }
+  }
+
+  if (endpointDots.size) {
+    const dotGeometry = state.edgeDotGeometry || (state.edgeDotGeometry = (() => {
+      const geom = new THREE.SphereGeometry(0.08, 10, 10);
+      geom.userData = { ...(geom.userData || {}), shared: true };
+      return geom;
+    })());
+    const dotMaterial = new THREE.MeshStandardMaterial({
+      color: 0xffffff,
+      emissive: new THREE.Color(0xffffff),
+      emissiveIntensity: visuals.flowGlowBase,
+      metalness: 0.7,
+      roughness: 0.25,
+      envMapIntensity: visuals.glass.envMapIntensity,
+      transparent: true,
+      opacity: 0.95,
+      depthWrite: false,
+      depthTest: true,
+      vertexColors: true
+    });
+    dotMaterial.userData = {
+      glowBase: visuals.flowGlowBase,
+      glowRange: visuals.flowGlowRange,
+      glowSpeed: 1.1,
+      glowPhase: 0.4
+    };
+    applyHeightFog(dotMaterial);
+    state.edgeDotMaterial = dotMaterial;
+    state.glowMaterials.push(dotMaterial);
+    const dotMesh = new THREE.InstancedMesh(dotGeometry, dotMaterial, endpointDots.size);
+    const dummy = new THREE.Object3D();
+    let index = 0;
+    endpointDots.forEach((entry) => {
+      const color = entry.weight ? entry.color.multiplyScalar(1 / entry.weight) : new THREE.Color(0xffffff);
+      dummy.position.set(entry.x, entry.y, entry.z);
+      dummy.updateMatrix();
+      dotMesh.setMatrixAt(index, dummy.matrix);
+      dotMesh.setColorAt(index, color);
+      index += 1;
+    });
+    dotMesh.instanceMatrix.needsUpdate = true;
+    if (dotMesh.instanceColor) dotMesh.instanceColor.needsUpdate = true;
+    dotMesh.renderOrder = 8;
+    edgeGroup.add(dotMesh);
+    state.edgeDotMesh = dotMesh;
+  }
+  updateFlowLights();
+
+  state.edgeTypeGroups = localEdgeTypeGroups;
+  state.edgeTypes = Array.from(flowSegmentsByType.keys()).sort((a, b) => a.localeCompare(b));
+};
diff --git a/src/map/isometric/client/layout-utils.js b/src/map/isometric/client/layout-utils.js
new file mode 100644
index 000000000..3e2b5be66
--- /dev/null
+++ b/src/map/isometric/client/layout-utils.js
@@ -0,0 +1,371 @@
+import { clamp, hashString } from './utils.js';
+
+const shapeForCategory = {
+  source: 'hexagon',
+  test: 'pentagon-pyramid',
+  config: 'octagon',
+  docs: 'heptagon',
+  generated: 'square',
+  dir: 'pentagon',
+  other: 'square'
+};
+
+const shapeForMemberType = {
+  class: 'pyramid',
+  function: 'hexagon-pyramid',
+  symbol: 'square'
+};
+
+const knownShapes = new Set([
+  'square',
+  'circle',
+  'pyramid',
+  'pentagon',
+  'hexagon',
+  'heptagon',
+  'octagon',
+  'pentagon-pyramid',
+  'hexagon-pyramid',
+  'heptagon-pyramid',
+  'octagon-pyramid',
+  'pentagon-frustum',
+  'hexagon-frustum',
+  'heptagon-frustum',
+  'octagon-frustum'
+]);
+
+export const resolveShape = (mode, { key, category, type } = {}) => {
+  const normalized = String(mode || 'square').toLowerCase();
+  if (normalized === 'category') {
+    if (category && shapeForCategory[category]) return shapeForCategory[category];
+    if (type && shapeForMemberType[type]) return shapeForMemberType[type];
+    return 'square';
+  }
+  if (normalized === 'mix') {
+    const mixSeed = hashString(key || category || type || '');
+    if (mixSeed < 0.2) return 'square';
+    if (mixSeed < 0.4) return 'circle';
+    if (mixSeed < 0.6) return 'pyramid';
+    if (mixSeed < 0.75) return 'hexagon';
+    if (mixSeed < 0.9) return 'pentagon';
+    return 'octagon';
+  }
+  if (knownShapes.has(normalized)) {
+    return normalized;
+  }
+  return 'square';
+};
+
+export const sizeFactor = (value, base, scale, min, max) => {
+  const normalized = base + Math.log1p(Math.max(0, value)) * scale;
+  return clamp(normalized, min, max);
+};
+
+export const memberSizeFromRange = (range) => {
+  if (!range || !Number.isFinite(range.startLine)) return 1;
+  const start = range.startLine;
+  const end = Number.isFinite(range.endLine) ? range.endLine : start;
+  return Math.max(1, end - start + 1);
+};
+
+const splitPath = (value) => String(value || '').split('/').filter(Boolean);
+
+export const groupKeyForPath = (filePath, groupDepth) => {
+  const segments = splitPath(filePath);
+  if (!segments.length || groupDepth === 0) return '(root)';
+  return segments.slice(0, groupDepth).join('/');
+};
+
+export const scoreMember = (member, scoring) => {
+  let score = 0;
+  const dataflow = member?.dataflow || {};
+  const flowLists = [dataflow.reads, dataflow.writes, dataflow.mutations, dataflow.aliases];
+  for (const list of flowLists) {
+    if (Array.isArray(list)) score += list.length * scoring.dataflow;
+  }
+  const control = member?.controlFlow || {};
+  for (const value of Object.values(control)) {
+    if (Array.isArray(value)) score += value.length * scoring.controlFlow;
+    else if (typeof value === 'number') score += value * scoring.controlFlow;
+    else if (value) score += 1 * scoring.controlFlow;
+  }
+  if (Array.isArray(member?.params)) score += member.params.length * scoring.params;
+  if (member?.signature) score += Math.min(10, String(member.signature).length / 20) * scoring.signature;
+  if (member?.returns) score += 1 * scoring.returns;
+  if (member?.exported) score += 1 * scoring.exported;
+  if (member?.modifiers && typeof member.modifiers === 'object') {
+    score += Object.keys(member.modifiers).length * scoring.modifiers;
+  }
+  const kind = String(member?.kind || member?.type || '').toLowerCase();
+  if (kind.includes('class') || kind.includes('interface') || kind.includes('struct')) score += scoring.type;
+  return score;
+};
+
+export const scoreToColor = (score, maxScore, colors, THREE, key) => {
+  const mode = String(colors.mode || 'score').toLowerCase();
+  const color = new THREE.Color();
+  if (mode === 'distinct') {
+    const seed = hashString(key || score || '');
+    const normalized = seed / 0xffffffff;
+    const hue = (normalized + (colors.distinctHueOffset || 0)) % 1;
+    const saturation = colors.distinctSaturation ?? colors.saturation ?? 0.7;
+    const lightness = colors.distinctLightness ?? colors.lightnessMax ?? 0.6;
+    color.setHSL(hue, saturation, lightness);
+    return color;
+  }
+  const ratio = maxScore > 0
+    ? Math.log10(score + 1) / Math.log10(maxScore + 1)
+    : 0;
+  const hue = colors.hueStart + (colors.hueEnd - colors.hueStart) * ratio;
+  const lightness = colors.lightnessMin + (colors.lightnessMax - colors.lightnessMin) * ratio;
+  color.setHSL(hue, colors.saturation, lightness);
+  return color;
+};
+
+export const computeGrid = (count) => {
+  if (!count) return { columns: 0, rows: 0 };
+  const columns = Math.ceil(Math.sqrt(count));
+  const rows = Math.ceil(count / columns);
+  return { columns, rows };
+};
+
+export const buildSlots = (width, depth, columns, rows, cellSize, gap, memberInset, memberCell, memberGap) => {
+  if (!columns || !rows) return [];
+  const slots = [];
+  const resolvedCell = cellSize || memberCell;
+  const resolvedGap = Number.isFinite(gap) ? gap : memberGap;
+  const startX = -width / 2 + memberInset + resolvedCell / 2;
+  const startZ = -depth / 2 + memberInset + resolvedCell / 2;
+  for (let row = 0; row < rows; row += 1) {
+    for (let col = 0; col < columns; col += 1) {
+      const x = startX + col * (resolvedCell + resolvedGap);
+      const z = startZ + row * (resolvedCell + resolvedGap);
+      slots.push({ x, z, sort: x + z });
+    }
+  }
+  return slots.sort((a, b) => (a.sort - b.sort) || (a.x - b.x) || (a.z - b.z));
+};
+
+export const orderByAdjacency = (items, getKey, adjacency) => {
+  if (!items.length) return [];
+  if (items.length === 1) return items.slice();
+  const keys = items.map(getKey);
+  const totalWeight = new Map();
+  keys.forEach((key) => {
+    const neighbors = adjacency.get(key) || new Map();
+    let total = 0;
+    for (const value of neighbors.values()) total += value;
+    totalWeight.set(key, total);
+  });
+  const remaining = new Set(keys);
+  const orderedKeys = [];
+  let current = keys.slice().sort((a, b) => {
+    const diff = (totalWeight.get(b) || 0) - (totalWeight.get(a) || 0);
+    return diff || a.localeCompare(b);
+  })[0];
+  orderedKeys.push(current);
+  remaining.delete(current);
+  while (remaining.size) {
+    let best = null;
+    let bestScore = -1;
+    for (const key of remaining) {
+      const neighbors = adjacency.get(key) || new Map();
+      let score = 0;
+      for (const placed of orderedKeys) {
+        score += neighbors.get(placed) || 0;
+      }
+      score += (totalWeight.get(key) || 0) * 0.1;
+      if (score > bestScore) {
+        bestScore = score;
+        best = key;
+      } else if (score === bestScore && best && key.localeCompare(best) < 0) {
+        best = key;
+      }
+    }
+    orderedKeys.push(best);
+    remaining.delete(best);
+  }
+  const itemByKey = new Map(items.map((item) => [getKey(item), item]));
+  return orderedKeys.map((key) => itemByKey.get(key)).filter(Boolean);
+};
+
+export const layoutGridItems = (items, columns, spacing) => {
+  const count = items.length;
+  if (!count) return { width: 0, depth: 0, columns: 0, rows: 0 };
+  const cols = Math.max(1, columns || 1);
+  const rows = Math.max(1, Math.ceil(count / cols));
+  const colWidths = Array.from({ length: cols }, () => 0);
+  const rowDepths = Array.from({ length: rows }, () => 0);
+  items.forEach((item, index) => {
+    const col = index % cols;
+    const row = Math.floor(index / cols);
+    colWidths[col] = Math.max(colWidths[col], item.width || 0);
+    rowDepths[row] = Math.max(rowDepths[row], item.depth || 0);
+  });
+  const colOffsets = [];
+  const rowOffsets = [];
+  let offsetX = 0;
+  for (let col = 0; col < cols; col += 1) {
+    colOffsets[col] = offsetX;
+    offsetX += colWidths[col] + spacing;
+  }
+  let offsetZ = 0;
+  for (let row = 0; row < rows; row += 1) {
+    rowOffsets[row] = offsetZ;
+    offsetZ += rowDepths[row] + spacing;
+  }
+  items.forEach((item, index) => {
+    const col = index % cols;
+    const row = Math.floor(index / cols);
+    const xPad = (colWidths[col] - item.width) / 2;
+    const zPad = (rowDepths[row] - item.depth) / 2;
+    item.x = colOffsets[col] + xPad;
+    item.z = rowOffsets[row] + zPad;
+  });
+  const totalWidth = colWidths.reduce((acc, value) => acc + value, 0) + spacing * (cols - 1);
+  const totalDepth = rowDepths.reduce((acc, value) => acc + value, 0) + spacing * (rows - 1);
+  return { width: totalWidth, depth: totalDepth, columns: cols, rows };
+};
+
+export const layoutRadialItems = (items, spacing) => {
+  const count = items.length;
+  if (!count) return { width: 0, depth: 0 };
+  if (count === 1) {
+    items[0].x = 0;
+    items[0].z = 0;
+    return { width: items[0].width || 0, depth: items[0].depth || 0 };
+  }
+  const radii = items.map((item) => Math.max(item.width || 0, item.depth || 0) / 2);
+  const maxRadius = radii.reduce((acc, value) => Math.max(acc, value), 0);
+  const circumference = radii.reduce((acc, value) => acc + (value * 2 + spacing), 0);
+  const baseRadius = Math.max(maxRadius * 1.5, circumference / (2 * Math.PI));
+  let angle = 0;
+  items.forEach((item, index) => {
+    const arc = (radii[index] * 2 + spacing) / baseRadius;
+    angle += arc / 2;
+    item.x = Math.cos(angle) * baseRadius;
+    item.z = Math.sin(angle) * baseRadius;
+    angle += arc / 2;
+  });
+  const extent = baseRadius + maxRadius;
+  return { width: extent * 2, depth: extent * 2 };
+};
+
+export const layoutHexItems = (items, spacing) => {
+  const count = items.length;
+  if (!count) return { width: 0, depth: 0, columns: 0, rows: 0 };
+  const cols = Math.max(1, Math.ceil(Math.sqrt(count)));
+  const rows = Math.max(1, Math.ceil(count / cols));
+  const maxWidth = items.reduce((acc, item) => Math.max(acc, item.width || 0), 0);
+  const maxDepth = items.reduce((acc, item) => Math.max(acc, item.depth || 0), 0);
+  const cellWidth = maxWidth + spacing;
+  const cellDepth = maxDepth + spacing;
+  const rowStep = cellDepth * 0.86;
+  items.forEach((item, index) => {
+    const row = Math.floor(index / cols);
+    const col = index % cols;
+    const offset = (row % 2) * cellWidth * 0.5;
+    item.x = col * cellWidth + offset;
+    item.z = row * rowStep;
+  });
+  const totalWidth = cellWidth * cols + cellWidth * 0.5;
+  const totalDepth = rowStep * Math.max(1, rows - 1) + maxDepth;
+  return { width: totalWidth, depth: totalDepth, columns: cols, rows };
+};
+
+export const layoutFlowItems = (items, spacing, adjacency, getKey) => {
+  const count = items.length;
+  if (!count) return { width: 0, depth: 0 };
+  const columns = Math.max(1, Math.ceil(Math.sqrt(count)));
+  layoutGridItems(items, columns, spacing);
+
+  const indexByKey = new Map(items.map((item, index) => [getKey(item), index]));
+  const neighbors = items.map(() => []);
+  items.forEach((item, index) => {
+    const key = getKey(item);
+    const adjacent = adjacency.get(key) || new Map();
+    for (const [targetKey, weight] of adjacent.entries()) {
+      const targetIndex = indexByKey.get(targetKey);
+      if (targetIndex === undefined) continue;
+      neighbors[index].push({ index: targetIndex, weight: weight || 1 });
+    }
+  });
+
+  const positions = items.map((item) => ({ x: item.x || 0, z: item.z || 0 }));
+  const velocities = items.map(() => ({ x: 0, z: 0 }));
+  const iterations = Math.min(80, 20 + count);
+  const repulse = 0.35;
+  const attract = 0.04;
+  const damping = 0.75;
+  const minSpacing = Math.max(0.6, spacing * 0.8);
+  const maxVelocity = Math.max(minSpacing, spacing * 1.2);
+
+  for (let iter = 0; iter < iterations; iter += 1) {
+    for (let i = 0; i < count; i += 1) {
+      let fx = 0;
+      let fz = 0;
+      const a = items[i];
+      const posA = positions[i];
+      for (let j = i + 1; j < count; j += 1) {
+        const b = items[j];
+        const posB = positions[j];
+        const dx = posB.x - posA.x;
+        const dz = posB.z - posA.z;
+        const dist = Math.sqrt(dx * dx + dz * dz) || 0.0001;
+        const target = (a.width + b.width) * 0.5 + minSpacing;
+        const overlap = target - dist;
+        if (overlap > 0) {
+          const push = overlap * repulse;
+          const rx = (dx / dist) * push;
+          const rz = (dz / dist) * push;
+          fx -= rx;
+          fz -= rz;
+          velocities[j].x += rx;
+          velocities[j].z += rz;
+        }
+      }
+      for (const neighbor of neighbors[i]) {
+        const b = items[neighbor.index];
+        const posB = positions[neighbor.index];
+        const dx = posB.x - posA.x;
+        const dz = posB.z - posA.z;
+        const dist = Math.sqrt(dx * dx + dz * dz) || 0.0001;
+        const target = (a.width + b.width) * 0.4 + spacing * 0.6;
+        // Keep attraction pulling toward the target distance to avoid runaway layouts.
+        const pull = (target - dist) * attract * Math.min(3, neighbor.weight || 1);
+        fx += (dx / dist) * pull;
+        fz += (dz / dist) * pull;
+      }
+      velocities[i].x = (velocities[i].x + fx) * damping;
+      velocities[i].z = (velocities[i].z + fz) * damping;
+      const speed = Math.hypot(velocities[i].x, velocities[i].z);
+      if (speed > maxVelocity) {
+        // Clamp velocity to avoid unstable layouts that can explode the bounds.
+        const scale = maxVelocity / speed;
+        velocities[i].x *= scale;
+        velocities[i].z *= scale;
+      }
+    }
+    for (let i = 0; i < count; i += 1) {
+      positions[i].x += velocities[i].x;
+      positions[i].z += velocities[i].z;
+    }
+  }
+
+  items.forEach((item, index) => {
+    item.x = positions[index].x;
+    item.z = positions[index].z;
+  });
+
+  let minX = Infinity;
+  let maxX = -Infinity;
+  let minZ = Infinity;
+  let maxZ = -Infinity;
+  items.forEach((item) => {
+    minX = Math.min(minX, item.x - item.width / 2);
+    maxX = Math.max(maxX, item.x + item.width / 2);
+    minZ = Math.min(minZ, item.z - item.depth / 2);
+    maxZ = Math.max(maxZ, item.z + item.depth / 2);
+  });
+  return { width: Math.max(0, maxX - minX), depth: Math.max(0, maxZ - minZ) };
+};
diff --git a/src/map/isometric/client/layout.js b/src/map/isometric/client/layout.js
new file mode 100644
index 000000000..abdd68891
--- /dev/null
+++ b/src/map/isometric/client/layout.js
@@ -0,0 +1,430 @@
+import { state } from './state.js';
+import { clamp, numberValue } from './utils.js';
+import {
+  resolveShape,
+  sizeFactor,
+  memberSizeFromRange,
+  groupKeyForPath,
+  scoreMember,
+  scoreToColor,
+  computeGrid,
+  buildSlots,
+  orderByAdjacency,
+  layoutGridItems,
+  layoutRadialItems,
+  layoutFlowItems,
+  layoutHexItems
+} from './layout-utils.js';
+
+export const createShapeGeometry = (shape) => {
+  const { THREE } = state;
+  const resolved = String(shape || 'square').toLowerCase();
+  state.shapeGeometryCache = state.shapeGeometryCache || new Map();
+  if (state.shapeGeometryCache.has(resolved)) {
+    return state.shapeGeometryCache.get(resolved);
+  }
+  const polygonMatch = resolved.match(/^(pentagon|hexagon|heptagon|octagon)(?:-(pyramid|frustum))?$/);
+  const polygonSides = {
+    pentagon: 5,
+    hexagon: 6,
+    heptagon: 7,
+    octagon: 8
+  };
+
+  let geometry;
+  if (resolved === 'circle') {
+    geometry = new THREE.CylinderGeometry(0.5, 0.5, 1, 32, 1, false);
+  } else if (resolved === 'pyramid') {
+    geometry = new THREE.CylinderGeometry(0, 0.55, 1, 4, 1, false);
+  } else if (polygonMatch) {
+    const [, polygon, variant] = polygonMatch;
+    const sides = polygonSides[polygon] || 6;
+    if (variant === 'pyramid') {
+      geometry = new THREE.CylinderGeometry(0, 0.55, 1, sides, 1, false);
+    } else if (variant === 'frustum') {
+      geometry = new THREE.CylinderGeometry(0.25, 0.55, 1, sides, 1, false);
+    } else {
+      geometry = new THREE.CylinderGeometry(0.5, 0.5, 1, sides, 1, false);
+    }
+  } else {
+    geometry = new THREE.BoxGeometry(1, 1, 1);
+  }
+  geometry.userData = { ...(geometry.userData || {}), shared: true };
+  state.shapeGeometryCache.set(resolved, geometry);
+  return geometry;
+};
+
+export const computeLayout = () => {
+  const {
+    THREE,
+    files,
+    edges,
+    layout,
+    layoutDefaults,
+    scoring,
+    colors,
+    scaleFactor,
+    fileByMember
+  } = state;
+
+  const groupDepth = Math.max(0, Math.floor(numberValue(layout.groupDepth, layoutDefaults.groupDepth)));
+  const baseSize = numberValue(layout.baseSize, layoutDefaults.baseSize) * scaleFactor;
+  const fileHeight = numberValue(layout.fileHeight, layoutDefaults.fileHeight) * scaleFactor * 2;
+  const memberCell = numberValue(layout.memberCell, layoutDefaults.memberCell) * scaleFactor;
+  const memberGap = numberValue(layout.memberGap, layoutDefaults.memberGap) * scaleFactor;
+  const memberInset = numberValue(layout.memberInset, layoutDefaults.memberInset) * scaleFactor;
+  const fileSpacing = numberValue(layout.fileSpacing ?? layout.spacing, layoutDefaults.fileSpacing) * scaleFactor;
+  const groupSpacing = numberValue(layout.groupSpacing, layoutDefaults.groupSpacing) * scaleFactor;
+  const compactness = numberValue(layout.compactness, layoutDefaults.compactness);
+  const routingPadding = numberValue(layout.routingPadding, layoutDefaults.routingPadding) * scaleFactor;
+  const routingStep = numberValue(layout.routingStep, layoutDefaults.routingStep) * scaleFactor;
+  const labelScale = numberValue(layout.labelScale, layoutDefaults.labelScale) * scaleFactor;
+  const labelOffset = numberValue(layout.labelOffset, layoutDefaults.labelOffset) * scaleFactor;
+  const edgePlane = numberValue(layout.edgePlane, layoutDefaults.edgePlane) * scaleFactor;
+  const memberHeightBase = numberValue(layout.memberHeightBase, layoutDefaults.memberHeightBase) * scaleFactor;
+  const memberHeightScale = numberValue(layout.memberHeightScale, layoutDefaults.memberHeightScale) * scaleFactor;
+  const memberHeightMax = numberValue(layout.memberHeightMax, layoutDefaults.memberHeightMax) * scaleFactor;
+
+  const edgeWeights = {
+    import: 3,
+    export: 3,
+    call: 2.5,
+    usage: 2,
+    dataflow: 2,
+    alias: 1.5
+  };
+
+  const resolveEdgeFile = (endpoint) => {
+    if (!endpoint) return null;
+    if (endpoint.file) return endpoint.file;
+    if (endpoint.member) return fileByMember.get(endpoint.member) || null;
+    return null;
+  };
+
+  const fileAdjacency = new Map();
+  const groupAdjacency = new Map();
+  const touchAdjacency = (mapRef, from, to, weight) => {
+    if (!from || !to || from === to) return;
+    const bucket = mapRef.get(from) || new Map();
+    bucket.set(to, (bucket.get(to) || 0) + weight);
+    mapRef.set(from, bucket);
+  };
+
+  const groupKeyByFile = new Map();
+  const surfaceScaleForShape = (shape) => {
+    if (shape === 'pyramid') return 0.72;
+    if (shape?.endsWith('-pyramid')) return 0.72;
+    if (shape?.endsWith('-frustum')) return 0.82;
+    if (shape === 'circle') return 0.9;
+    return 1;
+  };
+
+  for (const node of files) {
+    const key = groupKeyForPath(node.path || node.name || '', groupDepth);
+    groupKeyByFile.set(node.path, key);
+  }
+  for (const edge of edges) {
+    const fromFile = resolveEdgeFile(edge.from);
+    const toFile = resolveEdgeFile(edge.to);
+    if (!fromFile || !toFile) continue;
+    const weight = edgeWeights[edge.type] || 1;
+    touchAdjacency(fileAdjacency, fromFile, toFile, weight);
+    touchAdjacency(fileAdjacency, toFile, fromFile, weight);
+    const fromGroup = groupKeyByFile.get(fromFile);
+    const toGroup = groupKeyByFile.get(toFile);
+    if (fromGroup && toGroup) {
+      touchAdjacency(groupAdjacency, fromGroup, toGroup, weight);
+      touchAdjacency(groupAdjacency, toGroup, fromGroup, weight);
+    }
+  }
+
+  const groupsByKey = new Map();
+  let maxMemberScore = 0;
+  let maxFileScore = 0;
+
+  for (const node of files) {
+    const members = Array.isArray(node.members) ? node.members : [];
+    const membersWithMetrics = members.map((member) => {
+      const score = scoreMember(member, scoring);
+      const size = memberSizeFromRange(member.range);
+      maxMemberScore = Math.max(maxMemberScore, score);
+      const sizeScale = sizeFactor(size, 0.75, 0.12, 0.7, 1.35);
+      const scoreScale = sizeFactor(score, 0.65, 0.08, 0.8, 1.8);
+      return {
+        member,
+        score,
+        size,
+        footprintScale: clamp(sizeScale * scoreScale, 0.8, 3.2),
+        heightScale: clamp(
+          sizeFactor(size, 0.85, 0.18, 0.75, 1.6) * sizeFactor(score, 0.7, 0.08, 0.85, 2),
+          0.8,
+          2.6
+        )
+      };
+    });
+    const fileSize = membersWithMetrics.reduce((acc, entry) => acc + entry.size, 0)
+      || members.length
+      || 1;
+    const fileSizeScale = sizeFactor(fileSize, 0.8, 0.12, 0.75, 2.3);
+    const fileShape = resolveShape(layout.fileShape || layoutDefaults.fileShape, {
+      key: node.path || node.name,
+      category: node.category
+    });
+    const grid = computeGrid(members.length);
+    const maxFootprintScale = membersWithMetrics.reduce((acc, entry) => Math.max(acc, entry.footprintScale), 1);
+    const cellSize = memberCell * maxFootprintScale;
+    const cellGap = memberGap * maxFootprintScale;
+    let width = baseSize;
+    let depth = baseSize;
+    if (members.length) {
+      width = Math.max(baseSize, grid.columns * cellSize + (grid.columns - 1) * cellGap + memberInset * 2);
+      depth = Math.max(baseSize, grid.rows * cellSize + (grid.rows - 1) * cellGap + memberInset * 2);
+    }
+    const fileScore = membersWithMetrics.reduce((acc, entry) => acc + entry.score, 0);
+    maxFileScore = Math.max(maxFileScore, fileScore);
+    const fileScoreScale = sizeFactor(fileScore, 0.85, 0.08, 0.85, 1.9);
+    width *= fileSizeScale * fileScoreScale;
+    depth *= fileSizeScale * fileScoreScale;
+    const fileHeightBoost = Math.min(6, Math.log1p(fileScore) * 0.35) * scaleFactor;
+    const fileHeightScale = sizeFactor(fileSize, 0.9, 0.08, 0.85, 1.5);
+    const fileComplexityScale = sizeFactor(fileScore, 0.85, 0.06, 0.9, 1.7);
+    const surfaceScale = surfaceScaleForShape(fileShape);
+    const surfaceWidth = width * surfaceScale;
+    const surfaceDepth = depth * surfaceScale;
+    const surfaceInset = memberInset * surfaceScale;
+    const fileHeightValue = (fileHeight + fileHeightBoost) * fileHeightScale * fileComplexityScale;
+    const fileLayout = {
+      node,
+      width,
+      depth,
+      height: fileHeightValue,
+      topY: fileHeightValue,
+      surfaceScale,
+      surfaceWidth,
+      surfaceDepth,
+      score: fileScore,
+      shape: fileShape,
+      columns: grid.columns,
+      rows: grid.rows,
+      cellSize,
+      cellGap,
+      slotStep: cellSize + cellGap,
+      memberSlots: buildSlots(
+        surfaceWidth,
+        surfaceDepth,
+        grid.columns,
+        grid.rows,
+        cellSize,
+        cellGap,
+        surfaceInset,
+        memberCell,
+        memberGap
+      ),
+      members: membersWithMetrics.map((entry) => {
+        const rawHeight = memberHeightBase + scoreMember(entry.member, scoring) * memberHeightScale;
+        const clampedHeight = Math.max(memberHeightBase, Math.min(memberHeightMax, rawHeight));
+        return {
+          member: entry.member,
+          score: entry.score,
+          size: entry.size,
+          shape: resolveShape(layout.memberShape || layoutDefaults.memberShape, {
+            key: entry.member.id || entry.member.name,
+            type: entry.member.type
+          }),
+          footprint: memberCell * entry.footprintScale,
+          height: clampedHeight * entry.heightScale
+        };
+      })
+    };
+    const key = groupKeyForPath(node.path || node.name || '', groupDepth);
+    const group = groupsByKey.get(key) || { key, files: [] };
+    group.files.push(fileLayout);
+    groupsByKey.set(key, group);
+  }
+
+  const layoutStyle = String(layout.style || layoutDefaults.style || 'clustered').toLowerCase();
+  const groups = orderByAdjacency(
+    Array.from(groupsByKey.values()),
+    (group) => group.key,
+    groupAdjacency
+  );
+
+  for (const group of groups) {
+    group.files = orderByAdjacency(
+      group.files,
+      (file) => file.node.path || file.node.name || '',
+      fileAdjacency
+    );
+    if (layoutStyle === 'radial') {
+      const metrics = layoutRadialItems(group.files, fileSpacing);
+      group.width = Math.max(baseSize, metrics.width);
+      group.depth = Math.max(baseSize, metrics.depth);
+    } else if (layoutStyle === 'hex') {
+      const metrics = layoutHexItems(group.files, fileSpacing);
+      group.width = Math.max(baseSize, metrics.width);
+      group.depth = Math.max(baseSize, metrics.depth);
+    } else {
+      const columns = Math.max(1, Math.ceil(Math.sqrt(group.files.length || 1)));
+      const metrics = layoutGridItems(group.files, columns, fileSpacing);
+      group.width = Math.max(baseSize, metrics.width);
+      group.depth = Math.max(baseSize, metrics.depth);
+    }
+  }
+
+  const allFiles = groups.flatMap((group) => group.files);
+
+  if (layoutStyle === 'stream') {
+    const orderedFiles = orderByAdjacency(
+      allFiles,
+      (file) => file.node.path || file.node.name || '',
+      fileAdjacency
+    );
+    let cursorX = 0;
+    let cursorZ = 0;
+    orderedFiles.forEach((fileLayout) => {
+      fileLayout.x = cursorX;
+      fileLayout.z = cursorZ;
+      cursorX += fileLayout.width + fileSpacing;
+      cursorZ += fileLayout.depth * 0.6 + fileSpacing * 0.6;
+    });
+  } else if (layoutStyle === 'flat' || layoutStyle === 'grid') {
+    const orderedFiles = orderByAdjacency(
+      allFiles,
+      (file) => file.node.path || file.node.name || '',
+      fileAdjacency
+    );
+    const columns = Math.max(1, Math.ceil(Math.sqrt(orderedFiles.length || 1)));
+    layoutGridItems(orderedFiles, columns, fileSpacing);
+  } else if (layoutStyle === 'radial') {
+    const groupRadii = groups.map((group) => Math.max(group.width || 0, group.depth || 0) / 2);
+    const maxGroupRadius = groupRadii.reduce((acc, value) => Math.max(acc, value), baseSize / 2);
+    const circumference = groupRadii.reduce((acc, value) => acc + (value * 2 + groupSpacing), 0);
+    const baseRadius = Math.max(maxGroupRadius * 2.2, circumference / (2 * Math.PI));
+    let angle = 0;
+    groups.forEach((group, index) => {
+      const arc = (groupRadii[index] * 2 + groupSpacing) / baseRadius;
+      angle += arc / 2;
+      const offsetX = Math.cos(angle) * baseRadius;
+      const offsetZ = Math.sin(angle) * baseRadius;
+      for (const fileLayout of group.files) {
+        fileLayout.x += offsetX;
+        fileLayout.z += offsetZ;
+      }
+      angle += arc / 2;
+    });
+  } else if (layoutStyle === 'flow') {
+    const orderedFiles = orderByAdjacency(
+      allFiles,
+      (file) => file.node.path || file.node.name || '',
+      fileAdjacency
+    );
+    layoutFlowItems(
+      orderedFiles,
+      fileSpacing,
+      fileAdjacency,
+      (file) => file.node.path || file.node.name || ''
+    );
+  } else if (layoutStyle === 'hex') {
+    const orderedFiles = orderByAdjacency(
+      allFiles,
+      (file) => file.node.path || file.node.name || '',
+      fileAdjacency
+    );
+    layoutHexItems(orderedFiles, fileSpacing);
+  } else {
+    const groupCount = Math.max(1, groups.length);
+    const groupColumns = Math.ceil(Math.sqrt(groupCount));
+    const groupLayouts = groups.map((group) => ({
+      width: group.width || baseSize,
+      depth: group.depth || baseSize,
+      x: 0,
+      z: 0
+    }));
+    layoutGridItems(groupLayouts, groupColumns, groupSpacing);
+    groups.forEach((group, index) => {
+      const offsetX = groupLayouts[index].x;
+      const offsetZ = groupLayouts[index].z;
+      for (const fileLayout of group.files) {
+        fileLayout.x += offsetX;
+        fileLayout.z += offsetZ;
+      }
+    });
+  }
+
+  let minX = 0;
+  let maxX = 0;
+  let minZ = 0;
+  let maxZ = 0;
+  if (allFiles.length) {
+    minX = Infinity;
+    maxX = -Infinity;
+    minZ = Infinity;
+    maxZ = -Infinity;
+    for (const fileLayout of allFiles) {
+      const left = fileLayout.x - fileLayout.width / 2;
+      const right = fileLayout.x + fileLayout.width / 2;
+      const back = fileLayout.z - fileLayout.depth / 2;
+      const front = fileLayout.z + fileLayout.depth / 2;
+      minX = Math.min(minX, left);
+      maxX = Math.max(maxX, right);
+      minZ = Math.min(minZ, back);
+      maxZ = Math.max(maxZ, front);
+    }
+    const centerX = (minX + maxX) / 2;
+    const centerZ = (minZ + maxZ) / 2;
+    for (const fileLayout of allFiles) {
+      fileLayout.x -= centerX;
+      fileLayout.z -= centerZ;
+    }
+    minX -= centerX;
+    maxX -= centerX;
+    minZ -= centerZ;
+    maxZ -= centerZ;
+  }
+
+  if (Number.isFinite(compactness) && compactness > 0 && compactness !== 1) {
+    for (const fileLayout of allFiles) {
+      fileLayout.x *= compactness;
+      fileLayout.z *= compactness;
+    }
+    minX *= compactness;
+    maxX *= compactness;
+    minZ *= compactness;
+    maxZ *= compactness;
+  }
+
+  const spanX = Math.max(40, maxX - minX);
+  const spanZ = Math.max(40, maxZ - minZ);
+  const maxSpan = Math.max(spanX, spanZ);
+
+  Object.assign(state, {
+    layoutStyle,
+    layoutMetrics: {
+      groupDepth,
+      baseSize,
+      fileHeight,
+      memberCell,
+      memberGap,
+      memberInset,
+      fileSpacing,
+      groupSpacing,
+      compactness,
+      routingPadding,
+      routingStep,
+      labelScale,
+      labelOffset,
+      edgePlane
+    },
+    edgeWeights,
+    groupKeyByFile,
+    fileAdjacency,
+    groupAdjacency,
+    groups,
+    allFiles,
+    maxMemberScore,
+    maxFileScore,
+    bounds: { minX, maxX, minZ, maxZ, spanX, spanZ, maxSpan },
+    resolveShape,
+    scoreToColor: (score, key) => scoreToColor(score, maxMemberScore, colors, THREE, key)
+  });
+};
diff --git a/src/map/isometric/client/map-data.js b/src/map/isometric/client/map-data.js
new file mode 100644
index 000000000..246dcaef1
--- /dev/null
+++ b/src/map/isometric/client/map-data.js
@@ -0,0 +1,47 @@
+import { state } from './state.js';
+
+const buildMemberKey = (filePath, name, range) => {
+  const start = Number.isFinite(range?.startLine) ? range.startLine : '';
+  const end = Number.isFinite(range?.endLine) ? range.endLine : '';
+  return `${filePath}::${name || ''}:${start}-${end}`;
+};
+
+const buildMemberNameKey = (filePath, name) => `${filePath}::${name || ''}`;
+
+export const initMapData = () => {
+  const files = Array.isArray(state.map?.nodes) ? state.map.nodes : [];
+  const edges = Array.isArray(state.map?.edges) ? state.map.edges : [];
+  const nodeByPath = new Map();
+  const nodeById = new Map();
+  const memberById = new Map();
+  const memberByKey = new Map();
+  const fileByMember = new Map();
+
+  for (const node of files) {
+    if (node.path) nodeByPath.set(node.path, node);
+    if (node.name && !nodeByPath.has(node.name)) nodeByPath.set(node.name, node);
+    if (node.id) nodeById.set(node.id, node);
+    const members = Array.isArray(node.members) ? node.members : [];
+    for (const member of members) {
+      if (member?.id) memberById.set(member.id, member);
+      const filePath = member?.file || node.path || node.name || '';
+      if (member?.id) fileByMember.set(member.id, filePath);
+      const rangeKey = buildMemberKey(filePath, member?.name || '', member?.range || {});
+      memberByKey.set(rangeKey, member);
+      const nameKey = buildMemberNameKey(filePath, member?.name || '');
+      if (!memberByKey.has(nameKey)) memberByKey.set(nameKey, member);
+    }
+  }
+
+  Object.assign(state, {
+    files,
+    edges,
+    nodeByPath,
+    nodeById,
+    memberById,
+    memberByKey,
+    fileByMember,
+    buildMemberKey,
+    buildMemberNameKey
+  });
+};
diff --git a/src/map/isometric/client/materials.js b/src/map/isometric/client/materials.js
new file mode 100644
index 000000000..736362d34
--- /dev/null
+++ b/src/map/isometric/client/materials.js
@@ -0,0 +1,507 @@
+import { state } from './state.js';
+import { clamp, numberValue } from './utils.js';
+
+const getWireGeometry = (geometry, THREE) => {
+  const cache = state.wireGeometryCache || (state.wireGeometryCache = new Map());
+  const key = geometry?.uuid || geometry;
+  if (cache.has(key)) return cache.get(key);
+  const wireGeom = new THREE.EdgesGeometry(geometry);
+  wireGeom.userData = { ...(wireGeom.userData || {}), shared: true };
+  cache.set(key, wireGeom);
+  return wireGeom;
+};
+
+export const initMaterials = () => {
+  const { THREE, assets, visuals } = state;
+  state.glowMaterials = [];
+  state.flowMaterials = [];
+  state.glassMaterials = [];
+  state.labelMaterials = [];
+  state.glassShells = [];
+  state.wireMaterials = [];
+  state.gridLineMaterials = [];
+  state.normalMapState = { texture: null };
+
+  if (assets.normalMapUrl) {
+    const loader = new THREE.TextureLoader();
+    loader.load(assets.normalMapUrl, (texture) => {
+      texture.wrapS = THREE.RepeatWrapping;
+      texture.wrapT = THREE.RepeatWrapping;
+      texture.repeat.set(visuals.glass.normalRepeat, visuals.glass.normalRepeat);
+      state.normalMapState.texture = texture;
+      applyGlassSettings();
+    });
+  }
+};
+
+export const applyHeightFog = (material) => {
+  const { visuals } = state;
+  if (!material || material.userData?.heightFogApplied) return;
+  material.userData.heightFogApplied = true;
+  const fogVarying = 'vIsoWorldPosition';
+  const previousCompile = material.onBeforeCompile;
+  material.onBeforeCompile = (shader) => {
+    if (typeof previousCompile === 'function') {
+      previousCompile(shader);
+    }
+    shader.uniforms.fogHeight = { value: visuals.fogHeight };
+    shader.uniforms.fogHeightRange = { value: visuals.fogHeightRange };
+    shader.uniforms.fogHeightEnabled = { value: visuals.enableHeightFog ? 1 : 0 };
+    const fogUniformsSnippet = [
+      '#include <fog_pars_fragment>',
+      '  uniform float fogHeight;',
+      '  uniform float fogHeightRange;',
+      '  uniform float fogHeightEnabled;'
+    ].join('\n');
+    const heightExpr =
+      `  float heightFactor = fogHeightEnabled * clamp((fogHeight - ${fogVarying}.y) / ` +
+      'max(0.001, fogHeightRange), 0.0, 1.0);';
+    const fogFragmentSnippet = [
+      '#ifdef USE_FOG',
+      '  float fogFactor = smoothstep(fogNear, fogFar, vFogDepth);',
+      heightExpr,
+      '  float combinedFog = max(fogFactor, heightFactor);',
+      '  gl_FragColor.rgb = mix(gl_FragColor.rgb, fogColor, combinedFog);',
+      '#endif'
+    ].join('\n');
+    if (!shader.vertexShader.includes(`varying vec3 ${fogVarying}`)) {
+      if (shader.vertexShader.includes('#include <common>')) {
+        shader.vertexShader = shader.vertexShader.replace(
+          '#include <common>',
+          `#include <common>\n  varying vec3 ${fogVarying};`
+        );
+      }
+    }
+    if (shader.vertexShader.includes('#include <fog_vertex>')) {
+      shader.vertexShader = shader.vertexShader.replace(
+        '#include <fog_vertex>',
+        `#include <fog_vertex>\n  ${fogVarying} = (modelMatrix * vec4(position, 1.0)).xyz;`
+      );
+    }
+    if (!shader.fragmentShader.includes(`varying vec3 ${fogVarying}`)) {
+      if (shader.fragmentShader.includes('#include <common>')) {
+        shader.fragmentShader = shader.fragmentShader.replace(
+          '#include <common>',
+          `#include <common>\n  varying vec3 ${fogVarying};`
+        );
+      }
+    }
+    if (!shader.fragmentShader.includes('uniform float fogHeight')) {
+      if (shader.fragmentShader.includes('#include <fog_pars_fragment>')) {
+        shader.fragmentShader = shader.fragmentShader.replace(
+          '#include <fog_pars_fragment>',
+          fogUniformsSnippet
+        );
+      }
+    }
+    if (shader.fragmentShader.includes('#include <fog_fragment>')) {
+      shader.fragmentShader = shader.fragmentShader.replace(
+        '#include <fog_fragment>',
+        fogFragmentSnippet
+      );
+    }
+    material.userData.fogUniforms = shader.uniforms;
+  };
+  material.needsUpdate = true;
+};
+
+export const createGlassMaterial = (color, opacity) => {
+  const { THREE, visuals, normalMapState, glassMaterials, glowMaterials } = state;
+  const glass = visuals.glass || state.visualDefaults.glass;
+  const transmission = clamp(glass.transmission ?? 0, 0, 1);
+  const material = new THREE.MeshPhysicalMaterial({
+    color,
+    metalness: glass.metalness,
+    roughness: glass.roughness,
+    transmission,
+    ior: glass.ior,
+    reflectivity: glass.reflectivity,
+    thickness: glass.thickness,
+    envMapIntensity: glass.envMapIntensity,
+    clearcoat: glass.clearcoat,
+    clearcoatRoughness: glass.clearcoatRoughness,
+    transparent: true,
+    opacity,
+    depthWrite: false,
+    side: THREE.DoubleSide
+  });
+  material.attenuationDistance = transmission > 0 ? 9999 : 0;
+  material.emissive = color.clone().multiplyScalar(0.25);
+  material.emissiveIntensity = 0.4;
+  material.userData = {
+    glowBase: 0.4,
+    glowRange: 0.3,
+    baseColor: color.clone(),
+    baseEmissive: material.emissive.clone(),
+    baseEmissiveIntensity: material.emissiveIntensity,
+    baseOpacity: opacity
+  };
+  if (normalMapState.texture) {
+    material.normalMap = normalMapState.texture;
+    material.clearcoatNormalMap = normalMapState.texture;
+    material.normalScale = new THREE.Vector2(glass.normalScale, glass.normalScale);
+    material.clearcoatNormalScale = new THREE.Vector2(glass.clearcoatNormalScale, glass.clearcoatNormalScale);
+  }
+  glassMaterials.push(material);
+  glowMaterials.push(material);
+  applyHeightFog(material);
+  return material;
+};
+
+export const createGlassShell = (geometry, material) => {
+  const { THREE, visuals, glassMaterials, glowMaterials, glassShells } = state;
+  const outer = new THREE.Mesh(geometry, material);
+  const innerMaterial = material.clone();
+  innerMaterial.side = THREE.BackSide;
+  innerMaterial.opacity = clamp(material.opacity * 0.9, 0.05, 1);
+  innerMaterial.userData = {
+    ...(material.userData || {}),
+    baseEmissive: material.emissive.clone(),
+    baseEmissiveIntensity: material.emissiveIntensity,
+    baseOpacity: innerMaterial.opacity
+  };
+  glassMaterials.push(innerMaterial);
+  glowMaterials.push(innerMaterial);
+  applyHeightFog(innerMaterial);
+  const inner = new THREE.Mesh(geometry, innerMaterial);
+  const thicknessScale = clamp(1 - visuals.glass.thickness * 0.03, 0.75, 0.98);
+  inner.scale.set(thicknessScale, thicknessScale, thicknessScale);
+  const group = new THREE.Group();
+  group.add(outer);
+  group.add(inner);
+  glassShells.push({ inner, outer });
+  return { group, outer, inner };
+};
+
+export const configureWireMaterial = (wireMat) => {
+  const { visuals, visualDefaults, scaleFactor } = state;
+  const thickness = numberValue(visuals.wireframeThickness, visualDefaults.wireframeThickness) * (scaleFactor || 1);
+  const glow = numberValue(visuals.wireframeGlow, visualDefaults.wireframeGlow);
+  const baseColor = wireMat.userData?.baseColor || wireMat.color;
+  const emissiveColor = wireMat.userData?.emissiveColor || baseColor;
+  wireMat.opacity = clamp(0.02 + glow * 0.22, 0.02, 0.8);
+  if ('linewidth' in wireMat) {
+    wireMat.linewidth = clamp(thickness, 0.01, 12);
+    wireMat.userData.baseLinewidth = wireMat.linewidth;
+  }
+  wireMat.color.copy(emissiveColor);
+  wireMat.userData.glowBase = 0.03 + glow * 0.2;
+  wireMat.userData.glowRange = 0.05 + glow * 0.35;
+  wireMat.userData.flowSpeed = numberValue(visuals.wirePulseSpeed, visualDefaults.wirePulseSpeed);
+  if ('toneMapped' in wireMat) wireMat.toneMapped = false;
+};
+
+export const createWireframe = (geometry, color, phase) => {
+  const {
+    THREE,
+    LineMaterial,
+    LineSegments2,
+    LineSegmentsGeometry,
+    lineResolution,
+    wireMaterials
+  } = state;
+  const wireGeom = getWireGeometry(geometry, THREE);
+  let wireMat;
+  if (LineMaterial && LineSegments2 && LineSegmentsGeometry) {
+    wireMat = new LineMaterial({
+      color,
+      transparent: true,
+      opacity: 0.2,
+      linewidth: 1,
+      blending: THREE.AdditiveBlending,
+      depthWrite: false,
+      depthTest: false
+    });
+    wireMat.worldUnits = true;
+    wireMat.resolution.set(lineResolution.width, lineResolution.height);
+  } else {
+    wireMat = new THREE.LineBasicMaterial({
+      color,
+      transparent: true,
+      opacity: 0.2,
+      linewidth: 1,
+      blending: THREE.AdditiveBlending,
+      depthWrite: false,
+      depthTest: false
+    });
+  }
+  const emissiveColor = color.clone().lerp(new THREE.Color(0xffffff), 0.55);
+  wireMat.userData = {
+    glowBase: 0.18,
+    glowRange: 0.25,
+    flowPhase: phase || 0,
+    baseColor: color.clone(),
+    emissiveColor: emissiveColor.clone()
+  };
+  configureWireMaterial(wireMat);
+  wireMaterials.push(wireMat);
+  if (LineSegments2 && LineSegmentsGeometry && wireMat instanceof LineMaterial) {
+    const lineGeom = new LineSegmentsGeometry();
+    lineGeom.setPositions(wireGeom.attributes.position.array);
+    const line = new LineSegments2(lineGeom, wireMat);
+    line.computeLineDistances();
+    return line;
+  }
+  return new THREE.LineSegments(wireGeom, wireMat);
+};
+
+export const createTextPlane = (text, options = {}) => {
+  const { THREE, labelMaterials } = state;
+  const size = Number.isFinite(options.size) ? options.size : 0;
+  const maxTextureSize = 1024;
+  const baseFontSize = Math.max(20, Math.round(220 * (size || 1)));
+  const canvas = document.createElement('canvas');
+  const context = canvas.getContext('2d');
+  const measure = (fontPx) => {
+    context.font = `600 ${fontPx}px "Segoe UI", sans-serif`;
+    const paddingPx = Math.round(fontPx * 0.2);
+    const metrics = context.measureText(text);
+    const widthPx = Math.ceil(metrics.width + paddingPx * 2);
+    const heightPx = Math.ceil(fontPx + paddingPx * 2);
+    return { fontPx, paddingPx, widthPx, heightPx };
+  };
+  let { fontPx, paddingPx, widthPx, heightPx } = measure(baseFontSize);
+  const scaleDown = Math.min(1, maxTextureSize / Math.max(widthPx, heightPx));
+  if (scaleDown < 1) {
+    ({ fontPx, paddingPx, widthPx, heightPx } = measure(Math.max(10, Math.floor(baseFontSize * scaleDown))));
+  }
+  canvas.width = Math.min(maxTextureSize, widthPx);
+  canvas.height = Math.min(maxTextureSize, heightPx);
+  context.clearRect(0, 0, canvas.width, canvas.height);
+  context.font = `600 ${fontPx}px "Segoe UI", sans-serif`;
+  context.fillStyle = options.color || '#e7eef8';
+  context.textBaseline = 'middle';
+  context.textAlign = 'left';
+  context.fillText(text, paddingPx, canvas.height / 2);
+  const texture = new THREE.CanvasTexture(canvas);
+  texture.needsUpdate = true;
+  const material = new THREE.MeshBasicMaterial({
+    color: 0xffffff,
+    transparent: true,
+    opacity: options.opacity ?? 0.9,
+    side: THREE.DoubleSide,
+    depthWrite: false,
+    map: texture
+  });
+  if ('toneMapped' in material) material.toneMapped = false;
+  material.userData = { baseOpacity: material.opacity };
+  applyHeightFog(material);
+  labelMaterials.push(material);
+  const plane = new THREE.Mesh(new THREE.PlaneGeometry(canvas.width / 100, canvas.height / 100), material);
+  plane.userData = { labelTexture: texture };
+  return plane;
+};
+
+export const applyGlassSettings = () => {
+  const {
+    THREE,
+    visuals,
+    visualDefaults,
+    glassMaterials,
+    glassShells,
+    normalMapState,
+    flowMaterials,
+    grid,
+    edgeDotMaterial
+  } = state;
+  const glass = visuals.glass || visualDefaults.glass;
+  const transmission = clamp(glass.transmission ?? 0, 0, 1);
+  for (const material of glassMaterials) {
+    material.metalness = glass.metalness;
+    material.roughness = glass.roughness;
+    material.transmission = transmission;
+    material.ior = glass.ior;
+    material.reflectivity = glass.reflectivity;
+    material.thickness = glass.thickness;
+    material.attenuationDistance = transmission > 0 ? 9999 : 0;
+    material.envMapIntensity = glass.envMapIntensity;
+    material.clearcoat = glass.clearcoat;
+    material.clearcoatRoughness = glass.clearcoatRoughness;
+    if (normalMapState.texture) {
+      material.normalScale = new THREE.Vector2(glass.normalScale, glass.normalScale);
+      material.clearcoatNormalScale = new THREE.Vector2(glass.clearcoatNormalScale, glass.clearcoatNormalScale);
+    }
+    if (material.userData?.fogUniforms) {
+      material.userData.fogUniforms.fogHeight.value = visuals.fogHeight;
+      material.userData.fogUniforms.fogHeightRange.value = visuals.fogHeightRange;
+      if ('fogHeightEnabled' in material.userData.fogUniforms) {
+        material.userData.fogUniforms.fogHeightEnabled.value = visuals.enableHeightFog ? 1 : 0;
+      }
+    }
+    material.needsUpdate = true;
+  }
+  for (const material of flowMaterials) {
+    if ('envMapIntensity' in material) {
+      material.envMapIntensity = glass.envMapIntensity;
+    }
+    material.needsUpdate = true;
+  }
+  if (edgeDotMaterial && 'envMapIntensity' in edgeDotMaterial) {
+    edgeDotMaterial.envMapIntensity = glass.envMapIntensity;
+    edgeDotMaterial.needsUpdate = true;
+  }
+  if (grid?.material && 'envMapIntensity' in grid.material) {
+    grid.material.envMapIntensity = glass.envMapIntensity;
+    grid.material.needsUpdate = true;
+  }
+  const thicknessScale = clamp(1 - glass.thickness * 0.03, 0.75, 0.98);
+  for (const shell of glassShells) {
+    if (shell?.inner) shell.inner.scale.set(thicknessScale, thicknessScale, thicknessScale);
+  }
+};
+
+export const updateFileOpacity = () => {
+  const { visuals, visualDefaults, fileMeshes, fileChunkMeshes } = state;
+  const baseOpacity = clamp(numberValue(visuals.fileOpacity, visualDefaults.fileOpacity), 0.1, 1);
+  for (const mesh of [...fileMeshes, ...fileChunkMeshes]) {
+    const offset = mesh.userData?.opacityOffset ?? 0;
+    const opacity = clamp(baseOpacity + offset, 0.1, 1);
+    if (mesh.material) {
+      mesh.material.opacity = opacity;
+      if (mesh.material.userData) mesh.material.userData.baseOpacity = opacity;
+    }
+    const inner = mesh.userData?.shellInner;
+    if (inner?.material) {
+      const innerOpacity = clamp(opacity * 0.9, 0.05, 1);
+      inner.material.opacity = innerOpacity;
+      if (inner.material.userData) inner.material.userData.baseOpacity = innerOpacity;
+    }
+  }
+};
+
+export const updateMemberOpacity = () => {
+  const { visuals, visualDefaults, memberMeshes, chunkMeshes } = state;
+  const baseOpacity = clamp(numberValue(visuals.memberOpacity, visualDefaults.memberOpacity), 0.1, 1);
+  for (const mesh of [...memberMeshes, ...chunkMeshes]) {
+    const offset = mesh.userData?.opacityOffset ?? 0;
+    const opacity = clamp(baseOpacity + offset, 0.1, 1);
+    if (mesh.material) {
+      mesh.material.opacity = opacity;
+      if (mesh.material.userData) mesh.material.userData.baseOpacity = opacity;
+    }
+    const inner = mesh.userData?.shellInner;
+    if (inner?.material) {
+      const innerOpacity = clamp(opacity * 0.9, 0.05, 1);
+      inner.material.opacity = innerOpacity;
+      if (inner.material.userData) inner.material.userData.baseOpacity = innerOpacity;
+    }
+  }
+};
+
+export const updateWireframes = () => {
+  const { wireMaterials, lineResolution } = state;
+  for (const material of wireMaterials) {
+    configureWireMaterial(material);
+    if (material.resolution && typeof material.resolution.set === 'function') {
+      material.resolution.set(lineResolution.width, lineResolution.height);
+    }
+    material.needsUpdate = true;
+  }
+};
+
+export const updateFlowGlow = () => {
+  const { flowMaterials, visuals } = state;
+  for (const material of flowMaterials) {
+    material.emissiveIntensity = visuals.flowGlowBase;
+    material.userData.glowBase = visuals.flowGlowBase;
+    material.userData.glowRange = visuals.flowGlowRange;
+    material.userData.baseEmissiveIntensity = visuals.flowGlowBase;
+  }
+};
+
+export const updateGridGlow = () => {
+  const { visuals, visualDefaults, gridLineMaterials, lineResolution } = state;
+  const base = numberValue(visuals.gridGlowBase, visualDefaults.gridGlowBase);
+  const range = numberValue(visuals.gridGlowRange, visualDefaults.gridGlowRange);
+  const thickness = numberValue(visuals.gridLineThickness, visualDefaults.gridLineThickness);
+  for (const material of gridLineMaterials) {
+    material.opacity = clamp(base + range * 0.5, 0.05, 0.9);
+    material.userData.glowBase = base;
+    material.userData.glowRange = range;
+    material.userData.flowSpeed = numberValue(visuals.gridPulseSpeed, visualDefaults.gridPulseSpeed);
+    if ('linewidth' in material) {
+      material.linewidth = clamp(thickness, 0.02, 6);
+    }
+    if (material.resolution && typeof material.resolution.set === 'function') {
+      material.resolution.set(lineResolution.width, lineResolution.height);
+    }
+  }
+};
+
+export const updateFog = (maxSpanOverride) => {
+  const {
+    fogBounds,
+    visuals,
+    visualDefaults,
+    scene,
+    THREE,
+    glassMaterials,
+    labelMaterials,
+    flowMaterials,
+    wireMaterials,
+    gridLineMaterials,
+    grid,
+    edgeDotMaterial
+  } = state;
+  if (Number.isFinite(maxSpanOverride)) {
+    fogBounds.maxSpan = maxSpanOverride;
+  }
+  const maxSpan = fogBounds.maxSpan || 120;
+  const enableFog = visuals.enableFog === true;
+  const fogMaterials = [
+    ...glassMaterials,
+    ...labelMaterials,
+    ...flowMaterials,
+    ...wireMaterials,
+    ...gridLineMaterials,
+    ...(edgeDotMaterial ? [edgeDotMaterial] : [])
+  ];
+  if (!enableFog) {
+    scene.fog = null;
+    if (state.fogEnabled !== enableFog) {
+      state.fogEnabled = enableFog;
+      fogMaterials.forEach((material) => {
+        if (material) material.needsUpdate = true;
+      });
+    }
+    return;
+  }
+  const colorValue = visuals.fogColor || visualDefaults.fogColor;
+  const fogColor = new THREE.Color(colorValue);
+  const distance = numberValue(visuals.fogDistance, visualDefaults.fogDistance);
+  const fogNear = maxSpan * 0.9;
+  const fogFar = maxSpan * Math.max(1.1, distance);
+  scene.fog = new THREE.Fog(fogColor.getHex(), fogNear, fogFar);
+  if (state.fogEnabled !== enableFog) {
+    state.fogEnabled = enableFog;
+    fogMaterials.forEach((material) => {
+      if (material) material.needsUpdate = true;
+    });
+  }
+  const updateFogUniforms = (material) => {
+    if (!material?.userData?.fogUniforms) return;
+    material.userData.fogUniforms.fogHeight.value = visuals.fogHeight;
+    material.userData.fogUniforms.fogHeightRange.value = visuals.fogHeightRange;
+    if ('fogHeightEnabled' in material.userData.fogUniforms) {
+      material.userData.fogUniforms.fogHeightEnabled.value = visuals.enableHeightFog ? 1 : 0;
+    }
+  };
+  fogMaterials.forEach(updateFogUniforms);
+  if (grid?.material) updateFogUniforms(grid.material);
+};
+
+export const updateFlowLights = () => {
+  const { visuals, flowLights } = state;
+  const enabled = visuals.enableFlowLights !== false;
+  for (const light of flowLights) {
+    light.visible = enabled;
+  }
+};
+
+export const updateExtraLights = () => {
+  const { visuals, extraLights } = state;
+  const enabled = visuals.enableExtraLights !== false;
+  for (const light of extraLights) {
+    light.visible = enabled;
+  }
+};
diff --git a/src/map/isometric/client/meshes.js b/src/map/isometric/client/meshes.js
new file mode 100644
index 000000000..0d95858f3
--- /dev/null
+++ b/src/map/isometric/client/meshes.js
@@ -0,0 +1,329 @@
+import { state } from './state.js';
+import { clamp, hashString } from './utils.js';
+import { createGlassMaterial, createGlassShell, createTextPlane, createWireframe } from './materials.js';
+import { createShapeGeometry } from './layout.js';
+
+const colorPalette = {
+  source: 0x2980b9,
+  test: 0x8e44ad,
+  config: 0x16a085,
+  docs: 0xd35400,
+  generated: 0x7f8c8d,
+  dir: 0x34495e,
+  other: 0x2c3e50
+};
+
+export const buildMeshes = () => {
+  const {
+    THREE,
+    visuals,
+    allFiles,
+    layoutMetrics,
+    fileGroup,
+    memberGroup,
+    labelGroup,
+    wireGroup
+  } = state;
+
+  const { labelOffset, memberCell } = layoutMetrics;
+  const labelsEnabled = Boolean(labelGroup?.visible);
+  const chunkInstances = [];
+  const fileChunkInstances = [];
+
+  const colorFromKey = (value, saturation = 0.65, lightness = 0.55) => {
+    if (!value) return null;
+    const seed = hashString(value);
+    const hue = (seed % 360) / 360;
+    return new THREE.Color().setHSL(hue, saturation, lightness);
+  };
+
+  for (const fileLayout of allFiles) {
+    const node = fileLayout.node;
+    const geometry = createShapeGeometry(fileLayout.shape);
+    const languageKey = node.language || node.type || node.ext || node.category || node.name;
+    const fileColor = colorFromKey(languageKey, 0.68, 0.52)
+      || new THREE.Color(colorPalette[node.category] || colorPalette.other);
+    const fileOpacity = Math.max(0.1, Math.min(1, visuals.fileOpacity));
+    const material = createGlassMaterial(fileColor, fileOpacity);
+    const shell = createGlassShell(geometry, material);
+    const mesh = shell.outer;
+    shell.group.position.set(fileLayout.x, fileLayout.height / 2, fileLayout.z);
+    shell.group.scale.set(fileLayout.width, fileLayout.height, fileLayout.depth);
+    const fileTopY = Number.isFinite(fileLayout.topY) ? fileLayout.topY : fileLayout.height;
+    mesh.castShadow = true;
+    mesh.receiveShadow = true;
+    mesh.userData = {
+      type: 'file',
+      file: node.path || node.name,
+      name: node.name,
+      id: node.id || null,
+      range: null,
+      baseColor: fileColor.clone(),
+      shellInner: shell.inner,
+      shellGroup: shell.group
+    };
+    fileGroup.add(shell.group);
+    state.fileMeshes.push(mesh);
+    state.glowMaterials.push(material);
+    const fileWireColor = fileColor.clone();
+    const fileWire = createWireframe(geometry, fileWireColor, shell.group.position.x + shell.group.position.z);
+    fileWire.position.copy(shell.group.position);
+    fileWire.rotation.copy(shell.group.rotation);
+    fileWire.scale.copy(shell.group.scale);
+    wireGroup.add(fileWire);
+    state.wireByMesh.set(mesh, fileWire);
+    const fileKey = node.path || node.name;
+    if (fileKey) {
+      state.fileAnchors.set(fileKey, { x: shell.group.position.x, y: fileTopY, z: shell.group.position.z });
+      state.fileColorByPath.set(fileKey, fileColor.clone());
+      state.fileMeshByKey.set(fileKey, mesh);
+    }
+
+    const fileChunkCount = clamp(Math.ceil(Math.sqrt(fileLayout.members.length || 1)), 1, 8);
+    if (fileChunkCount > 0) {
+      const innerWidth = fileLayout.width * 0.6;
+      const innerDepth = fileLayout.depth * 0.6;
+      const cols = Math.max(1, Math.ceil(Math.sqrt(fileChunkCount)));
+      const rows = Math.max(1, Math.ceil(fileChunkCount / cols));
+      const stepX = innerWidth / cols;
+      const stepZ = innerDepth / rows;
+      const startX = fileLayout.x - innerWidth / 2 + stepX / 2;
+      const startZ = fileLayout.z - innerDepth / 2 + stepZ / 2;
+      for (let i = 0; i < fileChunkCount; i += 1) {
+        const row = Math.floor(i / cols);
+        const col = i % cols;
+        const seed = hashString(`${fileKey || node.name || 'file'}:${i}`);
+        const t = (seed % 1000) / 1000;
+        const heightScale = 0.6 + t * 0.6;
+        const footprintScale = 0.45 + ((seed >> 6) % 100) / 250;
+        const chunkHeight = Math.max(0.12, fileLayout.height * 0.08 * heightScale);
+        const chunkFootprint = Math.min(stepX, stepZ) * footprintScale;
+        const innerBottom = fileLayout.height * 0.12;
+        const innerTop = fileLayout.height * 0.65;
+        const centerY = Math.min(
+          innerTop - chunkHeight / 2,
+          innerBottom + (innerTop - innerBottom) * t
+        );
+        const chunkColor = fileColor.clone().offsetHSL(0.05 * t, 0.08, 0.08);
+        fileChunkInstances.push({
+          x: startX + col * stepX,
+          y: centerY,
+          z: startZ + row * stepZ,
+          scaleX: chunkFootprint,
+          scaleY: chunkHeight,
+          scaleZ: chunkFootprint,
+          color: chunkColor
+        });
+      }
+    }
+
+    const fileLabelText = String(node.name || node.path || '').split('/').filter(Boolean).pop();
+    if (labelsEnabled && fileLabelText) {
+      const fileLabelSize = Math.min(fileLayout.width, fileLayout.depth);
+      const fileLabel = createTextPlane(fileLabelText, { size: fileLabelSize });
+      if (fileLabel.material) fileLabel.material.depthTest = true;
+      fileLabel.position.set(
+        shell.group.position.x + fileLayout.width * 0.5 + labelOffset,
+        Math.max(0.3, fileTopY * 0.6),
+        shell.group.position.z + fileLayout.depth * 0.5 + labelOffset
+      );
+      fileLabel.rotation.y = -Math.PI / 4;
+      fileLabel.renderOrder = 4;
+      labelGroup.add(fileLabel);
+    }
+
+    const slots = fileLayout.memberSlots;
+    if (!slots.length) continue;
+    const members = fileLayout.members.slice().sort((a, b) => {
+      const footprintDiff = (b.footprint || 0) - (a.footprint || 0);
+      if (footprintDiff !== 0) return footprintDiff;
+      return (b.height || 0) - (a.height || 0);
+    });
+    const slotStep = fileLayout.slotStep || (layoutMetrics.memberCell + layoutMetrics.memberGap);
+    const slotLimit = Math.max(0.2, slotStep - layoutMetrics.memberGap * 0.6);
+    const maxFootprint = Math.min(
+      (fileLayout.surfaceWidth || fileLayout.width) / Math.max(1, fileLayout.columns || 1),
+      (fileLayout.surfaceDepth || fileLayout.depth) / Math.max(1, fileLayout.rows || 1),
+      slotLimit
+    ) - layoutMetrics.memberGap;
+    members.forEach((entry, index) => {
+      const slot = slots[index];
+      if (!slot) return;
+      const height = entry.height;
+      const rawFootprint = entry.footprint || memberCell;
+      const footprint = Math.max(0.2, Math.min(rawFootprint, maxFootprint || rawFootprint));
+      const mGeom = createShapeGeometry(entry.shape);
+      const memberKey = entry.member.id || entry.member.name || entry.member.file || '';
+      const mColor = state.scoreToColor(entry.score, memberKey);
+      if (entry.member.id) state.memberColorById.set(entry.member.id, mColor.clone());
+      const memberOpacity = Math.max(0.1, Math.min(1, visuals.memberOpacity));
+      const mMat = createGlassMaterial(mColor, memberOpacity);
+      const shellMember = createGlassShell(mGeom, mMat);
+      const mMesh = shellMember.outer;
+      const anchorY = Number.isFinite(fileLayout.topY) ? fileLayout.topY : fileLayout.height;
+      const lift = Math.max(0.06, layoutMetrics.memberGap * 0.35);
+      shellMember.group.position.set(fileLayout.x + slot.x, anchorY + height / 2 + lift, fileLayout.z + slot.z);
+      shellMember.group.scale.set(footprint, height, footprint);
+      mMesh.castShadow = false;
+      mMesh.receiveShadow = false;
+      mMesh.userData = {
+        type: 'member',
+        file: node.path || node.name,
+        id: entry.member.id || null,
+        name: entry.member.name,
+        range: entry.member.range || null,
+        baseColor: mColor.clone(),
+        shellInner: shellMember.inner,
+        shellGroup: shellMember.group
+      };
+      memberGroup.add(shellMember.group);
+      state.memberMeshes.push(mMesh);
+      state.glowMaterials.push(mMat);
+      const memberWireColor = mColor.clone();
+      const memberWire = createWireframe(
+        mGeom,
+        memberWireColor,
+        shellMember.group.position.x + shellMember.group.position.z
+      );
+      memberWire.position.copy(shellMember.group.position);
+      memberWire.rotation.copy(shellMember.group.rotation);
+      memberWire.scale.copy(shellMember.group.scale);
+      wireGroup.add(memberWire);
+      state.wireByMesh.set(mMesh, memberWire);
+      if (entry.member.id) {
+        state.memberAnchors.set(entry.member.id, {
+          x: shellMember.group.position.x,
+          y: shellMember.group.position.y + height / 2,
+          z: shellMember.group.position.z
+        });
+        state.memberMeshById.set(entry.member.id, mMesh);
+      }
+      if (labelsEnabled && entry.member.name) {
+        const memberLabelSize = Math.min(footprint, height);
+        const memberLabel = createTextPlane(entry.member.name, { size: memberLabelSize });
+        if (memberLabel.material) memberLabel.material.depthTest = true;
+        memberLabel.position.set(
+          shellMember.group.position.x + footprint * 0.5 + labelOffset,
+          shellMember.group.position.y,
+          shellMember.group.position.z + footprint * 0.5 + labelOffset
+        );
+        memberLabel.rotation.y = -Math.PI / 4;
+        memberLabel.renderOrder = 4;
+        labelGroup.add(memberLabel);
+      }
+
+      const dataflow = entry.member.dataflow || {};
+      const controlFlow = entry.member.controlFlow || {};
+      const flowCount = [
+        dataflow.reads,
+        dataflow.writes,
+        dataflow.mutations,
+        dataflow.aliases
+      ].reduce((acc, value) => acc + (Array.isArray(value) ? value.length : 0), 0);
+      const controlCount = Object.values(controlFlow).reduce((acc, value) => {
+        if (Array.isArray(value)) return acc + value.length;
+        if (typeof value === 'number') return acc + value;
+        if (value) return acc + 1;
+        return acc;
+      }, 0);
+      const chunkCount = clamp(Math.ceil(Math.sqrt(flowCount + controlCount + 1)), 1, 6);
+      const footprintScale = footprint / memberCell;
+      let chunkHeight = Math.max(0.08, height * clamp(0.1 + footprintScale * 0.02, 0.1, 0.18));
+      const chunkFootprintScale = clamp(0.55 + footprintScale * 0.15, 0.6, 0.95);
+      const chunkFootprint = Math.min(footprint, footprint * chunkFootprintScale);
+      let chunkGap = Math.max(0.02, chunkHeight * 0.12);
+      const maxStackHeight = height * 0.55;
+      const stackHeight = chunkCount * chunkHeight + (chunkCount - 1) * chunkGap;
+      if (stackHeight > maxStackHeight && stackHeight > 0) {
+        const scale = maxStackHeight / stackHeight;
+        chunkHeight *= scale;
+        chunkGap *= scale;
+      }
+      const topY = shellMember.group.position.y + height / 2 - 0.04;
+      const chunkStart = topY - (chunkCount * chunkHeight + (chunkCount - 1) * chunkGap) + chunkHeight / 2;
+      for (let i = 0; i < chunkCount; i += 1) {
+        const chunkY = chunkStart + i * (chunkHeight + chunkGap);
+        const chunkColor = mColor.clone().offsetHSL(0.02 * i, 0.08, 0.08);
+        chunkInstances.push({
+          x: shellMember.group.position.x,
+          y: chunkY,
+          z: shellMember.group.position.z,
+          scaleX: chunkFootprint,
+          scaleY: chunkHeight,
+          scaleZ: chunkFootprint,
+          color: chunkColor
+        });
+      }
+    });
+  }
+
+  if (chunkInstances.length) {
+    const chunkGeometry = createShapeGeometry('square');
+    const chunkOpacity = Math.min(1, Math.max(0.1, visuals.memberOpacity) + 0.1);
+    const chunkMaterial = createGlassMaterial(new THREE.Color(0xffffff), chunkOpacity);
+    chunkMaterial.vertexColors = true;
+    chunkMaterial.userData.glowSpeed = 1.4;
+    chunkMaterial.userData.glowPhase = -0.6;
+    const prevCompile = chunkMaterial.onBeforeCompile;
+    chunkMaterial.onBeforeCompile = (shader) => {
+      if (typeof prevCompile === 'function') prevCompile(shader);
+      if (shader.fragmentShader.includes('vColor')) {
+        shader.fragmentShader = shader.fragmentShader.replace(
+          '#include <emissivemap_fragment>',
+          '#include <emissivemap_fragment>\n  totalEmissiveRadiance *= vColor;'
+        );
+      }
+    };
+    chunkMaterial.needsUpdate = true;
+    const chunkMesh = new THREE.InstancedMesh(chunkGeometry, chunkMaterial, chunkInstances.length);
+    chunkMesh.castShadow = false;
+    chunkMesh.receiveShadow = false;
+    const dummy = new THREE.Object3D();
+    chunkInstances.forEach((entry, index) => {
+      dummy.position.set(entry.x, entry.y, entry.z);
+      dummy.scale.set(entry.scaleX, entry.scaleY, entry.scaleZ);
+      dummy.updateMatrix();
+      chunkMesh.setMatrixAt(index, dummy.matrix);
+      chunkMesh.setColorAt(index, entry.color);
+    });
+    chunkMesh.instanceMatrix.needsUpdate = true;
+    if (chunkMesh.instanceColor) chunkMesh.instanceColor.needsUpdate = true;
+    chunkMesh.userData = { type: 'chunk', opacityOffset: 0.1 };
+    memberGroup.add(chunkMesh);
+    state.chunkMeshes.push(chunkMesh);
+  }
+
+  if (fileChunkInstances.length) {
+    const chunkGeometry = createShapeGeometry('square');
+    const chunkOpacity = Math.min(1, Math.max(0.1, visuals.fileOpacity) + 0.05);
+    const chunkMaterial = createGlassMaterial(new THREE.Color(0xffffff), chunkOpacity);
+    chunkMaterial.vertexColors = true;
+    chunkMaterial.userData.glowSpeed = 0.6;
+    chunkMaterial.userData.glowPhase = Math.PI * 0.3;
+    const prevCompile = chunkMaterial.onBeforeCompile;
+    chunkMaterial.onBeforeCompile = (shader) => {
+      if (typeof prevCompile === 'function') prevCompile(shader);
+      if (shader.fragmentShader.includes('vColor')) {
+        shader.fragmentShader = shader.fragmentShader.replace(
+          '#include <emissivemap_fragment>',
+          '#include <emissivemap_fragment>\n  totalEmissiveRadiance *= vColor;'
+        );
+      }
+    };
+    chunkMaterial.needsUpdate = true;
+    const chunkMesh = new THREE.InstancedMesh(chunkGeometry, chunkMaterial, fileChunkInstances.length);
+    const dummy = new THREE.Object3D();
+    fileChunkInstances.forEach((entry, index) => {
+      dummy.position.set(entry.x, entry.y, entry.z);
+      dummy.scale.set(entry.scaleX, entry.scaleY, entry.scaleZ);
+      dummy.updateMatrix();
+      chunkMesh.setMatrixAt(index, dummy.matrix);
+      chunkMesh.setColorAt(index, entry.color);
+    });
+    chunkMesh.instanceMatrix.needsUpdate = true;
+    if (chunkMesh.instanceColor) chunkMesh.instanceColor.needsUpdate = true;
+    chunkMesh.userData = { type: 'file-chunk', opacityOffset: 0.05 };
+    fileGroup.add(chunkMesh);
+    state.fileChunkMeshes.push(chunkMesh);
+  }
+};
diff --git a/src/map/isometric/client/rebuild.js b/src/map/isometric/client/rebuild.js
new file mode 100644
index 000000000..ef7aeca50
--- /dev/null
+++ b/src/map/isometric/client/rebuild.js
@@ -0,0 +1,224 @@
+import { state } from './state.js';
+import { clearGroup, disposeObject } from './scene-utils.js';
+import { applyHeightFog, updateFog, updateGridGlow, updateFlowLights } from './materials.js';
+import { computeLayout } from './layout.js';
+import { buildMeshes } from './meshes.js';
+import { buildEdges } from './edges.js';
+import { applyHighlights } from './selection.js';
+
+const resetScene = () => {
+  clearGroup(state.fileGroup);
+  clearGroup(state.memberGroup);
+  clearGroup(state.labelGroup);
+  clearGroup(state.edgeGroup);
+  clearGroup(state.wireGroup);
+  state.fileMeshes = [];
+  state.memberMeshes = [];
+  state.chunkMeshes = [];
+  state.fileChunkMeshes = [];
+  state.glowMaterials = [];
+  state.flowMaterials = [];
+  state.glassMaterials = [];
+  state.labelMaterials = [];
+  state.glassShells = [];
+  state.wireMaterials = [];
+  state.gridLineMaterials = [];
+  state.edgeMeshes = [];
+  state.edgeSegments = [];
+  state.edgeDotMesh = null;
+  state.edgeDotMaterial = null;
+  state.fileMeshByKey = new Map();
+  state.memberMeshById = new Map();
+  state.wireByMesh = new Map();
+  state.fileAnchors = new Map();
+  state.memberAnchors = new Map();
+  state.fileColorByPath = new Map();
+  state.memberColorById = new Map();
+  state.edgeTypeGroups = new Map();
+  state.edgeTypes = [];
+  if (state.flowLights) {
+    state.flowLights.forEach((light) => state.scene.remove(light));
+  }
+  state.flowLights = [];
+  if (state.grid) {
+    state.scene.remove(state.grid);
+    disposeObject(state.grid);
+    state.grid = null;
+  }
+  if (state.gridLines) {
+    clearGroup(state.gridLines);
+    state.scene.remove(state.gridLines);
+    state.gridLines = null;
+  }
+};
+
+export const scheduleRebuild = (delay = 180) => {
+  if (state.rebuildTimer) {
+    clearTimeout(state.rebuildTimer);
+  }
+  state.rebuildTimer = setTimeout(() => {
+    state.rebuildTimer = null;
+    rebuildScene();
+  }, delay);
+};
+
+export const rebuildScene = () => {
+  if (typeof state.syncStateFromPanel === 'function') {
+    state.syncStateFromPanel();
+  }
+  const preservedCamera = {
+    position: state.camera.position.clone(),
+    zoom: state.camera.zoom
+  };
+  resetScene();
+  computeLayout();
+
+  const {
+    THREE,
+    visuals,
+    LineMaterial,
+    LineSegments2,
+    LineSegmentsGeometry,
+    layoutMetrics,
+    bounds,
+    scene,
+    lineResolution,
+    lockIsometric,
+    camera,
+    controlDefaults,
+    controls,
+    renderer
+  } = state;
+
+  const edgePlane = layoutMetrics.edgePlane;
+  const gridSize = Math.max(80, Math.ceil(bounds.maxSpan * 1.4 / 10) * 10);
+  const groundGeometry = new THREE.PlaneGeometry(gridSize, gridSize);
+  const groundMaterial = new THREE.MeshStandardMaterial({
+    color: 0x151a20,
+    metalness: 1,
+    roughness: 0.25,
+    envMapIntensity: visuals.glass.envMapIntensity * 0.6
+  });
+  applyHeightFog(groundMaterial);
+  state.grid = new THREE.Mesh(groundGeometry, groundMaterial);
+  state.grid.rotation.x = -Math.PI / 2;
+  state.grid.position.y = edgePlane - 0.05 * state.scaleFactor;
+  state.grid.receiveShadow = true;
+  scene.add(state.grid);
+  state.grid.visible = state.gridVisible;
+  state.groundPlane.constant = -state.grid.position.y;
+
+  const gridLineStep = Math.max(2, Math.round(layoutMetrics.baseSize));
+  const gridHalf = gridSize / 2;
+  const gridY = state.grid.position.y + 0.02 * state.scaleFactor;
+  const gridBuckets = [
+    { positions: [], phase: 0 },
+    { positions: [], phase: 1.8 },
+    { positions: [], phase: 3.6 }
+  ];
+  let lineIndex = 0;
+  for (let x = -gridHalf; x <= gridHalf; x += gridLineStep) {
+    const bucket = gridBuckets[lineIndex % gridBuckets.length];
+    bucket.positions.push(x, gridY, -gridHalf, x, gridY, gridHalf);
+    lineIndex += 1;
+  }
+  for (let z = -gridHalf; z <= gridHalf; z += gridLineStep) {
+    const bucket = gridBuckets[lineIndex % gridBuckets.length];
+    bucket.positions.push(-gridHalf, gridY, z, gridHalf, gridY, z);
+    lineIndex += 1;
+  }
+  const gridLineColor = new THREE.Color('#3b4350');
+  state.gridLines = new THREE.Group();
+  gridBuckets.forEach((bucket) => {
+    if (!bucket.positions.length) return;
+    let gridLineMaterial;
+    if (LineMaterial && LineSegments2 && LineSegmentsGeometry) {
+      gridLineMaterial = new LineMaterial({
+        color: gridLineColor,
+        transparent: true,
+        opacity: visuals.gridGlowBase,
+        linewidth: visuals.gridLineThickness,
+        blending: THREE.AdditiveBlending,
+        depthWrite: false,
+        depthTest: false
+      });
+      gridLineMaterial.resolution.set(lineResolution.width, lineResolution.height);
+    } else {
+      gridLineMaterial = new THREE.LineBasicMaterial({
+        color: gridLineColor,
+        transparent: true,
+        opacity: visuals.gridGlowBase,
+        blending: THREE.AdditiveBlending,
+        depthWrite: false,
+        depthTest: false
+      });
+    }
+    gridLineMaterial.userData = {
+      glowBase: visuals.gridGlowBase,
+      glowRange: visuals.gridGlowRange,
+      flowSpeed: visuals.gridPulseSpeed,
+      flowPhase: bucket.phase
+    };
+    if ('toneMapped' in gridLineMaterial) gridLineMaterial.toneMapped = false;
+    applyHeightFog(gridLineMaterial);
+    state.gridLineMaterials.push(gridLineMaterial);
+    if (LineSegments2 && LineSegmentsGeometry && gridLineMaterial instanceof LineMaterial) {
+      const gridGeom = new LineSegmentsGeometry();
+      gridGeom.setPositions(bucket.positions);
+      const lineMesh = new LineSegments2(gridGeom, gridLineMaterial);
+      lineMesh.computeLineDistances();
+      state.gridLines.add(lineMesh);
+    } else {
+      const gridGeom = new THREE.BufferGeometry();
+      gridGeom.setAttribute('position', new THREE.Float32BufferAttribute(bucket.positions, 3));
+      state.gridLines.add(new THREE.LineSegments(gridGeom, gridLineMaterial));
+    }
+  });
+  state.gridLines.renderOrder = 1;
+  state.gridLines.visible = state.gridVisible;
+  scene.add(state.gridLines);
+  updateGridGlow();
+  updateFog(bounds.maxSpan);
+
+  const targetCameraBase = Math.max(40, bounds.maxSpan * 0.6);
+  const cameraDistance = Math.max(60, bounds.maxSpan * 1.2);
+  if (!state.cameraInitialized) {
+    state.cameraBase = targetCameraBase;
+  }
+  state.farPlane = Math.max(5000, bounds.maxSpan * 10);
+  state.nearPlane = Math.max(0.1, state.farPlane / 100000);
+  const viewport = typeof state.getViewport === 'function'
+    ? state.getViewport()
+    : { width: 1, height: 1 };
+  const aspect = viewport.height ? viewport.width / viewport.height : 1;
+  camera.left = -state.cameraBase * aspect;
+  camera.right = state.cameraBase * aspect;
+  camera.top = state.cameraBase;
+  camera.bottom = -state.cameraBase;
+  camera.near = state.nearPlane;
+  camera.far = state.farPlane;
+  const zoomMin = Number.isFinite(controls.zoomMin) ? controls.zoomMin : controlDefaults.zoomMin;
+  const zoomMax = Number.isFinite(controls.zoomMax) ? controls.zoomMax : controlDefaults.zoomMax;
+  if (!state.cameraInitialized) {
+    camera.position.set(cameraDistance, cameraDistance * 0.9, cameraDistance);
+    lockIsometric();
+    state.cameraInitialized = true;
+  } else {
+    camera.position.copy(preservedCamera.position);
+    lockIsometric();
+  }
+  camera.zoom = Math.max(zoomMin, Math.min(zoomMax, preservedCamera.zoom || camera.zoom));
+  camera.updateProjectionMatrix();
+  lockIsometric();
+
+  buildMeshes();
+  buildEdges();
+  updateFlowLights();
+  if (typeof state.renderEdgeMenu === 'function') {
+    state.renderEdgeMenu();
+  }
+  applyHighlights();
+  if (renderer?.shadowMap) {
+    renderer.shadowMap.needsUpdate = true;
+  }
+};
diff --git a/src/map/isometric/client/scene-utils.js b/src/map/isometric/client/scene-utils.js
new file mode 100644
index 000000000..316ddee15
--- /dev/null
+++ b/src/map/isometric/client/scene-utils.js
@@ -0,0 +1,44 @@
+export const disposeMaterial = (material) => {
+  if (!material) return;
+  if (Array.isArray(material)) {
+    material.forEach((entry) => disposeMaterial(entry));
+    return;
+  }
+  if (material.map) material.map.dispose?.();
+  if (material.normalMap) material.normalMap.dispose?.();
+  if (material.clearcoatNormalMap) material.clearcoatNormalMap.dispose?.();
+  material.dispose?.();
+};
+
+export const disposeObject = (object) => {
+  if (!object) return;
+  if (object.geometry && !object.geometry.userData?.shared) object.geometry.dispose();
+  if (object.material) disposeMaterial(object.material);
+};
+
+export const clearGroup = (group) => {
+  if (!group) return;
+  const disposedGeometries = new Set();
+  const disposedMaterials = new Set();
+  group.traverse((child) => {
+    if (child === group) return;
+    const geometry = child.geometry;
+    if (geometry && !geometry.userData?.shared && !disposedGeometries.has(geometry)) {
+      disposedGeometries.add(geometry);
+      geometry.dispose?.();
+    }
+    const material = child.material;
+    if (material) {
+      const materials = Array.isArray(material) ? material : [material];
+      for (const entry of materials) {
+        if (entry && !disposedMaterials.has(entry)) {
+          disposedMaterials.add(entry);
+          disposeMaterial(entry);
+        }
+      }
+    }
+  });
+  while (group.children.length) {
+    group.remove(group.children[0]);
+  }
+};
diff --git a/src/map/isometric/client/scene.js b/src/map/isometric/client/scene.js
new file mode 100644
index 000000000..429fbd85f
--- /dev/null
+++ b/src/map/isometric/client/scene.js
@@ -0,0 +1,146 @@
+import { state } from './state.js';
+
+export const initScene = async () => {
+  const { THREE, dom, RGBELoader, assets, visuals } = state;
+  const { app } = dom;
+
+  const getViewport = () => {
+    const rect = app.getBoundingClientRect();
+    const width = rect.width || window.innerWidth;
+    const height = rect.height || window.innerHeight;
+    return { width, height };
+  };
+
+  const renderer = new THREE.WebGLRenderer({ antialias: true, alpha: true });
+  renderer.setPixelRatio(Math.min(2, window.devicePixelRatio || 1));
+  const initialViewport = getViewport();
+  const lineResolution = { width: initialViewport.width, height: initialViewport.height };
+  renderer.setSize(initialViewport.width, initialViewport.height);
+  renderer.domElement.style.width = '100%';
+  renderer.domElement.style.height = '100%';
+  renderer.physicallyCorrectLights = true;
+  renderer.toneMapping = THREE.ACESFilmicToneMapping;
+  renderer.toneMappingExposure = 1.9;
+  renderer.shadowMap.enabled = true;
+  renderer.shadowMap.type = THREE.PCFSoftShadowMap;
+  renderer.shadowMap.autoUpdate = false;
+  if (renderer.outputColorSpace !== undefined) {
+    renderer.outputColorSpace = THREE.SRGBColorSpace;
+  }
+  app.appendChild(renderer.domElement);
+
+  const scene = new THREE.Scene();
+  scene.background = new THREE.Color('#0f1115');
+
+  const ambient = new THREE.AmbientLight(0xffffff, 0.9);
+  scene.add(ambient);
+  const dirLight = new THREE.DirectionalLight(0xffffff, 1.2);
+  dirLight.position.set(50, 80, 30);
+  dirLight.castShadow = true;
+  scene.add(dirLight);
+  const hemiLight = new THREE.HemisphereLight(0x6fb1ff, 0x2b2f3a, 0.8);
+  scene.add(hemiLight);
+  const fillLight = new THREE.PointLight(0x9fd3ff, 1.0, 260);
+  fillLight.position.set(-40, 35, -20);
+  scene.add(fillLight);
+  const rimLight = new THREE.DirectionalLight(0x6fb1ff, 1.4);
+  rimLight.position.set(-80, 60, 80);
+  const accentLight = new THREE.PointLight(0xffe6b5, 1.2, 220);
+  accentLight.position.set(40, 50, -70);
+  const extraLights = [rimLight, accentLight];
+  extraLights.forEach((light) => scene.add(light));
+
+  const fileGroup = new THREE.Group();
+  const memberGroup = new THREE.Group();
+  const labelGroup = new THREE.Group();
+  const wireGroup = new THREE.Group();
+  const edgeGroup = new THREE.Group();
+  scene.add(fileGroup);
+  scene.add(memberGroup);
+  scene.add(labelGroup);
+  scene.add(wireGroup);
+  scene.add(edgeGroup);
+  edgeGroup.renderOrder = 1;
+  wireGroup.renderOrder = 5;
+  labelGroup.renderOrder = 4;
+  fileGroup.renderOrder = 2;
+  memberGroup.renderOrder = 3;
+  labelGroup.visible = false;
+
+  let cameraBase = 40;
+  let nearPlane = 0.1;
+  let farPlane = 2000;
+  const camera = new THREE.OrthographicCamera(-cameraBase, cameraBase, cameraBase, -cameraBase, nearPlane, farPlane);
+  camera.matrixAutoUpdate = true;
+  const isoYaw = Math.PI / 4;
+  const isoPitch = -Math.atan(1 / Math.sqrt(2));
+  const isoEuler = new THREE.Euler(isoPitch, isoYaw, 0, 'YXZ');
+  const isoQuaternion = new THREE.Quaternion().setFromEuler(isoEuler);
+  const isoUp = new THREE.Vector3(0, 1, 0);
+  camera.position.set(60, 54, 60);
+  camera.quaternion.copy(isoQuaternion);
+  camera.up.copy(isoUp);
+  const lockIsometric = () => {
+    camera.up.copy(isoUp);
+    camera.quaternion.copy(isoQuaternion);
+    camera.updateMatrixWorld();
+  };
+
+  const applyEnvironment = (texture) => {
+    if (!texture) return;
+    texture.mapping = THREE.EquirectangularReflectionMapping;
+    const pmrem = new THREE.PMREMGenerator(renderer);
+    scene.environment = pmrem.fromEquirectangular(texture).texture;
+    pmrem.dispose();
+  };
+
+  const envCanvas = document.createElement('canvas');
+  envCanvas.width = 32;
+  envCanvas.height = 16;
+  const envCtx = envCanvas.getContext('2d');
+  const gradient = envCtx.createLinearGradient(0, 0, envCanvas.width, envCanvas.height);
+  gradient.addColorStop(0, '#1b2230');
+  gradient.addColorStop(0.5, '#6fb1ff');
+  gradient.addColorStop(1, '#0f1115');
+  envCtx.fillStyle = gradient;
+  envCtx.fillRect(0, 0, envCanvas.width, envCanvas.height);
+  const fallbackEnv = new THREE.CanvasTexture(envCanvas);
+  applyEnvironment(fallbackEnv);
+  fallbackEnv.dispose();
+
+  if (RGBELoader && assets.hdrEnvUrl) {
+    const rgbe = new RGBELoader();
+    rgbe.load(assets.hdrEnvUrl, (hdrTexture) => {
+      applyEnvironment(hdrTexture);
+      hdrTexture.dispose();
+    });
+  }
+
+  Object.assign(state, {
+    renderer,
+    scene,
+    camera,
+    lineResolution,
+    getViewport,
+    lockIsometric,
+    cameraBase,
+    nearPlane,
+    farPlane,
+    cameraInitialized: false,
+    extraLights,
+    fileGroup,
+    memberGroup,
+    labelGroup,
+    wireGroup,
+    edgeGroup,
+    grid: null,
+    gridLines: null,
+    groundPlane: new THREE.Plane(new THREE.Vector3(0, 1, 0), 0),
+    fogBounds: { maxSpan: 120 },
+    scaleFactor: 2
+  });
+
+  if (visuals?.enableExtraLights === false) {
+    extraLights.forEach((light) => { light.visible = false; });
+  }
+};
diff --git a/src/map/isometric/client/selection.js b/src/map/isometric/client/selection.js
new file mode 100644
index 000000000..07d447987
--- /dev/null
+++ b/src/map/isometric/client/selection.js
@@ -0,0 +1,492 @@
+import { state } from './state.js';
+import { clamp } from './utils.js';
+import { configureWireMaterial } from './materials.js';
+
+const formatPrimitive = (value) => {
+  if (value === null || value === undefined || value === '') return 'None';
+  if (typeof value === 'boolean') return value ? 'true' : 'false';
+  if (typeof value === 'number') return Number.isFinite(value) ? value.toString() : 'None';
+  return String(value);
+};
+
+const isRefItem = (value) => value && typeof value === 'object' && value.__ref;
+
+export const setHoverRef = (ref) => {
+  state.hoveredRef = ref;
+  applyHighlights();
+};
+
+const renderValueNode = (value) => {
+  if (value === null || value === undefined || value === '') {
+    const empty = document.createElement('span');
+    empty.className = 'sel-empty';
+    empty.textContent = 'None';
+    return empty;
+  }
+  if (Array.isArray(value)) {
+    if (!value.length) {
+      const empty = document.createElement('span');
+      empty.className = 'sel-empty';
+      empty.textContent = 'None';
+      return empty;
+    }
+    const list = document.createElement('div');
+    list.className = 'sel-list';
+    value.forEach((entry) => {
+      const pill = document.createElement('span');
+      pill.className = 'sel-pill';
+      if (isRefItem(entry)) {
+        pill.textContent = entry.label;
+        pill.dataset.refType = entry.refType;
+        pill.dataset.refId = entry.refId;
+        pill.addEventListener('mouseenter', () => setHoverRef(entry));
+        pill.addEventListener('mouseleave', () => setHoverRef(null));
+      } else {
+        pill.textContent = formatPrimitive(entry);
+      }
+      list.appendChild(pill);
+    });
+    return list;
+  }
+  if (typeof value === 'object') {
+    if (isRefItem(value)) {
+      const pill = document.createElement('span');
+      pill.className = 'sel-pill';
+      pill.textContent = value.label;
+      pill.dataset.refType = value.refType;
+      pill.dataset.refId = value.refId;
+      pill.addEventListener('mouseenter', () => setHoverRef(value));
+      pill.addEventListener('mouseleave', () => setHoverRef(null));
+      return pill;
+    }
+    const entries = Object.entries(value);
+    if (!entries.length) {
+      const empty = document.createElement('span');
+      empty.className = 'sel-empty';
+      empty.textContent = 'None';
+      return empty;
+    }
+    const list = document.createElement('div');
+    list.className = 'sel-list';
+    entries.forEach(([key, entry]) => {
+      const pill = document.createElement('span');
+      pill.className = 'sel-pill';
+      pill.textContent = `${key}: ${formatPrimitive(entry)}`;
+      list.appendChild(pill);
+    });
+    return list;
+  }
+  const text = document.createElement('span');
+  text.textContent = formatPrimitive(value);
+  return text;
+};
+
+const createSelectionSection = (title) => {
+  const { dom } = state;
+  const section = document.createElement('div');
+  section.className = 'sel-section';
+  const heading = document.createElement('div');
+  heading.className = 'sel-title';
+  heading.textContent = title;
+  section.appendChild(heading);
+  dom.selectionBody.appendChild(section);
+  return section;
+};
+
+const addSelectionRow = (section, label, value) => {
+  const row = document.createElement('div');
+  row.className = 'sel-row';
+  const labelNode = document.createElement('div');
+  labelNode.className = 'sel-label';
+  labelNode.textContent = label;
+  const valueNode = document.createElement('div');
+  valueNode.className = 'sel-value';
+  valueNode.appendChild(renderValueNode(value));
+  row.appendChild(labelNode);
+  row.appendChild(valueNode);
+  section.appendChild(row);
+};
+
+const formatRange = (range) => {
+  if (!range || !Number.isFinite(range.startLine)) return 'None';
+  const start = range.startLine;
+  const end = Number.isFinite(range.endLine) ? range.endLine : start;
+  const span = Math.max(1, end - start + 1);
+  return `${start}-${end} (${span} lines)`;
+};
+
+const formatEdgeCounts = (edgeList) => {
+  if (!edgeList.length) return [];
+  const counts = new Map();
+  edgeList.forEach((edge) => {
+    const type = edge.type || 'other';
+    counts.set(type, (counts.get(type) || 0) + 1);
+  });
+  return Array.from(counts.entries())
+    .sort((a, b) => (b[1] - a[1]) || a[0].localeCompare(b[0]))
+    .map(([type, count]) => `${type}: ${count}`);
+};
+
+const formatEdgeTargets = (edgeList, direction, limit = 8) => {
+  if (!edgeList.length) return [];
+  const seen = new Set();
+  const targets = [];
+  edgeList.forEach((edge) => {
+    const endpoint = direction === 'incoming' ? edge.from : edge.to;
+    if (!endpoint) return;
+    let label = '';
+    let refType = '';
+    let refId = '';
+    if (endpoint.member) {
+      const member = state.memberById.get(endpoint.member);
+      if (member) {
+        label = `${member.name || endpoint.member} - ${member.file || ''}`.trim();
+        refType = 'member';
+        refId = member.id || endpoint.member;
+      } else {
+        label = endpoint.member;
+        refType = 'member';
+        refId = endpoint.member;
+      }
+    } else if (endpoint.file) {
+      label = endpoint.file;
+      refType = 'file';
+      refId = endpoint.file;
+    }
+    if (!label || seen.has(label)) return;
+    seen.add(label);
+    targets.push({ __ref: true, label, refType, refId });
+  });
+  if (targets.length > limit) {
+    const trimmed = targets.slice(0, limit);
+    trimmed.push(`+${targets.length - limit} more`);
+    return trimmed;
+  }
+  return targets;
+};
+
+const formatListWithLimit = (values, limit = 10) => {
+  if (!Array.isArray(values) || !values.length) return [];
+  if (values.length > limit) {
+    return values.slice(0, limit).concat(`+${values.length - limit} more`);
+  }
+  return values;
+};
+
+const collectEdgesForSelection = (selectionInfo, member, node) => {
+  if (!selectionInfo) return { incoming: [], outgoing: [] };
+  const memberId = member?.id || selectionInfo.id || null;
+  const fileKey = selectionInfo.file || node?.path || node?.name || '';
+  const incoming = [];
+  const outgoing = [];
+  state.edges.forEach((edge) => {
+    const from = edge.from || {};
+    const to = edge.to || {};
+    const fromMatch = memberId
+      ? from.member === memberId
+      : (from.file === fileKey || state.fileByMember.get(from.member) === fileKey);
+    const toMatch = memberId
+      ? to.member === memberId
+      : (to.file === fileKey || state.fileByMember.get(to.member) === fileKey);
+    if (fromMatch) outgoing.push(edge);
+    if (toMatch) incoming.push(edge);
+  });
+  return { incoming, outgoing };
+};
+
+export const renderSelectionDetails = (info) => {
+  const { dom, nodeByPath, nodeById, memberById, memberByKey, buildMemberKey, buildMemberNameKey } = state;
+  dom.selectionBody.textContent = '';
+  if (!info) {
+    dom.selectionBody.textContent = 'None';
+    return;
+  }
+  const fileKey = info.file || info.name || '';
+  const node = nodeByPath.get(fileKey) || nodeById.get(info.id) || null;
+  const rangeKey = buildMemberKey(fileKey, info.name || '', info.range || {});
+  const nameKey = buildMemberNameKey(fileKey, info.name || '');
+  const member = info.id
+    ? memberById.get(info.id)
+    : (memberByKey.get(rangeKey) || memberByKey.get(nameKey) || null);
+
+  if (info.type === 'file' || (!info.type && node)) {
+    const section = createSelectionSection('File');
+    addSelectionRow(section, 'Name', node?.name || info.name || fileKey);
+    addSelectionRow(section, 'Path', node?.path || fileKey);
+    addSelectionRow(section, 'Category', node?.category || 'None');
+    addSelectionRow(section, 'Type', node?.type || 'file');
+    addSelectionRow(section, 'Ext', node?.ext || 'None');
+    addSelectionRow(section, 'Id', node?.id || 'None');
+    const members = Array.isArray(node?.members) ? node.members : [];
+    addSelectionRow(section, 'Members', members.length);
+    if (members.length) {
+      const memberNames = members.map((entry) => entry.name).filter(Boolean);
+      addSelectionRow(section, 'Member names', formatListWithLimit(memberNames, 10));
+    }
+  }
+
+  if (info.type === 'member' || member) {
+    const section = createSelectionSection('Member');
+    addSelectionRow(section, 'Name', member?.name || info.name || 'None');
+    addSelectionRow(section, 'File', member?.file || fileKey || 'None');
+    addSelectionRow(section, 'Type', member?.type || info.type || 'None');
+    addSelectionRow(section, 'Kind', member?.kind || 'None');
+    addSelectionRow(section, 'Signature', member?.signature || 'None');
+    addSelectionRow(section, 'Params', member?.params || null);
+    addSelectionRow(section, 'Returns', member?.returns || 'None');
+    addSelectionRow(section, 'Modifiers', member?.modifiers || 'None');
+    addSelectionRow(section, 'Exported', member?.exported ?? false);
+    addSelectionRow(section, 'Range', formatRange(member?.range || info.range));
+    addSelectionRow(section, 'Id', member?.id || info.id || 'None');
+    addSelectionRow(section, 'Port', member?.port || 'None');
+
+    const dataflow = member?.dataflow || {};
+    const dataSection = createSelectionSection('Dataflow');
+    addSelectionRow(dataSection, 'Reads', dataflow.reads || null);
+    addSelectionRow(dataSection, 'Writes', dataflow.writes || null);
+    addSelectionRow(dataSection, 'Mutations', dataflow.mutations || null);
+    addSelectionRow(dataSection, 'Aliases', dataflow.aliases || null);
+
+    const controlFlow = member?.controlFlow || {};
+    const controlSection = createSelectionSection('Control flow');
+    addSelectionRow(controlSection, 'Branches', controlFlow.branches ?? 0);
+    addSelectionRow(controlSection, 'Loops', controlFlow.loops ?? 0);
+    addSelectionRow(controlSection, 'Returns', controlFlow.returns ?? 0);
+    addSelectionRow(controlSection, 'Breaks', controlFlow.breaks ?? 0);
+    addSelectionRow(controlSection, 'Continues', controlFlow.continues ?? 0);
+    addSelectionRow(controlSection, 'Throws', controlFlow.throws ?? 0);
+    addSelectionRow(controlSection, 'Awaits', controlFlow.awaits ?? 0);
+    addSelectionRow(controlSection, 'Yields', controlFlow.yields ?? 0);
+  }
+
+  const edgeSection = createSelectionSection('Edges');
+  const edgeData = collectEdgesForSelection(info, member, node);
+  addSelectionRow(edgeSection, 'Incoming', formatEdgeCounts(edgeData.incoming));
+  addSelectionRow(edgeSection, 'Outgoing', formatEdgeCounts(edgeData.outgoing));
+  addSelectionRow(edgeSection, 'From', formatEdgeTargets(edgeData.incoming, 'incoming'));
+  addSelectionRow(edgeSection, 'To', formatEdgeTargets(edgeData.outgoing, 'outgoing'));
+};
+
+const resetMaterialHighlight = (material) => {
+  if (!material || !material.userData?.baseEmissive) return;
+  material.emissive.copy(material.userData.baseEmissive);
+  material.emissiveIntensity = material.userData.baseEmissiveIntensity ?? material.emissiveIntensity;
+  if (material.userData.baseOpacity != null) material.opacity = material.userData.baseOpacity;
+  material.needsUpdate = true;
+};
+
+const resetObjectHighlights = () => {
+  for (const mesh of [...state.fileMeshes, ...state.memberMeshes, ...state.chunkMeshes]) {
+    resetMaterialHighlight(mesh.material);
+    const inner = mesh.userData?.shellInner;
+    if (inner?.material) resetMaterialHighlight(inner.material);
+  }
+};
+
+const resetEdgeHighlights = () => {
+  for (const mesh of state.edgeMeshes) {
+    const material = mesh.material;
+    if (!material) continue;
+    if (mesh.isInstancedMesh) {
+      const baseColors = mesh.userData?.instanceBaseColors;
+      if (Array.isArray(baseColors)) {
+        baseColors.forEach((color, index) => {
+          if (color) mesh.setColorAt(index, color);
+        });
+        if (mesh.instanceColor) mesh.instanceColor.needsUpdate = true;
+      }
+      if (material.userData?.baseEmissiveIntensity != null) {
+        material.emissiveIntensity = material.userData.baseEmissiveIntensity;
+      }
+      if (material.userData?.baseOpacity != null) {
+        material.opacity = material.userData.baseOpacity;
+      }
+      material.needsUpdate = true;
+      continue;
+    }
+    if (!material.userData?.baseColor) continue;
+    material.color.copy(material.userData.baseColor);
+    material.emissive.copy(material.userData.baseEmissive);
+    material.emissiveIntensity = material.userData.baseEmissiveIntensity ?? material.emissiveIntensity;
+    material.opacity = material.userData.baseOpacity ?? material.opacity;
+    material.needsUpdate = true;
+  }
+};
+
+const resetWireHighlights = () => {
+  for (const material of state.wireMaterials) {
+    configureWireMaterial(material);
+    material.needsUpdate = true;
+  }
+};
+
+const boostWireframe = (mesh, color, strength) => {
+  if (!mesh) return;
+  const wire = state.wireByMesh.get(mesh);
+  if (!wire || !wire.material) return;
+  const material = wire.material;
+  const baseWidth = material.userData?.baseLinewidth || material.linewidth || 1;
+  if ('linewidth' in material) {
+    material.linewidth = baseWidth * (1 + strength);
+  }
+  if (color) material.color.copy(color);
+  material.opacity = clamp(material.opacity + strength * 0.2, 0.02, 0.9);
+  material.needsUpdate = true;
+};
+
+const highlightMesh = (mesh, color, intensity, wireBoost = 0) => {
+  if (!mesh || !mesh.material) return;
+  mesh.material.emissive.copy(color);
+  mesh.material.emissiveIntensity = intensity;
+  mesh.material.needsUpdate = true;
+  const inner = mesh.userData?.shellInner;
+  if (inner?.material) {
+    inner.material.emissive.copy(color);
+    inner.material.emissiveIntensity = intensity * 0.75;
+    inner.material.needsUpdate = true;
+  }
+  if (wireBoost > 0) boostWireframe(mesh, color, wireBoost);
+};
+
+const highlightEdgeMesh = (mesh, color) => {
+  if (!mesh || !mesh.material) return;
+  mesh.material.color.copy(color);
+  mesh.material.emissive.copy(color);
+  mesh.material.emissiveIntensity = Math.max(0.6, mesh.material.userData?.baseEmissiveIntensity || 0.6);
+  mesh.material.opacity = Math.max(0.7, mesh.material.opacity);
+  mesh.material.needsUpdate = true;
+};
+
+const highlightEdgeInstance = (mesh, index, color) => {
+  if (!mesh || !mesh.isInstancedMesh) return;
+  if (typeof mesh.setColorAt === 'function') {
+    mesh.setColorAt(index, color);
+    if (mesh.instanceColor) mesh.instanceColor.needsUpdate = true;
+  }
+};
+
+const buildSelectionKeys = (info) => {
+  const keys = new Set();
+  if (!info) return keys;
+  const fileKey = info.file || info.name || '';
+  if (fileKey) keys.add(`file:${fileKey}`);
+  const memberId = info.id || info.memberId || null;
+  if (memberId) {
+    keys.add(`member:${memberId}`);
+    const memberFile = state.fileByMember.get(memberId);
+    if (memberFile) keys.add(`file:${memberFile}`);
+  }
+  return keys;
+};
+
+const applyHighlightsForKeys = (selectionKeys, intensity = 1) => {
+  if (!selectionKeys || !selectionKeys.size) return;
+  const connected = new Map();
+  const edgeSegments = state.edgeSegments || [];
+  edgeSegments.forEach((segment) => {
+    const endpoints = segment.endpoints;
+    if (!endpoints || !endpoints.size) return;
+    let matches = false;
+    for (const key of selectionKeys) {
+      if (endpoints.has(key)) {
+        matches = true;
+        break;
+      }
+    }
+    if (!matches) return;
+    const edgeColor = segment.edgeColor || new state.THREE.Color(0xffffff);
+    const highlightColor = segment.highlightColor || edgeColor;
+    highlightEdgeInstance(segment.mesh, segment.index, highlightColor);
+    endpoints.forEach((endpointKey) => {
+      if (selectionKeys.has(endpointKey)) return;
+      const entry = connected.get(endpointKey) || { color: new state.THREE.Color(0, 0, 0), weight: 0 };
+      entry.color.add(edgeColor.clone().multiplyScalar(1));
+      entry.weight += 1;
+      connected.set(endpointKey, entry);
+    });
+  });
+
+  connected.forEach((entry, endpointKey) => {
+    if (!entry.weight) return;
+    const color = entry.color.multiplyScalar(1 / entry.weight);
+    const [type, id] = endpointKey.split(':');
+    if (type === 'file' && state.fileMeshByKey.has(id)) {
+      highlightMesh(state.fileMeshByKey.get(id), color, 0.35 * intensity + 0.15, 0.25 * intensity);
+    }
+    if (type === 'member' && state.memberMeshById.has(id)) {
+      highlightMesh(state.memberMeshById.get(id), color, 0.35 * intensity + 0.15, 0.25 * intensity);
+    }
+  });
+};
+
+export const applyHighlights = () => {
+  resetObjectHighlights();
+  resetEdgeHighlights();
+  resetWireHighlights();
+  const selectionKeys = buildSelectionKeys(state.selected?.userData || null);
+  if (state.selected) {
+    const baseColor = state.selected.userData?.baseColor
+      ? state.selected.userData.baseColor
+      : (state.selected.material?.color ? state.selected.material.color : new state.THREE.Color(0xffffff));
+    highlightMesh(state.selected, baseColor.clone().lerp(new state.THREE.Color(0xffffff), 0.35), 0.7, 0.85);
+    applyHighlightsForKeys(selectionKeys, 1);
+  }
+  if (state.hoveredRef) {
+    const hoverInfo = state.hoveredRef.refType === 'member'
+      ? { id: state.hoveredRef.refId, memberId: state.hoveredRef.refId }
+      : { file: state.hoveredRef.refId, name: state.hoveredRef.refId };
+    const hoverKeys = buildSelectionKeys(hoverInfo);
+    applyHighlightsForKeys(hoverKeys, 0.6);
+    if (state.hoveredRef.refType === 'file' && state.fileMeshByKey.has(state.hoveredRef.refId)) {
+      highlightMesh(state.fileMeshByKey.get(state.hoveredRef.refId), new state.THREE.Color(0xffffff), 0.35, 0.35);
+    }
+    if (state.hoveredRef.refType === 'member' && state.memberMeshById.has(state.hoveredRef.refId)) {
+      highlightMesh(state.memberMeshById.get(state.hoveredRef.refId), new state.THREE.Color(0xffffff), 0.35, 0.35);
+    }
+  }
+  if (state.hoveredMesh && !state.selected) {
+    const baseColor = state.hoveredMesh.userData?.baseColor
+      ? state.hoveredMesh.userData.baseColor.clone().lerp(new state.THREE.Color(0xffffff), 0.25)
+      : new state.THREE.Color(0xffffff);
+    highlightMesh(state.hoveredMesh, baseColor, 0.35, 0.4);
+  }
+};
+
+export const setSelection = (object) => {
+  state.selected = object;
+  const info = state.selected ? (state.selected.userData || {}) : null;
+  renderSelectionDetails(info);
+  applyHighlights();
+};
+
+const resolveFilePath = (file) => {
+  if (!file) return '';
+  if (file.includes(':\\') || file.startsWith('\\') || file.startsWith('/')) return file;
+  const root = state.map.root?.path || '';
+  if (!root) return file;
+  if (root.endsWith('/') || root.endsWith('\\')) return root + file;
+  return root + '/' + file;
+};
+
+const buildOpenUri = (info) => {
+  if (!state.config.openUriTemplate) return null;
+  const range = info.range || {};
+  const filePath = resolveFilePath(info.file || '');
+  const replacements = {
+    file: encodeURIComponent(filePath),
+    fileRaw: filePath,
+    line: encodeURIComponent(range.startLine || 1),
+    column: encodeURIComponent(1),
+    startLine: encodeURIComponent(range.startLine || 1),
+    endLine: encodeURIComponent(range.endLine || range.startLine || 1),
+    symbol: encodeURIComponent(info.name || '')
+  };
+  return state.config.openUriTemplate.replace(/{(\w+)}/g, (match, key) => replacements[key] || match);
+};
+
+export const openSelection = () => {
+  if (!state.selected) return;
+  const uri = buildOpenUri(state.selected.userData || {});
+  if (uri) window.location.href = uri;
+};
+
diff --git a/src/map/isometric/client/state.js b/src/map/isometric/client/state.js
new file mode 100644
index 000000000..1979e5457
--- /dev/null
+++ b/src/map/isometric/client/state.js
@@ -0,0 +1 @@
+export const state = {};
diff --git a/src/map/isometric/client/three-loader.js b/src/map/isometric/client/three-loader.js
new file mode 100644
index 000000000..29569cf18
--- /dev/null
+++ b/src/map/isometric/client/three-loader.js
@@ -0,0 +1,25 @@
+export const loadThreeModules = async (threeUrl) => {
+  const THREE = await import(threeUrl);
+  let LineSegments2 = null;
+  let LineSegmentsGeometry = null;
+  let LineMaterial = null;
+  try {
+    ({ LineSegments2 } = await import('/three/examples/jsm/lines/LineSegments2.js'));
+    ({ LineSegmentsGeometry } = await import('/three/examples/jsm/lines/LineSegmentsGeometry.js'));
+    ({ LineMaterial } = await import('/three/examples/jsm/lines/LineMaterial.js'));
+  } catch (err) {
+    LineSegments2 = null;
+    LineSegmentsGeometry = null;
+    LineMaterial = null;
+  }
+  return { THREE, LineSegments2, LineSegmentsGeometry, LineMaterial };
+};
+
+export const loadRgbeLoader = async (url) => {
+  try {
+    const module = await import(url || '/three/examples/jsm/loaders/RGBELoader.js');
+    return module.RGBELoader || null;
+  } catch (err) {
+    return null;
+  }
+};
diff --git a/src/map/isometric/client/ui.js b/src/map/isometric/client/ui.js
new file mode 100644
index 000000000..6caa8c5ed
--- /dev/null
+++ b/src/map/isometric/client/ui.js
@@ -0,0 +1,988 @@
+import { state } from './state.js';
+import { storageKey } from './dom.js';
+import {
+  applyGlassSettings,
+  updateExtraLights,
+  updateFileOpacity,
+  updateFlowGlow,
+  updateFlowLights,
+  updateFog,
+  updateGridGlow,
+  updateMemberOpacity,
+  updateWireframes
+} from './materials.js';
+import { scheduleRebuild } from './rebuild.js';
+import { renderSelectionDetails } from './selection.js';
+import { clearGroup } from './scene-utils.js';
+
+const getNested = (obj, path) => {
+  const parts = path.split('.');
+  let current = obj;
+  for (const part of parts) {
+    if (!current || typeof current !== 'object') return undefined;
+    current = current[part];
+  }
+  return current;
+};
+
+const setNested = (obj, path, value) => {
+  const parts = path.split('.');
+  let current = obj;
+  while (parts.length > 1) {
+    const part = parts.shift();
+    current[part] = current[part] || {};
+    current = current[part];
+  }
+  current[parts[0]] = value;
+};
+
+const createToggle = (container, options) => {
+  const wrapper = document.createElement('label');
+  const input = document.createElement('input');
+  input.type = 'checkbox';
+  input.checked = options.checked !== false;
+  input.addEventListener('change', () => options.onChange(input.checked));
+  wrapper.appendChild(input);
+  if (options.swatch) wrapper.appendChild(options.swatch);
+  const text = document.createElement('span');
+  text.textContent = options.label;
+  wrapper.appendChild(text);
+  container.appendChild(wrapper);
+};
+
+const createSelect = (container, options) => {
+  const wrapper = document.createElement('label');
+  const text = document.createElement('span');
+  text.textContent = options.label;
+  const select = document.createElement('select');
+  select.style.flex = '1';
+  options.options.forEach((entry) => {
+    const option = document.createElement('option');
+    option.value = entry.value;
+    option.textContent = entry.label;
+    select.appendChild(option);
+  });
+  select.value = options.value ?? options.defaultValue;
+  select.addEventListener('change', () => {
+    options.onChange(select.value);
+  });
+  wrapper.appendChild(text);
+  wrapper.appendChild(select);
+  container.appendChild(wrapper);
+};
+
+const createSlider = (container, options) => {
+  const label = document.createElement('div');
+  label.textContent = options.label;
+  const row = document.createElement('div');
+  row.className = 'slider-row';
+  const input = document.createElement('input');
+  input.type = 'range';
+  input.min = String(options.min);
+  input.max = String(options.max);
+  input.step = String(options.step || 0.1);
+  const currentValue = getNested(state.panelState, options.path);
+  input.value = String(Number.isFinite(currentValue) ? currentValue : options.defaultValue);
+  const valueLabel = document.createElement('div');
+  valueLabel.className = 'value';
+  const updateValue = () => {
+    const raw = Number(input.value);
+    const nextValue = Number.isFinite(raw) ? raw : options.defaultValue;
+    setNested(state.panelState, options.path, nextValue);
+    valueLabel.textContent = options.format ? options.format(nextValue) : String(nextValue);
+    syncStateFromPanel();
+    if (typeof options.onInput === 'function') {
+      options.onInput(nextValue);
+    }
+    if (options.rebuild !== false) {
+      scheduleRebuild(options.debounceMs);
+    }
+    persistPanelState();
+  };
+  input.addEventListener('input', updateValue);
+  updateValue();
+  row.appendChild(input);
+  row.appendChild(valueLabel);
+  container.appendChild(label);
+  container.appendChild(row);
+};
+
+const createButton = (container, label, onClick) => {
+  const button = document.createElement('button');
+  button.type = 'button';
+  button.textContent = label;
+  button.addEventListener('click', onClick);
+  container.appendChild(button);
+};
+
+const persistPanelState = (() => {
+  let timer = null;
+  return () => {
+    if (timer) clearTimeout(timer);
+    timer = setTimeout(() => {
+      const payload = {
+        layout: state.panelState.layout,
+        scoring: state.panelState.scoring,
+        colors: state.panelState.colors,
+        controls: state.panelState.controls,
+        visuals: state.panelState.visuals
+      };
+      try {
+        window.localStorage.setItem(storageKey, JSON.stringify(payload));
+      } catch (err) {
+        // ignore storage failures
+      }
+    }, 200);
+  };
+})();
+
+export const syncStateFromPanel = () => {
+  Object.assign(state.layout, state.panelState.layout || {});
+  Object.assign(state.scoring, state.panelState.scoring || {});
+  Object.assign(state.colors, state.panelState.colors || {});
+  Object.assign(state.controls, state.panelState.controls || {});
+  state.controls.wasd = { ...state.controls.wasd, ...(state.panelState.controls?.wasd || {}) };
+  Object.assign(state.visuals, state.panelState.visuals || {});
+  state.visuals.glass = { ...state.visuals.glass, ...(state.panelState.visuals?.glass || {}) };
+  if (state.normalMapState?.texture) {
+    state.normalMapState.texture.repeat.set(state.visuals.glass.normalRepeat, state.visuals.glass.normalRepeat);
+  }
+  updateExtraLights();
+};
+
+export const renderEdgeMenu = () => {
+  const { dom, edgeTypes, edgeVisibility, edgeTypeGroups, visuals, visualDefaults } = state;
+  dom.menuEdges.textContent = '';
+  createToggle(dom.menuEdges, {
+    label: 'Curve edges',
+    checked: visuals.curveEdges ?? visualDefaults.curveEdges,
+    onChange: (value) => {
+      setNested(state.panelState, 'visuals.curveEdges', value);
+      syncStateFromPanel();
+      scheduleRebuild();
+      persistPanelState();
+    }
+  });
+  if (!edgeTypes.length) {
+    const empty = document.createElement('div');
+    empty.textContent = 'No edges available';
+    dom.menuEdges.appendChild(empty);
+    return;
+  }
+  edgeTypes.forEach((type) => {
+    const style = state.map.legend?.edgeStyles?.[type] || {};
+    const swatch = document.createElement('span');
+    swatch.className = 'swatch';
+    swatch.style.background = style.color || '#9aa0a6';
+    createToggle(dom.menuEdges, {
+      label: type,
+      swatch,
+      checked: edgeVisibility.has(type)
+        ? edgeVisibility.get(type)
+        : edgeTypeGroups.get(type)?.visible !== false,
+      onChange: (value) => {
+        const group = edgeTypeGroups.get(type);
+        if (group) group.visible = value;
+        edgeVisibility.set(type, value);
+      }
+    });
+  });
+};
+
+export const initUi = () => {
+  const {
+    dom,
+    layout,
+    scoring,
+    controls,
+    visuals,
+    controlDefaults,
+    layoutDefaults,
+    scoringDefaults,
+    colorDefaults,
+    visualDefaults,
+    colors,
+    fileGroup,
+    memberGroup,
+    labelGroup,
+    wireGroup,
+    edgeGroup
+  } = state;
+
+  state.panelState = {
+    layout: { ...layout },
+    scoring: { ...scoring },
+    colors: { ...colors },
+    controls: { ...controls, wasd: { ...(controls.wasd || {}) } },
+    visuals: { ...visuals, glass: { ...visuals.glass } }
+  };
+
+  state.edgeVisibility = state.edgeVisibility || new Map();
+  state.gridVisible = state.gridVisible ?? true;
+
+  createToggle(dom.menuView, {
+    label: 'Grid',
+    onChange: (value) => {
+      state.gridVisible = value;
+      if (state.grid) state.grid.visible = value;
+      if (state.gridLines) state.gridLines.visible = value;
+    }
+  });
+  createToggle(dom.menuView, {
+    label: 'Files',
+    onChange: (value) => {
+      fileGroup.visible = value;
+    }
+  });
+  createToggle(dom.menuView, {
+    label: 'Members',
+    onChange: (value) => {
+      memberGroup.visible = value;
+    }
+  });
+  createToggle(dom.menuView, {
+    label: 'Labels',
+    checked: false,
+    onChange: (value) => {
+      labelGroup.visible = value;
+      if (value) {
+        scheduleRebuild(0);
+      } else {
+        clearGroup(labelGroup);
+        state.labelMaterials = [];
+      }
+    }
+  });
+  createToggle(dom.menuView, {
+    label: 'Wireframes',
+    onChange: (value) => {
+      wireGroup.visible = value;
+    }
+  });
+  createToggle(dom.menuView, {
+    label: 'Edges',
+    onChange: (value) => {
+      edgeGroup.visible = value;
+    }
+  });
+
+  createSlider(dom.menuControls, {
+    label: 'Pan sensitivity',
+    path: 'controls.panSensitivity',
+    min: 0.2,
+    max: 4,
+    step: 0.1,
+    defaultValue: controlDefaults.panSensitivity,
+    rebuild: false
+  });
+  createSlider(dom.menuControls, {
+    label: 'Zoom damping',
+    path: 'controls.zoomDamping',
+    min: 0.6,
+    max: 0.98,
+    step: 0.01,
+    defaultValue: controlDefaults.zoomDamping,
+    format: (value) => value.toFixed(2),
+    rebuild: false
+  });
+  createSlider(dom.menuControls, {
+    label: 'Zoom max',
+    path: 'controls.zoomMax',
+    min: 4,
+    max: 120,
+    step: 1,
+    defaultValue: controlDefaults.zoomMax,
+    rebuild: false
+  });
+  createSlider(dom.menuControls, {
+    label: 'WASD sensitivity',
+    path: 'controls.wasd.sensitivity',
+    min: 100,
+    max: 50000,
+    step: 100,
+    defaultValue: controlDefaults.wasd.sensitivity,
+    rebuild: false
+  });
+  createSlider(dom.menuControls, {
+    label: 'WASD accel',
+    path: 'controls.wasd.acceleration',
+    min: 100,
+    max: 20000,
+    step: 100,
+    defaultValue: controlDefaults.wasd.acceleration,
+    rebuild: false
+  });
+  createSlider(dom.menuControls, {
+    label: 'WASD max',
+    path: 'controls.wasd.maxSpeed',
+    min: 100,
+    max: 60000,
+    step: 500,
+    defaultValue: controlDefaults.wasd.maxSpeed,
+    rebuild: false
+  });
+  createSlider(dom.menuControls, {
+    label: 'WASD drag',
+    path: 'controls.wasd.drag',
+    min: 1,
+    max: 20,
+    step: 0.5,
+    defaultValue: controlDefaults.wasd.drag,
+    rebuild: false
+  });
+
+  createSelect(dom.menuLayout, {
+    label: 'Layout style',
+    value: getNested(state.panelState, 'layout.style'),
+    defaultValue: layoutDefaults.style,
+    options: [
+      { label: 'Clustered', value: 'clustered' },
+      { label: 'Flow', value: 'flow' },
+      { label: 'Hex grid', value: 'hex' },
+      { label: 'Radial', value: 'radial' },
+      { label: 'Flat grid', value: 'flat' },
+      { label: 'Stream', value: 'stream' }
+    ],
+    onChange: (value) => {
+      setNested(state.panelState, 'layout.style', value);
+      syncStateFromPanel();
+      scheduleRebuild();
+      persistPanelState();
+    }
+  });
+
+  createSelect(dom.menuLayout, {
+    label: 'File shapes',
+    value: getNested(state.panelState, 'layout.fileShape'),
+    defaultValue: layoutDefaults.fileShape,
+    options: [
+      { label: 'Category', value: 'category' },
+      { label: 'Mixed', value: 'mix' },
+      { label: 'Square', value: 'square' },
+      { label: 'Circle', value: 'circle' },
+      { label: 'Pyramid', value: 'pyramid' },
+      { label: 'Pentagon', value: 'pentagon' },
+      { label: 'Hexagon', value: 'hexagon' },
+      { label: 'Heptagon', value: 'heptagon' },
+      { label: 'Octagon', value: 'octagon' },
+      { label: 'Pentagon pyramid', value: 'pentagon-pyramid' },
+      { label: 'Hexagon pyramid', value: 'hexagon-pyramid' },
+      { label: 'Heptagon pyramid', value: 'heptagon-pyramid' },
+      { label: 'Octagon pyramid', value: 'octagon-pyramid' },
+      { label: 'Pentagon frustum', value: 'pentagon-frustum' },
+      { label: 'Hexagon frustum', value: 'hexagon-frustum' },
+      { label: 'Heptagon frustum', value: 'heptagon-frustum' },
+      { label: 'Octagon frustum', value: 'octagon-frustum' }
+    ],
+    onChange: (value) => {
+      setNested(state.panelState, 'layout.fileShape', value);
+      syncStateFromPanel();
+      scheduleRebuild();
+      persistPanelState();
+    }
+  });
+
+  createSelect(dom.menuLayout, {
+    label: 'Member shapes',
+    value: getNested(state.panelState, 'layout.memberShape'),
+    defaultValue: layoutDefaults.memberShape,
+    options: [
+      { label: 'Category', value: 'category' },
+      { label: 'Mixed', value: 'mix' },
+      { label: 'Square', value: 'square' },
+      { label: 'Circle', value: 'circle' },
+      { label: 'Pyramid', value: 'pyramid' },
+      { label: 'Pentagon', value: 'pentagon' },
+      { label: 'Hexagon', value: 'hexagon' },
+      { label: 'Heptagon', value: 'heptagon' },
+      { label: 'Octagon', value: 'octagon' },
+      { label: 'Pentagon pyramid', value: 'pentagon-pyramid' },
+      { label: 'Hexagon pyramid', value: 'hexagon-pyramid' },
+      { label: 'Heptagon pyramid', value: 'heptagon-pyramid' },
+      { label: 'Octagon pyramid', value: 'octagon-pyramid' },
+      { label: 'Pentagon frustum', value: 'pentagon-frustum' },
+      { label: 'Hexagon frustum', value: 'hexagon-frustum' },
+      { label: 'Heptagon frustum', value: 'heptagon-frustum' },
+      { label: 'Octagon frustum', value: 'octagon-frustum' }
+    ],
+    onChange: (value) => {
+      setNested(state.panelState, 'layout.memberShape', value);
+      syncStateFromPanel();
+      scheduleRebuild();
+      persistPanelState();
+    }
+  });
+
+  createSlider(dom.menuLayout, {
+    label: 'Group spacing',
+    path: 'layout.groupSpacing',
+    min: 0,
+    max: 16,
+    step: 0.5,
+    defaultValue: layoutDefaults.groupSpacing
+  });
+  createSlider(dom.menuLayout, {
+    label: 'File spacing',
+    path: 'layout.fileSpacing',
+    min: 0,
+    max: 12,
+    step: 0.5,
+    defaultValue: layoutDefaults.fileSpacing
+  });
+  createSlider(dom.menuLayout, {
+    label: 'Compactness',
+    path: 'layout.compactness',
+    min: 0.5,
+    max: 1.4,
+    step: 0.05,
+    defaultValue: layoutDefaults.compactness
+  });
+  createSlider(dom.menuLayout, {
+    label: 'Routing padding',
+    path: 'layout.routingPadding',
+    min: 0,
+    max: 3,
+    step: 0.1,
+    defaultValue: layoutDefaults.routingPadding
+  });
+  createSlider(dom.menuLayout, {
+    label: 'Routing step',
+    path: 'layout.routingStep',
+    min: 0.5,
+    max: 5,
+    step: 0.1,
+    defaultValue: layoutDefaults.routingStep
+  });
+  createSlider(dom.menuLayout, {
+    label: 'Edge plane',
+    path: 'layout.edgePlane',
+    min: -4,
+    max: 0.5,
+    step: 0.05,
+    defaultValue: layoutDefaults.edgePlane
+  });
+  createSlider(dom.menuLayout, {
+    label: 'Label size',
+    path: 'layout.labelScale',
+    min: 0.01,
+    max: 0.04,
+    step: 0.002,
+    defaultValue: layoutDefaults.labelScale,
+    format: (value) => value.toFixed(3)
+  });
+  createSlider(dom.menuLayout, {
+    label: 'Label offset',
+    path: 'layout.labelOffset',
+    min: 0,
+    max: 1.5,
+    step: 0.05,
+    defaultValue: layoutDefaults.labelOffset
+  });
+
+  createSlider(dom.menuScore, {
+    label: 'Dataflow weight',
+    path: 'scoring.dataflow',
+    min: 0,
+    max: 2,
+    step: 0.05,
+    defaultValue: scoringDefaults.dataflow
+  });
+  createSlider(dom.menuScore, {
+    label: 'Controlflow weight',
+    path: 'scoring.controlFlow',
+    min: 0,
+    max: 2,
+    step: 0.05,
+    defaultValue: scoringDefaults.controlFlow
+  });
+  createSlider(dom.menuScore, {
+    label: 'Params weight',
+    path: 'scoring.params',
+    min: 0,
+    max: 1.5,
+    step: 0.05,
+    defaultValue: scoringDefaults.params
+  });
+  createSlider(dom.menuScore, {
+    label: 'Signature weight',
+    path: 'scoring.signature',
+    min: 0,
+    max: 0.15,
+    step: 0.01,
+    defaultValue: scoringDefaults.signature,
+    format: (value) => value.toFixed(2)
+  });
+  createSlider(dom.menuScore, {
+    label: 'Exported boost',
+    path: 'scoring.exported',
+    min: 0,
+    max: 3,
+    step: 0.1,
+    defaultValue: scoringDefaults.exported
+  });
+  createSlider(dom.menuScore, {
+    label: 'Modifiers weight',
+    path: 'scoring.modifiers',
+    min: 0,
+    max: 1.5,
+    step: 0.05,
+    defaultValue: scoringDefaults.modifiers
+  });
+  createSlider(dom.menuScore, {
+    label: 'Type weight',
+    path: 'scoring.type',
+    min: 0,
+    max: 2,
+    step: 0.05,
+    defaultValue: scoringDefaults.type
+  });
+  createSlider(dom.menuScore, {
+    label: 'Returns weight',
+    path: 'scoring.returns',
+    min: 0,
+    max: 2,
+    step: 0.05,
+    defaultValue: scoringDefaults.returns
+  });
+
+  createSlider(dom.menuColors, {
+    label: 'Hue start',
+    path: 'colors.hueStart',
+    min: 0,
+    max: 1,
+    step: 0.01,
+    defaultValue: colorDefaults.hueStart,
+    format: (value) => value.toFixed(2)
+  });
+  createSlider(dom.menuColors, {
+    label: 'Hue end',
+    path: 'colors.hueEnd',
+    min: 0,
+    max: 1,
+    step: 0.01,
+    defaultValue: colorDefaults.hueEnd,
+    format: (value) => value.toFixed(2)
+  });
+  createSlider(dom.menuColors, {
+    label: 'Saturation',
+    path: 'colors.saturation',
+    min: 0.2,
+    max: 1,
+    step: 0.02,
+    defaultValue: colorDefaults.saturation,
+    format: (value) => value.toFixed(2)
+  });
+  createSlider(dom.menuColors, {
+    label: 'Light min',
+    path: 'colors.lightnessMin',
+    min: 0.2,
+    max: 0.8,
+    step: 0.02,
+    defaultValue: colorDefaults.lightnessMin,
+    format: (value) => value.toFixed(2)
+  });
+  createSlider(dom.menuColors, {
+    label: 'Light max',
+    path: 'colors.lightnessMax',
+    min: 0.3,
+    max: 0.95,
+    step: 0.02,
+    defaultValue: colorDefaults.lightnessMax,
+    format: (value) => value.toFixed(2)
+  });
+
+  createSelect(dom.menuColorMode, {
+    label: 'Color mode',
+    value: getNested(state.panelState, 'colors.mode'),
+    defaultValue: colorDefaults.mode || 'score',
+    options: [
+      { label: 'Score gradient', value: 'score' },
+      { label: 'Distinct (hash)', value: 'distinct' }
+    ],
+    onChange: (value) => {
+      setNested(state.panelState, 'colors.mode', value);
+      syncStateFromPanel();
+      scheduleRebuild();
+      persistPanelState();
+    }
+  });
+  createSlider(dom.menuColorMode, {
+    label: 'Distinct saturation',
+    path: 'colors.distinctSaturation',
+    min: 0.2,
+    max: 1,
+    step: 0.02,
+    defaultValue: colorDefaults.distinctSaturation,
+    format: (value) => value.toFixed(2)
+  });
+  createSlider(dom.menuColorMode, {
+    label: 'Distinct lightness',
+    path: 'colors.distinctLightness',
+    min: 0.2,
+    max: 0.85,
+    step: 0.02,
+    defaultValue: colorDefaults.distinctLightness,
+    format: (value) => value.toFixed(2)
+  });
+  createSlider(dom.menuColorMode, {
+    label: 'Distinct hue offset',
+    path: 'colors.distinctHueOffset',
+    min: 0,
+    max: 1,
+    step: 0.01,
+    defaultValue: colorDefaults.distinctHueOffset,
+    format: (value) => value.toFixed(2)
+  });
+
+  createSlider(dom.menuVisuals, {
+    label: 'File opacity',
+    path: 'visuals.fileOpacity',
+    min: 0.1,
+    max: 1,
+    step: 0.05,
+    defaultValue: visualDefaults.fileOpacity,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: updateFileOpacity
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Member opacity',
+    path: 'visuals.memberOpacity',
+    min: 0.1,
+    max: 1,
+    step: 0.05,
+    defaultValue: visualDefaults.memberOpacity,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: updateMemberOpacity
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Wireframe thickness',
+    path: 'visuals.wireframeThickness',
+    min: 0.01,
+    max: 10,
+    step: 0.02,
+    defaultValue: visualDefaults.wireframeThickness,
+    rebuild: false,
+    onInput: updateWireframes
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Wireframe glow',
+    path: 'visuals.wireframeGlow',
+    min: 0,
+    max: 2.5,
+    step: 0.05,
+    defaultValue: visualDefaults.wireframeGlow,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: updateWireframes
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Wire pulse speed',
+    path: 'visuals.wirePulseSpeed',
+    min: 0.02,
+    max: 1,
+    step: 0.02,
+    defaultValue: visualDefaults.wirePulseSpeed,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: updateWireframes
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Flow glow base',
+    path: 'visuals.flowGlowBase',
+    min: 0,
+    max: 2,
+    step: 0.05,
+    defaultValue: visualDefaults.flowGlowBase,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: updateFlowGlow
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Flow glow pulse',
+    path: 'visuals.flowGlowRange',
+    min: 0,
+    max: 2,
+    step: 0.05,
+    defaultValue: visualDefaults.flowGlowRange,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: updateFlowGlow
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Glow speed',
+    path: 'visuals.glowPulseSpeed',
+    min: 0.4,
+    max: 4,
+    step: 0.1,
+    defaultValue: visualDefaults.glowPulseSpeed,
+    format: (value) => value.toFixed(1),
+    rebuild: false
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Glass roughness',
+    path: 'visuals.glass.roughness',
+    min: 0,
+    max: 1,
+    step: 0.02,
+    defaultValue: visualDefaults.glass.roughness,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: applyGlassSettings
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Glass metalness',
+    path: 'visuals.glass.metalness',
+    min: 0,
+    max: 1,
+    step: 0.02,
+    defaultValue: visualDefaults.glass.metalness,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: applyGlassSettings
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Glass transmission',
+    path: 'visuals.glass.transmission',
+    min: 0,
+    max: 1,
+    step: 0.02,
+    defaultValue: visualDefaults.glass.transmission,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: applyGlassSettings
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Glass IOR',
+    path: 'visuals.glass.ior',
+    min: 1,
+    max: 2.4,
+    step: 0.02,
+    defaultValue: visualDefaults.glass.ior,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: applyGlassSettings
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Glass reflectivity',
+    path: 'visuals.glass.reflectivity',
+    min: 0,
+    max: 1,
+    step: 0.02,
+    defaultValue: visualDefaults.glass.reflectivity,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: applyGlassSettings
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Glass thickness',
+    path: 'visuals.glass.thickness',
+    min: 0.1,
+    max: 10,
+    step: 0.1,
+    defaultValue: visualDefaults.glass.thickness,
+    format: (value) => value.toFixed(1),
+    rebuild: false,
+    onInput: applyGlassSettings
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Env intensity',
+    path: 'visuals.glass.envMapIntensity',
+    min: 0,
+    max: 8,
+    step: 0.1,
+    defaultValue: visualDefaults.glass.envMapIntensity,
+    format: (value) => value.toFixed(1),
+    rebuild: false,
+    onInput: applyGlassSettings
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Clearcoat',
+    path: 'visuals.glass.clearcoat',
+    min: 0,
+    max: 1,
+    step: 0.02,
+    defaultValue: visualDefaults.glass.clearcoat,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: applyGlassSettings
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Clearcoat rough',
+    path: 'visuals.glass.clearcoatRoughness',
+    min: 0,
+    max: 1,
+    step: 0.02,
+    defaultValue: visualDefaults.glass.clearcoatRoughness,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: applyGlassSettings
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Normal scale',
+    path: 'visuals.glass.normalScale',
+    min: 0,
+    max: 2,
+    step: 0.05,
+    defaultValue: visualDefaults.glass.normalScale,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: applyGlassSettings
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Clearcoat normal',
+    path: 'visuals.glass.clearcoatNormalScale',
+    min: 0,
+    max: 2,
+    step: 0.05,
+    defaultValue: visualDefaults.glass.clearcoatNormalScale,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: applyGlassSettings
+  });
+  createSlider(dom.menuVisuals, {
+    label: 'Normal repeat',
+    path: 'visuals.glass.normalRepeat',
+    min: 1,
+    max: 6,
+    step: 1,
+    defaultValue: visualDefaults.glass.normalRepeat,
+    rebuild: false,
+    onInput: applyGlassSettings
+  });
+
+  createToggle(dom.menuEffects, {
+    label: 'Fog',
+    checked: visuals.enableFog !== false,
+    onChange: (value) => {
+      setNested(state.panelState, 'visuals.enableFog', value);
+      syncStateFromPanel();
+      updateFog();
+      persistPanelState();
+    }
+  });
+  createToggle(dom.menuEffects, {
+    label: 'Height fog',
+    checked: visuals.enableHeightFog === true,
+    onChange: (value) => {
+      setNested(state.panelState, 'visuals.enableHeightFog', value);
+      syncStateFromPanel();
+      updateFog();
+      persistPanelState();
+    }
+  });
+  createSlider(dom.menuEffects, {
+    label: 'Fog distance',
+    path: 'visuals.fogDistance',
+    min: 1.2,
+    max: 4,
+    step: 0.1,
+    defaultValue: visualDefaults.fogDistance,
+    format: (value) => value.toFixed(1),
+    rebuild: false,
+    onInput: () => updateFog()
+  });
+  createSlider(dom.menuEffects, {
+    label: 'Fog height',
+    path: 'visuals.fogHeight',
+    min: 0,
+    max: 40,
+    step: 0.5,
+    defaultValue: visualDefaults.fogHeight,
+    format: (value) => value.toFixed(1),
+    rebuild: false,
+    onInput: () => updateFog()
+  });
+  createSlider(dom.menuEffects, {
+    label: 'Fog height range',
+    path: 'visuals.fogHeightRange',
+    min: 4,
+    max: 40,
+    step: 0.5,
+    defaultValue: visualDefaults.fogHeightRange,
+    format: (value) => value.toFixed(1),
+    rebuild: false,
+    onInput: () => updateFog()
+  });
+  createToggle(dom.menuEffects, {
+    label: 'Flow lights',
+    checked: visuals.enableFlowLights !== false,
+    onChange: (value) => {
+      setNested(state.panelState, 'visuals.enableFlowLights', value);
+      syncStateFromPanel();
+      updateFlowLights();
+      persistPanelState();
+    }
+  });
+  createToggle(dom.menuEffects, {
+    label: 'Extra lights',
+    checked: visuals.enableExtraLights !== false,
+    onChange: (value) => {
+      setNested(state.panelState, 'visuals.enableExtraLights', value);
+      syncStateFromPanel();
+      updateExtraLights();
+      persistPanelState();
+    }
+  });
+  createSlider(dom.menuEffects, {
+    label: 'Grid glow base',
+    path: 'visuals.gridGlowBase',
+    min: 0,
+    max: 0.6,
+    step: 0.02,
+    defaultValue: visualDefaults.gridGlowBase,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: updateGridGlow
+  });
+  createSlider(dom.menuEffects, {
+    label: 'Grid glow pulse',
+    path: 'visuals.gridGlowRange',
+    min: 0,
+    max: 1,
+    step: 0.02,
+    defaultValue: visualDefaults.gridGlowRange,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: updateGridGlow
+  });
+  createSlider(dom.menuEffects, {
+    label: 'Grid glow speed',
+    path: 'visuals.gridPulseSpeed',
+    min: 0.1,
+    max: 1,
+    step: 0.05,
+    defaultValue: visualDefaults.gridPulseSpeed,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: updateGridGlow
+  });
+  createSlider(dom.menuEffects, {
+    label: 'Grid line thickness',
+    path: 'visuals.gridLineThickness',
+    min: 0.02,
+    max: 6,
+    step: 0.05,
+    defaultValue: visualDefaults.gridLineThickness,
+    format: (value) => value.toFixed(2),
+    rebuild: false,
+    onInput: updateGridGlow
+  });
+
+  createButton(dom.menuActions, 'Save settings', () => {
+    persistPanelState();
+  });
+  createButton(dom.menuActions, 'Reset to defaults', () => {
+    try {
+      window.localStorage.removeItem(storageKey);
+    } catch (err) {
+      // ignore storage failures
+    }
+    window.location.reload();
+  });
+
+  state.syncStateFromPanel = syncStateFromPanel;
+  state.renderEdgeMenu = renderEdgeMenu;
+  renderSelectionDetails(state.selected?.userData || null);
+};
diff --git a/src/map/isometric/client/utils.js b/src/map/isometric/client/utils.js
new file mode 100644
index 000000000..faf2f1df6
--- /dev/null
+++ b/src/map/isometric/client/utils.js
@@ -0,0 +1,15 @@
+export const numberValue = (value, fallback) => {
+  const parsed = Number(value);
+  return Number.isFinite(parsed) ? parsed : fallback;
+};
+
+export const clamp = (value, min, max) => Math.max(min, Math.min(max, value));
+
+export const hashString = (value) => {
+  const text = String(value || '');
+  let hash = 0;
+  for (let i = 0; i < text.length; i += 1) {
+    hash = (hash * 31 + text.charCodeAt(i)) | 0;
+  }
+  return hash >>> 0;
+};
diff --git a/src/map/isometric/client/viewer-app.js b/src/map/isometric/client/viewer-app.js
new file mode 100644
index 000000000..f5f3c338b
--- /dev/null
+++ b/src/map/isometric/client/viewer-app.js
@@ -0,0 +1,120 @@
+import { state } from './state.js';
+import { loadDomConfig } from './dom.js';
+import { loadThreeModules, loadRgbeLoader } from './three-loader.js';
+import {
+  assetDefaults,
+  colorDefaults,
+  controlDefaults,
+  flowTypeProfiles,
+  flowWaveLayers,
+  layoutDefaults,
+  scoringDefaults,
+  visualDefaults
+} from './defaults.js';
+import { initScene } from './scene.js';
+import { initMapData } from './map-data.js';
+import { initMaterials } from './materials.js';
+import { initUi } from './ui.js';
+import { rebuildScene, scheduleRebuild } from './rebuild.js';
+import { initControls } from './controls.js';
+
+const initViewer = async () => {
+  const { map, config, dom } = loadDomConfig();
+
+  if (!config.threeUrl) {
+    dom.selectionBody.textContent = 'Missing three.js module reference.';
+    throw new Error('threeUrl missing');
+  }
+
+  const { THREE, LineSegments2, LineSegmentsGeometry, LineMaterial } = await loadThreeModules(config.threeUrl);
+
+  const layout = { ...layoutDefaults, ...(config.layout || {}) };
+  const scoring = { ...scoringDefaults, ...(config.scoring || {}) };
+  const colors = { ...colorDefaults, ...(config.colors || {}) };
+  const visuals = { ...visualDefaults, ...(config.visuals || {}) };
+  visuals.glass = { ...visualDefaults.glass, ...(config.visuals?.glass || {}) };
+  const assets = { ...assetDefaults, ...(config.assets || {}) };
+  const controls = {
+    ...controlDefaults,
+    ...(config.controls || {}),
+    wasd: {
+      ...controlDefaults.wasd,
+      ...(config.controls?.wasd || {})
+    }
+  };
+
+  const flowWaveTotal =
+    flowWaveLayers.reduce((acc, layer) => acc + layer.amplitude, 0) || 1;
+  const RGBELoader = await loadRgbeLoader(assets.rgbeLoaderUrl);
+
+  Object.assign(state, {
+    map,
+    config,
+    dom,
+    THREE,
+    LineSegments2,
+    LineSegmentsGeometry,
+    LineMaterial,
+    RGBELoader,
+    layout,
+    scoring,
+    colors,
+    visuals,
+    assets,
+    controls,
+    layoutDefaults,
+    scoringDefaults,
+    colorDefaults,
+    visualDefaults,
+    controlDefaults,
+    flowWaveLayers,
+    flowWaveTotal,
+    flowTypeProfiles,
+    edgeVisibility: new Map(),
+    gridVisible: true,
+    hoveredRef: null,
+    hoveredMesh: null,
+    selected: null,
+    fileMeshes: [],
+    memberMeshes: [],
+    chunkMeshes: [],
+    fileChunkMeshes: [],
+    fileAnchors: new Map(),
+    memberAnchors: new Map(),
+    fileMeshByKey: new Map(),
+    memberMeshById: new Map(),
+    fileColorByPath: new Map(),
+    memberColorById: new Map(),
+    wireByMesh: new Map(),
+    edgeMeshes: [],
+    edgeSegments: [],
+    edgeDotMesh: null,
+    edgeDotMaterial: null,
+    edgeTypeGroups: new Map(),
+    edgeTypes: [],
+    flowLights: [],
+    wireMaterials: [],
+    gridLineMaterials: [],
+    labelMaterials: [],
+    glassMaterials: [],
+    glassShells: [],
+    glowMaterials: [],
+    flowMaterials: [],
+    normalMapState: { texture: null }
+  });
+
+  const counts = map.summary?.counts || { files: 0, members: 0, edges: 0 };
+  dom.summary.textContent =
+    `files: ${counts.files || 0} | members: ${counts.members || 0}` +
+    ` | edges: ${counts.edges || 0}`;
+
+  await initScene();
+  initMapData();
+  initMaterials();
+  initUi();
+  rebuildScene();
+  initControls();
+  state.scheduleRebuild = scheduleRebuild;
+};
+
+initViewer();
diff --git a/src/map/isometric/client/viewer.js b/src/map/isometric/client/viewer.js
new file mode 100644
index 000000000..f519c4f93
--- /dev/null
+++ b/src/map/isometric/client/viewer.js
@@ -0,0 +1 @@
+import './viewer-app.js';
diff --git a/src/map/utils.js b/src/map/utils.js
new file mode 100644
index 000000000..b568f02a9
--- /dev/null
+++ b/src/map/utils.js
@@ -0,0 +1,51 @@
+import path from 'node:path';
+import { FILE_CATEGORY_RULES } from './constants.js';
+
+export const normalizePath = (value) => String(value || '').replace(/\\/g, '/');
+
+export const basename = (value) => {
+  if (!value) return '';
+  return normalizePath(path.basename(value));
+};
+
+export const extension = (value) => {
+  if (!value) return '';
+  const ext = path.extname(value);
+  return ext || '';
+};
+
+export const classifyFilePath = (filePath) => {
+  const normalized = normalizePath(filePath || '');
+  if (!normalized) return 'other';
+
+  const lower = normalized.toLowerCase();
+  const ext = extension(lower);
+
+  const isMatch = (rule) => {
+    if (!rule) return false;
+    if (rule.extensions && rule.extensions.some((entry) => lower.includes(entry + '.')))
+      return true;
+    if (rule.extensions && rule.extensions.includes(ext)) return true;
+    if (rule.names && rule.names.some((name) => lower.includes('/' + name + '/'))) return true;
+    if (rule.patterns && rule.patterns.some((pattern) => pattern.test(lower))) return true;
+    return false;
+  };
+
+  if (isMatch(FILE_CATEGORY_RULES.generated)) return 'generated';
+  if (isMatch(FILE_CATEGORY_RULES.test)) return 'test';
+  if (isMatch(FILE_CATEGORY_RULES.docs)) return 'docs';
+  if (isMatch(FILE_CATEGORY_RULES.config)) return 'config';
+  return 'source';
+};
+
+export const sortBy = (list, keyFn) => {
+  return list.slice().sort((a, b) => {
+    const left = keyFn(a);
+    const right = keyFn(b);
+    return String(left).localeCompare(String(right));
+  });
+};
+
+export const unique = (values) => Array.from(new Set((values || []).filter(Boolean)));
+
+export const clamp = (value, min, max) => Math.min(max, Math.max(min, value));
diff --git a/src/retrieval/bitmap.js b/src/retrieval/bitmap.js
new file mode 100644
index 000000000..3cf15c599
--- /dev/null
+++ b/src/retrieval/bitmap.js
@@ -0,0 +1,207 @@
+import { createRequire } from 'node:module';
+
+const require = createRequire(import.meta.url);
+const DEFAULT_MIN_SIZE = 256;
+let roaringLib = null;
+let roaringChecked = false;
+
+const resolveRoaring = () => {
+  if (roaringChecked) return roaringLib;
+  roaringChecked = true;
+  try {
+    roaringLib = require('roaring-wasm');
+  } catch {
+    roaringLib = null;
+  }
+  return roaringLib;
+};
+
+const resolveBitmapClass = () => {
+  const lib = resolveRoaring();
+  if (!lib) return null;
+  return lib.RoaringBitmap32
+    || lib.RoaringBitmap
+    || lib.default?.RoaringBitmap32
+    || lib.default?.RoaringBitmap
+    || lib.default
+    || null;
+};
+
+const normalizeIds = (values) => {
+  if (!values) return [];
+  const list = Array.isArray(values) ? values : Array.from(values);
+  const ids = [];
+  for (const value of list) {
+    const parsed = Number(value);
+    if (!Number.isFinite(parsed)) continue;
+    const id = Math.floor(parsed);
+    if (id < 0) continue;
+    ids.push(id);
+  }
+  ids.sort((a, b) => a - b);
+  const deduped = [];
+  let last = null;
+  for (const id of ids) {
+    if (id === last) continue;
+    deduped.push(id);
+    last = id;
+  }
+  return deduped;
+};
+
+const cloneBitmap = (bitmap) => {
+  if (!bitmap) return null;
+  if (typeof bitmap.clone === 'function') return bitmap.clone();
+  const ids = bitmapToArray(bitmap);
+  return createBitmapFromIds(ids, { force: true });
+};
+
+const bitmapHas = (bitmap, value) => {
+  if (!bitmap) return false;
+  if (typeof bitmap.has === 'function') return bitmap.has(value);
+  if (typeof bitmap.contains === 'function') return bitmap.contains(value);
+  if (typeof bitmap.includes === 'function') return bitmap.includes(value);
+  return false;
+};
+
+const getBitmapSize = (bitmap) => {
+  if (!bitmap) return 0;
+  if (Number.isFinite(bitmap.size)) return bitmap.size;
+  if (typeof bitmap.size === 'function') return bitmap.size();
+  if (typeof bitmap.getSize === 'function') return bitmap.getSize();
+  return bitmapToArray(bitmap).length;
+};
+
+export const isBitmapEmpty = (bitmap) => getBitmapSize(bitmap) === 0;
+
+export const isRoaringAvailable = () => Boolean(resolveBitmapClass());
+
+export const shouldUseBitmap = (size, minSize = DEFAULT_MIN_SIZE) => (
+  Number.isFinite(size) && size >= minSize
+);
+
+export const bitmapToArray = (bitmap) => {
+  if (!bitmap) return [];
+  if (typeof bitmap.toArray === 'function') return bitmap.toArray();
+  if (typeof bitmap.toArraySync === 'function') return bitmap.toArraySync();
+  if (typeof bitmap.values === 'function') return Array.from(bitmap.values());
+  return Array.from(bitmap || []);
+};
+
+export const bitmapToSet = (bitmap) => new Set(bitmapToArray(bitmap));
+
+export const createBitmapFromIds = (values, options = {}) => {
+  const Bitmap = resolveBitmapClass();
+  if (!Bitmap) return null;
+  const minSize = Number.isFinite(Number(options.minSize))
+    ? Math.max(1, Math.floor(Number(options.minSize)))
+    : DEFAULT_MIN_SIZE;
+  const force = options.force === true;
+  const ids = normalizeIds(values);
+  if (!ids.length) return null;
+  if (!force && !shouldUseBitmap(ids.length, minSize)) return null;
+  let bitmap = null;
+  if (typeof Bitmap.from === 'function') {
+    bitmap = Bitmap.from(ids);
+  } else {
+    bitmap = new Bitmap();
+    if (typeof bitmap.addMany === 'function') {
+      bitmap.addMany(ids);
+    } else {
+      for (const id of ids) bitmap.add(id);
+    }
+  }
+  return bitmap;
+};
+
+export const unionBitmaps = (bitmaps) => {
+  if (!Array.isArray(bitmaps) || !bitmaps.length) return null;
+  let acc = cloneBitmap(bitmaps[0]);
+  for (let i = 1; i < bitmaps.length; i += 1) {
+    const next = bitmaps[i];
+    if (!next || !acc) continue;
+    if (typeof acc.orInPlace === 'function') {
+      acc.orInPlace(next);
+    } else if (typeof acc.or === 'function') {
+      acc = acc.or(next);
+    } else if (typeof acc.union === 'function') {
+      acc = acc.union(next);
+    } else {
+      const merged = [...bitmapToArray(acc), ...bitmapToArray(next)];
+      acc = createBitmapFromIds(merged, { force: true });
+    }
+  }
+  return acc;
+};
+
+export const intersectBitmaps = (bitmaps) => {
+  if (!Array.isArray(bitmaps) || !bitmaps.length) return null;
+  let acc = cloneBitmap(bitmaps[0]);
+  for (let i = 1; i < bitmaps.length; i += 1) {
+    const next = bitmaps[i];
+    if (!next || !acc) continue;
+    if (typeof acc.andInPlace === 'function') {
+      acc.andInPlace(next);
+    } else if (typeof acc.and === 'function') {
+      acc = acc.and(next);
+    } else if (typeof acc.intersect === 'function') {
+      acc = acc.intersect(next);
+    } else {
+      const left = bitmapToArray(acc);
+      const right = new Set(bitmapToArray(next));
+      const merged = [];
+      for (const id of left) {
+        if (right.has(id)) merged.push(id);
+      }
+      acc = createBitmapFromIds(merged, { force: true });
+    }
+    if (!acc || isBitmapEmpty(acc)) return acc;
+  }
+  return acc;
+};
+
+export const intersectSetWithBitmap = (set, bitmap) => {
+  const out = new Set();
+  if (!set || !bitmap) return out;
+  const hasMethod = typeof bitmap.has === 'function'
+    || typeof bitmap.contains === 'function'
+    || typeof bitmap.includes === 'function';
+  if (!hasMethod) {
+    const bitmapSet = bitmapToSet(bitmap);
+    for (const id of set) {
+      if (bitmapSet.has(id)) out.add(id);
+    }
+    return out;
+  }
+  for (const id of set) {
+    if (bitmapHas(bitmap, id)) out.add(id);
+  }
+  return out;
+};
+
+export const buildBitmapIndex = (index, options = {}) => {
+  const Bitmap = resolveBitmapClass();
+  if (!Bitmap || !index) return null;
+  const minSize = Number.isFinite(Number(options.minSize))
+    ? Math.max(1, Math.floor(Number(options.minSize)))
+    : DEFAULT_MIN_SIZE;
+  const buildMap = (source) => {
+    const out = new Map();
+    if (!source || typeof source.entries !== 'function') return out;
+    for (const [key, set] of source.entries()) {
+      if (!set || !shouldUseBitmap(set.size, minSize)) continue;
+      const bitmap = createBitmapFromIds(set, { force: true, minSize });
+      if (bitmap) out.set(key, bitmap);
+    }
+    return out;
+  };
+  return {
+    enabled: true,
+    minSize,
+    byExt: buildMap(index.byExt),
+    byKind: buildMap(index.byKind),
+    byAuthor: buildMap(index.byAuthor),
+    byChunkAuthor: buildMap(index.byChunkAuthor),
+    byVisibility: buildMap(index.byVisibility)
+  };
+};
diff --git a/src/retrieval/cli-args.js b/src/retrieval/cli-args.js
new file mode 100644
index 000000000..acaba1859
--- /dev/null
+++ b/src/retrieval/cli-args.js
@@ -0,0 +1,177 @@
+import yargs from 'yargs/yargs';
+
+const BOOLEAN_FLAGS = [
+  'json',
+  'json-compact',
+  'stats',
+  'ann',
+  'lint',
+  'matched',
+  'async',
+  'generator',
+  'returns',
+  'explain',
+  'why',
+  'case',
+  'case-file',
+  'case-tokens'
+];
+
+const STRING_FLAGS = [
+  'type',
+  'author',
+  'import',
+  'calls',
+  'uses',
+  'signature',
+  'param',
+  'decorator',
+  'inferred-type',
+  'return-type',
+  'throws',
+  'reads',
+  'writes',
+  'mutates',
+  'churn',
+  'alias',
+  'awaits',
+  'branches',
+  'loops',
+  'breaks',
+  'continues',
+  'risk',
+  'risk-tag',
+  'risk-source',
+  'risk-sink',
+  'risk-category',
+  'risk-flow',
+  'struct-pack',
+  'struct-rule',
+  'struct-tag',
+  'meta',
+  'meta-json',
+  'file',
+  'ext',
+  'lang',
+  'chunk-author',
+  'modified-after',
+  'modified-since',
+  'visibility',
+  'extends',
+  'mode',
+  'backend',
+  'path',
+  'model',
+  'repo',
+  'branch',
+  'fts-profile',
+  'fts-weights',
+  'bm25-k1',
+  'bm25-b',
+  'profile'
+];
+
+const ALIASES = { n: 'top', c: 'context', t: 'type', why: 'explain' };
+const DEFAULTS = { n: 5, context: 3 };
+
+/**
+ * Parse CLI arguments for search.
+ * @param {string[]} rawArgs
+ * @returns {object}
+ */
+export function parseSearchArgs(rawArgs) {
+  const removedFlags = [
+    { flag: '--human', replacement: '--json | --json-compact' },
+    { flag: '--headline', replacement: '--matched' }
+  ];
+  const removed = removedFlags.filter((entry) =>
+    rawArgs.some((arg) => arg === entry.flag || arg.startsWith(`${entry.flag}=`))
+  );
+  if (removed.length) {
+    const details = removed
+      .map((entry) => `${entry.flag} was removed (use ${entry.replacement}).`)
+      .join(' ');
+    const error = new Error(details);
+    error.code = 'REMOVED_FLAG';
+    throw error;
+  }
+  const options = {
+    n: { type: 'number', default: DEFAULTS.n },
+    context: { type: 'number', default: DEFAULTS.context }
+  };
+  for (const flag of BOOLEAN_FLAGS) {
+    options[flag] = { type: 'boolean' };
+  }
+  for (const flag of STRING_FLAGS) {
+    options[flag] = { type: 'string' };
+  }
+  const argv = yargs(rawArgs)
+    .parserConfiguration({
+      'camel-case-expansion': false,
+      'dot-notation': false
+    })
+    .options(options)
+    .alias(ALIASES)
+    .help()
+    .alias('h', 'help')
+    .parse();
+  if (argv.profile) {
+    process.env.PAIROFCLEATS_PROFILE = String(argv.profile).trim();
+  }
+  return argv;
+}
+
+/**
+ * Build a usage string for search CLI.
+ * @returns {string}
+ */
+export function getSearchUsage() {
+  return [
+    'usage: search "query" [options]',
+    '',
+    'Options:',
+    '  --repo <path>',
+    '  --mode code|prose|both|records|all|extracted-prose',
+    '  --backend auto|memory|sqlite|sqlite-fts|lmdb',
+    '  --top N, --context N',
+    '  --json | --json-compact | --stats',
+    '  --ann | --no-ann',
+    '  --model <id>',
+    '  --fts-profile <name> | --fts-weights <json|csv>',
+    '  --bm25-k1 <num> | --bm25-b <num>',
+    '  --profile <name>',
+    '  --matched | --explain | --why',
+    '  Filters:',
+    '    --type <kind> --author <name> --import <module> --calls <name> --uses <name>',
+    '    --signature <text> --param <name> --decorator <name> --inferred-type <type> --return-type <type>',
+    '    --throws <name> --reads <name> --writes <name> --mutates <name> --alias <name> --awaits <name>',
+    '    --branches <min> --loops <min> --breaks <min> --continues <min>',
+    '    --risk <tag> --risk-tag <tag> --risk-source <name> --risk-sink <name> --risk-category <name> --risk-flow <name>',
+    '    --struct-pack <id> --struct-rule <id> --struct-tag <tag>',
+    '    --visibility <name> --extends <name> --async --generator --returns --lint',
+    '    --churn [min] --modified-after <date> --modified-since <days> --chunk-author <name>',
+    '    --path <pattern> --file <pattern> --ext <.ext> --lang <language> --branch <name>',
+    '    --case --case-file --case-tokens',
+    '    --meta <k=v> --meta-json <json>'
+  ].join('\n');
+}
+
+/**
+ * Resolve the requested search mode and derived flags.
+ * @param {string|undefined} modeRaw
+ * @returns {{searchMode:string,runCode:boolean,runProse:boolean,runRecords:boolean,runExtractedProse:boolean}}
+ */
+export function resolveSearchMode(modeRaw) {
+  const searchMode = String(modeRaw || 'both').toLowerCase();
+  const allowedModes = new Set(['code', 'prose', 'both', 'records', 'all', 'extracted-prose']);
+  if (!allowedModes.has(searchMode)) {
+    const error = new Error(`Invalid --mode ${searchMode}. Use code|prose|both|records|all|extracted-prose.`);
+    error.code = 'INVALID_MODE';
+    throw error;
+  }
+  const runCode = searchMode === 'code' || searchMode === 'both' || searchMode === 'all';
+  const runProse = searchMode === 'prose' || searchMode === 'both' || searchMode === 'all';
+  const runRecords = searchMode === 'records' || searchMode === 'all';
+  const runExtractedProse = searchMode === 'extracted-prose' || searchMode === 'all';
+  return { searchMode, runCode, runProse, runRecords, runExtractedProse };
+}
diff --git a/src/retrieval/cli-dictionary.js b/src/retrieval/cli-dictionary.js
new file mode 100644
index 000000000..6eec9cefd
--- /dev/null
+++ b/src/retrieval/cli-dictionary.js
@@ -0,0 +1,24 @@
+import fsSync from 'node:fs';
+import { getDictionaryPaths } from '../../tools/dict-utils.js';
+
+/**
+ * Load dictionary files into a normalized Set.
+ * @param {string} root
+ * @param {object} dictConfig
+ * @returns {Promise<{dict:Set<string>, dictionaryPaths:string[]}>}
+ */
+export async function loadDictionary(root, dictConfig) {
+  const dictionaryPaths = await getDictionaryPaths(root, dictConfig);
+  const dict = new Set();
+  for (const dictFile of dictionaryPaths) {
+    try {
+      const contents = fsSync.readFileSync(dictFile, 'utf8');
+      contents
+        .split(/\r?\n/)
+        .map((word) => word.trim().toLowerCase())
+        .filter(Boolean)
+        .forEach((word) => dict.add(word));
+    } catch {}
+  }
+  return { dict, dictionaryPaths };
+}
diff --git a/src/retrieval/cli-index.js b/src/retrieval/cli-index.js
new file mode 100644
index 000000000..24ab8ae58
--- /dev/null
+++ b/src/retrieval/cli-index.js
@@ -0,0 +1,334 @@
+import fsSync from 'node:fs';
+import path from 'node:path';
+import crypto from 'node:crypto';
+import { getIndexDir } from '../../tools/dict-utils.js';
+import { buildFilterIndex, hydrateFilterIndex } from './filter-index.js';
+import { createError, ERROR_CODES } from '../shared/error-codes.js';
+import {
+  MAX_JSON_BYTES,
+  loadChunkMeta,
+  loadTokenPostings,
+  readJsonFile
+} from '../shared/artifact-io.js';
+import { loadHnswIndex, normalizeHnswConfig, resolveHnswPaths, validateHnswMetaCompatibility } from '../shared/hnsw.js';
+
+/**
+ * Load file-backed index artifacts from a directory.
+ * @param {string} dir
+ * @param {{modelIdDefault:string}} options
+ * @returns {object}
+ */
+export function loadIndex(dir, options) {
+  const {
+    modelIdDefault,
+    fileChargramN,
+    includeHnsw = true,
+    hnswConfig: rawHnswConfig
+  } = options || {};
+  const hnswConfig = normalizeHnswConfig(rawHnswConfig || {});
+  const readJson = (name) => {
+    const filePath = path.join(dir, name);
+    return readJsonFile(filePath, { maxBytes: MAX_JSON_BYTES });
+  };
+  const loadOptional = (name) => {
+    try {
+      return readJson(name);
+    } catch (err) {
+      if (err?.code === 'ERR_JSON_TOO_LARGE') {
+        console.warn(
+          `[search] Skipping ${name}: ${err.message} Use sqlite backend for large repos.`
+        );
+      }
+      return null;
+    }
+  };
+  const chunkMeta = loadChunkMeta(dir, { maxBytes: MAX_JSON_BYTES });
+  const fileMetaRaw = loadOptional('file_meta.json');
+  let fileMetaById = null;
+  if (Array.isArray(fileMetaRaw)) {
+    fileMetaById = new Map();
+    for (const entry of fileMetaRaw) {
+      if (!entry || entry.id == null) continue;
+      fileMetaById.set(entry.id, entry);
+    }
+  }
+  if (!fileMetaById) {
+    const missingMeta = chunkMeta.some((chunk) => chunk && chunk.fileId != null && !chunk.file);
+    if (missingMeta) {
+      throw new Error('file_meta.json is required for fileId-based chunk metadata.');
+    }
+  } else {
+    for (const chunk of chunkMeta) {
+      if (!chunk || (chunk.file && chunk.ext)) continue;
+      const meta = fileMetaById.get(chunk.fileId);
+      if (!meta) continue;
+      if (!chunk.file) chunk.file = meta.file;
+      if (!chunk.ext) chunk.ext = meta.ext;
+      if (!chunk.externalDocs) chunk.externalDocs = meta.externalDocs;
+      if (!chunk.last_modified) chunk.last_modified = meta.last_modified;
+      if (!chunk.last_author) chunk.last_author = meta.last_author;
+      if (!chunk.churn) chunk.churn = meta.churn;
+      if (!chunk.churn_added) chunk.churn_added = meta.churn_added;
+      if (!chunk.churn_deleted) chunk.churn_deleted = meta.churn_deleted;
+      if (!chunk.churn_commits) chunk.churn_commits = meta.churn_commits;
+    }
+  }
+  const fileRelationsRaw = loadOptional('file_relations.json');
+  const repoMap = loadOptional('repo_map.json');
+  let fileRelations = null;
+  if (Array.isArray(fileRelationsRaw)) {
+    const map = new Map();
+    for (const entry of fileRelationsRaw) {
+      if (!entry || !entry.file) continue;
+      map.set(entry.file, entry.relations || null);
+    }
+    fileRelations = map;
+  }
+  const indexState = loadOptional('index_state.json');
+  const embeddingsState = indexState?.embeddings || null;
+  const embeddingsReady = embeddingsState?.ready !== false && embeddingsState?.pending !== true;
+  const denseVec = embeddingsReady ? loadOptional('dense_vectors_uint8.json') : null;
+  const denseVecDoc = embeddingsReady ? loadOptional('dense_vectors_doc_uint8.json') : null;
+  const denseVecCode = embeddingsReady ? loadOptional('dense_vectors_code_uint8.json') : null;
+  if (denseVec && !denseVec.model && modelIdDefault) denseVec.model = modelIdDefault;
+  if (denseVecDoc && !denseVecDoc.model && modelIdDefault) denseVecDoc.model = modelIdDefault;
+  if (denseVecCode && !denseVecCode.model && modelIdDefault) denseVecCode.model = modelIdDefault;
+  const hnswMeta = embeddingsReady && includeHnsw && hnswConfig.enabled
+    ? loadOptional('dense_vectors_hnsw.meta.json')
+    : null;
+  let hnswIndex = null;
+  let hnswAvailable = false;
+  if (hnswMeta && includeHnsw && hnswConfig.enabled) {
+    const compatibility = validateHnswMetaCompatibility({ denseVectors: denseVec, hnswMeta });
+    if (!compatibility.ok) {
+      console.warn(`[ann] Skipping HNSW index load due to incompatible metadata: ${compatibility.warnings.join('; ')}`);
+    } else {
+    const { indexPath } = resolveHnswPaths(dir);
+    const mergedConfig = {
+      ...hnswConfig,
+      space: hnswMeta.space || hnswConfig.space,
+      efSearch: hnswMeta.efSearch || hnswConfig.efSearch
+    };
+    hnswIndex = loadHnswIndex({ indexPath, dims: hnswMeta.dims, config: mergedConfig });
+    hnswAvailable = Boolean(hnswIndex);
+    }
+  }
+  const fieldPostings = loadOptional('field_postings.json');
+  const fieldTokens = loadOptional('field_tokens.json');
+  const filterIndexRaw = loadOptional('filter_index.json');
+  const idx = {
+    chunkMeta,
+    fileRelations,
+    repoMap,
+    denseVec,
+    denseVecDoc,
+    denseVecCode,
+    hnsw: hnswMeta ? {
+      available: hnswAvailable,
+      index: hnswIndex,
+      meta: hnswMeta,
+      space: hnswMeta.space || hnswConfig.space
+    } : { available: false, index: null, meta: null, space: hnswConfig.space },
+    state: indexState,
+    fieldPostings,
+    fieldTokens,
+    minhash: loadOptional('minhash_signatures.json'),
+    phraseNgrams: loadOptional('phrase_ngrams.json'),
+    chargrams: loadOptional('chargram_postings.json')
+  };
+  if (idx.phraseNgrams?.vocab && !idx.phraseNgrams.vocabIndex) {
+    idx.phraseNgrams.vocabIndex = new Map(idx.phraseNgrams.vocab.map((term, i) => [term, i]));
+  }
+  if (idx.chargrams?.vocab && !idx.chargrams.vocabIndex) {
+    idx.chargrams.vocabIndex = new Map(idx.chargrams.vocab.map((term, i) => [term, i]));
+  }
+  if (idx.fieldPostings?.fields) {
+    for (const field of Object.keys(idx.fieldPostings.fields)) {
+      const entry = idx.fieldPostings.fields[field];
+      if (!entry?.vocab || entry.vocabIndex) continue;
+      entry.vocabIndex = new Map(entry.vocab.map((term, i) => [term, i]));
+    }
+  }
+  idx.filterIndex = filterIndexRaw
+    ? (hydrateFilterIndex(filterIndexRaw) || buildFilterIndex(chunkMeta, { fileChargramN }))
+    : buildFilterIndex(chunkMeta, { fileChargramN });
+  try {
+    idx.tokenIndex = loadTokenPostings(dir, { maxBytes: MAX_JSON_BYTES });
+  } catch {}
+  return idx;
+}
+
+/**
+ * Resolve the index directory (cache-first, local fallback).
+ * @param {string} root
+ * @param {'code'|'prose'|'records'|'extracted-prose'} mode
+ * @param {object} userConfig
+ * @returns {string}
+ */
+export function resolveIndexDir(root, mode, userConfig) {
+  const cached = getIndexDir(root, mode, userConfig);
+  const cachedMeta = path.join(cached, 'chunk_meta.json');
+  const cachedMetaJsonl = path.join(cached, 'chunk_meta.jsonl');
+  const cachedMetaParts = path.join(cached, 'chunk_meta.meta.json');
+  const cachedPartsDir = path.join(cached, 'chunk_meta.parts');
+  if (fsSync.existsSync(cachedMeta)
+    || fsSync.existsSync(cachedMetaJsonl)
+    || fsSync.existsSync(cachedMetaParts)
+    || fsSync.existsSync(cachedPartsDir)) {
+    return cached;
+  }
+  const local = path.join(root, `index-${mode}`);
+  const localMeta = path.join(local, 'chunk_meta.json');
+  const localMetaJsonl = path.join(local, 'chunk_meta.jsonl');
+  const localMetaParts = path.join(local, 'chunk_meta.meta.json');
+  const localPartsDir = path.join(local, 'chunk_meta.parts');
+  if (fsSync.existsSync(localMeta)
+    || fsSync.existsSync(localMetaJsonl)
+    || fsSync.existsSync(localMetaParts)
+    || fsSync.existsSync(localPartsDir)) {
+    return local;
+  }
+  return cached;
+}
+
+/**
+ * Ensure a file-backed index exists for a mode.
+ * @param {string} root
+ * @param {'code'|'prose'|'records'|'extracted-prose'} mode
+ * @param {object} userConfig
+ * @returns {string}
+ */
+export function requireIndexDir(root, mode, userConfig, options = {}) {
+  const dir = resolveIndexDir(root, mode, userConfig);
+  const metaPath = path.join(dir, 'chunk_meta.json');
+  const metaJsonlPath = path.join(dir, 'chunk_meta.jsonl');
+  const metaPartsPath = path.join(dir, 'chunk_meta.meta.json');
+  const metaPartsDir = path.join(dir, 'chunk_meta.parts');
+  if (!fsSync.existsSync(metaPath)
+    && !fsSync.existsSync(metaJsonlPath)
+    && !fsSync.existsSync(metaPartsPath)
+    && !fsSync.existsSync(metaPartsDir)) {
+    const suffix = (mode === 'records' || mode === 'extracted-prose')
+      ? ` --mode ${mode}`
+      : '';
+    const message = `[search] ${mode} index not found at ${dir}. Run "pairofcleats index build${suffix}" or "npm run build-index${suffix}".`;
+    const emitOutput = options.emitOutput !== false;
+    const exitOnError = options.exitOnError !== false;
+    if (emitOutput) console.error(message);
+    if (exitOnError) process.exit(1);
+    throw createError(ERROR_CODES.NO_INDEX, message);
+  }
+  return dir;
+}
+
+/**
+ * Build a deterministic cache key for the current query + settings.
+ * @param {object} payload
+ * @returns {{key:string,payload:object}}
+ */
+export function buildQueryCacheKey(payload) {
+  const raw = JSON.stringify(payload);
+  const key = crypto.createHash('sha1').update(raw).digest('hex');
+  return { key, payload };
+}
+
+/**
+ * Build a signature payload for cache invalidation.
+ * @param {object} options
+ * @returns {object}
+ */
+export function getIndexSignature(options) {
+  const {
+    useSqlite,
+    backendLabel,
+    sqliteCodePath,
+    sqliteProsePath,
+    runRecords,
+    runExtractedProse,
+    root,
+    userConfig
+  } = options;
+  const fileSignature = (filePath) => {
+    try {
+      let statPath = filePath;
+      if (!fsSync.existsSync(statPath) && filePath.endsWith('.json')) {
+        const gzPath = `${filePath}.gz`;
+        if (fsSync.existsSync(gzPath)) statPath = gzPath;
+      }
+      const stat = fsSync.statSync(statPath);
+      return `${stat.size}:${stat.mtimeMs}`;
+    } catch {
+      return null;
+    }
+  };
+
+  const extractedProseDir = runExtractedProse
+    ? resolveIndexDir(root, 'extracted-prose', userConfig)
+    : null;
+  const extractedProseMeta = extractedProseDir ? path.join(extractedProseDir, 'chunk_meta.json') : null;
+  const extractedProseDense = extractedProseDir ? path.join(extractedProseDir, 'dense_vectors_uint8.json') : null;
+  const extractedProseHnswMeta = extractedProseDir ? path.join(extractedProseDir, 'dense_vectors_hnsw.meta.json') : null;
+  const extractedProseHnswIndex = extractedProseDir ? path.join(extractedProseDir, 'dense_vectors_hnsw.bin') : null;
+
+  if (useSqlite) {
+    const codeDir = resolveIndexDir(root, 'code', userConfig);
+    const proseDir = resolveIndexDir(root, 'prose', userConfig);
+    const codeRelations = path.join(codeDir, 'file_relations.json');
+    const proseRelations = path.join(proseDir, 'file_relations.json');
+    const recordDir = runRecords ? resolveIndexDir(root, 'records', userConfig) : null;
+    const recordMeta = recordDir ? path.join(recordDir, 'chunk_meta.json') : null;
+    const recordDense = recordDir ? path.join(recordDir, 'dense_vectors_uint8.json') : null;
+    return {
+      backend: backendLabel,
+      code: fileSignature(sqliteCodePath),
+      prose: fileSignature(sqliteProsePath),
+      codeRelations: fileSignature(codeRelations),
+      proseRelations: fileSignature(proseRelations),
+      extractedProse: extractedProseMeta ? fileSignature(extractedProseMeta) : null,
+      extractedProseDense: extractedProseDense ? fileSignature(extractedProseDense) : null,
+      extractedProseHnswMeta: extractedProseHnswMeta ? fileSignature(extractedProseHnswMeta) : null,
+      extractedProseHnswIndex: extractedProseHnswIndex ? fileSignature(extractedProseHnswIndex) : null,
+      records: recordMeta ? fileSignature(recordMeta) : null,
+      recordsDense: recordDense ? fileSignature(recordDense) : null
+    };
+  }
+
+  const codeDir = resolveIndexDir(root, 'code', userConfig);
+  const proseDir = resolveIndexDir(root, 'prose', userConfig);
+  const codeMeta = path.join(codeDir, 'chunk_meta.json');
+  const proseMeta = path.join(proseDir, 'chunk_meta.json');
+  const codeDense = path.join(codeDir, 'dense_vectors_uint8.json');
+  const proseDense = path.join(proseDir, 'dense_vectors_uint8.json');
+  const codeHnswMeta = path.join(codeDir, 'dense_vectors_hnsw.meta.json');
+  const codeHnswIndex = path.join(codeDir, 'dense_vectors_hnsw.bin');
+  const proseHnswMeta = path.join(proseDir, 'dense_vectors_hnsw.meta.json');
+  const proseHnswIndex = path.join(proseDir, 'dense_vectors_hnsw.bin');
+  const codeRelations = path.join(codeDir, 'file_relations.json');
+  const proseRelations = path.join(proseDir, 'file_relations.json');
+  const recordDir = runRecords ? resolveIndexDir(root, 'records', userConfig) : null;
+  const recordMeta = recordDir ? path.join(recordDir, 'chunk_meta.json') : null;
+  const recordDense = recordDir ? path.join(recordDir, 'dense_vectors_uint8.json') : null;
+  const recordHnswMeta = recordDir ? path.join(recordDir, 'dense_vectors_hnsw.meta.json') : null;
+  const recordHnswIndex = recordDir ? path.join(recordDir, 'dense_vectors_hnsw.bin') : null;
+  return {
+    backend: backendLabel,
+    code: fileSignature(codeMeta),
+    prose: fileSignature(proseMeta),
+    codeDense: fileSignature(codeDense),
+    proseDense: fileSignature(proseDense),
+    codeHnswMeta: fileSignature(codeHnswMeta),
+    codeHnswIndex: fileSignature(codeHnswIndex),
+    proseHnswMeta: fileSignature(proseHnswMeta),
+    proseHnswIndex: fileSignature(proseHnswIndex),
+    codeRelations: fileSignature(codeRelations),
+    proseRelations: fileSignature(proseRelations),
+    extractedProse: extractedProseMeta ? fileSignature(extractedProseMeta) : null,
+    extractedProseDense: extractedProseDense ? fileSignature(extractedProseDense) : null,
+    extractedProseHnswMeta: extractedProseHnswMeta ? fileSignature(extractedProseHnswMeta) : null,
+    extractedProseHnswIndex: extractedProseHnswIndex ? fileSignature(extractedProseHnswIndex) : null,
+    records: recordMeta ? fileSignature(recordMeta) : null,
+    recordsDense: recordDense ? fileSignature(recordDense) : null,
+    recordsHnswMeta: recordHnswMeta ? fileSignature(recordHnswMeta) : null,
+    recordsHnswIndex: recordHnswIndex ? fileSignature(recordHnswIndex) : null
+  };
+}
diff --git a/src/retrieval/cli-lmdb.js b/src/retrieval/cli-lmdb.js
new file mode 100644
index 000000000..010e9f89d
--- /dev/null
+++ b/src/retrieval/cli-lmdb.js
@@ -0,0 +1,104 @@
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { Unpackr } from 'msgpackr';
+import { LMDB_META_KEYS, LMDB_SCHEMA_VERSION } from '../storage/lmdb/schema.js';
+
+let open = null;
+try {
+  ({ open } = await import('lmdb'));
+} catch {}
+
+const unpackr = new Unpackr();
+const decode = (value) => (value == null ? null : unpackr.unpack(value));
+
+const isStorePresent = (storePath) => {
+  if (!storePath || !fsSync.existsSync(storePath)) return false;
+  return fsSync.existsSync(path.join(storePath, 'data.mdb'));
+};
+
+const validateStore = (db, label) => {
+  const version = decode(db.get(LMDB_META_KEYS.schemaVersion));
+  if (version !== LMDB_SCHEMA_VERSION) {
+    return { ok: false, reason: `lmdb schema mismatch (expected ${LMDB_SCHEMA_VERSION}, got ${version ?? 'missing'})` };
+  }
+  const mode = decode(db.get(LMDB_META_KEYS.mode));
+  if (mode && mode !== label) {
+    return { ok: false, reason: `lmdb mode mismatch (expected ${label}, got ${mode})` };
+  }
+  return { ok: true };
+};
+
+export async function createLmdbBackend(options) {
+  const {
+    useLmdb: useLmdbInput,
+    needsCode,
+    needsProse,
+    lmdbCodePath,
+    lmdbProsePath,
+    backendForcedLmdb,
+    lmdbStates
+  } = options;
+  let useLmdb = useLmdbInput;
+  let dbCode = null;
+  let dbProse = null;
+
+  if (!useLmdb) {
+    return { useLmdb, dbCode, dbProse, isAvailable: false };
+  }
+
+  if (!open) {
+    const message = 'lmdb is required for the LMDB backend. Run npm install first.';
+    if (backendForcedLmdb) {
+      throw new Error(message);
+    }
+    console.warn(message);
+    useLmdb = false;
+    return { useLmdb, dbCode, dbProse, isAvailable: false };
+  }
+
+  const isLmdbReady = (mode) => {
+    const state = lmdbStates?.[mode] || null;
+    const lmdbState = state?.lmdb || null;
+    if (!lmdbState) return true;
+    return lmdbState.ready !== false && lmdbState.pending !== true;
+  };
+  const pendingModes = [];
+  if (needsCode && !isLmdbReady('code')) pendingModes.push('code');
+  if (needsProse && !isLmdbReady('prose')) pendingModes.push('prose');
+  if (pendingModes.length) {
+    const message = `LMDB ${pendingModes.join(', ')} index marked pending; falling back to file-backed indexes.`;
+    if (backendForcedLmdb) {
+      throw new Error(message);
+    }
+    console.warn(message);
+    useLmdb = false;
+    return { useLmdb, dbCode, dbProse, isAvailable: false };
+  }
+
+  const openStore = (storePath, label) => {
+    if (!isStorePresent(storePath)) return null;
+    const db = open({ path: storePath, readOnly: true });
+    const validation = validateStore(db, label);
+    if (!validation.ok) {
+      db.close();
+      if (backendForcedLmdb) {
+        throw new Error(`LMDB ${label} invalid: ${validation.reason}`);
+      }
+      console.warn(`LMDB ${label} invalid: ${validation.reason}`);
+      return null;
+    }
+    return db;
+  };
+
+  if (needsCode) dbCode = openStore(lmdbCodePath, 'code');
+  if (needsProse) dbProse = openStore(lmdbProsePath, 'prose');
+  if ((needsCode && !dbCode) || (needsProse && !dbProse)) {
+    if (dbCode) dbCode.close();
+    if (dbProse) dbProse.close();
+    dbCode = null;
+    dbProse = null;
+    useLmdb = false;
+  }
+
+  return { useLmdb, dbCode, dbProse, isAvailable: Boolean(dbCode || dbProse) };
+}
diff --git a/src/retrieval/cli-sqlite.js b/src/retrieval/cli-sqlite.js
new file mode 100644
index 000000000..2806e43ef
--- /dev/null
+++ b/src/retrieval/cli-sqlite.js
@@ -0,0 +1,193 @@
+import { hasVectorTable, loadVectorExtension, resolveVectorExtensionPath } from '../../tools/vector-extension.js';
+
+import { parseEnvBool } from '../shared/env.js';
+
+/**
+ * Initialize SQLite connections for search.
+ * @param {object} options
+ * @returns {Promise<{useSqlite:boolean,dbCode:(object|null),dbProse:(object|null),vectorAnnState:object,vectorAnnUsed:object}>}
+ */
+export async function createSqliteBackend(options) {
+  const {
+    useSqlite: useSqliteInput,
+    needsCode,
+    needsProse,
+    sqliteCodePath,
+    sqliteProsePath,
+    sqliteFtsRequested,
+    backendForcedSqlite,
+    vectorExtension,
+    vectorAnnEnabled,
+    dbCache,
+    sqliteStates
+  } = options;
+
+  let useSqlite = useSqliteInput;
+  let dbCode = null;
+  let dbProse = null;
+  const vectorAnnState = {
+    code: { available: false },
+    prose: { available: false },
+    records: { available: false }
+  };
+  const vectorAnnUsed = { code: false, prose: false, records: false };
+
+  if (!useSqlite) {
+    return { useSqlite, dbCode, dbProse, vectorAnnState, vectorAnnUsed };
+  }
+
+  const isSqliteReady = (mode) => {
+    const state = sqliteStates?.[mode] || null;
+    const sqliteState = state?.sqlite || null;
+    if (!sqliteState) return true;
+    return sqliteState.ready !== false && sqliteState.pending !== true;
+  };
+  const pendingModes = [];
+  if (needsCode && !isSqliteReady('code')) pendingModes.push('code');
+  if (needsProse && !isSqliteReady('prose')) pendingModes.push('prose');
+  if (pendingModes.length) {
+    const message = `SQLite ${pendingModes.join(', ')} index marked pending; falling back to file-backed indexes.`;
+    if (backendForcedSqlite) {
+      throw new Error(message);
+    }
+    console.warn(message);
+    useSqlite = false;
+    return { useSqlite, dbCode, dbProse, vectorAnnState, vectorAnnUsed };
+  }
+
+  const sqliteDisabled = parseEnvBool(process.env.PAIROFCLEATS_SQLITE_DISABLED) === true;
+  if (sqliteDisabled) {
+    const message = 'better-sqlite3 is required for the SQLite backend. Run npm install first.';
+    if (backendForcedSqlite) {
+      throw new Error(message);
+    }
+    console.warn(message);
+    useSqlite = false;
+    return { useSqlite, dbCode, dbProse, vectorAnnState, vectorAnnUsed };
+  }
+
+  let Database;
+  try {
+    ({ default: Database } = await import('better-sqlite3'));
+  } catch (err) {
+    const message = 'better-sqlite3 is required for the SQLite backend. Run npm install first.';
+    if (backendForcedSqlite) {
+      throw new Error(message);
+    }
+    console.warn(message);
+    useSqlite = false;
+    return { useSqlite, dbCode, dbProse, vectorAnnState, vectorAnnUsed };
+  }
+
+  const requiredTables = sqliteFtsRequested
+    ? [
+      'chunks',
+      'chunks_fts',
+      'minhash_signatures',
+      'dense_vectors',
+      'dense_meta'
+    ]
+    : [
+      'chunks',
+      'token_vocab',
+      'token_postings',
+      'doc_lengths',
+      'token_stats',
+      'phrase_vocab',
+      'phrase_postings',
+      'chargram_vocab',
+      'chargram_postings',
+      'minhash_signatures',
+      'dense_vectors',
+      'dense_meta'
+    ];
+
+  const openSqlite = (dbPath, label) => {
+    const cached = dbCache?.get?.(dbPath);
+    if (cached) return cached;
+    const db = new Database(dbPath, { readonly: true });
+    const tableRows = db.prepare("SELECT name FROM sqlite_master WHERE type='table'").all();
+    const tableNames = new Set(tableRows.map((row) => row.name));
+    const missing = requiredTables.filter((name) => !tableNames.has(name));
+    if (missing.length) {
+      const message = `SQLite index ${label} is missing required tables (${missing.join(', ')}). Rebuild with npm run build-sqlite-index.`;
+      if (backendForcedSqlite) {
+        throw new Error(message);
+      }
+      console.warn(`${message} Falling back to file-backed indexes.`);
+      db.close();
+      return null;
+    }
+    if (dbCache?.set) dbCache.set(dbPath, db);
+    return db;
+  };
+
+  let vectorAnnWarned = false;
+  const initVectorAnn = (db, mode) => {
+    if (!vectorAnnEnabled || !db) return;
+    const loadResult = loadVectorExtension(db, vectorExtension, `sqlite ${mode}`);
+    if (!loadResult.ok) {
+      if (!vectorAnnWarned) {
+        const extPath = resolveVectorExtensionPath(vectorExtension);
+        console.warn(`[ann] SQLite vector extension unavailable (${loadResult.reason}).`);
+        console.warn(`[ann] Expected extension at ${extPath || 'unset'}; falling back to JS ANN.`);
+        vectorAnnWarned = true;
+      }
+      return;
+    }
+    if (!hasVectorTable(db, vectorExtension.table)) {
+      if (!vectorAnnWarned) {
+        console.warn(`[ann] SQLite vector table missing (${vectorExtension.table}). Rebuild with npm run build-sqlite-index.`);
+        vectorAnnWarned = true;
+      }
+      return;
+    }
+    vectorAnnState[mode].available = true;
+  };
+
+  if (needsCode) dbCode = openSqlite(sqliteCodePath, 'code');
+  if (needsProse) dbProse = openSqlite(sqliteProsePath, 'prose');
+  if (needsCode) initVectorAnn(dbCode, 'code');
+  if (needsProse) initVectorAnn(dbProse, 'prose');
+  if ((needsCode && !dbCode) || (needsProse && !dbProse)) {
+    if (dbCode) dbCache?.close ? dbCache.close(sqliteCodePath) : dbCode.close();
+    if (dbProse) dbCache?.close ? dbCache.close(sqliteProsePath) : dbProse.close();
+    dbCode = null;
+    dbProse = null;
+    useSqlite = false;
+  }
+
+  return { useSqlite, dbCode, dbProse, vectorAnnState, vectorAnnUsed };
+}
+
+/**
+ * Probe SQLite chunk counts for auto-backend selection.
+ * @param {string} dbPath
+ * @param {'code'|'prose'} mode
+ * @returns {Promise<number|null>}
+ */
+export async function getSqliteChunkCount(dbPath, mode) {
+  if (parseEnvBool(process.env.PAIROFCLEATS_SQLITE_DISABLED) === true) {
+    return null;
+  }
+  let Database;
+  try {
+    ({ default: Database } = await import('better-sqlite3'));
+  } catch {
+    return null;
+  }
+  let db;
+  try {
+    db = new Database(dbPath, { readonly: true });
+    const row = db.prepare('SELECT COUNT(*) as count FROM chunks WHERE mode = ?').get(mode);
+    return typeof row?.count === 'number' ? row.count : null;
+  } catch {
+    return null;
+  } finally {
+    if (db) {
+      try {
+        db.close();
+      } catch {}
+    }
+  }
+}
diff --git a/src/retrieval/cli.js b/src/retrieval/cli.js
new file mode 100644
index 000000000..c2738c58d
--- /dev/null
+++ b/src/retrieval/cli.js
@@ -0,0 +1,617 @@
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import {
+  applyAdaptiveDictConfig,
+  DEFAULT_MODEL_ID,
+  getCacheRuntimeConfig,
+  getDictConfig,
+  getMetricsDir,
+  getModelConfig,
+  loadUserConfig,
+  resolveRepoRoot,
+  resolveLmdbPaths,
+  resolveSqlitePaths
+} from '../../tools/dict-utils.js';
+import { queryVectorAnn } from '../../tools/vector-extension.js';
+import { getEnvConfig } from '../shared/env.js';
+import { createError, ERROR_CODES, isErrorCode } from '../shared/error-codes.js';
+import { getSearchUsage, parseSearchArgs } from './cli-args.js';
+import { loadDictionary } from './cli-dictionary.js';
+import { resolveIndexDir } from './cli-index.js';
+import { configureOutputCaches } from './output.js';
+import { createSearchTelemetry } from './cli/telemetry.js';
+import { getMissingFlagMessages, resolveIndexedFileCount } from './cli/options.js';
+import { hasLmdbStore } from './cli/index-loader.js';
+import { applyBranchFilter } from './cli/branch-filter.js';
+import { createBackendContext } from './cli/backend-context.js';
+import { color } from './cli/ansi.js';
+import { resolveBackendSelection } from './cli/policy.js';
+import { normalizeSearchOptions } from './cli/normalize-options.js';
+import { buildQueryPlan } from './cli/query-plan.js';
+import { loadSearchIndexes } from './cli/load-indexes.js';
+import { runSearchSession } from './cli/run-search-session.js';
+import { renderSearchOutput } from './cli/render.js';
+import { recordSearchArtifacts } from './cli/persist.js';
+
+const loadIndexState = (rootDir, userConfig, mode) => {
+  try {
+    const dir = resolveIndexDir(rootDir, mode, userConfig);
+    const statePath = path.join(dir, 'index_state.json');
+    if (!fsSync.existsSync(statePath)) return null;
+    return JSON.parse(fsSync.readFileSync(statePath, 'utf8'));
+  } catch {
+    return null;
+  }
+};
+
+const isSqliteReady = (state) => {
+  if (!state?.sqlite) return true;
+  return state.sqlite.ready !== false && state.sqlite.pending !== true;
+};
+
+const isLmdbReady = (state) => {
+  if (!state?.lmdb) return true;
+  return state.lmdb.ready !== false && state.lmdb.pending !== true;
+};
+
+export async function runSearchCli(rawArgs = process.argv.slice(2), options = {}) {
+  const telemetry = createSearchTelemetry();
+  const recordSearchMetrics = (status) => telemetry.record(status);
+  const emitOutput = options.emitOutput !== false;
+  const exitOnError = options.exitOnError !== false;
+  const indexCache = options.indexCache || null;
+  const sqliteCache = options.sqliteCache || null;
+  const t0 = Date.now();
+
+  const inferJsonOutputFromArgs = () => {
+    if (!Array.isArray(rawArgs)) return { jsonOutput: false, jsonCompact: false };
+    const hasFlag = (name) =>
+      rawArgs.some((arg) => typeof arg === 'string' && (arg === name || arg.startsWith(`${name}=`)));
+    const jsonCompact = hasFlag('--json-compact');
+    const jsonOutput = hasFlag('--json') || jsonCompact;
+    return { jsonOutput, jsonCompact };
+  };
+
+  let argv;
+  try {
+    argv = parseSearchArgs(rawArgs);
+  } catch (err) {
+    recordSearchMetrics('error');
+    const { jsonOutput } = inferJsonOutputFromArgs();
+    const message = err && typeof err.message === 'string' && err.message.trim()
+      ? err.message
+      : 'Invalid arguments.';
+
+    if (emitOutput) {
+      if (jsonOutput) {
+        console.log(JSON.stringify({ ok: false, code: ERROR_CODES.INVALID_REQUEST, message }, null, 2));
+      } else {
+        console.error(message);
+      }
+    }
+
+    if (exitOnError) process.exit(1);
+
+    const error = createError(ERROR_CODES.INVALID_REQUEST, message);
+    error.emitted = true;
+    error.cause = err;
+    throw error;
+  }
+
+  const jsonCompact = argv['json-compact'] === true;
+  const jsonOutput = argv.json || jsonCompact;
+  const rootOverride = options.root ? path.resolve(options.root) : null;
+  const rootArg = rootOverride || (argv.repo ? path.resolve(argv.repo) : null);
+  const rootDir = rootArg || resolveRepoRoot(process.cwd());
+  const userConfig = loadUserConfig(rootDir);
+  const cacheConfig = getCacheRuntimeConfig(rootDir, userConfig);
+  const envConfig = getEnvConfig();
+  const verboseCache = envConfig.verbose === true;
+  const cacheLog = verboseCache ? (msg) => process.stderr.write(`\n${msg}\n`) : null;
+
+  configureOutputCaches({ cacheConfig, verbose: verboseCache, log: cacheLog });
+
+  const emitError = (message, errorCode) => {
+    if (!emitOutput || !message) return;
+    if (jsonOutput) {
+      console.log(JSON.stringify({ ok: false, code: errorCode, message }, null, 2));
+    } else {
+      console.error(message);
+    }
+  };
+  const bail = (message, code = 1, errorCode = ERROR_CODES.INTERNAL) => {
+    const resolvedCode = isErrorCode(errorCode) ? errorCode : ERROR_CODES.INTERNAL;
+    emitError(message, resolvedCode);
+    if (exitOnError) process.exit(code);
+    recordSearchMetrics('error');
+    const error = createError(resolvedCode, message || 'Search failed.');
+    error.emitted = true;
+    throw error;
+  };
+
+  try {
+    const missingValueMessages = getMissingFlagMessages(argv, rawArgs);
+    if (missingValueMessages.length) {
+      return bail(missingValueMessages.join('\n'), 1, ERROR_CODES.INVALID_REQUEST);
+    }
+
+    const metricsDir = getMetricsDir(rootDir, userConfig);
+    let normalized;
+    try {
+      normalized = normalizeSearchOptions({
+        argv,
+        rawArgs,
+        rootDir,
+        userConfig,
+        envConfig,
+        metricsDir
+      });
+    } catch (err) {
+      return bail(err.message, 1, ERROR_CODES.INVALID_REQUEST);
+    }
+
+    if (normalized.missingValueMessages.length) {
+      return bail(normalized.missingValueMessages.join('\n'), 1, ERROR_CODES.INVALID_REQUEST);
+    }
+
+    const {
+      query,
+      searchType,
+      searchAuthor,
+      searchImport,
+      chunkAuthorFilter,
+      searchMode,
+      runCode,
+      runProse,
+      runRecords,
+      runExtractedProse: runExtractedProseRaw,
+      embeddingProvider,
+      embeddingOnnx,
+      hnswConfig,
+      sqliteConfig,
+      sqliteAutoChunkThreshold,
+      sqliteAutoArtifactBytes,
+      postingsConfig,
+      filePrefilterEnabled,
+      searchRegexConfig,
+      fileChargramN,
+      vectorExtension,
+      bm25K1,
+      bm25B,
+      branchesMin,
+      loopsMin,
+      breaksMin,
+      continuesMin,
+      churnMin,
+      modifiedAfter,
+      modifiedSinceDays,
+      fileFilter,
+      caseFile,
+      caseTokens,
+      branchFilter,
+      extFilter,
+      metaFilters,
+      annEnabled,
+      scoreBlendEnabled,
+      scoreBlendSparseWeight,
+      scoreBlendAnnWeight,
+      symbolBoostEnabled,
+      symbolBoostDefinitionWeight,
+      symbolBoostExportWeight,
+      minhashMaxDocs,
+      queryCacheEnabled,
+      queryCacheMaxEntries,
+      queryCacheTtlMs,
+      rrfEnabled,
+      rrfK,
+      contextExpansionEnabled,
+      contextExpansionOptions,
+      contextExpansionRespectFilters,
+      sqliteFtsNormalize,
+      sqliteFtsProfile,
+      sqliteFtsWeights,
+      fieldWeightsConfig,
+      explain,
+      denseVectorMode,
+      backendArg
+    } = normalized;
+
+    if (!query) {
+      return bail(getSearchUsage(), 1, ERROR_CODES.INVALID_REQUEST);
+    }
+
+    telemetry.setMode(searchMode);
+    telemetry.setAnn(annEnabled ? 'on' : 'off');
+
+    const modelConfig = getModelConfig(rootDir, userConfig);
+    const modelIdDefault = argv.model || modelConfig.id || DEFAULT_MODEL_ID;
+    const useStubEmbeddings = envConfig.embeddings === 'stub';
+    const topN = argv.n;
+    const showStats = argv.stats === true;
+    const showMatched = argv.matched === true;
+
+    const needsCode = runCode;
+    const needsProse = runProse;
+    const needsSqlite = runCode || runProse;
+    const vectorAnnEnabled = annEnabled && vectorExtension.enabled;
+    const sqliteScoreModeConfig = sqliteConfig.scoreMode === 'fts';
+    const sqliteConfigured = sqliteConfig.use !== false;
+    const lmdbConfigured = userConfig.lmdb?.use !== false;
+
+    const lmdbPaths = resolveLmdbPaths(rootDir, userConfig);
+    const lmdbCodePath = lmdbPaths.codePath;
+    const lmdbProsePath = lmdbPaths.prosePath;
+    const sqlitePaths = resolveSqlitePaths(rootDir, userConfig);
+    const sqliteCodePath = sqlitePaths.codePath;
+    const sqliteProsePath = sqlitePaths.prosePath;
+
+    const sqliteStateCode = needsCode ? loadIndexState(rootDir, userConfig, 'code') : null;
+    const sqliteStateProse = needsProse ? loadIndexState(rootDir, userConfig, 'prose') : null;
+    const sqliteCodeAvailable = fsSync.existsSync(sqliteCodePath) && isSqliteReady(sqliteStateCode);
+    const sqliteProseAvailable = fsSync.existsSync(sqliteProsePath) && isSqliteReady(sqliteStateProse);
+    const sqliteAvailable = (!needsCode || sqliteCodeAvailable) && (!needsProse || sqliteProseAvailable);
+    const lmdbStateCode = sqliteStateCode;
+    const lmdbStateProse = sqliteStateProse;
+    const lmdbCodeAvailable = hasLmdbStore(lmdbCodePath) && isLmdbReady(lmdbStateCode);
+    const lmdbProseAvailable = hasLmdbStore(lmdbProsePath) && isLmdbReady(lmdbStateProse);
+    const lmdbAvailable = (!needsCode || lmdbCodeAvailable) && (!needsProse || lmdbProseAvailable);
+
+    const backendSelection = await resolveBackendSelection({
+      backendArg,
+      sqliteScoreModeConfig,
+      sqliteConfigured,
+      sqliteAvailable,
+      sqliteCodeAvailable,
+      sqliteProseAvailable,
+      sqliteCodePath,
+      sqliteProsePath,
+      lmdbConfigured,
+      lmdbAvailable,
+      lmdbCodeAvailable,
+      lmdbProseAvailable,
+      lmdbCodePath,
+      lmdbProsePath,
+      sqliteAutoChunkThreshold,
+      sqliteAutoArtifactBytes,
+      needsSqlite,
+      needsCode,
+      needsProse,
+      root: rootDir,
+      userConfig,
+      onWarn: console.warn
+    });
+    if (backendSelection.error) {
+      return bail(backendSelection.error.message);
+    }
+
+    const {
+      backendPolicy,
+      useSqlite: useSqliteSelection,
+      useLmdb: useLmdbSelection,
+      sqliteFtsRequested,
+      backendForcedSqlite,
+      backendForcedLmdb
+    } = backendSelection;
+
+    const backendContext = await createBackendContext({
+      backendPolicy,
+      useSqlite: useSqliteSelection,
+      useLmdb: useLmdbSelection,
+      needsCode,
+      needsProse,
+      sqliteCodePath,
+      sqliteProsePath,
+      sqliteFtsRequested,
+      backendForcedSqlite,
+      backendForcedLmdb,
+      vectorExtension,
+      vectorAnnEnabled,
+      dbCache: sqliteCache,
+      sqliteStates: {
+        code: sqliteStateCode,
+        prose: sqliteStateProse
+      },
+      lmdbCodePath,
+      lmdbProsePath,
+      lmdbStates: {
+        code: lmdbStateCode,
+        prose: lmdbStateProse
+      },
+      postingsConfig,
+      sqliteFtsWeights,
+      queryVectorAnn,
+      modelIdDefault,
+      fileChargramN,
+      hnswConfig,
+      root: rootDir,
+      userConfig
+    });
+
+    const {
+      useSqlite,
+      useLmdb,
+      backendLabel,
+      backendPolicyInfo,
+      vectorAnnState,
+      vectorAnnUsed,
+      sqliteHelpers,
+      lmdbHelpers
+    } = backendContext;
+    telemetry.setBackend(backendLabel);
+
+    const branchResult = await applyBranchFilter({
+      branchFilter,
+      caseSensitive: caseFile,
+      root: rootDir,
+      metricsDir,
+      runCode,
+      runProse,
+      backendLabel,
+      backendPolicy: backendPolicyInfo,
+      emitOutput,
+      jsonOutput,
+      recordSearchMetrics,
+      warn: console.warn
+    });
+    if (branchResult?.payload) {
+      return branchResult.payload;
+    }
+
+    const dictConfigBase = getDictConfig(rootDir, userConfig);
+    const dictConfig = applyAdaptiveDictConfig(
+      dictConfigBase,
+      resolveIndexedFileCount(metricsDir, { runCode, runProse, runExtractedProse: runExtractedProseRaw })
+    );
+    const { dict } = await loadDictionary(rootDir, dictConfig);
+
+    const queryPlan = buildQueryPlan({
+      query,
+      argv,
+      dict,
+      dictConfig,
+      postingsConfig,
+      caseTokens,
+      fileFilter,
+      caseFile,
+      searchRegexConfig,
+      filePrefilterEnabled,
+      fileChargramN,
+      searchType,
+      searchAuthor,
+      searchImport,
+      chunkAuthorFilter,
+      branchesMin,
+      loopsMin,
+      breaksMin,
+      continuesMin,
+      churnMin,
+      extFilter,
+      metaFilters,
+      modifiedAfter,
+      modifiedSinceDays,
+      fieldWeightsConfig,
+      denseVectorMode,
+      branchFilter
+    });
+
+    const annActive = annEnabled && queryPlan.queryTokens.length > 0;
+
+    const {
+      loadIndexFromSqlite,
+      buildCandidateSetSqlite,
+      getTokenIndexForQuery,
+      rankSqliteFts,
+      rankVectorAnnSqlite
+    } = sqliteHelpers;
+    const { loadIndexFromLmdb } = lmdbHelpers;
+
+    const {
+      idxProse,
+      idxExtractedProse,
+      idxCode,
+      idxRecords,
+      runExtractedProse,
+      hnswAnnState,
+      hnswAnnUsed,
+      modelIdForCode,
+      modelIdForProse,
+      modelIdForExtractedProse,
+      modelIdForRecords
+    } = loadSearchIndexes({
+      rootDir,
+      userConfig,
+      searchMode,
+      runProse,
+      runExtractedProse: runExtractedProseRaw,
+      runCode,
+      runRecords,
+      useSqlite,
+      useLmdb,
+      emitOutput,
+      exitOnError,
+      annActive,
+      filtersActive: queryPlan.filtersActive,
+      contextExpansionEnabled,
+      sqliteFtsRequested,
+      indexCache,
+      modelIdDefault,
+      fileChargramN,
+      hnswConfig,
+      loadIndexFromSqlite,
+      loadIndexFromLmdb,
+      resolvedDenseVectorMode: queryPlan.resolvedDenseVectorMode
+    });
+
+    const modelIds = {
+      code: modelIdForCode,
+      prose: modelIdForProse,
+      extractedProse: modelIdForExtractedProse,
+      records: modelIdForRecords
+    };
+
+    const searchResult = await runSearchSession({
+      rootDir,
+      userConfig,
+      metricsDir,
+      query,
+      searchMode,
+      runCode,
+      runProse,
+      runExtractedProse,
+      runRecords,
+      topN,
+      useSqlite,
+      annEnabled,
+      annActive,
+      vectorExtension,
+      vectorAnnEnabled,
+      vectorAnnState,
+      vectorAnnUsed,
+      hnswConfig,
+      hnswAnnState,
+      hnswAnnUsed,
+      sqliteFtsRequested,
+      sqliteFtsNormalize,
+      sqliteFtsProfile,
+      sqliteFtsWeights,
+      sqliteCodePath,
+      sqliteProsePath,
+      bm25K1,
+      bm25B,
+      fieldWeights: queryPlan.fieldWeights,
+      postingsConfig,
+      queryTokens: queryPlan.queryTokens,
+      phraseNgramSet: queryPlan.phraseNgramSet,
+      phraseRange: queryPlan.phraseRange,
+      symbolBoost: {
+        enabled: symbolBoostEnabled,
+        definitionWeight: symbolBoostDefinitionWeight,
+        exportWeight: symbolBoostExportWeight
+      },
+      filters: queryPlan.filters,
+      filtersActive: queryPlan.filtersActive,
+      scoreBlend: {
+        enabled: scoreBlendEnabled,
+        sparseWeight: scoreBlendSparseWeight,
+        annWeight: scoreBlendAnnWeight
+      },
+      rrf: {
+        enabled: rrfEnabled,
+        k: rrfK
+      },
+      minhashMaxDocs,
+      buildCandidateSetSqlite,
+      getTokenIndexForQuery,
+      rankSqliteFts,
+      rankVectorAnnSqlite,
+      idxProse,
+      idxExtractedProse,
+      idxCode,
+      idxRecords,
+      modelConfig,
+      modelIds,
+      embeddingProvider,
+      embeddingOnnx,
+      embeddingQueryText: queryPlan.embeddingQueryText,
+      useStubEmbeddings,
+      contextExpansionEnabled,
+      contextExpansionOptions,
+      contextExpansionRespectFilters,
+      cacheFilters: queryPlan.cacheFilters,
+      queryCacheEnabled,
+      queryCacheMaxEntries,
+      queryCacheTtlMs,
+      backendLabel,
+      resolvedDenseVectorMode: queryPlan.resolvedDenseVectorMode,
+      intentInfo: queryPlan.intentInfo
+    });
+
+    const elapsedMs = Date.now() - t0;
+
+    const payload = renderSearchOutput({
+      emitOutput,
+      jsonOutput,
+      jsonCompact,
+      explain,
+      color,
+      rootDir,
+      backendLabel,
+      backendPolicyInfo,
+      runCode,
+      runProse,
+      runExtractedProse,
+      runRecords,
+      topN,
+      queryTokens: queryPlan.queryTokens,
+      highlightRegex: queryPlan.highlightRegex,
+      contextExpansionEnabled,
+      expandedHits: {
+        prose: searchResult.proseExpanded,
+        extractedProse: searchResult.extractedProseExpanded,
+        code: searchResult.codeExpanded,
+        records: searchResult.recordExpanded
+      },
+      baseHits: {
+        proseHits: searchResult.proseHits,
+        extractedProseHits: searchResult.extractedProseHits,
+        codeHits: searchResult.codeHits,
+        recordHits: searchResult.recordHits
+      },
+      annEnabled,
+      annActive,
+      annBackend: searchResult.annBackend,
+      vectorExtension,
+      vectorAnnEnabled,
+      vectorAnnState,
+      vectorAnnUsed,
+      hnswConfig,
+      hnswAnnState,
+      modelIds,
+      embeddingProvider,
+      embeddingOnnx,
+      cacheInfo: searchResult.cache,
+      intentInfo: queryPlan.intentInfo,
+      resolvedDenseVectorMode: queryPlan.resolvedDenseVectorMode,
+      fieldWeights: queryPlan.fieldWeights,
+      contextExpansionStats: searchResult.contextExpansionStats,
+      idxProse,
+      idxCode,
+      idxRecords,
+      showStats,
+      showMatched,
+      verboseCache,
+      elapsedMs
+    });
+
+    await recordSearchArtifacts({
+      metricsDir,
+      query,
+      queryTokens: queryPlan.queryTokens,
+      proseHits: searchResult.proseHits,
+      codeHits: searchResult.codeHits,
+      recordHits: searchResult.recordHits,
+      elapsedMs,
+      cacheHit: searchResult.cache.hit
+    });
+
+    recordSearchMetrics('ok');
+    return payload;
+  } catch (err) {
+    recordSearchMetrics('error');
+    if (emitOutput && jsonOutput && !err?.emitted) {
+      const message = err?.message || 'Search failed.';
+      const code = isErrorCode(err?.code) ? err.code : ERROR_CODES.INTERNAL;
+      console.log(JSON.stringify({ ok: false, code, message }, null, 2));
+      if (err) err.emitted = true;
+    }
+    throw err;
+  }
+}
+
+if (process.argv[1] === fileURLToPath(import.meta.url)) {
+  runSearchCli().catch((err) => {
+    console.error(err?.message || err);
+    process.exit(1);
+  });
+}
diff --git a/src/retrieval/cli/ansi.js b/src/retrieval/cli/ansi.js
new file mode 100644
index 000000000..82328bf51
--- /dev/null
+++ b/src/retrieval/cli/ansi.js
@@ -0,0 +1,11 @@
+export const color = {
+  green: (text) => `\x1b[32m${text}\x1b[0m`,
+  yellow: (text) => `\x1b[33m${text}\x1b[0m`,
+  red: (text) => `\x1b[31m${text}\x1b[0m`,
+  cyan: (text) => `\x1b[36m${text}\x1b[0m`,
+  magenta: (text) => `\x1b[35m${text}\x1b[0m`,
+  blue: (text) => `\x1b[34m${text}\x1b[0m`,
+  gray: (text) => `\x1b[90m${text}\x1b[0m`,
+  bold: (text) => `\x1b[1m${text}\x1b[0m`,
+  underline: (text) => `\x1b[4m${text}\x1b[0m`
+};
diff --git a/src/retrieval/cli/backend-context.js b/src/retrieval/cli/backend-context.js
new file mode 100644
index 000000000..8d3a8c20f
--- /dev/null
+++ b/src/retrieval/cli/backend-context.js
@@ -0,0 +1,128 @@
+import { createLmdbBackend } from '../cli-lmdb.js';
+import { createSqliteBackend } from '../cli-sqlite.js';
+import { resolveIndexDir } from '../cli-index.js';
+import { createLmdbHelpers } from '../lmdb-helpers.js';
+import { createSqliteHelpers } from '../sqlite-helpers.js';
+
+export const createBackendContext = async ({
+  backendPolicy,
+  useSqlite: useSqliteInput,
+  useLmdb: useLmdbInput,
+  needsCode,
+  needsProse,
+  sqliteCodePath,
+  sqliteProsePath,
+  sqliteFtsRequested,
+  backendForcedSqlite,
+  backendForcedLmdb,
+  vectorExtension,
+  vectorAnnEnabled,
+  dbCache,
+  sqliteStates,
+  lmdbCodePath,
+  lmdbProsePath,
+  lmdbStates,
+  postingsConfig,
+  sqliteFtsWeights,
+  queryVectorAnn,
+  modelIdDefault,
+  fileChargramN,
+  hnswConfig,
+  root,
+  userConfig
+}) => {
+  const lmdbBackend = await createLmdbBackend({
+    useLmdb: useLmdbInput,
+    needsCode,
+    needsProse,
+    lmdbCodePath,
+    lmdbProsePath,
+    backendForcedLmdb,
+    lmdbStates
+  });
+  let useLmdb = lmdbBackend.useLmdb;
+
+  const sqliteBackend = await createSqliteBackend({
+    useSqlite: useSqliteInput,
+    needsCode,
+    needsProse,
+    sqliteCodePath,
+    sqliteProsePath,
+    sqliteFtsRequested,
+    backendForcedSqlite,
+    vectorExtension,
+    vectorAnnEnabled,
+    dbCache,
+    sqliteStates
+  });
+  let useSqlite = sqliteBackend.useSqlite;
+  let dbCode = sqliteBackend.dbCode;
+  let dbProse = sqliteBackend.dbProse;
+  let lmdbCode = lmdbBackend.dbCode;
+  let lmdbProse = lmdbBackend.dbProse;
+
+  if (useSqlite) {
+    useLmdb = false;
+    lmdbCode = null;
+    lmdbProse = null;
+  }
+
+  const vectorAnnState = sqliteBackend.vectorAnnState;
+  const vectorAnnUsed = sqliteBackend.vectorAnnUsed;
+  const backendLabel = useSqlite
+    ? (sqliteFtsRequested ? 'sqlite-fts' : 'sqlite')
+    : (useLmdb ? 'lmdb' : 'memory');
+  const backendPolicyInfo = backendPolicy ? { ...backendPolicy, backendLabel } : { backendLabel };
+
+  const getSqliteDb = (mode) => {
+    if (!useSqlite) return null;
+    if (mode === 'code') return dbCode;
+    if (mode === 'prose') return dbProse;
+    return null;
+  };
+
+  const getLmdbDb = (mode) => {
+    if (!useLmdb) return null;
+    if (mode === 'code') return lmdbCode;
+    if (mode === 'prose') return lmdbProse;
+    return null;
+  };
+
+  const sqliteHelpers = createSqliteHelpers({
+    getDb: getSqliteDb,
+    postingsConfig,
+    sqliteFtsWeights,
+    vectorExtension,
+    vectorAnnState,
+    queryVectorAnn,
+    modelIdDefault,
+    fileChargramN
+  });
+
+  const lmdbIndexDirs = {
+    code: resolveIndexDir(root, 'code', userConfig),
+    prose: resolveIndexDir(root, 'prose', userConfig)
+  };
+  const lmdbHelpers = createLmdbHelpers({
+    getDb: getLmdbDb,
+    hnswConfig,
+    modelIdDefault,
+    fileChargramN,
+    indexDirs: lmdbIndexDirs
+  });
+
+  return {
+    useSqlite,
+    useLmdb,
+    dbCode,
+    dbProse,
+    lmdbCode,
+    lmdbProse,
+    backendLabel,
+    backendPolicyInfo,
+    vectorAnnState,
+    vectorAnnUsed,
+    sqliteHelpers,
+    lmdbHelpers
+  };
+};
diff --git a/src/retrieval/cli/branch-filter.js b/src/retrieval/cli/branch-filter.js
new file mode 100644
index 000000000..06e643cae
--- /dev/null
+++ b/src/retrieval/cli/branch-filter.js
@@ -0,0 +1,70 @@
+import simpleGit from 'simple-git';
+import { loadBranchFromMetrics } from './options.js';
+
+export const resolveRepoBranch = async ({ root, metricsDir, runCode, runProse }) => {
+  const fromMetrics = runCode ? loadBranchFromMetrics(metricsDir, 'code') : null;
+  const fromProse = !fromMetrics && runProse ? loadBranchFromMetrics(metricsDir, 'prose') : null;
+  if (fromMetrics || fromProse) return fromMetrics || fromProse;
+  try {
+    const git = simpleGit(root);
+    const status = await git.status();
+    return status.current || null;
+  } catch {
+    return null;
+  }
+};
+
+export const applyBranchFilter = async ({
+  branchFilter,
+  caseSensitive,
+  root,
+  metricsDir,
+  runCode,
+  runProse,
+  backendLabel,
+  backendPolicy,
+  emitOutput,
+  jsonOutput,
+  recordSearchMetrics,
+  warn = console.warn,
+  repoBranch: repoBranchInput,
+  resolveBranch
+} = {}) => {
+  if (!branchFilter) {
+    return { matched: true, repoBranch: null, payload: null };
+  }
+  const resolve = resolveBranch || resolveRepoBranch;
+  const repoBranch = repoBranchInput ?? await resolve({ root, metricsDir, runCode, runProse });
+  const normalizedBranch = caseSensitive ? branchFilter : branchFilter.toLowerCase();
+  const normalizedRepo = repoBranch ? (caseSensitive ? repoBranch : repoBranch.toLowerCase()) : null;
+  const branchMatches = normalizedRepo ? normalizedRepo === normalizedBranch : true;
+  if (repoBranch && !branchMatches) {
+    const payload = {
+      backend: backendLabel,
+      prose: [],
+      code: [],
+      records: [],
+      stats: {
+        branch: repoBranch,
+        branchFilter,
+        branchMatch: false,
+        backendPolicy
+      }
+    };
+    if (emitOutput) {
+      if (jsonOutput) {
+        console.log(JSON.stringify(payload, null, 2));
+      } else {
+        console.log(`Branch filter ${branchFilter} did not match current branch ${repoBranch}; returning no results.`);
+      }
+    }
+    if (recordSearchMetrics) {
+      recordSearchMetrics('ok');
+    }
+    return { matched: false, repoBranch, payload };
+  }
+  if (!repoBranch && warn) {
+    warn('Branch filter requested but repo branch is unavailable; continuing without branch validation.');
+  }
+  return { matched: true, repoBranch, payload: null };
+};
diff --git a/src/retrieval/cli/highlight.js b/src/retrieval/cli/highlight.js
new file mode 100644
index 000000000..ce45e7486
--- /dev/null
+++ b/src/retrieval/cli/highlight.js
@@ -0,0 +1,12 @@
+const escapeRegExp = (value) => value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+
+export function buildHighlightRegex(queryTokens) {
+  const highlightTokens = [...new Set(queryTokens.map((tok) => tok.trim()).filter(Boolean))];
+  if (!highlightTokens.length) return null;
+  try {
+    const pattern = highlightTokens.map((tok) => escapeRegExp(tok)).join('|');
+    return pattern ? new RegExp(`(${pattern})`, 'ig') : null;
+  } catch {
+    return null;
+  }
+}
diff --git a/src/retrieval/cli/index-loader.js b/src/retrieval/cli/index-loader.js
new file mode 100644
index 000000000..14eb67a4f
--- /dev/null
+++ b/src/retrieval/cli/index-loader.js
@@ -0,0 +1,98 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { loadIndexWithCache } from '../index-cache.js';
+import { resolveIndexDir } from '../cli-index.js';
+
+export function hasLmdbStore(storePath) {
+  if (!storePath || !fs.existsSync(storePath)) return false;
+  return fs.existsSync(path.join(storePath, 'data.mdb'));
+}
+
+export function loadIndexCached({
+  indexCache,
+  dir,
+  modelIdDefault,
+  fileChargramN,
+  includeHnsw = true,
+  hnswConfig,
+  loadIndex
+}) {
+  return loadIndexWithCache(
+    indexCache,
+    dir,
+    {
+      modelIdDefault,
+      fileChargramN,
+      includeHnsw,
+      hnswConfig
+    },
+    loadIndex
+  );
+}
+
+export function hasIndexMeta(dir) {
+  if (!dir) return false;
+  const metaPath = path.join(dir, 'chunk_meta.json');
+  const metaJsonlPath = path.join(dir, 'chunk_meta.jsonl');
+  const metaPartsPath = path.join(dir, 'chunk_meta.meta.json');
+  const metaPartsDir = path.join(dir, 'chunk_meta.parts');
+  return fs.existsSync(metaPath)
+    || fs.existsSync(metaJsonlPath)
+    || fs.existsSync(metaPartsPath)
+    || fs.existsSync(metaPartsDir);
+}
+
+export function warnPendingState(idx, label, { emitOutput, useSqlite, annActive }) {
+  if (!emitOutput) return;
+  const state = idx?.state;
+  if (!state || useSqlite) return;
+  if (state.enrichment?.pending) {
+    console.warn(`[search] ${label} index enrichment pending (stage1).`);
+  }
+  if (annActive && state.embeddings?.enabled && state.embeddings.ready === false) {
+    console.warn(`[search] ${label} embeddings pending; ANN may be limited.`);
+  }
+}
+
+export function resolveDenseVector(idx, mode, denseVectorMode) {
+  if (!idx) return null;
+  if (denseVectorMode === 'code') return idx.denseVecCode || idx.denseVec || null;
+  if (denseVectorMode === 'doc') return idx.denseVecDoc || idx.denseVec || null;
+  if (denseVectorMode === 'auto') {
+    if (mode === 'code') return idx.denseVecCode || idx.denseVec || null;
+    if (mode === 'prose' || mode === 'extracted-prose') {
+      return idx.denseVecDoc || idx.denseVec || null;
+    }
+  }
+  return idx.denseVec || null;
+}
+
+export function loadFileRelations(rootDir, userConfig, mode) {
+  try {
+    const dir = resolveIndexDir(rootDir, mode, userConfig);
+    const relPath = path.join(dir, 'file_relations.json');
+    if (!fs.existsSync(relPath)) return null;
+    const raw = JSON.parse(fs.readFileSync(relPath, 'utf8'));
+    if (!Array.isArray(raw)) return null;
+    const map = new Map();
+    for (const entry of raw) {
+      if (!entry?.file) continue;
+      map.set(entry.file, entry.relations || null);
+    }
+    return map;
+  } catch {
+    return null;
+  }
+}
+
+export function loadRepoMap(rootDir, userConfig, mode) {
+  try {
+    const dir = resolveIndexDir(rootDir, mode, userConfig);
+    const mapPath = path.join(dir, 'repo_map.json');
+    if (!fs.existsSync(mapPath)) return null;
+    const raw = JSON.parse(fs.readFileSync(mapPath, 'utf8'));
+    return Array.isArray(raw) ? raw : null;
+  } catch {
+    return null;
+  }
+}
diff --git a/src/retrieval/cli/load-indexes.js b/src/retrieval/cli/load-indexes.js
new file mode 100644
index 000000000..d98028e9f
--- /dev/null
+++ b/src/retrieval/cli/load-indexes.js
@@ -0,0 +1,189 @@
+import {
+  hasIndexMeta,
+  loadFileRelations,
+  loadIndexCached,
+  loadRepoMap,
+  resolveDenseVector,
+  warnPendingState
+} from './index-loader.js';
+import { loadIndex, requireIndexDir, resolveIndexDir } from '../cli-index.js';
+import { resolveModelIds } from './model-ids.js';
+
+const EMPTY_INDEX = { chunkMeta: [], denseVec: null, minhash: null };
+
+export function loadSearchIndexes({
+  rootDir,
+  userConfig,
+  searchMode,
+  runProse,
+  runExtractedProse,
+  runCode,
+  runRecords,
+  useSqlite,
+  useLmdb,
+  emitOutput,
+  exitOnError,
+  annActive,
+  filtersActive,
+  contextExpansionEnabled,
+  sqliteFtsRequested,
+  indexCache,
+  modelIdDefault,
+  fileChargramN,
+  hnswConfig,
+  loadIndexFromSqlite,
+  loadIndexFromLmdb,
+  resolvedDenseVectorMode
+}) {
+  const sqliteLazyChunks = sqliteFtsRequested && !filtersActive;
+  const sqliteContextChunks = contextExpansionEnabled ? true : !sqliteLazyChunks;
+
+  const proseDir = runProse && !useSqlite
+    ? requireIndexDir(rootDir, 'prose', userConfig, { emitOutput, exitOnError })
+    : null;
+  const codeDir = runCode && !useSqlite
+    ? requireIndexDir(rootDir, 'code', userConfig, { emitOutput, exitOnError })
+    : null;
+  const recordsDir = runRecords
+    ? requireIndexDir(rootDir, 'records', userConfig, { emitOutput, exitOnError })
+    : null;
+
+  const loadIndexCachedLocal = (dir, includeHnsw = true) => loadIndexCached({
+    indexCache,
+    dir,
+    modelIdDefault,
+    fileChargramN,
+    includeHnsw,
+    hnswConfig,
+    loadIndex
+  });
+
+  let extractedProseDir = null;
+  let resolvedRunExtractedProse = runExtractedProse;
+  if (resolvedRunExtractedProse) {
+    if (searchMode === 'extracted-prose') {
+      extractedProseDir = requireIndexDir(rootDir, 'extracted-prose', userConfig, { emitOutput, exitOnError });
+    } else {
+      extractedProseDir = resolveIndexDir(rootDir, 'extracted-prose', userConfig);
+      if (!hasIndexMeta(extractedProseDir)) {
+        resolvedRunExtractedProse = false;
+        if (emitOutput) {
+          console.warn('[search] extracted-prose index not found; skipping.');
+        }
+      }
+    }
+  }
+
+  const idxProse = runProse
+    ? (useSqlite ? loadIndexFromSqlite('prose', {
+      includeDense: annActive,
+      includeMinhash: annActive,
+      includeChunks: sqliteContextChunks,
+      includeFilterIndex: filtersActive
+    }) : (useLmdb ? loadIndexFromLmdb('prose', {
+      includeDense: annActive,
+      includeMinhash: annActive,
+      includeChunks: true,
+      includeFilterIndex: filtersActive
+    }) : loadIndexCachedLocal(proseDir, annActive)))
+    : { ...EMPTY_INDEX };
+  const idxExtractedProse = resolvedRunExtractedProse
+    ? loadIndexCachedLocal(extractedProseDir, annActive)
+    : { ...EMPTY_INDEX };
+  const idxCode = runCode
+    ? (useSqlite ? loadIndexFromSqlite('code', {
+      includeDense: annActive,
+      includeMinhash: annActive,
+      includeChunks: sqliteContextChunks,
+      includeFilterIndex: filtersActive
+    }) : (useLmdb ? loadIndexFromLmdb('code', {
+      includeDense: annActive,
+      includeMinhash: annActive,
+      includeChunks: true,
+      includeFilterIndex: filtersActive
+    }) : loadIndexCachedLocal(codeDir, annActive)))
+    : { ...EMPTY_INDEX };
+  const idxRecords = runRecords
+    ? loadIndexCachedLocal(recordsDir, annActive)
+    : { ...EMPTY_INDEX };
+
+  warnPendingState(idxCode, 'code', { emitOutput, useSqlite, annActive });
+  warnPendingState(idxProse, 'prose', { emitOutput, useSqlite, annActive });
+  warnPendingState(idxExtractedProse, 'extracted-prose', { emitOutput, useSqlite, annActive });
+
+  const hnswAnnState = {
+    code: { available: Boolean(idxCode?.hnsw?.available) },
+    prose: { available: Boolean(idxProse?.hnsw?.available) },
+    records: { available: Boolean(idxRecords?.hnsw?.available) },
+    'extracted-prose': { available: Boolean(idxExtractedProse?.hnsw?.available) }
+  };
+  const hnswAnnUsed = {
+    code: false,
+    prose: false,
+    records: false,
+    'extracted-prose': false
+  };
+
+  if (runCode) {
+    idxCode.denseVec = resolveDenseVector(idxCode, 'code', resolvedDenseVectorMode);
+    if ((useSqlite || useLmdb) && !idxCode.fileRelations) {
+      idxCode.fileRelations = loadFileRelations(rootDir, userConfig, 'code');
+    }
+    if ((useSqlite || useLmdb) && !idxCode.repoMap) {
+      idxCode.repoMap = loadRepoMap(rootDir, userConfig, 'code');
+    }
+  }
+  if (runProse) {
+    idxProse.denseVec = resolveDenseVector(idxProse, 'prose', resolvedDenseVectorMode);
+    if ((useSqlite || useLmdb) && !idxProse.fileRelations) {
+      idxProse.fileRelations = loadFileRelations(rootDir, userConfig, 'prose');
+    }
+    if ((useSqlite || useLmdb) && !idxProse.repoMap) {
+      idxProse.repoMap = loadRepoMap(rootDir, userConfig, 'prose');
+    }
+  }
+  if (resolvedRunExtractedProse) {
+    idxExtractedProse.denseVec = resolveDenseVector(
+      idxExtractedProse,
+      'extracted-prose',
+      resolvedDenseVectorMode
+    );
+    if (!idxExtractedProse.fileRelations) {
+      idxExtractedProse.fileRelations = loadFileRelations(rootDir, userConfig, 'extracted-prose');
+    }
+    if (!idxExtractedProse.repoMap) {
+      idxExtractedProse.repoMap = loadRepoMap(rootDir, userConfig, 'extracted-prose');
+    }
+  }
+
+  const {
+    modelIdForCode,
+    modelIdForProse,
+    modelIdForExtractedProse,
+    modelIdForRecords
+  } = resolveModelIds({
+    modelIdDefault,
+    runCode,
+    runProse,
+    runExtractedProse: resolvedRunExtractedProse,
+    runRecords,
+    idxCode,
+    idxProse,
+    idxExtractedProse,
+    idxRecords
+  });
+
+  return {
+    idxProse,
+    idxExtractedProse,
+    idxCode,
+    idxRecords,
+    runExtractedProse: resolvedRunExtractedProse,
+    hnswAnnState,
+    hnswAnnUsed,
+    modelIdForCode,
+    modelIdForProse,
+    modelIdForExtractedProse,
+    modelIdForRecords
+  };
+}
diff --git a/src/retrieval/cli/model-ids.js b/src/retrieval/cli/model-ids.js
new file mode 100644
index 000000000..7178e9570
--- /dev/null
+++ b/src/retrieval/cli/model-ids.js
@@ -0,0 +1,20 @@
+export const resolveModelIds = ({
+  modelIdDefault,
+  runCode,
+  runProse,
+  runExtractedProse,
+  runRecords,
+  idxCode,
+  idxProse,
+  idxExtractedProse,
+  idxRecords
+}) => {
+  return {
+    modelIdForCode: runCode ? (idxCode?.denseVec?.model || modelIdDefault) : null,
+    modelIdForProse: runProse ? (idxProse?.denseVec?.model || modelIdDefault) : null,
+    modelIdForExtractedProse: runExtractedProse
+      ? (idxExtractedProse?.denseVec?.model || modelIdDefault)
+      : null,
+    modelIdForRecords: runRecords ? (idxRecords?.denseVec?.model || modelIdDefault) : null
+  };
+};
diff --git a/src/retrieval/cli/normalize-options.js b/src/retrieval/cli/normalize-options.js
new file mode 100644
index 000000000..affc4bdb9
--- /dev/null
+++ b/src/retrieval/cli/normalize-options.js
@@ -0,0 +1,247 @@
+import { getVectorExtensionConfig } from '../../../tools/vector-extension.js';
+import { normalizeHnswConfig } from '../../shared/hnsw.js';
+import { normalizeEmbeddingProvider, normalizeOnnxConfig } from '../../shared/onnx-embeddings.js';
+import { normalizePostingsConfig } from '../../shared/postings-config.js';
+import { resolveFtsWeights } from '../fts.js';
+import { parseJson } from '../query-cache.js';
+import { parseChurnArg, parseModifiedArgs } from '../query-parse.js';
+import { mergeExtFilters, normalizeExtFilter, normalizeLangFilter, parseMetaFilters } from '../filters.js';
+import { resolveSearchMode } from '../cli-args.js';
+import { getMissingFlagMessages, resolveBm25Defaults } from './options.js';
+
+const normalizeOptionalNumber = (value) => (
+  Number.isFinite(Number(value)) ? Number(value) : null
+);
+
+const normalizeOptionalPositive = (value, fallback) => {
+  const parsed = normalizeOptionalNumber(value);
+  if (!Number.isFinite(parsed)) return fallback;
+  return Math.max(0, parsed);
+};
+
+export function normalizeSearchOptions({
+  argv,
+  rawArgs,
+  rootDir,
+  userConfig,
+  envConfig,
+  metricsDir
+}) {
+  const jsonCompact = argv['json-compact'] === true;
+  const jsonOutput = argv.json || jsonCompact;
+  const missingValueMessages = getMissingFlagMessages(argv, rawArgs);
+  const query = argv._.join(' ').trim();
+
+  const embeddingsConfig = userConfig.indexing?.embeddings || {};
+  const embeddingProvider = normalizeEmbeddingProvider(embeddingsConfig.provider);
+  const embeddingOnnx = normalizeOnnxConfig(embeddingsConfig.onnx || {});
+  const hnswConfig = normalizeHnswConfig(embeddingsConfig.hnsw || {});
+
+  const sqliteConfig = userConfig.sqlite || {};
+  const sqliteAutoChunkThresholdRaw = userConfig.search?.sqliteAutoChunkThreshold;
+  const sqliteAutoChunkThreshold = normalizeOptionalPositive(sqliteAutoChunkThresholdRaw, 0);
+  const sqliteAutoArtifactBytesRaw = userConfig.search?.sqliteAutoArtifactBytes;
+  const sqliteAutoArtifactBytes = normalizeOptionalPositive(sqliteAutoArtifactBytesRaw, 0);
+
+  const postingsConfig = normalizePostingsConfig(userConfig.indexing?.postings || {});
+  const filePrefilterConfig = userConfig.search?.filePrefilter || {};
+  const filePrefilterEnabled = filePrefilterConfig.enabled !== false;
+  const searchRegexConfig = userConfig.search?.regex || null;
+  const fileChargramN = Number.isFinite(Number(filePrefilterConfig.chargramN))
+    ? Math.max(2, Math.floor(Number(filePrefilterConfig.chargramN)))
+    : postingsConfig.chargramMinN;
+
+  const vectorExtension = getVectorExtensionConfig(rootDir, userConfig);
+
+  const contextLines = Math.max(0, parseInt(argv.context, 10) || 0);
+  const searchType = argv.type || null;
+  const searchAuthor = argv.author || null;
+  const searchImport = argv.import || null;
+  const chunkAuthorFilter = argv['chunk-author'] || null;
+
+  const searchModeInfo = resolveSearchMode(argv.mode);
+  const {
+    searchMode,
+    runCode,
+    runProse,
+    runRecords,
+    runExtractedProse: runExtractedProseRaw
+  } = searchModeInfo;
+  const runExtractedProse = runExtractedProseRaw;
+
+  const bm25Config = userConfig.search?.bm25 || {};
+  const bm25K1Arg = normalizeOptionalNumber(argv['bm25-k1']);
+  const bm25BArg = normalizeOptionalNumber(argv['bm25-b']);
+  const bm25Defaults = resolveBm25Defaults(metricsDir, { runCode, runProse, runExtractedProse });
+  const bm25K1 = bm25K1Arg
+    ?? normalizeOptionalNumber(bm25Config.k1)
+    ?? (bm25Defaults ? bm25Defaults.k1 : null)
+    ?? 1.2;
+  const bm25B = bm25BArg
+    ?? normalizeOptionalNumber(bm25Config.b)
+    ?? (bm25Defaults ? bm25Defaults.b : null)
+    ?? 0.75;
+
+  const branchesMin = normalizeOptionalNumber(argv.branches);
+  const loopsMin = normalizeOptionalNumber(argv.loops);
+  const breaksMin = normalizeOptionalNumber(argv.breaks);
+  const continuesMin = normalizeOptionalNumber(argv.continues);
+  const churnMin = argv.churn ? parseChurnArg(argv.churn) : null;
+  const modifiedArgs = parseModifiedArgs(argv['modified-after'], argv['modified-since']);
+  const modifiedAfter = modifiedArgs.modifiedAfter;
+  const modifiedSinceDays = modifiedArgs.modifiedSinceDays;
+
+  const fileFilters = [];
+  if (argv.path) fileFilters.push(argv.path);
+  if (argv.file) fileFilters.push(argv.file);
+  const fileFilter = fileFilters.length ? fileFilters.flat() : null;
+  const caseAll = argv.case === true;
+  const caseFile = argv['case-file'] === true || caseAll;
+  const caseTokens = argv['case-tokens'] === true || caseAll;
+  const branchFilter = argv.branch ? String(argv.branch).trim() : null;
+
+  const extFilterRaw = normalizeExtFilter(argv.ext);
+  const langFilter = normalizeLangFilter(argv.lang);
+  const extFilter = mergeExtFilters(extFilterRaw, langFilter);
+  const metaFilters = parseMetaFilters(argv.meta, argv['meta-json']);
+
+  const annFlagPresent = rawArgs.includes('--ann') || rawArgs.includes('--no-ann');
+  const annDefault = userConfig.search?.annDefault !== false;
+  const annEnabled = annFlagPresent ? argv.ann : annDefault;
+
+  const scoreBlendConfig = userConfig.search?.scoreBlend || {};
+  const scoreBlendEnabled = scoreBlendConfig.enabled === true;
+  const scoreBlendSparseWeight = normalizeOptionalNumber(scoreBlendConfig.sparseWeight) ?? 1;
+  const scoreBlendAnnWeight = normalizeOptionalNumber(scoreBlendConfig.annWeight) ?? 1;
+
+  const symbolBoostConfig = userConfig.search?.symbolBoost || {};
+  const symbolBoostEnabled = symbolBoostConfig.enabled !== false;
+  const symbolBoostDefinitionWeight = normalizeOptionalNumber(symbolBoostConfig.definitionWeight) ?? 1.2;
+  const symbolBoostExportWeight = normalizeOptionalNumber(symbolBoostConfig.exportWeight) ?? 1.1;
+
+  const minhashMaxDocs = Number.isFinite(Number(userConfig.search?.minhashMaxDocs))
+    ? Math.max(0, Number(userConfig.search.minhashMaxDocs))
+    : 5000;
+
+  const queryCacheConfig = userConfig.search?.queryCache || {};
+  const queryCacheEnabled = queryCacheConfig.enabled === true;
+  const queryCacheMaxEntries = Number.isFinite(Number(queryCacheConfig.maxEntries))
+    ? Math.max(1, Number(queryCacheConfig.maxEntries))
+    : 200;
+  const queryCacheTtlMs = Number.isFinite(Number(queryCacheConfig.ttlMs))
+    ? Math.max(0, Number(queryCacheConfig.ttlMs))
+    : 0;
+
+  const rrfConfig = userConfig.search?.rrf || {};
+  const rrfEnabled = rrfConfig.enabled !== false;
+  const rrfK = Number.isFinite(Number(rrfConfig.k)) ? Math.max(1, Number(rrfConfig.k)) : 60;
+
+  const contextExpansionConfig = userConfig.search?.contextExpansion || {};
+  const contextExpansionEnabled = contextExpansionConfig.enabled === true;
+  const contextExpansionOptions = {
+    maxPerHit: contextExpansionConfig.maxPerHit,
+    maxTotal: contextExpansionConfig.maxTotal,
+    includeCalls: contextExpansionConfig.includeCalls,
+    includeImports: contextExpansionConfig.includeImports,
+    includeExports: contextExpansionConfig.includeExports,
+    includeUsages: contextExpansionConfig.includeUsages
+  };
+  const contextExpansionRespectFilters = contextExpansionConfig.respectFilters !== false;
+
+  const sqliteFtsNormalize = userConfig.search?.sqliteFtsNormalize === true;
+  const sqliteFtsProfile = (argv['fts-profile']
+    || envConfig.ftsProfile
+    || userConfig.search?.sqliteFtsProfile
+    || 'balanced').toLowerCase();
+  let sqliteFtsWeightsConfig = userConfig.search?.sqliteFtsWeights || null;
+  if (argv['fts-weights']) {
+    const parsed = parseJson(argv['fts-weights'], null);
+    if (parsed) {
+      sqliteFtsWeightsConfig = parsed;
+    } else {
+      const values = String(argv['fts-weights'])
+        .split(/[,\s]+/)
+        .filter(Boolean)
+        .map((val) => Number(val))
+        .filter((val) => Number.isFinite(val));
+      sqliteFtsWeightsConfig = values.length ? values : sqliteFtsWeightsConfig;
+    }
+  }
+  const sqliteFtsWeights = resolveFtsWeights(sqliteFtsProfile, sqliteFtsWeightsConfig);
+
+  const explain = argv.explain === true || argv.why === true;
+  const denseVectorMode = typeof userConfig.search?.denseVectorMode === 'string'
+    ? userConfig.search.denseVectorMode.toLowerCase()
+    : 'merged';
+
+  const backendArg = typeof argv.backend === 'string' ? argv.backend.toLowerCase() : '';
+
+  return {
+    jsonCompact,
+    jsonOutput,
+    missingValueMessages,
+    query,
+    contextLines,
+    searchType,
+    searchAuthor,
+    searchImport,
+    chunkAuthorFilter,
+    searchMode,
+    runCode,
+    runProse,
+    runRecords,
+    runExtractedProse,
+    embeddingsConfig,
+    embeddingProvider,
+    embeddingOnnx,
+    hnswConfig,
+    sqliteConfig,
+    sqliteAutoChunkThreshold,
+    sqliteAutoArtifactBytes,
+    postingsConfig,
+    filePrefilterConfig,
+    filePrefilterEnabled,
+    searchRegexConfig,
+    fileChargramN,
+    vectorExtension,
+    bm25Config,
+    bm25K1,
+    bm25B,
+    branchesMin,
+    loopsMin,
+    breaksMin,
+    continuesMin,
+    churnMin,
+    modifiedAfter,
+    modifiedSinceDays,
+    fileFilter,
+    caseFile,
+    caseTokens,
+    branchFilter,
+    extFilter,
+    metaFilters,
+    annEnabled,
+    scoreBlendEnabled,
+    scoreBlendSparseWeight,
+    scoreBlendAnnWeight,
+    symbolBoostEnabled,
+    symbolBoostDefinitionWeight,
+    symbolBoostExportWeight,
+    minhashMaxDocs,
+    queryCacheEnabled,
+    queryCacheMaxEntries,
+    queryCacheTtlMs,
+    rrfEnabled,
+    rrfK,
+    contextExpansionEnabled,
+    contextExpansionOptions,
+    contextExpansionRespectFilters,
+    sqliteFtsNormalize,
+    sqliteFtsProfile,
+    sqliteFtsWeights,
+    fieldWeightsConfig: userConfig.search?.fieldWeights,
+    explain,
+    denseVectorMode,
+    backendArg
+  };
+}
diff --git a/src/retrieval/cli/options.js b/src/retrieval/cli/options.js
new file mode 100644
index 000000000..fffa6e927
--- /dev/null
+++ b/src/retrieval/cli/options.js
@@ -0,0 +1,139 @@
+import fs from 'node:fs';
+import path from 'node:path';
+
+export function getMissingFlagMessages(argv, rawArgs = []) {
+  const args = Array.isArray(rawArgs) ? rawArgs : [];
+  const hasMissingValue = (flag) => {
+    const flagEq = `${flag}=`;
+    for (let i = 0; i < args.length; i += 1) {
+      const arg = String(args[i] || '');
+      if (arg === flag) {
+        const next = args[i + 1];
+        if (next === undefined) return true;
+        const nextValue = String(next);
+        if (!nextValue.trim() || nextValue.startsWith('-')) return true;
+        continue;
+      }
+      if (arg.startsWith(flagEq)) {
+        const value = arg.slice(flagEq.length);
+        if (!String(value).trim()) return true;
+      }
+    }
+    return false;
+  };
+
+  const missingValueFlags = [
+    { key: 'type', flag: '--type', example: '--type Function' },
+    { key: 'author', flag: '--author', example: '--author "Jane Doe"' },
+    { key: 'import', flag: '--import', example: '--import lodash' }
+  ];
+  return missingValueFlags
+    .filter((entry) => {
+      const value = argv?.[entry.key];
+      if (value === true) return true;
+      if (typeof value === 'string' && !value.trim()) return true;
+      if (value === undefined && hasMissingValue(entry.flag)) return true;
+      return false;
+    })
+    .map((entry) => `Missing value for ${entry.flag}. Example: ${entry.example}`);
+}
+
+export function estimateIndexBytes(indexDir) {
+  if (!indexDir || !fs.existsSync(indexDir)) return 0;
+  const targets = [
+    'chunk_meta.json',
+    'chunk_meta.jsonl',
+    'chunk_meta.meta.json',
+    'token_postings.json',
+    'token_postings.meta.json',
+    'phrase_ngrams.json',
+    'chargram_postings.json',
+    'dense_vectors_uint8.json',
+    'filter_index.json'
+  ];
+  const sumFile = (targetPath) => {
+    try {
+      const stat = fs.statSync(targetPath);
+      return stat.size;
+    } catch {
+      return 0;
+    }
+  };
+  let total = 0;
+  for (const name of targets) {
+    total += sumFile(path.join(indexDir, name));
+  }
+  const chunkMetaPartsDir = path.join(indexDir, 'chunk_meta.parts');
+  if (fs.existsSync(chunkMetaPartsDir)) {
+    for (const entry of fs.readdirSync(chunkMetaPartsDir)) {
+      total += sumFile(path.join(chunkMetaPartsDir, entry));
+    }
+  }
+  const tokenPostingsShardsDir = path.join(indexDir, 'token_postings.shards');
+  if (fs.existsSync(tokenPostingsShardsDir)) {
+    for (const entry of fs.readdirSync(tokenPostingsShardsDir)) {
+      total += sumFile(path.join(tokenPostingsShardsDir, entry));
+    }
+  }
+  return total;
+}
+
+export function resolveIndexedFileCount(metricsRoot, modeFlags) {
+  if (!metricsRoot || !fs.existsSync(metricsRoot)) return null;
+  const modes = [];
+  if (modeFlags?.runCode) modes.push('code');
+  if (modeFlags?.runProse) modes.push('prose');
+  if (modeFlags?.runExtractedProse) modes.push('extracted-prose');
+  if (!modes.length) return null;
+  const counts = [];
+  for (const mode of modes) {
+    const metricsPath = path.join(metricsRoot, `index-${mode}.json`);
+    if (!fs.existsSync(metricsPath)) continue;
+    try {
+      const raw = JSON.parse(fs.readFileSync(metricsPath, 'utf8'));
+      const count = Number(raw?.files?.candidates);
+      if (Number.isFinite(count) && count > 0) counts.push(count);
+    } catch {
+      // ignore
+    }
+  }
+  if (!counts.length) return null;
+  return Math.max(...counts);
+}
+
+export function resolveBm25Defaults(metricsRoot, modeFlags) {
+  if (!metricsRoot || !fs.existsSync(metricsRoot)) return null;
+  const targets = [];
+  if (modeFlags?.runCode) targets.push('code');
+  if (modeFlags?.runProse) targets.push('prose');
+  if (modeFlags?.runExtractedProse) targets.push('extracted-prose');
+  if (!targets.length) return null;
+  const values = [];
+  for (const mode of targets) {
+    const metricsPath = path.join(metricsRoot, `index-${mode}.json`);
+    if (!fs.existsSync(metricsPath)) continue;
+    try {
+      const raw = JSON.parse(fs.readFileSync(metricsPath, 'utf8'));
+      const k1 = Number(raw?.bm25?.k1);
+      const b = Number(raw?.bm25?.b);
+      if (Number.isFinite(k1) && Number.isFinite(b)) values.push({ k1, b });
+    } catch {
+      // ignore
+    }
+  }
+  if (!values.length) return null;
+  const k1 = values.reduce((sum, v) => sum + v.k1, 0) / values.length;
+  const b = values.reduce((sum, v) => sum + v.b, 0) / values.length;
+  return { k1, b };
+}
+
+export function loadBranchFromMetrics(metricsDir, mode) {
+  try {
+    const metricsPath = path.join(metricsDir, `index-${mode}.json`);
+    if (!fs.existsSync(metricsPath)) return null;
+    const raw = JSON.parse(fs.readFileSync(metricsPath, 'utf8'));
+    return raw?.git?.branch || null;
+  } catch {
+    return null;
+  }
+}
diff --git a/src/retrieval/cli/persist.js b/src/retrieval/cli/persist.js
new file mode 100644
index 000000000..eb1714cae
--- /dev/null
+++ b/src/retrieval/cli/persist.js
@@ -0,0 +1,58 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+
+export async function recordSearchArtifacts({
+  metricsDir,
+  query,
+  queryTokens,
+  proseHits,
+  codeHits,
+  recordHits,
+  elapsedMs,
+  cacheHit
+}) {
+  try {
+    const metricsPath = path.join(metricsDir, 'metrics.json');
+    const historyPath = path.join(metricsDir, 'searchHistory');
+    const noResultPath = path.join(metricsDir, 'noResultQueries');
+    await fs.mkdir(path.dirname(metricsPath), { recursive: true });
+
+    let metrics = {};
+    try {
+      metrics = JSON.parse(await fs.readFile(metricsPath, 'utf8'));
+    } catch {
+      metrics = {};
+    }
+    const inc = (file, key) => {
+      if (!metrics[file]) metrics[file] = { md: 0, code: 0, records: 0, terms: [] };
+      metrics[file][key] = (metrics[file][key] || 0) + 1;
+      queryTokens.forEach((token) => {
+        if (!metrics[file].terms.includes(token)) metrics[file].terms.push(token);
+      });
+    };
+    proseHits.forEach((hit) => inc(hit.file, 'md'));
+    codeHits.forEach((hit) => inc(hit.file, 'code'));
+    recordHits.forEach((hit) => inc(hit.file, 'records'));
+    await fs.writeFile(metricsPath, JSON.stringify(metrics) + '\n');
+
+    await fs.appendFile(
+      historyPath,
+      JSON.stringify({
+        time: new Date().toISOString(),
+        query,
+        mdFiles: proseHits.length,
+        codeFiles: codeHits.length,
+        recordFiles: recordHits.length,
+        ms: elapsedMs,
+        cached: cacheHit
+      }) + '\n'
+    );
+
+    if (proseHits.length === 0 && codeHits.length === 0 && recordHits.length === 0) {
+      await fs.appendFile(
+        noResultPath,
+        JSON.stringify({ time: new Date().toISOString(), query }) + '\n'
+      );
+    }
+  } catch {}
+}
diff --git a/src/retrieval/cli/policy.js b/src/retrieval/cli/policy.js
new file mode 100644
index 000000000..e3f7583e6
--- /dev/null
+++ b/src/retrieval/cli/policy.js
@@ -0,0 +1,102 @@
+import { getIndexDir } from '../../../tools/dict-utils.js';
+import { resolveBackendPolicy } from '../../storage/backend-policy.js';
+import { getSqliteChunkCount } from '../cli-sqlite.js';
+import { estimateIndexBytes } from './options.js';
+
+export const resolveBackendSelection = async ({
+  backendArg,
+  sqliteScoreModeConfig,
+  sqliteConfigured,
+  sqliteAvailable,
+  sqliteCodeAvailable,
+  sqliteProseAvailable,
+  sqliteCodePath,
+  sqliteProsePath,
+  lmdbConfigured,
+  lmdbAvailable,
+  lmdbCodeAvailable,
+  lmdbProseAvailable,
+  lmdbCodePath,
+  lmdbProsePath,
+  sqliteAutoChunkThreshold,
+  sqliteAutoArtifactBytes,
+  needsSqlite,
+  needsCode,
+  needsProse,
+  root,
+  userConfig,
+  onWarn
+}) => {
+  let chunkCounts = [];
+  let artifactBytes = [];
+  if (needsSqlite && (!backendArg || backendArg === 'auto')) {
+    if (sqliteAutoChunkThreshold > 0) {
+      if (needsCode) chunkCounts.push(await getSqliteChunkCount(sqliteCodePath, 'code'));
+      if (needsProse) chunkCounts.push(await getSqliteChunkCount(sqliteProsePath, 'prose'));
+    }
+    if (sqliteAutoArtifactBytes > 0) {
+      if (needsCode) artifactBytes.push(estimateIndexBytes(getIndexDir(root, 'code', userConfig)));
+      if (needsProse) artifactBytes.push(estimateIndexBytes(getIndexDir(root, 'prose', userConfig)));
+    }
+  }
+
+  const backendPolicy = resolveBackendPolicy({
+    backendArg,
+    sqliteScoreModeConfig,
+    sqliteConfigured,
+    sqliteAvailable,
+    lmdbConfigured,
+    lmdbAvailable,
+    sqliteAutoChunkThreshold,
+    sqliteAutoArtifactBytes,
+    needsSqlite,
+    chunkCounts,
+    artifactBytes
+  });
+
+  if (backendPolicy.error) {
+    const missing = [];
+    if (backendPolicy.backendLabel === 'lmdb') {
+      if (needsCode && !lmdbCodeAvailable) missing.push(`code=${lmdbCodePath}`);
+      if (needsProse && !lmdbProseAvailable) missing.push(`prose=${lmdbProsePath}`);
+    } else {
+      if (needsCode && !sqliteCodeAvailable) missing.push(`code=${sqliteCodePath}`);
+      if (needsProse && !sqliteProseAvailable) missing.push(`prose=${sqliteProsePath}`);
+    }
+    const suffix = missing.length
+      ? missing.join(', ')
+      : (backendPolicy.backendLabel === 'lmdb' ? 'missing lmdb index' : 'missing sqlite index');
+    return {
+      backendPolicy,
+      error: {
+        message: `${backendPolicy.error} (${suffix}).`,
+        missing
+      }
+    };
+  }
+
+  if (!needsSqlite && backendPolicy.backendForcedSqlite) {
+    onWarn?.('SQLite backend requested, but records-only mode selected; using file-backed records index.');
+  }
+  if (!needsSqlite && backendPolicy.backendForcedLmdb) {
+    onWarn?.('LMDB backend requested, but records-only mode selected; using file-backed records index.');
+  }
+  if (backendPolicy.backendDisabled) {
+    onWarn?.(`Unknown backend "${backendArg}". Falling back to memory.`);
+  }
+
+  let useSqlite = backendPolicy.useSqlite;
+  let useLmdb = backendPolicy.useLmdb;
+  if (useLmdb) {
+    useSqlite = false;
+  }
+
+  return {
+    backendPolicy,
+    useSqlite,
+    useLmdb,
+    sqliteFtsRequested: backendPolicy.sqliteFtsRequested,
+    backendForcedSqlite: backendPolicy.backendForcedSqlite,
+    backendForcedLmdb: backendPolicy.backendForcedLmdb
+  };
+};
diff --git a/src/retrieval/cli/query-plan.js b/src/retrieval/cli/query-plan.js
new file mode 100644
index 000000000..21279e90b
--- /dev/null
+++ b/src/retrieval/cli/query-plan.js
@@ -0,0 +1,195 @@
+import { hasActiveFilters } from '../filters.js';
+import { buildHighlightRegex } from './highlight.js';
+import {
+  buildPhraseNgrams,
+  parseQueryInput,
+  tokenizePhrase,
+  tokenizeQueryTerms
+} from '../query-parse.js';
+import {
+  classifyQuery,
+  resolveIntentFieldWeights,
+  resolveIntentVectorMode
+} from '../query-intent.js';
+
+export function buildQueryPlan({
+  query,
+  argv,
+  dict,
+  dictConfig,
+  postingsConfig,
+  caseTokens,
+  fileFilter,
+  caseFile,
+  searchRegexConfig,
+  filePrefilterEnabled,
+  fileChargramN,
+  searchType,
+  searchAuthor,
+  searchImport,
+  chunkAuthorFilter,
+  branchesMin,
+  loopsMin,
+  breaksMin,
+  continuesMin,
+  churnMin,
+  extFilter,
+  metaFilters,
+  modifiedAfter,
+  modifiedSinceDays,
+  fieldWeightsConfig,
+  denseVectorMode,
+  branchFilter
+}) {
+  const parsedQuery = parseQueryInput(query);
+  const includeTokens = tokenizeQueryTerms(parsedQuery.includeTerms, dict, { ...dictConfig, caseSensitive: caseTokens });
+  const phraseTokens = parsedQuery.phrases
+    .map((phrase) => tokenizePhrase(phrase, dict, { ...dictConfig, caseSensitive: caseTokens }))
+    .filter((tokens) => tokens.length);
+  const phraseInfo = buildPhraseNgrams(phraseTokens, postingsConfig);
+  const phraseNgrams = phraseInfo.ngrams;
+  const phraseNgramSet = phraseNgrams.length ? new Set(phraseNgrams) : null;
+  const phraseRange = { min: phraseInfo.minLen, max: phraseInfo.maxLen };
+  const excludeTokens = tokenizeQueryTerms(parsedQuery.excludeTerms, dict, { ...dictConfig, caseSensitive: caseTokens });
+  const excludePhraseTokens = parsedQuery.excludePhrases
+    .map((phrase) => tokenizePhrase(phrase, dict, { ...dictConfig, caseSensitive: caseTokens }))
+    .filter((tokens) => tokens.length);
+  const excludePhraseInfo = buildPhraseNgrams(excludePhraseTokens, postingsConfig);
+  const excludePhraseNgrams = excludePhraseInfo.ngrams;
+  const excludePhraseRange = excludePhraseInfo.minLen && excludePhraseInfo.maxLen
+    ? { min: excludePhraseInfo.minLen, max: excludePhraseInfo.maxLen }
+    : null;
+  const queryTokens = [...includeTokens, ...phraseTokens.flat()];
+  const rx = buildHighlightRegex(queryTokens);
+  const embeddingQueryText = [...parsedQuery.includeTerms, ...parsedQuery.phrases]
+    .join(' ')
+    .trim() || query;
+  const intentInfo = classifyQuery({
+    query,
+    tokens: queryTokens,
+    phrases: parsedQuery.phrases,
+    filters: { file: fileFilter }
+  });
+  const fieldWeights = resolveIntentFieldWeights(fieldWeightsConfig, intentInfo);
+  const resolvedDenseVectorMode = resolveIntentVectorMode(denseVectorMode, intentInfo);
+
+  const filters = {
+    type: searchType,
+    author: searchAuthor,
+    importName: searchImport,
+    lint: argv.lint,
+    churn: churnMin,
+    calls: argv.calls,
+    uses: argv.uses,
+    signature: argv.signature,
+    param: argv.param,
+    decorator: argv.decorator,
+    inferredType: argv['inferred-type'],
+    returnType: argv['return-type'],
+    throws: argv.throws,
+    reads: argv.reads,
+    writes: argv.writes,
+    mutates: argv.mutates,
+    alias: argv.alias,
+    risk: argv.risk,
+    riskTag: argv['risk-tag'],
+    riskSource: argv['risk-source'],
+    riskSink: argv['risk-sink'],
+    riskCategory: argv['risk-category'],
+    riskFlow: argv['risk-flow'],
+    structPack: argv['struct-pack'],
+    structRule: argv['struct-rule'],
+    structTag: argv['struct-tag'],
+    awaits: argv.awaits,
+    branches: branchesMin,
+    loops: loopsMin,
+    breaks: breaksMin,
+    continues: continuesMin,
+    visibility: argv.visibility,
+    extends: argv.extends,
+    async: argv.async,
+    generator: argv.generator,
+    returns: argv.returns,
+    file: fileFilter,
+    caseFile,
+    caseTokens,
+    regexConfig: fileFilter ? searchRegexConfig : null,
+    filePrefilter: {
+      enabled: filePrefilterEnabled,
+      chargramN: fileChargramN
+    },
+    ext: extFilter,
+    meta: metaFilters,
+    chunkAuthor: chunkAuthorFilter,
+    modifiedAfter,
+    excludeTokens,
+    excludePhrases: excludePhraseNgrams,
+    excludePhraseRange
+  };
+  const filtersActive = hasActiveFilters(filters);
+
+  const cacheFilters = {
+    type: searchType,
+    author: searchAuthor,
+    calls: argv.calls || null,
+    uses: argv.uses || null,
+    signature: argv.signature || null,
+    param: argv.param || null,
+    import: searchImport,
+    lint: argv.lint || false,
+    churn: churnMin,
+    decorator: argv.decorator || null,
+    inferredType: argv['inferred-type'] || null,
+    returnType: argv['return-type'] || null,
+    throws: argv.throws || null,
+    reads: argv.reads || null,
+    writes: argv.writes || null,
+    mutates: argv.mutates || null,
+    risk: argv.risk || null,
+    riskTag: argv['risk-tag'] || null,
+    riskSource: argv['risk-source'] || null,
+    riskSink: argv['risk-sink'] || null,
+    riskCategory: argv['risk-category'] || null,
+    riskFlow: argv['risk-flow'] || null,
+    structPack: argv['struct-pack'] || null,
+    structRule: argv['struct-rule'] || null,
+    structTag: argv['struct-tag'] || null,
+    awaits: argv.awaits || null,
+    visibility: argv.visibility || null,
+    extends: argv.extends || null,
+    async: argv.async || false,
+    generator: argv.generator || false,
+    returns: argv.returns || false,
+    file: fileFilter || null,
+    ext: extFilter || null,
+    branch: branchFilter || null,
+    caseFile,
+    caseTokens,
+    regexConfig: fileFilter ? searchRegexConfig : null,
+    meta: metaFilters,
+    chunkAuthor: chunkAuthorFilter || null,
+    modifiedAfter,
+    modifiedSinceDays
+  };
+
+  return {
+    parsedQuery,
+    includeTokens,
+    phraseTokens,
+    phraseNgrams,
+    phraseNgramSet,
+    phraseRange,
+    excludeTokens,
+    excludePhraseNgrams,
+    excludePhraseRange,
+    queryTokens,
+    highlightRegex: rx,
+    embeddingQueryText,
+    intentInfo,
+    fieldWeights,
+    resolvedDenseVectorMode,
+    filters,
+    filtersActive,
+    cacheFilters
+  };
+}
diff --git a/src/retrieval/cli/render-output.js b/src/retrieval/cli/render-output.js
new file mode 100644
index 000000000..2dd5c72a2
--- /dev/null
+++ b/src/retrieval/cli/render-output.js
@@ -0,0 +1,31 @@
+export function compactHit(hit, includeExplain = false) {
+  if (!hit || typeof hit !== 'object') return hit;
+  const compact = {};
+  const fields = [
+    'id',
+    'file',
+    'start',
+    'end',
+    'startLine',
+    'endLine',
+    'ext',
+    'kind',
+    'name',
+    'headline',
+    'score',
+    'scoreType',
+    'sparseScore',
+    'sparseType',
+    'annScore',
+    'annSource',
+    'annType',
+    'context'
+  ];
+  for (const field of fields) {
+    if (hit[field] !== undefined) compact[field] = hit[field];
+  }
+  if (includeExplain && hit.scoreBreakdown !== undefined) {
+    compact.scoreBreakdown = hit.scoreBreakdown;
+  }
+  return compact;
+}
diff --git a/src/retrieval/cli/render.js b/src/retrieval/cli/render.js
new file mode 100644
index 000000000..225a9c323
--- /dev/null
+++ b/src/retrieval/cli/render.js
@@ -0,0 +1,320 @@
+import { compactHit } from './render-output.js';
+import { formatFullChunk, formatShortChunk, getOutputCacheReporter } from '../output.js';
+
+export function renderSearchOutput({
+  emitOutput,
+  jsonOutput,
+  jsonCompact,
+  explain,
+  color,
+  rootDir,
+  backendLabel,
+  backendPolicyInfo,
+  runCode,
+  runProse,
+  runExtractedProse,
+  runRecords,
+  topN,
+  queryTokens,
+  highlightRegex,
+  contextExpansionEnabled,
+  expandedHits,
+  baseHits,
+  annEnabled,
+  annActive,
+  annBackend,
+  vectorExtension,
+  vectorAnnEnabled,
+  vectorAnnState,
+  vectorAnnUsed,
+  hnswConfig,
+  hnswAnnState,
+  modelIds,
+  embeddingProvider,
+  embeddingOnnx,
+  cacheInfo,
+  intentInfo,
+  resolvedDenseVectorMode,
+  fieldWeights,
+  contextExpansionStats,
+  idxProse,
+  idxCode,
+  idxRecords,
+  showStats,
+  showMatched,
+  verboseCache,
+  elapsedMs
+}) {
+  const proseHitsFinal = expandedHits.prose.hits;
+  const extractedProseHitsFinal = expandedHits.extractedProse.hits;
+  const codeHitsFinal = expandedHits.code.hits;
+  const recordHitsFinal = expandedHits.records.hits;
+
+  const memory = process.memoryUsage();
+  const payload = {
+    backend: backendLabel,
+    prose: jsonCompact ? proseHitsFinal.map((hit) => compactHit(hit, explain)) : proseHitsFinal,
+    extractedProse: jsonCompact
+      ? extractedProseHitsFinal.map((hit) => compactHit(hit, explain))
+      : extractedProseHitsFinal,
+    code: jsonCompact ? codeHitsFinal.map((hit) => compactHit(hit, explain)) : codeHitsFinal,
+    records: jsonCompact ? recordHitsFinal.map((hit) => compactHit(hit, explain)) : recordHitsFinal,
+    stats: {
+      elapsedMs,
+      annEnabled,
+      annActive,
+      annMode: vectorExtension.annMode,
+      annBackend,
+      backendPolicy: backendPolicyInfo,
+      annExtension: vectorAnnEnabled ? {
+        provider: vectorExtension.provider,
+        table: vectorExtension.table,
+        available: {
+          code: vectorAnnState.code.available,
+          prose: vectorAnnState.prose.available,
+          records: vectorAnnState.records.available
+        }
+      } : null,
+      annHnsw: hnswConfig.enabled ? {
+        available: {
+          code: hnswAnnState.code.available,
+          prose: hnswAnnState.prose.available,
+          records: hnswAnnState.records.available,
+          extractedProse: hnswAnnState['extracted-prose'].available
+        },
+        space: hnswConfig.space,
+        efSearch: hnswConfig.efSearch
+      } : null,
+      models: {
+        code: modelIds.code,
+        prose: modelIds.prose,
+        extractedProse: modelIds.extractedProse,
+        records: modelIds.records
+      },
+      embeddings: {
+        provider: embeddingProvider,
+        onnxModel: embeddingOnnx.modelPath || null,
+        onnxTokenizer: embeddingOnnx.tokenizerId || null
+      },
+      cache: {
+        enabled: cacheInfo.enabled,
+        hit: cacheInfo.hit,
+        key: cacheInfo.key
+      },
+      memory: {
+        rss: memory.rss,
+        heapTotal: memory.heapTotal,
+        heapUsed: memory.heapUsed,
+        external: memory.external,
+        arrayBuffers: memory.arrayBuffers
+      }
+    }
+  };
+
+  if (explain) {
+    payload.stats.intent = {
+      ...intentInfo,
+      denseVectorMode: resolvedDenseVectorMode,
+      fieldWeights
+    };
+    payload.stats.contextExpansion = contextExpansionStats;
+  }
+
+  if (emitOutput && jsonOutput) {
+    console.log(JSON.stringify(payload, null, 2));
+  }
+
+  if (emitOutput && !jsonOutput) {
+    let showProse = runProse ? topN : 0;
+    let showExtractedProse = runExtractedProse ? topN : 0;
+    let showCode = runCode ? topN : 0;
+    let showRecords = runRecords ? topN : 0;
+
+    if (runProse && runCode) {
+      if (baseHits.proseHits.length < topN) {
+        showCode += showProse;
+      }
+      if (baseHits.codeHits.length < topN) {
+        showProse += showCode;
+      }
+    }
+    if (contextExpansionEnabled) {
+      showProse += expandedHits.prose.contextHits.length;
+      showExtractedProse += expandedHits.extractedProse.contextHits.length;
+      showCode += expandedHits.code.contextHits.length;
+      showRecords += expandedHits.records.contextHits.length;
+    }
+
+    if (runProse) {
+      console.log(color.bold(`\n===== Markdown Results (${backendLabel}) =====`));
+      const summaryState = { lastCount: 0 };
+      proseHitsFinal.slice(0, showProse).forEach((hit, index) => {
+        if (index < 2) {
+          process.stdout.write(formatFullChunk({
+            chunk: hit,
+            index,
+            mode: 'prose',
+            score: hit.score,
+            scoreType: hit.scoreType,
+            explain,
+            color,
+            queryTokens,
+            rx: highlightRegex,
+            matched: showMatched,
+            rootDir,
+            summaryState
+          }));
+        } else {
+          process.stdout.write(formatShortChunk({
+            chunk: hit,
+            index,
+            mode: 'prose',
+            score: hit.score,
+            scoreType: hit.scoreType,
+            explain,
+            color,
+            queryTokens,
+            rx: highlightRegex,
+            matched: showMatched
+          }));
+        }
+      });
+      console.log('\n');
+    }
+
+    if (runExtractedProse) {
+      console.log(color.bold(`===== Extracted Prose Results (${backendLabel}) =====`));
+      const summaryState = { lastCount: 0 };
+      extractedProseHitsFinal.slice(0, showExtractedProse).forEach((hit, index) => {
+        if (index < 2) {
+          process.stdout.write(formatFullChunk({
+            chunk: hit,
+            index,
+            mode: 'extracted-prose',
+            score: hit.score,
+            scoreType: hit.scoreType,
+            explain,
+            color,
+            queryTokens,
+            rx: highlightRegex,
+            matched: showMatched,
+            rootDir,
+            summaryState
+          }));
+        } else {
+          process.stdout.write(formatShortChunk({
+            chunk: hit,
+            index,
+            mode: 'extracted-prose',
+            score: hit.score,
+            scoreType: hit.scoreType,
+            explain,
+            color,
+            queryTokens,
+            rx: highlightRegex,
+            matched: showMatched
+          }));
+        }
+      });
+      console.log('\n');
+    }
+
+    if (runCode) {
+      console.log(color.bold(`===== Code Results (${backendLabel}) =====`));
+      const summaryState = { lastCount: 0 };
+      codeHitsFinal.slice(0, showCode).forEach((hit, index) => {
+        if (index < 1) {
+          process.stdout.write(formatFullChunk({
+            chunk: hit,
+            index,
+            mode: 'code',
+            score: hit.score,
+            scoreType: hit.scoreType,
+            explain,
+            color,
+            queryTokens,
+            rx: highlightRegex,
+            matched: showMatched,
+            rootDir,
+            summaryState
+          }));
+        } else {
+          process.stdout.write(formatShortChunk({
+            chunk: hit,
+            index,
+            mode: 'code',
+            score: hit.score,
+            scoreType: hit.scoreType,
+            explain,
+            color,
+            queryTokens,
+            rx: highlightRegex,
+            matched: showMatched
+          }));
+        }
+      });
+      console.log('\n');
+    }
+
+    if (runRecords) {
+      console.log(color.bold(`===== Records Results (${backendLabel}) =====`));
+      recordHitsFinal.slice(0, showRecords).forEach((hit, index) => {
+        if (index < 2) {
+          process.stdout.write(formatFullChunk({
+            chunk: hit,
+            index,
+            mode: 'records',
+            score: hit.score,
+            scoreType: hit.scoreType,
+            explain,
+            color,
+            queryTokens,
+            rx: highlightRegex,
+            matched: showMatched,
+            rootDir: null,
+            summaryState: null
+          }));
+        } else {
+          process.stdout.write(formatShortChunk({
+            chunk: hit,
+            index,
+            mode: 'records',
+            score: hit.score,
+            scoreType: hit.scoreType,
+            explain,
+            color,
+            queryTokens,
+            rx: highlightRegex,
+            matched: showMatched
+          }));
+        }
+      });
+      console.log('\n');
+    }
+
+    if (showStats) {
+      const proseCount = idxProse?.chunkMeta?.length ?? 0;
+      const codeCount = idxCode?.chunkMeta?.length ?? 0;
+      const recordsCount = idxRecords?.chunkMeta?.length ?? 0;
+      const cacheTag = cacheInfo.enabled ? (cacheInfo.hit ? 'cache=hit' : 'cache=miss') : 'cache=off';
+      const statsParts = [
+        `prose chunks=${proseCount}`,
+        `code chunks=${codeCount}`,
+        runRecords ? `records chunks=${recordsCount}` : null,
+        `(${cacheTag})`
+      ].filter(Boolean);
+      if (explain && backendPolicyInfo?.reason) {
+        statsParts.push(`backend=${backendLabel}`);
+        statsParts.push(`policy=${backendPolicyInfo.reason}`);
+      }
+      console.log(color.gray(`Stats: ${statsParts.join(', ')}`));
+    }
+  }
+
+  const outputCacheReporter = getOutputCacheReporter();
+  if (emitOutput && verboseCache && outputCacheReporter) {
+    outputCacheReporter.report();
+  }
+
+  return payload;
+}
diff --git a/src/retrieval/cli/run-search-session.js b/src/retrieval/cli/run-search-session.js
new file mode 100644
index 000000000..abf81106b
--- /dev/null
+++ b/src/retrieval/cli/run-search-session.js
@@ -0,0 +1,342 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { incCacheEvent } from '../../shared/metrics.js';
+import { createSearchPipeline } from '../pipeline.js';
+import { buildQueryCacheKey, getIndexSignature } from '../cli-index.js';
+import { getQueryEmbedding } from '../embedding.js';
+import { expandContext } from '../context-expansion.js';
+import { loadQueryCache, pruneQueryCache } from '../query-cache.js';
+import { filterChunks } from '../output.js';
+import { runSearchByMode } from './search-runner.js';
+
+export async function runSearchSession({
+  rootDir,
+  userConfig,
+  metricsDir,
+  query,
+  searchMode,
+  runCode,
+  runProse,
+  runExtractedProse,
+  runRecords,
+  topN,
+  useSqlite,
+  annEnabled,
+  annActive,
+  vectorExtension,
+  vectorAnnEnabled,
+  vectorAnnState,
+  vectorAnnUsed,
+  hnswConfig,
+  hnswAnnState,
+  hnswAnnUsed,
+  sqliteFtsRequested,
+  sqliteFtsNormalize,
+  sqliteFtsProfile,
+  sqliteFtsWeights,
+  sqliteCodePath,
+  sqliteProsePath,
+  bm25K1,
+  bm25B,
+  fieldWeights,
+  postingsConfig,
+  queryTokens,
+  phraseNgramSet,
+  phraseRange,
+  symbolBoost,
+  filters,
+  filtersActive,
+  scoreBlend,
+  rrf,
+  minhashMaxDocs,
+  buildCandidateSetSqlite,
+  getTokenIndexForQuery,
+  rankSqliteFts,
+  rankVectorAnnSqlite,
+  idxProse,
+  idxExtractedProse,
+  idxCode,
+  idxRecords,
+  modelConfig,
+  modelIds,
+  embeddingProvider,
+  embeddingOnnx,
+  embeddingQueryText,
+  useStubEmbeddings,
+  contextExpansionEnabled,
+  contextExpansionOptions,
+  contextExpansionRespectFilters,
+  cacheFilters,
+  queryCacheEnabled,
+  queryCacheMaxEntries,
+  queryCacheTtlMs,
+  backendLabel,
+  resolvedDenseVectorMode,
+  intentInfo
+}) {
+  const searchPipeline = createSearchPipeline({
+    useSqlite,
+    sqliteFtsRequested,
+    sqliteFtsNormalize,
+    sqliteFtsProfile,
+    sqliteFtsWeights,
+    bm25K1,
+    bm25B,
+    fieldWeights,
+    postingsConfig,
+    queryTokens,
+    phraseNgramSet,
+    phraseRange,
+    symbolBoost,
+    filters,
+    filtersActive,
+    topN,
+    annEnabled: annActive,
+    scoreBlend,
+    rrf,
+    minhashMaxDocs,
+    vectorAnnState,
+    vectorAnnUsed,
+    hnswAnnState,
+    hnswAnnUsed,
+    buildCandidateSetSqlite,
+    getTokenIndexForQuery,
+    rankSqliteFts,
+    rankVectorAnnSqlite
+  });
+
+  let cacheHit = false;
+  let cacheKey = null;
+  let cacheSignature = null;
+  let cacheData = null;
+  let cachedPayload = null;
+
+  const queryCachePath = path.join(metricsDir, 'queryCache.json');
+  if (queryCacheEnabled) {
+    const signature = getIndexSignature({
+      useSqlite,
+      backendLabel,
+      sqliteCodePath,
+      sqliteProsePath,
+      runRecords,
+      runExtractedProse,
+      root: rootDir,
+      userConfig
+    });
+    cacheSignature = JSON.stringify(signature);
+    const cacheKeyInfo = buildQueryCacheKey({
+      query,
+      backend: backendLabel,
+      mode: searchMode,
+      topN,
+      ann: annActive,
+      annMode: vectorExtension.annMode,
+      annProvider: vectorExtension.provider,
+      annExtension: vectorAnnEnabled,
+      scoreBlend,
+      fieldWeights,
+      denseVectorMode: resolvedDenseVectorMode,
+      intent: intentInfo?.type || null,
+      minhashMaxDocs,
+      sqliteFtsNormalize,
+      sqliteFtsProfile,
+      sqliteFtsWeights,
+      models: modelIds,
+      embeddings: {
+        provider: embeddingProvider,
+        onnxModel: embeddingOnnx.modelPath || null,
+        onnxTokenizer: embeddingOnnx.tokenizerId || null
+      },
+      contextExpansion: {
+        enabled: contextExpansionEnabled,
+        maxPerHit: contextExpansionOptions.maxPerHit || null,
+        maxTotal: contextExpansionOptions.maxTotal || null,
+        includeCalls: contextExpansionOptions.includeCalls !== false,
+        includeImports: contextExpansionOptions.includeImports !== false,
+        includeExports: contextExpansionOptions.includeExports === true,
+        includeUsages: contextExpansionOptions.includeUsages === true,
+        respectFilters: contextExpansionRespectFilters
+      },
+      filters: cacheFilters
+    });
+    cacheKey = cacheKeyInfo.key;
+    cacheData = loadQueryCache(queryCachePath);
+    const entry = cacheData.entries.find((e) => e.key === cacheKey && e.signature === cacheSignature);
+    if (entry) {
+      const ttl = Number.isFinite(Number(entry.ttlMs)) ? Number(entry.ttlMs) : queryCacheTtlMs;
+      if (!ttl || (Date.now() - entry.ts) <= ttl) {
+        cachedPayload = entry.payload || null;
+        if (cachedPayload) {
+          const hasCode = !runCode || Array.isArray(cachedPayload.code);
+          const hasProse = !runProse || Array.isArray(cachedPayload.prose);
+          const hasRecords = !runRecords || Array.isArray(cachedPayload.records);
+          if (hasCode && hasProse && hasRecords) {
+            cacheHit = true;
+            entry.ts = Date.now();
+          }
+        }
+      }
+    }
+  }
+  if (queryCacheEnabled) {
+    incCacheEvent({ cache: 'query', result: cacheHit ? 'hit' : 'miss' });
+  }
+
+  const needsEmbedding = !cacheHit && annActive && (
+    (runProse && (idxProse.denseVec?.vectors?.length || vectorAnnState.prose.available || hnswAnnState.prose.available))
+    || (runCode && (idxCode.denseVec?.vectors?.length || vectorAnnState.code.available || hnswAnnState.code.available))
+    || (runExtractedProse && idxExtractedProse?.denseVec?.vectors?.length)
+    || (runRecords && idxRecords.denseVec?.vectors?.length)
+  );
+  const embeddingCache = new Map();
+  const getEmbeddingForModel = async (modelId, dims) => {
+    if (!modelId) return null;
+    const cacheKeyLocal = useStubEmbeddings ? `${modelId}:${dims || 'default'}` : modelId;
+    if (embeddingCache.has(cacheKeyLocal)) {
+      incCacheEvent({ cache: 'embedding', result: 'hit' });
+      return embeddingCache.get(cacheKeyLocal);
+    }
+    incCacheEvent({ cache: 'embedding', result: 'miss' });
+    const embedding = await getQueryEmbedding({
+      text: embeddingQueryText,
+      modelId,
+      dims,
+      modelDir: modelConfig.dir,
+      useStub: useStubEmbeddings,
+      provider: embeddingProvider,
+      onnxConfig: embeddingOnnx,
+      rootDir
+    });
+    embeddingCache.set(cacheKeyLocal, embedding);
+    return embedding;
+  };
+  const queryEmbeddingCode = needsEmbedding && runCode && (
+    idxCode.denseVec?.vectors?.length
+    || vectorAnnState.code.available
+    || hnswAnnState.code.available
+  )
+    ? await getEmbeddingForModel(modelIds.code, idxCode.denseVec?.dims || null)
+    : null;
+  const queryEmbeddingProse = needsEmbedding && runProse && (
+    idxProse.denseVec?.vectors?.length
+    || vectorAnnState.prose.available
+    || hnswAnnState.prose.available
+  )
+    ? await getEmbeddingForModel(modelIds.prose, idxProse.denseVec?.dims || null)
+    : null;
+  const queryEmbeddingExtractedProse = needsEmbedding && runExtractedProse && idxExtractedProse?.denseVec?.vectors?.length
+    ? await getEmbeddingForModel(modelIds.extractedProse, idxExtractedProse.denseVec?.dims || null)
+    : null;
+  const queryEmbeddingRecords = needsEmbedding && runRecords && idxRecords.denseVec?.vectors?.length
+    ? await getEmbeddingForModel(modelIds.records, idxRecords.denseVec?.dims || null)
+    : null;
+
+  const cachedHits = cacheHit && cachedPayload
+    ? {
+      proseHits: cachedPayload.prose || [],
+      extractedProseHits: cachedPayload.extractedProse || [],
+      codeHits: cachedPayload.code || [],
+      recordHits: cachedPayload.records || []
+    }
+    : null;
+  const { proseHits, extractedProseHits, codeHits, recordHits } = cachedHits || runSearchByMode({
+    searchPipeline,
+    runProse,
+    runExtractedProse,
+    runCode,
+    runRecords,
+    idxProse,
+    idxExtractedProse,
+    idxCode,
+    idxRecords,
+    queryEmbeddingProse,
+    queryEmbeddingExtractedProse,
+    queryEmbeddingCode,
+    queryEmbeddingRecords
+  });
+
+  const contextExpansionStats = {
+    enabled: contextExpansionEnabled,
+    code: 0,
+    prose: 0,
+    'extracted-prose': 0,
+    records: 0
+  };
+  const expandModeHits = (mode, idx, hits) => {
+    if (!contextExpansionEnabled || !hits.length || !idx?.chunkMeta?.length) {
+      return { hits, contextHits: [] };
+    }
+    const allowedIds = contextExpansionRespectFilters && filtersActive
+      ? new Set(
+        filterChunks(idx.chunkMeta, filters, idx.filterIndex, idx.fileRelations)
+          .map((chunk) => chunk.id)
+      )
+      : null;
+    const contextHits = expandContext({
+      hits,
+      chunkMeta: idx.chunkMeta,
+      fileRelations: idx.fileRelations,
+      repoMap: idx.repoMap,
+      options: contextExpansionOptions,
+      allowedIds
+    });
+    contextExpansionStats[mode] = contextHits.length;
+    return { hits: hits.concat(contextHits), contextHits };
+  };
+  const proseExpanded = runProse ? expandModeHits('prose', idxProse, proseHits) : { hits: proseHits, contextHits: [] };
+  const extractedProseExpanded = runExtractedProse
+    ? expandModeHits('extracted-prose', idxExtractedProse, extractedProseHits)
+    : { hits: extractedProseHits, contextHits: [] };
+  const codeExpanded = runCode ? expandModeHits('code', idxCode, codeHits) : { hits: codeHits, contextHits: [] };
+  const recordExpanded = runRecords ? expandModeHits('records', idxRecords, recordHits) : { hits: recordHits, contextHits: [] };
+
+  const hnswActive = Object.values(hnswAnnUsed).some(Boolean);
+  const annBackend = vectorAnnEnabled && (vectorAnnUsed.code || vectorAnnUsed.prose)
+    ? 'sqlite-extension'
+    : (hnswActive ? 'hnsw' : 'js');
+
+  if (queryCacheEnabled && cacheKey) {
+    if (!cacheData) cacheData = { version: 1, entries: [] };
+    if (!cacheHit) {
+      cacheData.entries = cacheData.entries.filter((entry) => entry.key !== cacheKey);
+      cacheData.entries.push({
+        key: cacheKey,
+        ts: Date.now(),
+        ttlMs: queryCacheTtlMs,
+        signature: cacheSignature,
+        meta: {
+          query,
+          backend: backendLabel
+        },
+        payload: {
+          prose: proseHits,
+          code: codeHits,
+          records: recordHits
+        }
+      });
+    }
+    pruneQueryCache(cacheData, queryCacheMaxEntries);
+    try {
+      await fs.mkdir(path.dirname(queryCachePath), { recursive: true });
+      await fs.writeFile(queryCachePath, JSON.stringify(cacheData, null, 2));
+    } catch {}
+  }
+
+  return {
+    proseHits,
+    extractedProseHits,
+    codeHits,
+    recordHits,
+    proseExpanded,
+    extractedProseExpanded,
+    codeExpanded,
+    recordExpanded,
+    contextExpansionStats,
+    annBackend,
+    cache: {
+      enabled: queryCacheEnabled,
+      hit: cacheHit,
+      key: cacheKey
+    }
+  };
+}
diff --git a/src/retrieval/cli/search-runner.js b/src/retrieval/cli/search-runner.js
new file mode 100644
index 000000000..12e82b4dc
--- /dev/null
+++ b/src/retrieval/cli/search-runner.js
@@ -0,0 +1,29 @@
+export function runSearchByMode({
+  searchPipeline,
+  runProse,
+  runExtractedProse,
+  runCode,
+  runRecords,
+  idxProse,
+  idxExtractedProse,
+  idxCode,
+  idxRecords,
+  queryEmbeddingProse,
+  queryEmbeddingExtractedProse,
+  queryEmbeddingCode,
+  queryEmbeddingRecords
+}) {
+  const proseHits = runProse
+    ? searchPipeline(idxProse, 'prose', queryEmbeddingProse)
+    : [];
+  const extractedProseHits = runExtractedProse
+    ? searchPipeline(idxExtractedProse, 'extracted-prose', queryEmbeddingExtractedProse)
+    : [];
+  const codeHits = runCode
+    ? searchPipeline(idxCode, 'code', queryEmbeddingCode)
+    : [];
+  const recordHits = runRecords
+    ? searchPipeline(idxRecords, 'records', queryEmbeddingRecords)
+    : [];
+  return { proseHits, extractedProseHits, codeHits, recordHits };
+}
diff --git a/src/retrieval/cli/telemetry.js b/src/retrieval/cli/telemetry.js
new file mode 100644
index 000000000..7a4ea02f7
--- /dev/null
+++ b/src/retrieval/cli/telemetry.js
@@ -0,0 +1,29 @@
+import { observeSearchDuration } from '../../shared/metrics.js';
+
+export function createSearchTelemetry() {
+  const metricsStart = process.hrtime.bigint();
+  let metricsRecorded = false;
+  let metricsMode = 'unknown';
+  let metricsBackend = 'unknown';
+  let metricsAnn = 'unknown';
+  const recordSearchMetrics = (status) => {
+    if (metricsRecorded) return;
+    metricsRecorded = true;
+    const elapsed = Number(process.hrtime.bigint() - metricsStart) / 1e9;
+    try {
+      observeSearchDuration({
+        mode: metricsMode,
+        backend: metricsBackend,
+        ann: metricsAnn,
+        status,
+        seconds: elapsed
+      });
+    } catch {}
+  };
+  return {
+    setMode: (mode) => { metricsMode = mode; },
+    setBackend: (backend) => { metricsBackend = backend; },
+    setAnn: (ann) => { metricsAnn = ann; },
+    record: recordSearchMetrics
+  };
+}
diff --git a/src/retrieval/context-expansion.js b/src/retrieval/context-expansion.js
new file mode 100644
index 000000000..2af8902e9
--- /dev/null
+++ b/src/retrieval/context-expansion.js
@@ -0,0 +1,124 @@
+const pushIds = (acc, ids, reason) => {
+  for (const id of ids) {
+    if (id == null) continue;
+    acc.push({ id, reason });
+  }
+};
+
+export function expandContext({
+  hits,
+  chunkMeta,
+  fileRelations,
+  repoMap,
+  options = {},
+  allowedIds = null
+}) {
+  if (!Array.isArray(hits) || !hits.length || !Array.isArray(chunkMeta)) {
+    return [];
+  }
+  const maxPerHit = Number.isFinite(Number(options.maxPerHit)) ? Math.max(0, Number(options.maxPerHit)) : 4;
+  const maxTotal = Number.isFinite(Number(options.maxTotal)) ? Math.max(0, Number(options.maxTotal)) : 40;
+  const includeCalls = options.includeCalls !== false;
+  const includeImports = options.includeImports !== false;
+  const includeExports = options.includeExports === true;
+  const includeUsages = options.includeUsages === true;
+
+  const byName = new Map();
+  const byFile = new Map();
+  for (const chunk of chunkMeta) {
+    if (!chunk) continue;
+    if (chunk.name) {
+      const list = byName.get(chunk.name) || [];
+      list.push(chunk.id);
+      byName.set(chunk.name, list);
+    }
+    if (chunk.file) {
+      const list = byFile.get(chunk.file) || [];
+      list.push(chunk.id);
+      byFile.set(chunk.file, list);
+    }
+  }
+
+  const repoMapByName = new Map();
+  if (Array.isArray(repoMap)) {
+    for (const entry of repoMap) {
+      if (!entry?.name || !entry?.file) continue;
+      const list = repoMapByName.get(entry.name) || [];
+      list.push(entry.file);
+      repoMapByName.set(entry.name, list);
+    }
+  }
+
+  const primaryIds = new Set(hits.map((hit) => hit?.id).filter((id) => id != null));
+  const addedIds = new Set();
+  const contextHits = [];
+
+  for (const hit of hits) {
+    if (contextHits.length >= maxTotal) break;
+    const sourceId = hit?.id;
+    const sourceChunk = sourceId != null ? chunkMeta[sourceId] : null;
+    if (!sourceChunk) continue;
+    const candidates = [];
+    if (includeCalls) {
+      const calls = sourceChunk.codeRelations?.calls || [];
+      for (const entry of calls) {
+        const callee = Array.isArray(entry) ? entry[1] : null;
+        if (!callee) continue;
+        const ids = byName.get(callee) || [];
+        if (ids.length) {
+          pushIds(candidates, ids, `call:${callee}`);
+        } else {
+          const files = repoMapByName.get(callee) || [];
+          for (const file of files) {
+            pushIds(candidates, byFile.get(file) || [], `call:${callee}`);
+          }
+        }
+      }
+    }
+    if (fileRelations && sourceChunk.file) {
+      const relations = typeof fileRelations.get === 'function'
+        ? fileRelations.get(sourceChunk.file)
+        : fileRelations[sourceChunk.file];
+      if (relations) {
+        if (includeImports && Array.isArray(relations.importLinks)) {
+          for (const file of relations.importLinks) {
+            pushIds(candidates, byFile.get(file) || [], `import:${file}`);
+          }
+        }
+        if (includeUsages && Array.isArray(relations.usages)) {
+          for (const usage of relations.usages) {
+            pushIds(candidates, byName.get(usage) || [], `usage:${usage}`);
+          }
+        }
+        if (includeExports && Array.isArray(relations.exports)) {
+          for (const exp of relations.exports) {
+            pushIds(candidates, byName.get(exp) || [], `export:${exp}`);
+          }
+        }
+      }
+    }
+
+    let addedForHit = 0;
+    for (const candidate of candidates) {
+      if (contextHits.length >= maxTotal || addedForHit >= maxPerHit) break;
+      const id = candidate.id;
+      if (primaryIds.has(id) || addedIds.has(id)) continue;
+      if (allowedIds && !allowedIds.has(id)) continue;
+      const chunk = chunkMeta[id];
+      if (!chunk) continue;
+      addedIds.add(id);
+      addedForHit += 1;
+      contextHits.push({
+        ...chunk,
+        score: 0,
+        scoreType: 'context',
+        context: {
+          sourceId,
+          reason: candidate.reason
+        }
+      });
+    }
+  }
+
+  return contextHits;
+}
diff --git a/src/retrieval/embedding.js b/src/retrieval/embedding.js
new file mode 100644
index 000000000..45e36c382
--- /dev/null
+++ b/src/retrieval/embedding.js
@@ -0,0 +1,80 @@
+import fs from 'node:fs';
+import { stubEmbedding } from '../shared/embedding.js';
+import { createOnnxEmbedder, normalizeEmbeddingProvider } from '../shared/onnx-embeddings.js';
+
+const embedderCache = new Map();
+
+async function getEmbedder({ provider, modelId, modelDir, rootDir, onnxConfig }) {
+  const resolvedProvider = normalizeEmbeddingProvider(provider);
+  const cacheKey = JSON.stringify({
+    provider: resolvedProvider,
+    modelId,
+    modelDir,
+    onnxConfig: onnxConfig || null,
+    rootDir
+  });
+  if (embedderCache.has(cacheKey)) return embedderCache.get(cacheKey);
+  if (resolvedProvider === 'onnx') {
+    const embedder = createOnnxEmbedder({
+      rootDir,
+      modelId,
+      modelsDir: modelDir,
+      onnxConfig
+    });
+    embedderCache.set(cacheKey, embedder);
+    return embedder;
+  }
+  const { pipeline, env } = await import('@xenova/transformers');
+  if (modelDir) {
+    try {
+      fs.mkdirSync(modelDir, { recursive: true });
+    } catch {}
+    env.cacheDir = modelDir;
+  }
+  const embedder = await pipeline('feature-extraction', modelId);
+  embedderCache.set(cacheKey, embedder);
+  return embedder;
+}
+
+/**
+ * Compute a query embedding using the configured model.
+ * Returns null when embeddings are unavailable.
+ * @param {object} options
+ * @param {string} options.text
+ * @param {string} options.modelId
+ * @param {number} options.dims
+ * @param {string} options.modelDir
+ * @param {boolean} options.useStub
+ * @returns {Promise<number[]|null>}
+ */
+export async function getQueryEmbedding({
+  text,
+  modelId,
+  dims,
+  modelDir,
+  useStub,
+  provider,
+  onnxConfig,
+  rootDir
+}) {
+  if (useStub) {
+    return stubEmbedding(text, dims);
+  }
+  try {
+    const resolvedProvider = normalizeEmbeddingProvider(provider);
+    const embedder = await getEmbedder({
+      provider: resolvedProvider,
+      modelId,
+      modelDir,
+      rootDir,
+      onnxConfig
+    });
+    if (resolvedProvider === 'onnx') {
+      return await embedder.getEmbedding(text);
+    }
+    const output = await embedder(text, { pooling: 'mean', normalize: true });
+    return Array.from(output.data);
+  } catch {
+    return null;
+  }
+}
diff --git a/src/retrieval/filter-index.js b/src/retrieval/filter-index.js
new file mode 100644
index 000000000..4e8d5b28e
--- /dev/null
+++ b/src/retrieval/filter-index.js
@@ -0,0 +1,146 @@
+import { tri } from '../shared/tokenize.js';
+import { buildBitmapIndex } from './bitmap.js';
+
+/**
+ * Build lookup maps for common search filters.
+ * @param {Array<object>} chunkMeta
+ * @param {{fileChargramN?:number}} [options]
+ * @returns {object}
+ */
+export function buildFilterIndex(chunkMeta = [], options = {}) {
+  const fileChargramN = Number.isFinite(Number(options.fileChargramN))
+    ? Math.max(2, Math.floor(Number(options.fileChargramN)))
+    : 3;
+  const includeBitmaps = options.includeBitmaps !== false;
+  const index = {
+    byExt: new Map(),
+    byKind: new Map(),
+    byAuthor: new Map(),
+    byChunkAuthor: new Map(),
+    byVisibility: new Map(),
+    fileById: [],
+    fileIdByPath: new Map(),
+    fileChunksById: [],
+    fileChargrams: new Map(),
+    fileChargramN
+  };
+
+  const add = (map, value, id) => {
+    if (!value) return;
+    const values = Array.isArray(value) ? value : [value];
+    for (const entry of values) {
+      const key = String(entry || '').toLowerCase();
+      if (!key) continue;
+      let bucket = map.get(key);
+      if (!bucket) {
+        bucket = new Set();
+        map.set(key, bucket);
+      }
+      bucket.add(id);
+    }
+  };
+
+  const normalizeFilePath = (value) => String(value || '').replace(/\\/g, '/').toLowerCase();
+  const addFileChargrams = (fileId, fileValue) => {
+    const grams = new Set(tri(fileValue, fileChargramN));
+    for (const gram of grams) {
+      let bucket = index.fileChargrams.get(gram);
+      if (!bucket) {
+        bucket = new Set();
+        index.fileChargrams.set(gram, bucket);
+      }
+      bucket.add(fileId);
+    }
+  };
+  const addFile = (fileValue, chunkId) => {
+    if (!fileValue) return;
+    const normalized = normalizeFilePath(fileValue);
+    let fileId = index.fileIdByPath.get(normalized);
+    if (fileId == null) {
+      fileId = index.fileById.length;
+      index.fileIdByPath.set(normalized, fileId);
+      index.fileById.push(normalized);
+      index.fileChunksById[fileId] = new Set();
+      addFileChargrams(fileId, normalized);
+    }
+    index.fileChunksById[fileId].add(chunkId);
+  };
+
+  for (const chunk of chunkMeta) {
+    if (!chunk) continue;
+    const id = chunk.id;
+    if (!Number.isFinite(id)) continue;
+    addFile(chunk.file, id);
+    add(index.byExt, chunk.ext, id);
+    add(index.byKind, chunk.kind, id);
+    add(index.byAuthor, chunk.last_author, id);
+    const visibility = chunk.docmeta?.visibility || chunk.docmeta?.modifiers?.visibility || null;
+    add(index.byVisibility, visibility, id);
+    const chunkAuthors = Array.isArray(chunk.chunk_authors) ? chunk.chunk_authors : [];
+    for (const author of chunkAuthors) add(index.byChunkAuthor, author, id);    
+  }
+
+  if (includeBitmaps) {
+    index.bitmap = buildBitmapIndex(index);
+  }
+  return index;
+}
+
+const serializeMap = (map) => {
+  const out = {};
+  if (!map || typeof map.entries !== 'function') return out;
+  for (const [key, value] of map.entries()) {
+    out[key] = Array.from(value || []);
+  }
+  return out;
+};
+
+const hydrateMap = (value) => {
+  const map = new Map();
+  if (!value || typeof value !== 'object') return map;
+  for (const [key, list] of Object.entries(value)) {
+    map.set(key, new Set(Array.isArray(list) ? list : []));
+  }
+  return map;
+};
+
+export function serializeFilterIndex(index) {
+  if (!index) return null;
+  return {
+    fileChargramN: index.fileChargramN || 3,
+    byExt: serializeMap(index.byExt),
+    byKind: serializeMap(index.byKind),
+    byAuthor: serializeMap(index.byAuthor),
+    byChunkAuthor: serializeMap(index.byChunkAuthor),
+    byVisibility: serializeMap(index.byVisibility),
+    fileById: Array.isArray(index.fileById) ? index.fileById : [],
+    fileChunksById: Array.isArray(index.fileChunksById)
+      ? index.fileChunksById.map((set) => Array.from(set || []))
+      : [],
+    fileChargrams: serializeMap(index.fileChargrams)
+  };
+}
+
+export function hydrateFilterIndex(raw) {
+  if (!raw || typeof raw !== 'object') return null;
+  const fileById = Array.isArray(raw.fileById) ? raw.fileById : [];
+  const fileIdByPath = new Map(fileById.map((value, idx) => [value, idx]));     
+  const index = {
+    fileChargramN: Number.isFinite(Number(raw.fileChargramN))
+      ? Math.max(2, Math.floor(Number(raw.fileChargramN)))
+      : 3,
+    byExt: hydrateMap(raw.byExt),
+    byKind: hydrateMap(raw.byKind),
+    byAuthor: hydrateMap(raw.byAuthor),
+    byChunkAuthor: hydrateMap(raw.byChunkAuthor),
+    byVisibility: hydrateMap(raw.byVisibility),
+    fileById,
+    fileIdByPath,
+    fileChunksById: Array.isArray(raw.fileChunksById)
+      ? raw.fileChunksById.map((list) => new Set(Array.isArray(list) ? list : []))
+      : [],
+    fileChargrams: hydrateMap(raw.fileChargrams)
+  };
+  index.bitmap = buildBitmapIndex(index);
+  return index;
+}
diff --git a/src/retrieval/filters.js b/src/retrieval/filters.js
new file mode 100644
index 000000000..d8807c571
--- /dev/null
+++ b/src/retrieval/filters.js
@@ -0,0 +1,204 @@
+import { parseJson } from './query-cache.js';
+import {
+  CLIKE_EXTS,
+  CSHARP_EXTS,
+  CSS_EXTS,
+  GO_EXTS,
+  HTML_EXTS,
+  JAVA_EXTS,
+  JS_EXTS,
+  KOTLIN_EXTS,
+  LUA_EXTS,
+  OBJC_EXTS,
+  PERL_EXTS,
+  PHP_EXTS,
+  RUBY_EXTS,
+  SHELL_EXTS,
+  SQL_EXTS,
+  TS_EXTS
+} from '../index/constants.js';
+
+const PY_EXTS = new Set(['.py']);
+const SWIFT_EXTS = new Set(['.swift']);
+const DOC_EXTS = new Set(['.md', '.rst', '.adoc', '.asciidoc']);
+const CONFIG_EXTS = new Set(['.json', '.toml', '.ini', '.cfg', '.conf', '.xml', '.yml', '.yaml']);
+
+const LANG_EXT_MAP = new Map([
+  ['javascript', JS_EXTS],
+  ['js', JS_EXTS],
+  ['typescript', TS_EXTS],
+  ['ts', TS_EXTS],
+  ['python', PY_EXTS],
+  ['py', PY_EXTS],
+  ['swift', SWIFT_EXTS],
+  ['rust', new Set(['.rs'])],
+  ['go', GO_EXTS],
+  ['java', JAVA_EXTS],
+  ['csharp', CSHARP_EXTS],
+  ['c#', CSHARP_EXTS],
+  ['kotlin', KOTLIN_EXTS],
+  ['ruby', RUBY_EXTS],
+  ['php', PHP_EXTS],
+  ['lua', LUA_EXTS],
+  ['sql', SQL_EXTS],
+  ['perl', PERL_EXTS],
+  ['shell', SHELL_EXTS],
+  ['bash', SHELL_EXTS],
+  ['zsh', SHELL_EXTS],
+  ['clike', CLIKE_EXTS],
+  ['c', new Set(['.c', '.h'])],
+  ['cpp', new Set(['.cc', '.cpp', '.hpp', '.hh'])],
+  ['c++', new Set(['.cc', '.cpp', '.hpp', '.hh'])],
+  ['objc', OBJC_EXTS],
+  ['objective-c', OBJC_EXTS],
+  ['html', HTML_EXTS],
+  ['css', CSS_EXTS],
+  ['json', new Set(['.json'])],
+  ['yaml', new Set(['.yml', '.yaml'])],
+  ['toml', new Set(['.toml'])],
+  ['ini', new Set(['.ini', '.cfg', '.conf'])],
+  ['xml', new Set(['.xml'])],
+  ['markdown', new Set(['.md'])],
+  ['rst', new Set(['.rst'])],
+  ['asciidoc', new Set(['.adoc', '.asciidoc'])],
+  ['docs', DOC_EXTS],
+  ['config', CONFIG_EXTS]
+]);
+
+/**
+ * Normalize extension filters into a lowercase list.
+ * @param {string|string[]|null|undefined} extArg
+ * @returns {string[]|null}
+ */
+export function normalizeExtFilter(extArg) {
+  const entries = Array.isArray(extArg) ? extArg : (extArg ? [extArg] : []);
+  if (!entries.length) return null;
+  const normalized = [];
+  for (const entry of entries) {
+    String(entry || '')
+      .split(/[,\s]+/)
+      .map((raw) => raw.trim())
+      .filter(Boolean)
+      .forEach((raw) => {
+        let value = raw.toLowerCase();
+        value = value.replace(/^\*+/, '');
+        if (!value) return;
+        if (!value.startsWith('.')) value = `.${value}`;
+        normalized.push(value);
+      });
+  }
+  return normalized.length ? Array.from(new Set(normalized)) : null;
+}
+
+/**
+ * Normalize language filters into a list of extensions.
+ * @param {string|string[]|null|undefined} langArg
+ * @returns {string[]|null}
+ */
+export function normalizeLangFilter(langArg) {
+  const entries = Array.isArray(langArg) ? langArg : (langArg ? [langArg] : []);
+  if (!entries.length) return null;
+  const exts = new Set();
+  for (const entry of entries) {
+    String(entry || '')
+      .split(/[,\s]+/)
+      .map((raw) => raw.trim().toLowerCase())
+      .filter(Boolean)
+      .forEach((raw) => {
+        const mapped = LANG_EXT_MAP.get(raw);
+        if (!mapped) return;
+        for (const ext of mapped) exts.add(ext);
+      });
+  }
+  return exts.size ? Array.from(exts) : null;
+}
+
+/**
+ * Merge extension filters with language filters.
+ * @param {string[]|null} extFilter
+ * @param {string[]|null} langFilter
+ * @returns {string[]|null}
+ */
+export function mergeExtFilters(extFilter, langFilter) {
+  if (!extFilter && !langFilter) return null;
+  if (extFilter && langFilter) {
+    const langSet = new Set(langFilter);
+    const merged = extFilter.filter((ext) => langSet.has(ext));
+    return merged.length ? Array.from(new Set(merged)) : null;
+  }
+  return extFilter || langFilter;
+}
+
+/**
+ * Parse --meta and --meta-json into a normalized filter list.
+ * @param {string|string[]|null|undefined} metaArg
+ * @param {string|string[]|null|undefined} metaJsonArg
+ * @returns {Array<{key:string,value:any}>|null}
+ */
+export function parseMetaFilters(metaArg, metaJsonArg) {
+  const filters = [];
+  const pushFilter = (rawKey, rawValue) => {
+    const key = String(rawKey || '').trim();
+    if (!key) return;
+    const value = rawValue === undefined ? null : rawValue;
+    filters.push({ key, value });
+  };
+  const handleEntry = (entry) => {
+    const text = String(entry || '').trim();
+    if (!text) return;
+    const split = text.split('=');
+    const key = split.shift();
+    const value = split.length ? split.join('=').trim() : null;
+    pushFilter(key, value === '' ? null : value);
+  };
+  const metaEntries = Array.isArray(metaArg) ? metaArg : (metaArg ? [metaArg] : []);
+  for (const entry of metaEntries) handleEntry(entry);
+  const metaJsonEntries = Array.isArray(metaJsonArg) ? metaJsonArg : (metaJsonArg ? [metaJsonArg] : []);
+  for (const entry of metaJsonEntries) {
+    const parsed = parseJson(entry, null);
+    if (!parsed) continue;
+    if (Array.isArray(parsed)) {
+      parsed.forEach((item) => {
+        if (!item || typeof item !== 'object') return;
+        Object.entries(item).forEach(([key, value]) => pushFilter(key, value));
+      });
+    } else if (typeof parsed === 'object') {
+      Object.entries(parsed).forEach(([key, value]) => pushFilter(key, value));
+    }
+  }
+  return filters.length ? filters : null;
+}
+
+/**
+ * Check whether any search filters are active.
+ * @param {object|null|undefined} filters
+ * @returns {boolean}
+ */
+export function hasActiveFilters(filters) {
+  if (!filters || typeof filters !== 'object') return false;
+  for (const value of Object.values(filters)) {
+    if (value == null) continue;
+    if (typeof value === 'boolean') {
+      if (value) return true;
+      continue;
+    }
+    if (typeof value === 'number') {
+      if (Number.isFinite(value)) return true;
+      continue;
+    }
+    if (typeof value === 'string') {
+      if (value.trim()) return true;
+      continue;
+    }
+    if (Array.isArray(value)) {
+      if (value.length) return true;
+      continue;
+    }
+    if (typeof value === 'object') {
+      if (Object.keys(value).length) return true;
+      continue;
+    }
+    return true;
+  }
+  return false;
+}
diff --git a/src/retrieval/fts.js b/src/retrieval/fts.js
new file mode 100644
index 000000000..87dcf48bd
--- /dev/null
+++ b/src/retrieval/fts.js
@@ -0,0 +1,88 @@
+/**
+ * Resolve FTS5 bm25 weights from a profile or config override.
+ * @param {string} profile
+ * @param {object|number[]|null} config
+ * @returns {number[]}
+ */
+export function resolveFtsWeights(profile, config) {
+  const profiles = {
+    balanced: {
+      file: 0.2,
+      name: 1.5,
+      signature: 1.2,
+      kind: 0.6,
+      headline: 1.5,
+      doc: 1.8,
+      tokens: 1.0
+    },
+    headline: {
+      file: 0.1,
+      name: 1.2,
+      signature: 1.0,
+      kind: 0.4,
+      headline: 3.0,
+      doc: 2.2,
+      tokens: 1.0
+    },
+    name: {
+      file: 0.2,
+      name: 2.5,
+      signature: 1.6,
+      kind: 0.8,
+      headline: 1.2,
+      doc: 1.4,
+      tokens: 1.0
+    }
+  };
+  const base = profiles[profile] || profiles.balanced;
+  if (Array.isArray(config)) {
+    const values = config.map((v) => Number(v)).filter((v) => Number.isFinite(v));
+    if (values.length >= 8) return values.slice(0, 8);
+    if (values.length === 7) return [0, ...values];
+    if (values.length === 6) {
+      const [, file, name, kind, headline, tokens] = values;
+      return [
+        0,
+        file ?? base.file,
+        name ?? base.name,
+        base.signature,
+        kind ?? base.kind,
+        headline ?? base.headline,
+        base.doc,
+        tokens ?? base.tokens
+      ];
+    }
+    if (values.length === 5) {
+      const [file, name, kind, headline, tokens] = values;
+      return [
+        0,
+        file ?? base.file,
+        name ?? base.name,
+        base.signature,
+        kind ?? base.kind,
+        headline ?? base.headline,
+        base.doc,
+        tokens ?? base.tokens
+      ];
+    }
+  } else if (config && typeof config === 'object') {
+    const merged = { ...base };
+    for (const key of ['file', 'name', 'signature', 'kind', 'headline', 'doc', 'tokens']) {
+      if (Number.isFinite(Number(config[key]))) merged[key] = Number(config[key]);
+    }
+    if (Number.isFinite(Number(config.body))) merged.tokens = Number(config.body);
+    return [0, merged.file, merged.name, merged.signature, merged.kind, merged.headline, merged.doc, merged.tokens];
+  }
+
+  return [0, base.file, base.name, base.signature, base.kind, base.headline, base.doc, base.tokens];
+}
+
+/**
+ * Build a bm25(chunks_fts, ...) SQL expression from weights.
+ * @param {number[]} weights
+ * @returns {string}
+ */
+export function buildFtsBm25Expr(weights) {
+  const safe = weights.map((val) => (Number.isFinite(val) ? val : 1));
+  return `bm25(chunks_fts, ${safe.join(', ')})`;
+}
diff --git a/src/retrieval/index-cache.js b/src/retrieval/index-cache.js
new file mode 100644
index 000000000..7d4880822
--- /dev/null
+++ b/src/retrieval/index-cache.js
@@ -0,0 +1,106 @@
+import fsSync from 'node:fs';
+import path from 'node:path';
+
+const INDEX_FILES = [
+  'phrase_ngrams.json',
+  'chargram_postings.json',
+  'dense_vectors_uint8.json',
+  'dense_vectors_doc_uint8.json',
+  'dense_vectors_code_uint8.json',
+  'dense_vectors_hnsw.meta.json',
+  'dense_vectors_hnsw.bin',
+  'field_postings.json',
+  'field_tokens.json',
+  'minhash_signatures.json',
+  'file_relations.json',
+  'file_meta.json',
+  'filter_index.json',
+  'index_state.json'
+];
+
+const fileSignature = (filePath) => {
+  try {
+    let statPath = filePath;
+    if (!fsSync.existsSync(statPath) && filePath.endsWith('.json')) {
+      const gzPath = `${filePath}.gz`;
+      if (fsSync.existsSync(gzPath)) statPath = gzPath;
+    }
+    const stat = fsSync.statSync(statPath);
+    return `${stat.size}:${stat.mtimeMs}`;
+  } catch {
+    return null;
+  }
+};
+
+const shardSignature = (dir, prefix) => {
+  try {
+    if (!fsSync.existsSync(dir)) return null;
+    const entries = fsSync
+      .readdirSync(dir)
+      .filter((name) => name.startsWith(prefix))
+      .sort();
+    if (!entries.length) return null;
+    return entries
+      .map((name) => fileSignature(path.join(dir, name)) || 'missing')
+      .join(',');
+  } catch {
+    return null;
+  }
+};
+
+const chunkMetaSignature = (dir) => {
+  const jsonPath = path.join(dir, 'chunk_meta.json');
+  const jsonSig = fileSignature(jsonPath);
+  if (jsonSig) return `chunk_meta.json:${jsonSig}`;
+  const jsonlPath = path.join(dir, 'chunk_meta.jsonl');
+  const jsonlSig = fileSignature(jsonlPath);
+  if (jsonlSig) return `chunk_meta.jsonl:${jsonlSig}`;
+  const metaPath = path.join(dir, 'chunk_meta.meta.json');
+  const metaSig = fileSignature(metaPath);
+  const partsSig = shardSignature(path.join(dir, 'chunk_meta.parts'), 'chunk_meta.part-');
+  if (metaSig || partsSig) {
+    return `chunk_meta.meta.json:${metaSig || 'missing'}|parts:${partsSig || 'missing'}`;
+  }
+  return 'chunk_meta.json:missing';
+};
+
+const tokenPostingsSignature = (dir) => {
+  const jsonPath = path.join(dir, 'token_postings.json');
+  const jsonSig = fileSignature(jsonPath);
+  if (jsonSig) return `token_postings.json:${jsonSig}`;
+  const metaPath = path.join(dir, 'token_postings.meta.json');
+  const metaSig = fileSignature(metaPath);
+  const partsSig = shardSignature(path.join(dir, 'token_postings.shards'), 'token_postings.part-');
+  if (metaSig || partsSig) {
+    return `token_postings.meta.json:${metaSig || 'missing'}|parts:${partsSig || 'missing'}`;
+  }
+  return 'token_postings.json:missing';
+};
+
+export function buildIndexSignature(dir) {
+  if (!dir) return null;
+  const parts = [
+    chunkMetaSignature(dir),
+    tokenPostingsSignature(dir),
+    ...INDEX_FILES.map((name) => {
+      const target = path.join(dir, name);
+      const sig = fileSignature(target);
+      return `${name}:${sig || 'missing'}`;
+    })
+  ];
+  return parts.join('|');
+}
+
+export function loadIndexWithCache(cache, dir, options, loader) {
+  if (!cache) return loader(dir, options);
+  const hnswKey = options?.includeHnsw ? JSON.stringify(options?.hnswConfig || {}) : 'no-hnsw';
+  const cacheKey = `${dir}::${options?.modelIdDefault || ''}::${options?.fileChargramN || ''}::${hnswKey}`;
+  const signature = buildIndexSignature(dir);
+  const cached = cache.get(cacheKey);
+  if (cached && cached.signature === signature) {
+    return cached.value;
+  }
+  const value = loader(dir, options);
+  cache.set(cacheKey, { signature, value });
+  return value;
+}
diff --git a/src/retrieval/lancedb.js b/src/retrieval/lancedb.js
new file mode 100644
index 000000000..1e587769e
--- /dev/null
+++ b/src/retrieval/lancedb.js
@@ -0,0 +1,172 @@
+import fs from 'node:fs';
+import { tryImport } from '../shared/optional-deps.js';
+import { normalizeLanceDbConfig } from '../shared/lancedb.js';
+
+const CANDIDATE_PUSH_LIMIT = 500;
+
+let cachedModule = null;
+let warnedMissing = false;
+let warnedQuery = false;
+
+const warnOnce = (message) => {
+  if (warnedQuery) return;
+  warnedQuery = true;
+  console.warn(message);
+};
+
+const loadLanceDb = async () => {
+  if (cachedModule) return cachedModule;
+  const result = await tryImport('@lancedb/lancedb');
+  if (!result.ok) {
+    if (!warnedMissing) {
+      warnedMissing = true;
+      console.warn('[ann] LanceDB unavailable; falling back to other ANN backends.');
+    }
+    return null;
+  }
+  cachedModule = result.mod?.default || result.mod;
+  return cachedModule;
+};
+
+const connectionCache = new Map();
+
+const getConnection = async (dir) => {
+  if (!dir) return null;
+  if (connectionCache.has(dir)) return connectionCache.get(dir);
+  const lancedb = await loadLanceDb();
+  const connect = lancedb?.connect || lancedb?.default?.connect;
+  if (!connect) return null;
+  const db = await connect(dir);
+  const entry = { db, tables: new Map() };
+  connectionCache.set(dir, entry);
+  return entry;
+};
+
+const getTable = async (dir, tableName) => {
+  const connection = await getConnection(dir);
+  if (!connection || !tableName) return null;
+  if (connection.tables.has(tableName)) return connection.tables.get(tableName);
+  const openTable = connection.db?.openTable;
+  if (typeof openTable !== 'function') return null;
+  const table = await openTable.call(connection.db, tableName);
+  connection.tables.set(tableName, table);
+  return table;
+};
+
+const toArray = async (query) => {
+  if (!query) return [];
+  if (typeof query.toArray === 'function') return query.toArray();
+  if (typeof query.execute === 'function') return query.execute();
+  if (typeof query.collect === 'function') return query.collect();
+  return [];
+};
+
+const normalizeSim = (distance, metric) => {
+  if (!Number.isFinite(distance)) return null;
+  if (metric === 'l2') return -distance;
+  if (metric === 'cosine') return 1 - distance;
+  return distance;
+};
+
+const readRowId = (row, idColumn) => {
+  const value = row?.[idColumn] ?? row?.id ?? row?._id ?? row?.idx;
+  const numeric = Number(value);
+  if (Number.isFinite(numeric)) return numeric;
+  return null;
+};
+
+const readRowScore = (row, metric) => {
+  const distanceRaw = row?._distance ?? row?.distance;
+  if (distanceRaw != null) {
+    return normalizeSim(Number(distanceRaw), metric);
+  }
+  const scoreRaw = row?.score ?? row?._score ?? row?.sim ?? row?.similarity;
+  const score = Number(scoreRaw);
+  return Number.isFinite(score) ? score : null;
+};
+
+export async function rankLanceDb({
+  lancedbInfo,
+  queryEmbedding,
+  topN,
+  candidateSet,
+  config
+}) {
+  if (!lancedbInfo?.available) return [];
+  if (!Array.isArray(queryEmbedding) || !queryEmbedding.length) return [];
+  const resolvedConfig = normalizeLanceDbConfig(config);
+  if (!resolvedConfig.enabled) return [];
+
+  const meta = lancedbInfo.meta || {};
+  const tableName = meta.table || resolvedConfig.table;
+  const idColumn = meta.idColumn || resolvedConfig.idColumn;
+  const embeddingColumn = meta.embeddingColumn || resolvedConfig.embeddingColumn;
+  const metric = meta.metric || resolvedConfig.metric;
+  const dims = Number.isFinite(Number(meta.dims)) ? Number(meta.dims) : null;
+  if (dims && queryEmbedding.length !== dims) return [];
+
+  const dir = lancedbInfo.dir;
+  if (!dir || !fs.existsSync(dir)) return [];
+
+  let table;
+  try {
+    table = await getTable(dir, tableName);
+  } catch (err) {
+    warnOnce(`[ann] LanceDB table load failed; falling back to other ANN backends. ${err?.message || err}`);
+    return [];
+  }
+  if (!table || typeof table.search !== 'function') return [];
+
+  const limitBase = Math.max(1, Number(topN) || 1);
+  const candidateCount = candidateSet && candidateSet.size ? candidateSet.size : 0;
+  const limit = candidateCount
+    ? Math.min(Math.max(limitBase * 4, limitBase + 10), candidateCount)
+    : limitBase;
+  let query;
+  if (embeddingColumn !== 'vector' && table.search.length > 1) {
+    query = table.search(queryEmbedding, { vectorColumn: embeddingColumn });
+  } else {
+    query = table.search(queryEmbedding);
+  }
+  if (typeof query?.metricType === 'function') {
+    query = query.metricType(metric);
+  } else if (typeof query?.metric === 'function') {
+    query = query.metric(metric);
+  } else if (typeof query?.distanceType === 'function') {
+    query = query.distanceType(metric);
+  }
+  const canPushdown = candidateCount > 0
+    && candidateCount <= CANDIDATE_PUSH_LIMIT
+    && typeof query?.where === 'function';
+  if (canPushdown) {
+    const ids = Array.from(candidateSet).filter((id) => Number.isFinite(Number(id)));
+    if (ids.length) {
+      query = query.where(`${idColumn} IN (${ids.join(',')})`);
+    }
+  }
+  if (typeof query.limit === 'function') query = query.limit(limit);
+  if (typeof query.select === 'function') query = query.select([idColumn]);
+
+  let rows;
+  try {
+    rows = await toArray(query);
+  } catch (err) {
+    warnOnce(`[ann] LanceDB query failed; falling back to other ANN backends. ${err?.message || err}`);
+    return [];
+  }
+
+  const hits = [];
+  for (const row of rows || []) {
+    const idx = readRowId(row, idColumn);
+    if (idx == null) continue;
+    const sim = readRowScore(row, metric);
+    if (sim == null) continue;
+    hits.push({ idx, sim });
+  }
+  const filtered = !candidateCount || canPushdown
+    ? hits
+    : hits.filter((hit) => candidateSet.has(hit.idx));
+  return filtered
+    .sort((a, b) => (b.sim - a.sim) || (a.idx - b.idx))
+    .slice(0, limitBase);
+}
diff --git a/src/retrieval/lmdb-helpers.js b/src/retrieval/lmdb-helpers.js
new file mode 100644
index 000000000..b57b77d81
--- /dev/null
+++ b/src/retrieval/lmdb-helpers.js
@@ -0,0 +1,180 @@
+import { Unpackr } from 'msgpackr';
+import { buildFilterIndex, hydrateFilterIndex } from './filter-index.js';
+import { loadHnswIndex, normalizeHnswConfig, resolveHnswPaths, validateHnswMetaCompatibility } from '../shared/hnsw.js';
+import { LMDB_ARTIFACT_KEYS, LMDB_META_KEYS } from '../storage/lmdb/schema.js';
+
+const unpackr = new Unpackr();
+const decode = (value) => (value == null ? null : unpackr.unpack(value));
+
+/**
+ * Create LMDB helper functions for search.
+ * @param {object} options
+ * @param {(mode:'code'|'prose')=>object|null} options.getDb
+ * @param {object} options.hnswConfig
+ * @param {string} options.modelIdDefault
+ * @param {number} options.fileChargramN
+ * @param {Record<string,string>} options.indexDirs
+ * @returns {object}
+ */
+export function createLmdbHelpers(options) {
+  const {
+    getDb,
+    hnswConfig: rawHnswConfig,
+    modelIdDefault,
+    fileChargramN,
+    indexDirs
+  } = options;
+  const hnswConfig = normalizeHnswConfig(rawHnswConfig || {});
+
+  const getArtifact = (db, key) => decode(db.get(key));
+
+  /**
+   * Load index artifacts from LMDB into in-memory structures.
+   * @param {'code'|'prose'} mode
+   * @param {object} [options]
+   * @returns {object}
+   */
+  function loadIndexFromLmdb(mode, options = {}) {
+    const db = getDb(mode);
+    if (!db) throw new Error('LMDB backend requested but database is not available.');
+    const includeMinhash = options.includeMinhash !== false;
+    const includeDense = options.includeDense !== false;
+    const includeChunks = options.includeChunks !== false;
+    const includeFilterIndex = options.includeFilterIndex !== false;
+    const includeHnsw = options.includeHnsw !== false;
+
+    const chunkCountRaw = getArtifact(db, LMDB_META_KEYS.chunkCount);
+    const chunkCount = Number.isFinite(Number(chunkCountRaw)) ? Number(chunkCountRaw) : 0;
+    let chunkMeta = includeChunks
+      ? (getArtifact(db, LMDB_ARTIFACT_KEYS.chunkMeta) || [])
+      : (chunkCount ? Array.from({ length: chunkCount }) : []);
+
+    const fileMetaRaw = getArtifact(db, LMDB_ARTIFACT_KEYS.fileMeta);
+    let fileMetaById = null;
+    if (Array.isArray(fileMetaRaw)) {
+      fileMetaById = new Map();
+      for (const entry of fileMetaRaw) {
+        if (!entry || entry.id == null) continue;
+        fileMetaById.set(entry.id, entry);
+      }
+    }
+    if (!fileMetaById && includeChunks) {
+      const missingMeta = chunkMeta.some((chunk) => chunk && chunk.fileId != null && !chunk.file);
+      if (missingMeta) {
+        throw new Error('file_meta.json is required for fileId-based chunk metadata.');
+      }
+    } else if (fileMetaById && includeChunks) {
+      for (const chunk of chunkMeta) {
+        if (!chunk || (chunk.file && chunk.ext)) continue;
+        const meta = fileMetaById.get(chunk.fileId);
+        if (!meta) continue;
+        if (!chunk.file) chunk.file = meta.file;
+        if (!chunk.ext) chunk.ext = meta.ext;
+        if (!chunk.externalDocs) chunk.externalDocs = meta.externalDocs;
+        if (!chunk.last_modified) chunk.last_modified = meta.last_modified;
+        if (!chunk.last_author) chunk.last_author = meta.last_author;
+        if (!chunk.churn) chunk.churn = meta.churn;
+        if (!chunk.churn_added) chunk.churn_added = meta.churn_added;
+        if (!chunk.churn_deleted) chunk.churn_deleted = meta.churn_deleted;
+        if (!chunk.churn_commits) chunk.churn_commits = meta.churn_commits;
+      }
+    }
+
+    const fileRelationsRaw = getArtifact(db, LMDB_ARTIFACT_KEYS.fileRelations);
+    const repoMap = getArtifact(db, LMDB_ARTIFACT_KEYS.repoMap);
+    let fileRelations = null;
+    if (Array.isArray(fileRelationsRaw)) {
+      const map = new Map();
+      for (const entry of fileRelationsRaw) {
+        if (!entry || !entry.file) continue;
+        map.set(entry.file, entry.relations || null);
+      }
+      fileRelations = map;
+    }
+
+    const indexState = getArtifact(db, LMDB_ARTIFACT_KEYS.indexState);
+    const embeddingsState = indexState?.embeddings || null;
+    const embeddingsReady = embeddingsState?.ready !== false && embeddingsState?.pending !== true;
+    const denseVec = embeddingsReady && includeDense
+      ? getArtifact(db, LMDB_ARTIFACT_KEYS.denseVectors)
+      : null;
+    const denseVecDoc = embeddingsReady && includeDense
+      ? getArtifact(db, LMDB_ARTIFACT_KEYS.denseVectorsDoc)
+      : null;
+    const denseVecCode = embeddingsReady && includeDense
+      ? getArtifact(db, LMDB_ARTIFACT_KEYS.denseVectorsCode)
+      : null;
+    if (denseVec && !denseVec.model && modelIdDefault) denseVec.model = modelIdDefault;
+    if (denseVecDoc && !denseVecDoc.model && modelIdDefault) denseVecDoc.model = modelIdDefault;
+    if (denseVecCode && !denseVecCode.model && modelIdDefault) denseVecCode.model = modelIdDefault;
+    const hnswMeta = embeddingsReady && includeDense && includeHnsw && hnswConfig.enabled
+      ? getArtifact(db, LMDB_ARTIFACT_KEYS.denseHnswMeta)
+      : null;
+    let hnswIndex = null;
+    let hnswAvailable = false;
+    if (hnswMeta && includeHnsw && hnswConfig.enabled) {
+      const compatibility = validateHnswMetaCompatibility({ denseVectors: denseVec, hnswMeta });
+      if (!compatibility.ok) {
+        console.warn(`[ann] Skipping HNSW index load due to incompatible metadata: ${compatibility.warnings.join('; ')}`);
+      } else {
+      const indexDir = indexDirs?.[mode] || null;
+      if (indexDir) {
+        const { indexPath } = resolveHnswPaths(indexDir);
+        const mergedConfig = {
+          ...hnswConfig,
+          space: hnswMeta.space || hnswConfig.space,
+          efSearch: hnswMeta.efSearch || hnswConfig.efSearch
+        };
+        hnswIndex = loadHnswIndex({ indexPath, dims: hnswMeta.dims, config: mergedConfig });
+        hnswAvailable = Boolean(hnswIndex);
+      }
+      }
+    }
+
+    const fieldPostings = getArtifact(db, LMDB_ARTIFACT_KEYS.fieldPostings);
+    const fieldTokens = getArtifact(db, LMDB_ARTIFACT_KEYS.fieldTokens);
+    const filterIndexRaw = getArtifact(db, LMDB_ARTIFACT_KEYS.filterIndex);
+    const idx = {
+      chunkMeta,
+      fileRelations,
+      repoMap,
+      denseVec,
+      denseVecDoc,
+      denseVecCode,
+      hnsw: hnswMeta ? {
+        available: hnswAvailable,
+        index: hnswIndex,
+        meta: hnswMeta,
+        space: hnswMeta.space || hnswConfig.space
+      } : { available: false, index: null, meta: null, space: hnswConfig.space },
+      state: indexState,
+      fieldPostings,
+      fieldTokens,
+      minhash: includeMinhash ? getArtifact(db, LMDB_ARTIFACT_KEYS.minhashSignatures) : null,
+      phraseNgrams: getArtifact(db, LMDB_ARTIFACT_KEYS.phraseNgrams),
+      chargrams: getArtifact(db, LMDB_ARTIFACT_KEYS.chargramPostings)
+    };
+    if (idx.phraseNgrams?.vocab && !idx.phraseNgrams.vocabIndex) {
+      idx.phraseNgrams.vocabIndex = new Map(idx.phraseNgrams.vocab.map((term, i) => [term, i]));
+    }
+    if (idx.chargrams?.vocab && !idx.chargrams.vocabIndex) {
+      idx.chargrams.vocabIndex = new Map(idx.chargrams.vocab.map((term, i) => [term, i]));
+    }
+    if (idx.fieldPostings?.fields) {
+      for (const field of Object.keys(idx.fieldPostings.fields)) {
+        const entry = idx.fieldPostings.fields[field];
+        if (!entry?.vocab || entry.vocabIndex) continue;
+        entry.vocabIndex = new Map(entry.vocab.map((term, i) => [term, i]));
+      }
+    }
+    idx.filterIndex = includeFilterIndex
+      ? (filterIndexRaw
+        ? (hydrateFilterIndex(filterIndexRaw) || buildFilterIndex(chunkMeta, { fileChargramN }))
+        : buildFilterIndex(chunkMeta, { fileChargramN }))
+      : null;
+    idx.tokenIndex = getArtifact(db, LMDB_ARTIFACT_KEYS.tokenPostings);
+    return idx;
+  }
+
+  return { loadIndexFromLmdb };
+}
diff --git a/src/retrieval/output.js b/src/retrieval/output.js
new file mode 100644
index 000000000..c85ca9727
--- /dev/null
+++ b/src/retrieval/output.js
@@ -0,0 +1,4 @@
+export { configureOutputCaches, getOutputCacheReporter } from './output/cache.js';
+export { filterChunks } from './output/filters.js';
+export { cleanContext } from './output/context.js';
+export { formatFullChunk, formatShortChunk } from './output/format.js';
diff --git a/src/retrieval/output/cache.js b/src/retrieval/output/cache.js
new file mode 100644
index 000000000..558180312
--- /dev/null
+++ b/src/retrieval/output/cache.js
@@ -0,0 +1,77 @@
+import {
+  createCacheReporter,
+  createLruCache,
+  DEFAULT_CACHE_MB,
+  DEFAULT_CACHE_TTL_MS,
+  estimateStringBytes
+} from '../../shared/cache.js';
+import { getEnvConfig } from '../../shared/env.js';
+
+const resolveEntryLimit = (raw) => {
+  const parsed = Number(raw);
+  return Number.isFinite(parsed) ? Math.max(0, Math.floor(parsed)) : null;
+};
+
+let outputCacheReporter = createCacheReporter({ enabled: false, log: null });
+let fileTextCache = createLruCache({
+  name: 'fileText',
+  maxMb: DEFAULT_CACHE_MB.fileText,
+  ttlMs: DEFAULT_CACHE_TTL_MS.fileText,
+  sizeCalculation: estimateStringBytes,
+  reporter: outputCacheReporter
+});
+let summaryCache = createLruCache({
+  name: 'summary',
+  maxMb: DEFAULT_CACHE_MB.summary,
+  ttlMs: DEFAULT_CACHE_TTL_MS.summary,
+  sizeCalculation: estimateStringBytes,
+  reporter: outputCacheReporter
+});
+
+export function configureOutputCaches({ cacheConfig = null, verbose = false, log = null } = {}) {
+  const envConfig = getEnvConfig();
+  const entryLimits = {
+    fileText: resolveEntryLimit(envConfig.fileCacheMax),
+    summary: resolveEntryLimit(envConfig.summaryCacheMax)
+  };
+  outputCacheReporter = createCacheReporter({ enabled: verbose, log });
+  const fileTextConfig = cacheConfig?.fileText || {};
+  const summaryConfig = cacheConfig?.summary || {};
+  fileTextCache = createLruCache({
+    name: 'fileText',
+    maxMb: Number.isFinite(Number(fileTextConfig.maxMb))
+      ? Number(fileTextConfig.maxMb)
+      : DEFAULT_CACHE_MB.fileText,
+    ttlMs: Number.isFinite(Number(fileTextConfig.ttlMs))
+      ? Number(fileTextConfig.ttlMs)
+      : DEFAULT_CACHE_TTL_MS.fileText,
+    maxEntries: entryLimits.fileText,
+    sizeCalculation: estimateStringBytes,
+    reporter: outputCacheReporter
+  });
+  summaryCache = createLruCache({
+    name: 'summary',
+    maxMb: Number.isFinite(Number(summaryConfig.maxMb))
+      ? Number(summaryConfig.maxMb)
+      : DEFAULT_CACHE_MB.summary,
+    ttlMs: Number.isFinite(Number(summaryConfig.ttlMs))
+      ? Number(summaryConfig.ttlMs)
+      : DEFAULT_CACHE_TTL_MS.summary,
+    maxEntries: entryLimits.summary,
+    sizeCalculation: estimateStringBytes,
+    reporter: outputCacheReporter
+  });
+  return outputCacheReporter;
+}
+
+export function getOutputCacheReporter() {
+  return outputCacheReporter;
+}
+
+export function getFileTextCache() {
+  return fileTextCache;
+}
+
+export function getSummaryCache() {
+  return summaryCache;
+}
diff --git a/src/retrieval/output/context.js b/src/retrieval/output/context.js
new file mode 100644
index 000000000..0362c0aae
--- /dev/null
+++ b/src/retrieval/output/context.js
@@ -0,0 +1,10 @@
+export function cleanContext(lines) {
+  return lines
+    .filter((line) => {
+      const trimmed = line.trim();
+      if (!trimmed || trimmed === '```') return false;
+      if (!/[a-zA-Z0-9]/.test(trimmed)) return false;
+      return true;
+    })
+    .map((line) => line.replace(/\s+/g, ' ').trim());
+}
diff --git a/src/retrieval/output/explain.js b/src/retrieval/output/explain.js
new file mode 100644
index 000000000..05a9b0d8a
--- /dev/null
+++ b/src/retrieval/output/explain.js
@@ -0,0 +1,90 @@
+const formatExplainLine = (label, parts, color) => {
+  const filtered = parts.filter(Boolean);
+  if (!filtered.length) return null;
+  return color.gray(`   ${label}: `) + filtered.join(', ');
+};
+
+export function formatScoreBreakdown(scoreBreakdown, color) {
+  if (!scoreBreakdown || typeof scoreBreakdown !== 'object') return [];
+  const lines = [];
+  const selected = scoreBreakdown.selected || null;
+  if (selected) {
+    const parts = [];
+    if (selected.type) parts.push(`type=${selected.type}`);
+    if (Number.isFinite(selected.score)) parts.push(`score=${selected.score.toFixed(4)}`);
+    const line = formatExplainLine('Score', parts, color);
+    if (line) lines.push(line);
+  }
+  const sparse = scoreBreakdown.sparse || null;
+  if (sparse) {
+    const parts = [];
+    if (sparse.type) parts.push(`type=${sparse.type}`);
+    if (Number.isFinite(sparse.score)) parts.push(`score=${sparse.score.toFixed(4)}`);
+    if (Number.isFinite(sparse.k1)) parts.push(`k1=${sparse.k1.toFixed(2)}`);
+    if (Number.isFinite(sparse.b)) parts.push(`b=${sparse.b.toFixed(2)}`);
+    if (sparse.normalized != null) parts.push(`normalized=${sparse.normalized}`);
+    if (sparse.profile) parts.push(`profile=${sparse.profile}`);
+    if (Array.isArray(sparse.weights) && sparse.weights.length) {
+      const weights = sparse.weights
+        .map((value) => (Number.isFinite(value) ? value.toFixed(2) : String(value)))
+        .join('/');
+      parts.push(`weights=${weights}`);
+    }
+    const line = formatExplainLine('Sparse', parts, color);
+    if (line) lines.push(line);
+  }
+  const ann = scoreBreakdown.ann || null;
+  if (ann) {
+    const parts = [];
+    if (Number.isFinite(ann.score)) parts.push(`score=${ann.score.toFixed(4)}`);
+    if (ann.source) parts.push(`source=${ann.source}`);
+    const line = formatExplainLine('ANN', parts, color);
+    if (line) lines.push(line);
+  }
+  const rrf = scoreBreakdown.rrf || null;
+  if (rrf) {
+    const parts = [];
+    if (Number.isFinite(rrf.k)) parts.push(`k=${rrf.k}`);
+    if (Number.isFinite(rrf.sparseRank)) parts.push(`sparseRank=${rrf.sparseRank}`);
+    if (Number.isFinite(rrf.annRank)) parts.push(`annRank=${rrf.annRank}`);
+    if (Number.isFinite(rrf.sparseRrf)) parts.push(`sparseScore=${rrf.sparseRrf.toFixed(4)}`);
+    if (Number.isFinite(rrf.annRrf)) parts.push(`annScore=${rrf.annRrf.toFixed(4)}`);
+    if (Number.isFinite(rrf.score)) parts.push(`score=${rrf.score.toFixed(4)}`);
+    const line = formatExplainLine('RRF', parts, color);
+    if (line) lines.push(line);
+  }
+  const blend = scoreBreakdown.blend || null;
+  if (blend) {
+    const parts = [];
+    if (Number.isFinite(blend.score)) parts.push(`score=${blend.score.toFixed(4)}`);
+    if (Number.isFinite(blend.sparseNormalized)) parts.push(`sparseNorm=${blend.sparseNormalized.toFixed(4)}`);
+    if (Number.isFinite(blend.annNormalized)) parts.push(`annNorm=${blend.annNormalized.toFixed(4)}`);
+    if (Number.isFinite(blend.sparseWeight) || Number.isFinite(blend.annWeight)) {
+      const sparseWeight = Number.isFinite(blend.sparseWeight) ? blend.sparseWeight.toFixed(2) : '0.00';
+      const annWeight = Number.isFinite(blend.annWeight) ? blend.annWeight.toFixed(2) : '0.00';
+      parts.push(`weights=${sparseWeight}/${annWeight}`);
+    }
+    const line = formatExplainLine('Blend', parts, color);
+    if (line) lines.push(line);
+  }
+  const phrase = scoreBreakdown.phrase || null;
+  if (phrase) {
+    const parts = [];
+    if (Number.isFinite(phrase.matches)) parts.push(`matches=${phrase.matches}`);
+    if (Number.isFinite(phrase.boost)) parts.push(`boost=${phrase.boost.toFixed(4)}`);
+    if (Number.isFinite(phrase.factor)) parts.push(`factor=${phrase.factor.toFixed(2)}`);
+    const line = formatExplainLine('Phrase', parts, color);
+    if (line) lines.push(line);
+  }
+  const symbol = scoreBreakdown.symbol || null;
+  if (symbol) {
+    const parts = [];
+    if (typeof symbol.definition === 'boolean') parts.push(`definition=${symbol.definition}`);
+    if (typeof symbol.export === 'boolean') parts.push(`export=${symbol.export}`);
+    if (Number.isFinite(symbol.factor)) parts.push(`factor=${symbol.factor.toFixed(2)}`);
+    if (Number.isFinite(symbol.boost)) parts.push(`boost=${symbol.boost.toFixed(4)}`);
+    const line = formatExplainLine('Symbol', parts, color);
+    if (line) lines.push(line);
+  }
+  return lines;
+}
diff --git a/src/retrieval/output/filters.js b/src/retrieval/output/filters.js
new file mode 100644
index 000000000..6b3d8f4a5
--- /dev/null
+++ b/src/retrieval/output/filters.js
@@ -0,0 +1,635 @@
+import path from 'node:path';
+import { extractNgrams, tri } from '../../shared/tokenize.js';
+import { createSafeRegex, normalizeSafeRegexConfig } from '../../shared/safe-regex.js';
+import {
+  bitmapToSet,
+  createBitmapFromIds,
+  intersectBitmaps,
+  intersectSetWithBitmap,
+  isBitmapEmpty,
+  isRoaringAvailable,
+  unionBitmaps
+} from '../bitmap.js';
+
+/**
+ * Filter chunk metadata by search constraints.
+ * @param {Array} meta
+ * @param {object} filters
+ * @returns {Array}
+ */
+export function filterChunks(meta, filters = {}, filterIndex = null, fileRelations = null) {
+  const {
+    type,
+    author,
+    importName,
+    lint,
+    churn,
+    calls,
+    uses,
+    signature,
+    param,
+    decorator,
+    returnType,
+    throws,
+    reads,
+    writes,
+    mutates,
+    alias,
+    risk,
+    riskTag,
+    riskSource,
+    riskSink,
+    riskCategory,
+    riskFlow,
+    structPack,
+    structRule,
+    structTag,
+    awaits,
+    branches,
+    loops,
+    breaks,
+    continues,
+    inferredType,
+    visibility,
+    extends: extendsFilter,
+    async: asyncOnly,
+    generator: generatorOnly,
+    returns: returnsOnly,
+    file,
+    caseFile,
+    caseTokens,
+    ext,
+    meta: metaFilter,
+    chunkAuthor,
+    modifiedAfter,
+    excludeTokens,
+    excludePhrases,
+    excludePhraseRange
+  } = filters;
+  const normalize = (value) => String(value || '').toLowerCase();
+  const normalizeFilePath = (value) => String(value || '').replace(/\\/g, '/');
+  const normalizeFile = (value) => (
+    caseFile ? normalizeFilePath(value) : normalize(normalizeFilePath(value))
+  );
+  const normalizeFilePrefilter = (value) => normalizeFilePath(value).toLowerCase();
+  const safeRegexConfig = normalizeSafeRegexConfig(filters.regexConfig || {}, {
+    flags: caseFile ? '' : 'i'
+  });
+  const normalizeList = (value) => {
+    if (!value) return [];
+    const entries = Array.isArray(value) ? value : [value];
+    return entries
+      .flatMap((entry) => String(entry || '').split(/[,\s]+/))
+      .map((entry) => entry.trim())
+      .filter(Boolean);
+  };
+  const parseFileMatcher = (entry) => {
+    const raw = String(entry || '').trim();
+    if (!raw) return null;
+    const regexMatch = raw.match(/^\/(.+)\/([a-z]*)$/i);
+    if (regexMatch) {
+      const pattern = regexMatch[1];
+      const flags = regexMatch[2] || '';
+      const matcher = createSafeRegex(pattern, flags, safeRegexConfig);
+      if (matcher) return { type: 'regex', value: matcher };
+      return { type: 'substring', value: normalizeFile(raw) };
+    }
+    return { type: 'substring', value: normalizeFile(raw) };
+  };
+  const fileMatchers = normalizeList(file).map(parseFileMatcher).filter(Boolean);
+  const filePrefilterConfig = filters.filePrefilter || {};
+  const filePrefilterEnabled = filePrefilterConfig.enabled !== false;
+  const fileChargramN = Number.isFinite(Number(filePrefilterConfig.chargramN))
+    ? Math.max(2, Math.floor(Number(filePrefilterConfig.chargramN)))
+    : (filterIndex?.fileChargramN || 3);
+  const extNeedles = normalizeList(ext)
+    .map((entry) => {
+      let value = entry.toLowerCase();
+      value = value.replace(/^\*+/, '');
+      if (value && !value.startsWith('.')) value = `.${value}`;
+      return value;
+    })
+    .filter(Boolean);
+  const typeNeedles = normalizeList(type).map(normalize);
+  const authorNeedles = normalizeList(author).map(normalize);
+  const metaFilters = Array.isArray(metaFilter) ? metaFilter : (metaFilter ? [metaFilter] : []);
+  const excludeNeedles = normalizeList(excludeTokens).map((value) => (caseTokens ? String(value || '') : normalize(value)));
+  const excludePhraseNeedles = normalizeList(excludePhrases).map((value) => (caseTokens ? String(value || '') : normalize(value)));
+  const structPackNeedles = normalizeList(structPack).map(normalize);
+  const structRuleNeedles = normalizeList(structRule).map(normalize);
+  const structTagNeedles = normalizeList(structTag).map(normalize);
+  const roaringAvailable = isRoaringAvailable();
+  const bitmapIndex = filterIndex?.bitmap || null;
+  const buildCandidate = (sets, bitmaps) => {
+    const setList = Array.isArray(sets) ? sets.filter(Boolean) : [];
+    const bitmapList = Array.isArray(bitmaps) ? bitmaps.filter(Boolean) : [];
+    if (!setList.length && !bitmapList.length) return null;
+    if (roaringAvailable) {
+      let bitmap = bitmapList.length ? unionBitmaps(bitmapList) : null;
+      if (setList.length) {
+        const ids = [];
+        for (const set of setList) {
+          for (const id of set) ids.push(id);
+        }
+        const extraBitmap = createBitmapFromIds(ids, { force: true });
+        if (extraBitmap) {
+          bitmap = bitmap ? unionBitmaps([bitmap, extraBitmap]) : extraBitmap;
+        }
+      }
+      if (bitmap) return { bitmap };
+    }
+    const out = new Set();
+    for (const set of setList) {
+      for (const id of set) out.add(id);
+    }
+    for (const bitmap of bitmapList) {
+      for (const id of bitmapToSet(bitmap)) out.add(id);
+    }
+    return { set: out };
+  };
+  const mergeCandidates = (candidates) => {
+    if (!Array.isArray(candidates) || !candidates.length) return null;
+    const sets = [];
+    const bitmaps = [];
+    for (const candidate of candidates) {
+      if (!candidate) continue;
+      if (candidate.bitmap) bitmaps.push(candidate.bitmap);
+      if (candidate.set) sets.push(candidate.set);
+    }
+    return buildCandidate(sets, bitmaps);
+  };
+  const collectExactMatches = (map, values, bitmapMap = null) => {
+    if (!map || !values.length) return null;
+    const sets = [];
+    const bitmaps = [];
+    for (const value of values) {
+      if (!value) continue;
+      const set = map.get(value);
+      if (!set) continue;
+      const bitmap = bitmapMap ? bitmapMap.get(value) : null;
+      if (bitmap) {
+        bitmaps.push(bitmap);
+      } else {
+        sets.push(set);
+      }
+    }
+    if (!sets.length && !bitmaps.length) return { set: new Set() };
+    return buildCandidate(sets, bitmaps);
+  };
+  const collectSubstringMatches = (map, needle, bitmapMap = null) => {
+    if (!map || !needle) return null;
+    const sets = [];
+    const bitmaps = [];
+    for (const [key, set] of map.entries()) {
+      if (!key.includes(needle)) continue;
+      const bitmap = bitmapMap ? bitmapMap.get(key) : null;
+      if (bitmap) {
+        bitmaps.push(bitmap);
+      } else {
+        sets.push(set);
+      }
+    }
+    if (!sets.length && !bitmaps.length) return { set: new Set() };
+    return buildCandidate(sets, bitmaps);
+  };
+  const collectAnySubstringMatches = (map, values, bitmapMap = null) => {
+    if (!map || !values.length) return null;
+    const candidates = values
+      .map((value) => collectSubstringMatches(map, value, bitmapMap))
+      .filter(Boolean);
+    return mergeCandidates(candidates);
+  };
+  const intersectSets = (sets) => {
+    if (!sets.length) return null;
+    let acc = sets[0];
+    for (let i = 1; i < sets.length; i += 1) {
+      const next = sets[i];
+      const merged = new Set();
+      for (const id of acc) {
+        if (next.has(id)) merged.add(id);
+      }
+      acc = merged;
+      if (!acc.size) break;
+    }
+    return acc;
+  };
+  const intersectTwoSets = (left, right) => {
+    if (!left || !right) return new Set();
+    const out = new Set();
+    for (const id of left) {
+      if (right.has(id)) out.add(id);
+    }
+    return out;
+  };
+  const intersectCandidates = (candidates) => {
+    if (!Array.isArray(candidates) || !candidates.length) return null;
+    const sets = [];
+    const bitmaps = [];
+    for (const candidate of candidates) {
+      if (!candidate) continue;
+      if (candidate.set) {
+        if (!candidate.set.size) return new Set();
+        sets.push(candidate.set);
+      }
+      if (candidate.bitmap) {
+        if (isBitmapEmpty(candidate.bitmap)) return new Set();
+        bitmaps.push(candidate.bitmap);
+      }
+    }
+    if (bitmaps.length) {
+      let bitmap = intersectBitmaps(bitmaps);
+      if (!bitmap || isBitmapEmpty(bitmap)) return new Set();
+      if (sets.length) {
+        const setIntersection = intersectSets(sets);
+        if (!setIntersection || !setIntersection.size) return new Set();
+        const setBitmap = createBitmapFromIds(setIntersection, { force: true });
+        if (setBitmap) {
+          bitmap = intersectBitmaps([bitmap, setBitmap]);
+          return bitmap ? bitmapToSet(bitmap) : new Set();
+        }
+        return intersectSetWithBitmap(setIntersection, bitmap);
+      }
+      return bitmapToSet(bitmap);
+    }
+    return intersectSets(sets);
+  };
+  const extractRegexLiteral = (pattern) => {
+    let best = '';
+    let current = '';
+    let escaped = false;
+    for (const ch of pattern) {
+      if (escaped) {
+        current += ch;
+        escaped = false;
+        continue;
+      }
+      if (ch === '\\') {
+        escaped = true;
+        continue;
+      }
+      if ('^$.*+?()[]{}|'.includes(ch)) {
+        if (current.length > best.length) best = current;
+        current = '';
+        continue;
+      }
+      current += ch;
+    }
+    if (current.length > best.length) best = current;
+    return best;
+  };
+  const collectFilePrefilterMatches = () => {
+    if (!fileMatchers.length || !filterIndex || !filterIndex.fileChargrams || !filterIndex.fileChunksById) {
+      return null;
+    }
+    const fileIds = new Set();
+    for (const matcher of fileMatchers) {
+      let needle = null;
+      if (matcher.type === 'substring') {
+        needle = normalizeFilePrefilter(matcher.value);
+      } else if (matcher.type === 'regex') {
+        const literal = extractRegexLiteral(matcher.value.source || '');
+        needle = literal ? normalizeFilePrefilter(literal) : null;
+      }
+      if (!needle || needle.length < fileChargramN) continue;
+      const grams = tri(needle, fileChargramN);
+      if (!grams.length) continue;
+      let candidateFiles = null;
+      for (const gram of grams) {
+        const bucket = filterIndex.fileChargrams.get(gram);
+        if (!bucket) {
+          candidateFiles = new Set();
+          break;
+        }
+        candidateFiles = candidateFiles ? intersectTwoSets(candidateFiles, bucket) : new Set(bucket);
+        if (!candidateFiles.size) break;
+      }
+      if (!candidateFiles || !candidateFiles.size) continue;
+      for (const fileId of candidateFiles) {
+        fileIds.add(fileId);
+      }
+    }
+    if (!fileIds.size) return null;
+    const chunkIds = new Set();
+    for (const fileId of fileIds) {
+      const chunks = filterIndex.fileChunksById[fileId];
+      if (!chunks) continue;
+      for (const id of chunks) chunkIds.add(id);
+    }
+    return chunkIds;
+  };
+  const matchList = (list, value) => {
+    if (!value) return true;
+    if (!Array.isArray(list)) return false;
+    const needle = normalize(value);
+    return list.some((entry) => normalize(entry).includes(needle));
+  };
+  const matchInferredType = (inferred, value) => {
+    if (!value) return true;
+    if (!inferred) return false;
+    const needle = normalize(value);
+    const types = [];
+    const collect = (entries) => {
+      if (!Array.isArray(entries)) return;
+      for (const entry of entries) {
+        if (entry?.type) types.push(entry.type);
+      }
+    };
+    const collectMap = (map) => {
+      if (!map || typeof map !== 'object') return;
+      Object.values(map).forEach((entries) => collect(entries));
+    };
+    collectMap(inferred.params);
+    collectMap(inferred.fields);
+    collectMap(inferred.locals);
+    collect(inferred.returns);
+    if (!types.length) return false;
+    return types.some((entry) => normalize(entry).includes(needle));
+  };
+  const matchStructural = (chunk) => {
+    if (!structPackNeedles.length && !structRuleNeedles.length && !structTagNeedles.length) {
+      return true;
+    }
+    const structural = chunk?.docmeta?.structural;
+    if (!Array.isArray(structural) || !structural.length) return false;
+    return structural.some((entry) => {
+      if (structPackNeedles.length) {
+        const packValue = normalize(entry?.pack || '');
+        if (!structPackNeedles.some((needle) => packValue.includes(needle))) return false;
+      }
+      if (structRuleNeedles.length) {
+        const ruleValue = normalize(entry?.ruleId || '');
+        if (!structRuleNeedles.some((needle) => ruleValue.includes(needle))) return false;
+      }
+      if (structTagNeedles.length) {
+        const tags = Array.isArray(entry?.tags) ? entry.tags : [];
+        if (!tags.some((tag) =>
+          structTagNeedles.some((needle) => normalize(tag).includes(needle))
+        )) {
+          return false;
+        }
+      }
+      return true;
+    });
+  };
+  const truthy = (value) => value === true;
+  const resolveMetaField = (record, key) => {
+    if (!record || typeof record !== 'object' || !key) return undefined;
+    if (!key.includes('.')) return record[key];
+    return key.split('.').reduce((acc, part) => (acc && typeof acc === 'object' ? acc[part] : undefined), record);
+  };
+  const matchMetaFilters = (chunk) => {
+    if (!metaFilters.length) return true;
+    const recordMeta = chunk?.docmeta?.record;
+    if (!recordMeta || typeof recordMeta !== 'object') return false;
+    for (const filter of metaFilters) {
+      const key = filter?.key;
+      if (!key) continue;
+      const value = filter?.value;
+      const field = resolveMetaField(recordMeta, key);
+      if (value == null || value === '') {
+        if (field == null) return false;
+        if (Array.isArray(field) && field.length === 0) return false;
+        if (typeof field === 'string' && !field.trim()) return false;
+        continue;
+      }
+      const needle = normalize(value);
+      if (Array.isArray(field)) {
+        if (!field.some((entry) => normalize(entry).includes(needle))) return false;
+      } else if (field && typeof field === 'object') {
+        if (!normalize(JSON.stringify(field)).includes(needle)) return false;
+      } else if (!normalize(field).includes(needle)) {
+        return false;
+      }
+    }
+    return true;
+  };
+  const resolveFileRelations = (file) => {
+    if (!file || !fileRelations) return null;
+    if (typeof fileRelations.get === 'function') {
+      return fileRelations.get(file) || null;
+    }
+    return fileRelations[file] || null;
+  };
+  const normalizeToken = caseTokens ? (value) => String(value || '') : normalize;
+
+  const indexedCandidates = [];
+  if (filterIndex) {
+    if (extNeedles.length && filterIndex.byExt) {
+      const candidate = collectExactMatches(
+        filterIndex.byExt,
+        extNeedles,
+        bitmapIndex?.byExt
+      );
+      if (candidate) indexedCandidates.push(candidate);
+    }
+    if (typeNeedles.length && filterIndex.byKind) {
+      const candidate = collectExactMatches(
+        filterIndex.byKind,
+        typeNeedles,
+        bitmapIndex?.byKind
+      );
+      if (candidate) indexedCandidates.push(candidate);
+    }
+    if (authorNeedles.length && filterIndex.byAuthor) {
+      const candidate = collectAnySubstringMatches(
+        filterIndex.byAuthor,
+        authorNeedles,
+        bitmapIndex?.byAuthor
+      );
+      if (candidate) indexedCandidates.push(candidate);
+    }
+    if (chunkAuthor && filterIndex.byChunkAuthor) {
+      const candidate = collectSubstringMatches(
+        filterIndex.byChunkAuthor,
+        normalize(chunkAuthor),
+        bitmapIndex?.byChunkAuthor
+      );
+      if (candidate) indexedCandidates.push(candidate);
+    }
+    if (visibility && filterIndex.byVisibility) {
+      const candidate = collectSubstringMatches(
+        filterIndex.byVisibility,
+        normalize(visibility),
+        bitmapIndex?.byVisibility
+      );
+      if (candidate) indexedCandidates.push(candidate);
+    }
+    if (fileMatchers.length && filePrefilterEnabled) {
+      const filePrefilterIds = collectFilePrefilterMatches();
+      if (filePrefilterIds) {
+        const candidate = buildCandidate([filePrefilterIds], []);
+        if (candidate) indexedCandidates.push(candidate);
+      }
+    }
+  }
+  const candidateIds = indexedCandidates.length
+    ? intersectCandidates(indexedCandidates)
+    : null;
+  const sourceMeta = candidateIds
+    ? Array.from(candidateIds).map((id) => meta[id]).filter(Boolean)
+    : meta;
+
+  return sourceMeta.filter((c) => {
+    if (!c) return false;
+    if (fileMatchers.length) {
+      const fileValue = String(c.file || '');
+      const fileValueNorm = normalizeFile(fileValue);
+      const matches = fileMatchers.some((matcher) => {
+        if (matcher.type === 'regex') {
+          matcher.value.lastIndex = 0;
+          return matcher.value.test(fileValue);
+        }
+        return fileValueNorm.includes(matcher.value);
+      });
+      if (!matches) return false;
+    }
+    if (extNeedles.length) {
+      const extValue = normalize(c.ext || path.extname(c.file || ''));
+      if (!extNeedles.includes(extValue)) return false;
+    }
+    if (!matchMetaFilters(c)) return false;
+    if (excludeNeedles.length || excludePhraseNeedles.length) {
+      const tokens = Array.isArray(c.tokens) ? c.tokens : [];
+      let ngrams = Array.isArray(c.ngrams) ? c.ngrams : null;
+      if (!ngrams && excludePhraseNeedles.length && tokens.length && excludePhraseRange?.min && excludePhraseRange?.max) {
+        ngrams = extractNgrams(tokens, excludePhraseRange.min, excludePhraseRange.max);
+      }
+      const tokenSet = new Set(tokens.map(normalizeToken));
+      const ngramSet = new Set((ngrams || []).map(normalizeToken));
+      const tokenMatch = excludeNeedles.some((needle) => tokenSet.has(needle) || ngramSet.has(needle));
+      if (tokenMatch) return false;
+      if (excludePhraseNeedles.some((needle) => ngramSet.has(needle))) return false;
+    }
+    if (modifiedAfter != null) {
+      const lastModified = c.last_modified ? Date.parse(c.last_modified) : NaN;
+      if (!Number.isFinite(lastModified) || lastModified < modifiedAfter) return false;
+    }
+    if (typeNeedles.length) {
+      const kindValue = c.kind;
+      if (!kindValue) return false;
+      const kinds = Array.isArray(kindValue) ? kindValue : [kindValue];
+      const matches = kinds.some((entry) => typeNeedles.includes(normalize(entry)));
+      if (!matches) return false;
+    }
+    if (authorNeedles.length) {
+      const authorValue = c.last_author;
+      if (!authorValue) return false;
+      const authors = Array.isArray(authorValue) ? authorValue : [authorValue];
+      const matches = authorNeedles.some((needle) =>
+        authors.some((entry) => normalize(entry).includes(needle))
+      );
+      if (!matches) return false;
+    }
+    if (chunkAuthor && !matchList(c.chunk_authors, chunkAuthor)) return false;
+    if (importName) {
+      const imports = c.codeRelations?.imports || resolveFileRelations(c.file)?.imports;
+      if (!Array.isArray(imports) || !imports.includes(importName)) return false;
+    }
+    if (lint && (!c.lint || !c.lint.length)) return false;
+    if (churn !== null && churn !== undefined) {
+      const churnValue = Number(c.churn);
+      if (!Number.isFinite(churnValue) || churnValue < churn) return false;
+    }
+    if (calls) {
+      const callsList = c.codeRelations?.calls;
+      if (!Array.isArray(callsList)) return false;
+      const found = callsList.find(([fn, callName]) => fn === calls || callName === calls);
+      if (!found) return false;
+    }
+    if (uses) {
+      const usages = c.codeRelations?.usages || resolveFileRelations(c.file)?.usages;
+      if (!Array.isArray(usages)) return false;
+      if (!usages.includes(uses)) return false;
+    }
+    if (signature) {
+      const sig = c.docmeta?.signature;
+      if (!sig) return false;
+      if (!sig.includes(signature)) return false;
+    }
+    if (param) {
+      const params = c.docmeta?.params;
+      if (!Array.isArray(params)) return false;
+      if (!params.includes(param)) return false;
+    }
+    if (decorator && !matchList(c.docmeta?.decorators, decorator)) return false;
+    if (returnType) {
+      const foundReturnType = c.docmeta?.returnType || null;
+      if (!foundReturnType || !normalize(foundReturnType).includes(normalize(returnType))) {
+        return false;
+      }
+    }
+    if (inferredType && !matchInferredType(c.docmeta?.inferredTypes, inferredType)) {
+      return false;
+    }
+    if (throws && !matchList(c.docmeta?.throws, throws)) return false;
+    if (awaits && !matchList(c.docmeta?.awaits, awaits)) return false;
+    if (reads && !matchList(c.docmeta?.dataflow?.reads, reads)) return false;
+    if (writes && !matchList(c.docmeta?.dataflow?.writes, writes)) return false;
+    if (mutates && !matchList(c.docmeta?.dataflow?.mutations, mutates)) return false;
+    if (alias && !matchList(c.docmeta?.dataflow?.aliases, alias)) return false;
+    const riskMeta = c.docmeta?.risk || null;
+    const riskTagValue = riskTag || risk;
+    if (riskTagValue && !matchList(riskMeta?.tags, riskTagValue)) return false;
+    if (riskSource) {
+      const sourceNames = Array.isArray(riskMeta?.sources)
+        ? riskMeta.sources.map((source) => source.name)
+        : null;
+      if (!matchList(sourceNames, riskSource)) return false;
+    }
+    if (riskSink) {
+      const sinkNames = Array.isArray(riskMeta?.sinks)
+        ? riskMeta.sinks.map((sink) => sink.name)
+        : null;
+      if (!matchList(sinkNames, riskSink)) return false;
+    }
+    if (riskCategory) {
+      const categories = Array.isArray(riskMeta?.categories)
+        ? riskMeta.categories
+        : (Array.isArray(riskMeta?.sinks) ? riskMeta.sinks.map((sink) => sink.category) : null);
+      if (!matchList(categories, riskCategory)) return false;
+    }
+    if (riskFlow) {
+      const flows = Array.isArray(riskMeta?.flows)
+        ? riskMeta.flows.map((flow) => `${flow.source}->${flow.sink}`)
+        : null;
+      if (!matchList(flows, riskFlow)) return false;
+    }
+    if (!matchStructural(c)) return false;
+    if (branches != null) {
+      const count = c.docmeta?.controlFlow?.branches;
+      if (!Number.isFinite(count) || count < branches) return false;
+    }
+    if (loops != null) {
+      const count = c.docmeta?.controlFlow?.loops;
+      if (!Number.isFinite(count) || count < loops) return false;
+    }
+    if (breaks != null) {
+      const count = c.docmeta?.controlFlow?.breaks;
+      if (!Number.isFinite(count) || count < breaks) return false;
+    }
+    if (continues != null) {
+      const count = c.docmeta?.controlFlow?.continues;
+      if (!Number.isFinite(count) || count < continues) return false;
+    }
+    if (visibility) {
+      const docVisibility = c.docmeta?.visibility || c.docmeta?.modifiers?.visibility || null;
+      if (!docVisibility || !normalize(docVisibility).includes(normalize(visibility))) {
+        return false;
+      }
+    }
+    if (extendsFilter) {
+      const parents = c.docmeta?.extends || c.docmeta?.bases || [];
+      if (!matchList(parents, extendsFilter)) return false;
+    }
+    if (truthy(asyncOnly)) {
+      if (!(c.docmeta?.async || c.docmeta?.modifiers?.async)) return false;
+    }
+    if (truthy(generatorOnly)) {
+      if (!(c.docmeta?.modifiers?.generator || c.docmeta?.yields)) return false;
+    }
+    if (truthy(returnsOnly)) {
+      if (!(c.docmeta?.returnsValue || c.docmeta?.returns)) return false;
+    }
+    return true;
+  });
+}
diff --git a/src/search/output.js b/src/retrieval/output/format.js
similarity index 54%
rename from src/search/output.js
rename to src/retrieval/output/format.js
index a3f44c44b..17f756e6b 100644
--- a/src/search/output.js
+++ b/src/retrieval/output/format.js
@@ -1,335 +1,6 @@
-import fs from 'node:fs';
-import path from 'node:path';
-import { extractNgrams } from '../shared/tokenize.js';
-
-const fileTextCache = new Map();
-const summaryCache = new Map();
-
-/**
- * Filter chunk metadata by search constraints.
- * @param {Array} meta
- * @param {object} filters
- * @returns {Array}
- */
-export function filterChunks(meta, filters = {}) {
-  const {
-    type,
-    author,
-    importName,
-    lint,
-    churn,
-    calls,
-    uses,
-    signature,
-    param,
-    decorator,
-    returnType,
-    throws,
-    reads,
-    writes,
-    mutates,
-    alias,
-    risk,
-    riskTag,
-    riskSource,
-    riskSink,
-    riskCategory,
-    riskFlow,
-    awaits,
-    branches,
-    loops,
-    breaks,
-    continues,
-    inferredType,
-    visibility,
-    extends: extendsFilter,
-    async: asyncOnly,
-    generator: generatorOnly,
-    returns: returnsOnly,
-    file,
-    ext,
-    meta: metaFilter,
-    chunkAuthor,
-    modifiedAfter,
-    excludeTokens,
-    excludePhrases,
-    excludePhraseRange
-  } = filters;
-  const normalize = (value) => String(value || '').toLowerCase();
-  const normalizeList = (value) => {
-    if (!value) return [];
-    const entries = Array.isArray(value) ? value : [value];
-    return entries
-      .flatMap((entry) => String(entry || '').split(/[,\s]+/))
-      .map((entry) => entry.trim())
-      .filter(Boolean);
-  };
-  const parseFileMatcher = (entry) => {
-    const raw = String(entry || '').trim();
-    if (!raw) return null;
-    const regexMatch = raw.match(/^\/(.+)\/([a-z]*)$/i);
-    if (regexMatch) {
-      const pattern = regexMatch[1];
-      let flags = regexMatch[2] || '';
-      if (!flags.includes('i')) flags += 'i';
-      try {
-        return { type: 'regex', value: new RegExp(pattern, flags) };
-      } catch {
-        return { type: 'substring', value: normalize(raw) };
-      }
-    }
-    return { type: 'substring', value: normalize(raw) };
-  };
-  const fileMatchers = normalizeList(file).map(parseFileMatcher).filter(Boolean);
-  const extNeedles = normalizeList(ext)
-    .map((entry) => {
-      let value = entry.toLowerCase();
-      value = value.replace(/^\*+/, '');
-      if (value && !value.startsWith('.')) value = `.${value}`;
-      return value;
-    })
-    .filter(Boolean);
-  const metaFilters = Array.isArray(metaFilter) ? metaFilter : (metaFilter ? [metaFilter] : []);
-  const excludeNeedles = normalizeList(excludeTokens).map(normalize);
-  const excludePhraseNeedles = normalizeList(excludePhrases).map(normalize);
-  const matchList = (list, value) => {
-    if (!value) return true;
-    if (!Array.isArray(list)) return false;
-    const needle = normalize(value);
-    return list.some((entry) => normalize(entry).includes(needle));
-  };
-  const matchInferredType = (inferred, value) => {
-    if (!value) return true;
-    if (!inferred) return false;
-    const needle = normalize(value);
-    const types = [];
-    const collect = (entries) => {
-      if (!Array.isArray(entries)) return;
-      for (const entry of entries) {
-        if (entry?.type) types.push(entry.type);
-      }
-    };
-    const collectMap = (map) => {
-      if (!map || typeof map !== 'object') return;
-      Object.values(map).forEach((entries) => collect(entries));
-    };
-    collectMap(inferred.params);
-    collectMap(inferred.fields);
-    collectMap(inferred.locals);
-    collect(inferred.returns);
-    if (!types.length) return false;
-    return types.some((entry) => normalize(entry).includes(needle));
-  };
-  const truthy = (value) => value === true;
-  const resolveMetaField = (record, key) => {
-    if (!record || typeof record !== 'object' || !key) return undefined;
-    if (!key.includes('.')) return record[key];
-    return key.split('.').reduce((acc, part) => (acc && typeof acc === 'object' ? acc[part] : undefined), record);
-  };
-  const matchMetaFilters = (chunk) => {
-    if (!metaFilters.length) return true;
-    const recordMeta = chunk?.docmeta?.record;
-    if (!recordMeta || typeof recordMeta !== 'object') return false;
-    for (const filter of metaFilters) {
-      const key = filter?.key;
-      if (!key) continue;
-      const value = filter?.value;
-      const field = resolveMetaField(recordMeta, key);
-      if (value == null || value === '') {
-        if (field == null) return false;
-        if (Array.isArray(field) && field.length === 0) return false;
-        if (typeof field === 'string' && !field.trim()) return false;
-        continue;
-      }
-      const needle = normalize(value);
-      if (Array.isArray(field)) {
-        if (!field.some((entry) => normalize(entry).includes(needle))) return false;
-      } else if (field && typeof field === 'object') {
-        if (!normalize(JSON.stringify(field)).includes(needle)) return false;
-      } else if (!normalize(field).includes(needle)) {
-        return false;
-      }
-    }
-    return true;
-  };
-
-  return meta.filter((c) => {
-    if (!c) return false;
-    if (fileMatchers.length) {
-      const fileValue = String(c.file || '');
-      const fileValueLower = normalize(fileValue);
-      const matches = fileMatchers.some((matcher) => {
-        if (matcher.type === 'regex') {
-          matcher.value.lastIndex = 0;
-          return matcher.value.test(fileValue);
-        }
-        return fileValueLower.includes(matcher.value);
-      });
-      if (!matches) return false;
-    }
-    if (extNeedles.length) {
-      const extValue = normalize(c.ext || path.extname(c.file || ''));
-      if (!extNeedles.includes(extValue)) return false;
-    }
-    if (!matchMetaFilters(c)) return false;
-    if (excludeNeedles.length || excludePhraseNeedles.length) {
-      const tokens = Array.isArray(c.tokens) ? c.tokens : [];
-      let ngrams = Array.isArray(c.ngrams) ? c.ngrams : null;
-      if (!ngrams && excludePhraseNeedles.length && tokens.length && excludePhraseRange?.min && excludePhraseRange?.max) {
-        ngrams = extractNgrams(tokens, excludePhraseRange.min, excludePhraseRange.max);
-      }
-      const tokenSet = new Set(tokens.map(normalize));
-      const ngramSet = new Set((ngrams || []).map(normalize));
-      const tokenMatch = excludeNeedles.some((needle) => tokenSet.has(needle) || ngramSet.has(needle));
-      if (tokenMatch) return false;
-      if (excludePhraseNeedles.some((needle) => ngramSet.has(needle))) return false;
-    }
-    if (modifiedAfter != null) {
-      const lastModified = c.last_modified ? Date.parse(c.last_modified) : NaN;
-      if (!Number.isFinite(lastModified) || lastModified < modifiedAfter) return false;
-    }
-    if (type && c.kind && c.kind.toLowerCase() !== type.toLowerCase()) return false;
-    if (author && c.last_author && !c.last_author.toLowerCase().includes(author.toLowerCase())) return false;
-    if (chunkAuthor && !matchList(c.chunk_authors, chunkAuthor)) return false;
-    if (importName && c.codeRelations && c.codeRelations.imports) {
-      if (!c.codeRelations.imports.includes(importName)) return false;
-    }
-    if (lint && (!c.lint || !c.lint.length)) return false;
-    if (churn !== null && churn !== undefined) {
-      const churnValue = Number(c.churn);
-      if (!Number.isFinite(churnValue) || churnValue < churn) return false;
-    }
-    if (calls && c.codeRelations && c.codeRelations.calls) {
-      const found = c.codeRelations.calls.find(([fn, callName]) => fn === calls || callName === calls);
-      if (!found) return false;
-    }
-    if (uses && c.codeRelations && c.codeRelations.usages) {
-      if (!c.codeRelations.usages.includes(uses)) return false;
-    }
-    if (signature && c.docmeta?.signature) {
-      if (!c.docmeta.signature.includes(signature)) return false;
-    }
-    if (param && c.docmeta?.params) {
-      if (!c.docmeta.params.includes(param)) return false;
-    }
-    if (decorator && !matchList(c.docmeta?.decorators, decorator)) return false;
-    if (returnType) {
-      const foundReturnType = c.docmeta?.returnType || null;
-      if (!foundReturnType || !normalize(foundReturnType).includes(normalize(returnType))) {
-        return false;
-      }
-    }
-    if (inferredType && !matchInferredType(c.docmeta?.inferredTypes, inferredType)) {
-      return false;
-    }
-    if (throws && !matchList(c.docmeta?.throws, throws)) return false;
-    if (awaits && !matchList(c.docmeta?.awaits, awaits)) return false;
-    if (reads && !matchList(c.docmeta?.dataflow?.reads, reads)) return false;
-    if (writes && !matchList(c.docmeta?.dataflow?.writes, writes)) return false;
-    if (mutates && !matchList(c.docmeta?.dataflow?.mutations, mutates)) return false;
-    if (alias && !matchList(c.docmeta?.dataflow?.aliases, alias)) return false;
-    const riskMeta = c.docmeta?.risk || null;
-    const riskTagValue = riskTag || risk;
-    if (riskTagValue && !matchList(riskMeta?.tags, riskTagValue)) return false;
-    if (riskSource) {
-      const sourceNames = Array.isArray(riskMeta?.sources)
-        ? riskMeta.sources.map((source) => source.name)
-        : null;
-      if (!matchList(sourceNames, riskSource)) return false;
-    }
-    if (riskSink) {
-      const sinkNames = Array.isArray(riskMeta?.sinks)
-        ? riskMeta.sinks.map((sink) => sink.name)
-        : null;
-      if (!matchList(sinkNames, riskSink)) return false;
-    }
-    if (riskCategory) {
-      const categories = Array.isArray(riskMeta?.categories)
-        ? riskMeta.categories
-        : (Array.isArray(riskMeta?.sinks) ? riskMeta.sinks.map((sink) => sink.category) : null);
-      if (!matchList(categories, riskCategory)) return false;
-    }
-    if (riskFlow) {
-      const flows = Array.isArray(riskMeta?.flows)
-        ? riskMeta.flows.map((flow) => `${flow.source}->${flow.sink}`)
-        : null;
-      if (!matchList(flows, riskFlow)) return false;
-    }
-    if (branches != null) {
-      const count = c.docmeta?.controlFlow?.branches;
-      if (!Number.isFinite(count) || count < branches) return false;
-    }
-    if (loops != null) {
-      const count = c.docmeta?.controlFlow?.loops;
-      if (!Number.isFinite(count) || count < loops) return false;
-    }
-    if (breaks != null) {
-      const count = c.docmeta?.controlFlow?.breaks;
-      if (!Number.isFinite(count) || count < breaks) return false;
-    }
-    if (continues != null) {
-      const count = c.docmeta?.controlFlow?.continues;
-      if (!Number.isFinite(count) || count < continues) return false;
-    }
-    if (visibility) {
-      const docVisibility = c.docmeta?.visibility || c.docmeta?.modifiers?.visibility || null;
-      if (!docVisibility || !normalize(docVisibility).includes(normalize(visibility))) {
-        return false;
-      }
-    }
-    if (extendsFilter) {
-      const parents = c.docmeta?.extends || c.docmeta?.bases || [];
-      if (!matchList(parents, extendsFilter)) return false;
-    }
-    if (truthy(asyncOnly)) {
-      if (!(c.docmeta?.async || c.docmeta?.modifiers?.async)) return false;
-    }
-    if (truthy(generatorOnly)) {
-      if (!(c.docmeta?.modifiers?.generator || c.docmeta?.yields)) return false;
-    }
-    if (truthy(returnsOnly)) {
-      if (!(c.docmeta?.returnsValue || c.docmeta?.returns)) return false;
-    }
-    return true;
-  });
-}
-
-/**
- * Normalize context lines for display.
- * @param {string[]} lines
- * @returns {string[]}
- */
-export function cleanContext(lines) {
-  return lines
-    .filter((line) => {
-      const trimmed = line.trim();
-      if (!trimmed || trimmed === '```') return false;
-      if (!/[a-zA-Z0-9]/.test(trimmed)) return false;
-      return true;
-    })
-    .map((line) => line.replace(/\s+/g, ' ').trim());
-}
-
-function getBodySummary(rootDir, chunk, maxWords = 80) {
-  try {
-    const absPath = path.join(rootDir, chunk.file);
-    const cacheKey = `${absPath}:${chunk.start}:${chunk.end}:${maxWords}`;
-    if (summaryCache.has(cacheKey)) return summaryCache.get(cacheKey);
-    let text = fileTextCache.get(absPath);
-    if (!text) {
-      text = fs.readFileSync(absPath, 'utf8');
-      fileTextCache.set(absPath, text);
-    }
-    const chunkText = text.slice(chunk.start, chunk.end)
-      .replace(/\s+/g, ' ')
-      .trim();
-    const words = chunkText.split(/\s+/).slice(0, maxWords).join(' ');
-    summaryCache.set(cacheKey, words);
-    return words;
-  } catch {
-    return '(Could not load summary)';
-  }
-}
+import { cleanContext } from './context.js';
+import { formatScoreBreakdown } from './explain.js';
+import { getBodySummary } from './summary.js';
 
 const formatInferredEntry = (entry) => {
   if (!entry?.type) return '';
@@ -371,6 +42,7 @@ export function formatFullChunk({
   mode,
   score,
   scoreType,
+  explain = false,
   color,
   queryTokens = [],
   rx,
@@ -398,6 +70,13 @@ export function formatFullChunk({
 
   out += line1 + '\n';
 
+  if (explain && chunk.scoreBreakdown) {
+    const explainLines = formatScoreBreakdown(chunk.scoreBreakdown, c);
+    if (explainLines.length) {
+      out += explainLines.join('\n') + '\n';
+    }
+  }
+
   const headlinePart = chunk.headline ? c.bold('Headline: ') + c.underline(chunk.headline) : '';
   const lastModPart = chunk.last_modified ? c.gray('Last Modified: ') + c.bold(chunk.last_modified) : '';
   const secondLine = [headlinePart, lastModPart].filter(Boolean).join('   ');
@@ -438,11 +117,31 @@ export function formatFullChunk({
     out += c.yellow('   CallSummary: ') + summaries.join(', ') + '\n';
   }
 
-  if (chunk.codeRelations?.importLinks?.length) {
+  if (chunk.importLinks?.length) {
+    out += c.green('   ImportLinks: ') + chunk.importLinks.join(', ') + '\n';
+  } else if (chunk.codeRelations?.importLinks?.length) {
     out += c.green('   ImportLinks: ') + chunk.codeRelations.importLinks.join(', ') + '\n';
   }
 
-  if (chunk.codeRelations?.usages?.length) {
+  if (chunk.usages?.length) {
+    const usageFreq = Object.create(null);
+    chunk.usages.forEach((raw) => {
+      const trimmed = typeof raw === 'string' ? raw.trim() : '';
+      if (!trimmed) return;
+      usageFreq[trimmed] = (usageFreq[trimmed] || 0) + 1;
+    });
+
+    const usageEntries = Object.entries(usageFreq).sort((a, b) => b[1] - a[1]);
+    const maxCount = usageEntries[0]?.[1] || 0;
+
+    const usageStr = usageEntries.slice(0, 10).map(([usage, count]) => {
+      if (count === 1) return usage;
+      if (count === maxCount) return c.bold(c.yellow(`${usage} (${count})`));
+      return c.cyan(`${usage} (${count})`);
+    }).join(', ');
+
+    if (usageStr.length) out += c.cyan('   Usages: ') + usageStr + '\n';
+  } else if (chunk.codeRelations?.usages?.length) {
     const usageFreq = Object.create(null);
     chunk.codeRelations.usages.forEach((raw) => {
       const trimmed = typeof raw === 'string' ? raw.trim() : '';
@@ -661,6 +360,7 @@ export function formatShortChunk({
   mode,
   score,
   scoreType,
+  explain = false,
   color,
   queryTokens = [],
   rx,
@@ -708,6 +408,13 @@ export function formatShortChunk({
     }
   }
 
+  if (explain && chunk.scoreBreakdown) {
+    const explainLines = formatScoreBreakdown(chunk.scoreBreakdown, color);
+    if (explainLines.length) {
+      out += '\n' + explainLines.join('\n');
+    }
+  }
+
   out += '\n';
   return out;
 }
diff --git a/src/retrieval/output/summary.js b/src/retrieval/output/summary.js
new file mode 100644
index 000000000..33b72a076
--- /dev/null
+++ b/src/retrieval/output/summary.js
@@ -0,0 +1,27 @@
+import path from 'node:path';
+import { readTextFileSync } from '../../shared/encoding.js';
+import { getFileTextCache, getSummaryCache } from './cache.js';
+
+export function getBodySummary(rootDir, chunk, maxWords = 80) {
+  try {
+    const absPath = path.join(rootDir, chunk.file);
+    const cacheKey = `${absPath}:${chunk.start}:${chunk.end}:${maxWords}`;
+    const summaryCache = getSummaryCache();
+    const fileTextCache = getFileTextCache();
+    const cached = summaryCache.get(cacheKey);
+    if (cached !== null) return cached;
+    let text = fileTextCache.get(absPath);
+    if (text == null) {
+      ({ text } = readTextFileSync(absPath));
+      fileTextCache.set(absPath, text);
+    }
+    const chunkText = text.slice(chunk.start, chunk.end)
+      .replace(/\s+/g, ' ')
+      .trim();
+    const words = chunkText.split(/\s+/).slice(0, maxWords).join(' ');
+    summaryCache.set(cacheKey, words);
+    return words;
+  } catch {
+    return '(Could not load summary)';
+  }
+}
diff --git a/src/retrieval/pipeline.js b/src/retrieval/pipeline.js
new file mode 100644
index 000000000..a2c76a12e
--- /dev/null
+++ b/src/retrieval/pipeline.js
@@ -0,0 +1,498 @@
+import { filterChunks } from './output.js';
+import { hasActiveFilters } from './filters.js';
+import { rankBM25, rankBM25Fields, rankDenseVectors, rankMinhash } from './rankers.js';
+import { extractNgrams, tri } from '../shared/tokenize.js';
+import { rankHnswIndex } from '../shared/hnsw.js';
+
+const SQLITE_IN_LIMIT = 900;
+
+/**
+ * Create a search pipeline runner bound to a shared context.
+ * @param {object} context
+ * @returns {(idx:object, mode:'code'|'prose'|'records'|'extracted-prose', queryEmbedding:number[]|null)=>Array<object>}
+ */
+export function createSearchPipeline(context) {
+  const {
+    useSqlite,
+    sqliteFtsRequested,
+    sqliteFtsNormalize,
+    sqliteFtsProfile,
+    sqliteFtsWeights,
+    bm25K1,
+    bm25B,
+    fieldWeights,
+    postingsConfig,
+    queryTokens,
+    phraseNgramSet,
+    phraseRange,
+    symbolBoost,
+    filters,
+    filtersActive,
+    topN,
+    annEnabled,
+    scoreBlend,
+    minhashMaxDocs,
+    vectorAnnState,
+    vectorAnnUsed,
+    hnswAnnState,
+    hnswAnnUsed,
+    buildCandidateSetSqlite,
+    getTokenIndexForQuery,
+    rankSqliteFts,
+    rankVectorAnnSqlite,
+    rrf
+  } = context;
+  const blendEnabled = scoreBlend?.enabled === true;
+  const blendSparseWeight = Number.isFinite(Number(scoreBlend?.sparseWeight))
+    ? Number(scoreBlend.sparseWeight)
+    : 1;
+  const blendAnnWeight = Number.isFinite(Number(scoreBlend?.annWeight))
+    ? Number(scoreBlend.annWeight)
+    : 1;
+  const symbolBoostEnabled = symbolBoost?.enabled !== false;
+  const symbolBoostDefinitionWeight = Number.isFinite(Number(symbolBoost?.definitionWeight))
+    ? Number(symbolBoost.definitionWeight)
+    : 1.15;
+  const symbolBoostExportWeight = Number.isFinite(Number(symbolBoost?.exportWeight))
+    ? Number(symbolBoost.exportWeight)
+    : 1.1;
+  const rrfEnabled = rrf?.enabled !== false;
+  const rrfK = Number.isFinite(Number(rrf?.k))
+    ? Math.max(1, Number(rrf.k))
+    : 60;
+  const minhashLimit = Number.isFinite(Number(minhashMaxDocs)) && Number(minhashMaxDocs) > 0
+    ? Number(minhashMaxDocs)
+    : null;
+    const chargramMaxTokenLength = postingsConfig?.chargramMaxTokenLength == null
+      ? null
+      : Math.max(2, Math.floor(Number(postingsConfig.chargramMaxTokenLength)));
+    const fieldWeightsEnabled = fieldWeights
+      && Object.values(fieldWeights).some((value) => Number.isFinite(Number(value)) && Number(value) > 0);
+
+  const isDefinitionKind = (kind) => typeof kind === 'string'
+    && /Declaration|Definition|Initializer|Deinitializer/.test(kind);
+
+  const isExportedChunk = (chunk) => {
+    if (!chunk) return false;
+    if (chunk.exported === true || chunk?.meta?.exported === true) return true;
+    const kind = chunk.kind || '';
+    if (typeof kind === 'string' && kind.includes('Export')) return true;
+    const exportsList = Array.isArray(chunk.exports)
+      ? chunk.exports
+      : (Array.isArray(chunk?.meta?.exports) ? chunk.meta.exports : null);
+    if (!exportsList || !chunk.name) return false;
+    return exportsList.includes(chunk.name);
+  };
+
+  /**
+   * Build a candidate set from file-backed indexes (or SQLite).
+   * @param {object} idx
+   * @param {string[]} tokens
+   * @param {'code'|'prose'|'records'|'extracted-prose'} mode
+   * @returns {Set<number>|null}
+   */
+  function buildCandidateSet(idx, tokens, mode) {
+    if (useSqlite && (mode === 'code' || mode === 'prose')) {
+      return buildCandidateSetSqlite(mode, tokens);
+    }
+
+    const candidates = new Set();
+    let matched = false;
+
+    if (postingsConfig.enablePhraseNgrams !== false && idx.phraseNgrams?.vocab && idx.phraseNgrams?.postings) {
+      const vocabIndex = idx.phraseNgrams.vocabIndex
+        || (idx.phraseNgrams.vocabIndex = new Map(idx.phraseNgrams.vocab.map((t, i) => [t, i])));
+      const ngrams = extractNgrams(tokens, postingsConfig.phraseMinN, postingsConfig.phraseMaxN);
+      for (const ng of ngrams) {
+        const hit = vocabIndex.get(ng);
+        if (hit === undefined) continue;
+        const posting = idx.phraseNgrams.postings[hit] || [];
+        posting.forEach((id) => candidates.add(id));
+        matched = matched || posting.length > 0;
+      }
+    }
+
+    if (postingsConfig.enableChargrams !== false && idx.chargrams?.vocab && idx.chargrams?.postings) {
+      const vocabIndex = idx.chargrams.vocabIndex
+        || (idx.chargrams.vocabIndex = new Map(idx.chargrams.vocab.map((t, i) => [t, i])));
+      for (const token of tokens) {
+        if (chargramMaxTokenLength && token.length > chargramMaxTokenLength) continue;
+        for (let n = postingsConfig.chargramMinN; n <= postingsConfig.chargramMaxN; n++) {
+          for (const gram of tri(token, n)) {
+            const hit = vocabIndex.get(gram);
+            if (hit === undefined) continue;
+            const posting = idx.chargrams.postings[hit] || [];
+            posting.forEach((id) => candidates.add(id));
+            matched = matched || posting.length > 0;
+          }
+        }
+      }
+    }
+
+    return matched ? candidates : null;
+  }
+
+  function getPhraseMatchInfo(chunk, phraseSet, range) {
+    if (!phraseSet || !phraseSet.size || !chunk) return { matches: 0 };
+    let ngrams = Array.isArray(chunk.ngrams) && chunk.ngrams.length ? chunk.ngrams : null;
+    if (!ngrams && Array.isArray(chunk.tokens) && range?.min && range?.max) {
+      ngrams = extractNgrams(chunk.tokens, range.min, range.max);
+    }
+    if (!ngrams || !ngrams.length) return { matches: 0 };
+    let matches = 0;
+    for (const ng of ngrams) {
+      if (phraseSet.has(ng)) matches += 1;
+    }
+    return { matches };
+  }
+
+  /**
+   * Execute the full search pipeline for a mode.
+   * @param {object} idx
+    * @param {'code'|'prose'|'records'|'extracted-prose'} mode
+    * @param {number[]|null} queryEmbedding
+    * @returns {Array<object>}
+    */
+  return function runSearch(idx, mode, queryEmbedding) {
+    const meta = idx.chunkMeta;
+    const sqliteEnabledForMode = useSqlite && (mode === 'code' || mode === 'prose');
+    const filtersEnabled = typeof filtersActive === 'boolean'
+      ? filtersActive
+      : hasActiveFilters(filters);
+
+    // Filtering
+    const filteredMeta = filtersEnabled
+      ? filterChunks(meta, filters, idx.filterIndex, idx.fileRelations)
+      : meta;
+    const allowedIdx = filtersEnabled ? new Set(filteredMeta.map((c) => c.id)) : null;
+    if (filtersEnabled && (!allowedIdx || allowedIdx.size === 0)) {
+      return [];
+    }
+
+    const intersectCandidateSet = (candidateSet, allowedSet) => {
+      if (!allowedSet) return candidateSet;
+      if (!candidateSet) return allowedSet;
+      const filtered = new Set();
+      for (const id of candidateSet) {
+        if (allowedSet.has(id)) filtered.add(id);
+      }
+      return filtered;
+    };
+
+    const searchTopN = Math.max(1, Number(topN) || 1);
+    const expandedTopN = searchTopN * 3;
+
+    // Main search: BM25 token match (with optional SQLite FTS first pass)
+    let candidates = null;
+    let bmHits = [];
+    let sparseType = fieldWeightsEnabled ? 'bm25-fielded' : 'bm25';
+    let sqliteFtsUsed = false;
+    const sqliteFtsAllowed = allowedIdx && allowedIdx.size ? allowedIdx : null;
+    const sqliteFtsCanPushdown = !!(sqliteFtsAllowed && sqliteFtsAllowed.size <= SQLITE_IN_LIMIT);
+    const sqliteFtsEligible = sqliteEnabledForMode
+      && sqliteFtsRequested
+      && (!filtersEnabled || sqliteFtsCanPushdown);
+    if (sqliteFtsEligible) {
+      bmHits = rankSqliteFts(
+        idx,
+        queryTokens,
+        mode,
+        expandedTopN,
+        sqliteFtsNormalize,
+        sqliteFtsCanPushdown ? sqliteFtsAllowed : null
+      );
+      sqliteFtsUsed = bmHits.length > 0;
+      if (sqliteFtsUsed) {
+        sparseType = 'fts';
+        candidates = new Set(bmHits.map((h) => h.idx));
+      }
+    }
+    if (!bmHits.length) {
+      const tokenIndexOverride = sqliteEnabledForMode ? getTokenIndexForQuery(queryTokens, mode) : null;
+      candidates = buildCandidateSet(idx, queryTokens, mode);
+      bmHits = fieldWeightsEnabled
+        ? rankBM25Fields({
+          idx,
+          tokens: queryTokens,
+          topN: expandedTopN,
+          fieldWeights,
+          allowedIdx,
+          k1: bm25K1,
+          b: bm25B
+        })
+        : rankBM25({
+          idx,
+          tokens: queryTokens,
+          topN: expandedTopN,
+          tokenIndexOverride,
+          allowedIdx,
+          k1: bm25K1,
+          b: bm25B
+        });
+      sparseType = fieldWeightsEnabled ? 'bm25-fielded' : 'bm25';
+      sqliteFtsUsed = false;
+    }
+
+    // MinHash (embedding) ANN, if requested
+    let annHits = [];
+    let annSource = null;
+    const annCandidates = intersectCandidateSet(candidates, allowedIdx);
+    const annFallback = candidates && allowedIdx ? allowedIdx : null;
+    const annCandidatesEmpty = annCandidates && annCandidates.size === 0;
+    if (annEnabled) {
+      if (queryEmbedding && vectorAnnState?.[mode]?.available) {
+        if (!annCandidatesEmpty) {
+          annHits = rankVectorAnnSqlite(mode, queryEmbedding, expandedTopN, annCandidates);
+        }
+        if (!annHits.length && annFallback) {
+          annHits = rankVectorAnnSqlite(mode, queryEmbedding, expandedTopN, annFallback);
+        }
+        if (annHits.length) {
+          vectorAnnUsed[mode] = true;
+          annSource = 'sqlite-vector';
+        }
+      }
+      if (!annHits.length && queryEmbedding && (idx.hnsw?.available || hnswAnnState?.[mode]?.available)) {
+        if (!annCandidatesEmpty) {
+          annHits = rankHnswIndex(idx.hnsw || {}, queryEmbedding, expandedTopN, annCandidates);
+        }
+        if (!annHits.length && annFallback) {
+          annHits = rankHnswIndex(idx.hnsw || {}, queryEmbedding, expandedTopN, annFallback);
+        }
+        if (annHits.length) {
+          if (hnswAnnUsed && mode in hnswAnnUsed) hnswAnnUsed[mode] = true;
+          annSource = 'hnsw';
+        }
+      }
+      if (!annHits.length && queryEmbedding && idx.denseVec?.vectors?.length) {
+        if (!annCandidatesEmpty) {
+          annHits = rankDenseVectors(idx, queryEmbedding, expandedTopN, annCandidates);
+        }
+        if (!annHits.length && annFallback) {
+          annHits = rankDenseVectors(idx, queryEmbedding, expandedTopN, annFallback);
+        }
+        if (annHits.length) annSource = 'dense';
+      }
+      if (!annHits.length) {
+        const minhashBase = candidates || (bmHits.length ? new Set(bmHits.map((h) => h.idx)) : null);
+        const minhashCandidates = intersectCandidateSet(minhashBase, allowedIdx);
+        const minhashFallback = minhashBase && allowedIdx ? allowedIdx : null;
+        const minhashCandidatesEmpty = minhashCandidates && minhashCandidates.size === 0;
+        const minhashTotal = minhashCandidates ? minhashCandidates.size : (idx.minhash?.signatures?.length || 0);
+        const allowMinhash = minhashTotal > 0 && (!minhashLimit || minhashTotal <= minhashLimit);
+        if (allowMinhash && !minhashCandidatesEmpty) {
+          annHits = rankMinhash(idx, queryTokens, expandedTopN, minhashCandidates);
+          if (annHits.length) annSource = 'minhash';
+        }
+        if (!annHits.length && allowMinhash && minhashFallback) {
+          annHits = rankMinhash(idx, queryTokens, expandedTopN, minhashFallback);
+          if (annHits.length) annSource = 'minhash';
+        }
+      }
+    }
+
+    const useRrf = rrfEnabled && !blendEnabled && bmHits.length && annHits.length;
+    const sparseRanks = new Map();
+    const annRanks = new Map();
+    if (useRrf) {
+      bmHits.forEach((hit, index) => sparseRanks.set(hit.idx, index + 1));
+      annHits.forEach((hit, index) => annRanks.set(hit.idx, index + 1));
+    }
+
+    if (idx.loadChunkMetaByIds) {
+      const idsToLoad = new Set();
+      bmHits.forEach((h) => idsToLoad.add(h.idx));
+      annHits.forEach((h) => idsToLoad.add(h.idx));
+      const missing = Array.from(idsToLoad).filter((id) => !meta[id]);
+      if (missing.length) idx.loadChunkMetaByIds(mode, missing, meta);
+    }
+
+    // Combine and dedup
+    const allHits = new Map();
+    const recordHit = (idxVal, update) => {
+      const current = allHits.get(idxVal) || { bm25: null, fts: null, ann: null, annSource: null };
+      allHits.set(idxVal, { ...current, ...update });
+    };
+    bmHits.forEach((h) => {
+      recordHit(h.idx, sparseType === 'fts' ? { fts: h.score } : { bm25: h.score });
+    });
+    annHits.forEach((h) => {
+      recordHit(h.idx, { ann: h.sim, annSource });
+    });
+
+    const sparseMaxScore = bmHits.length
+      ? Math.max(...bmHits.map((hit) => (hit.score ?? hit.sim ?? 0)))
+      : null;
+    const scored = [...allHits.entries()]
+      .filter(([idxVal]) => !allowedIdx || allowedIdx.has(idxVal))
+      .map(([idxVal, scores]) => {
+          const sparseScore = scores.fts ?? scores.bm25 ?? null;
+          const annScore = scores.ann ?? null;
+          const sparseTypeValue = scores.fts != null
+            ? 'fts'
+            : (scores.bm25 != null ? (fieldWeightsEnabled ? 'bm25-fielded' : 'bm25') : null);
+        let scoreType = null;
+        let score = null;
+        let blendInfo = null;
+        if (useRrf) {
+          const sparseRank = sparseRanks.get(idxVal) ?? null;
+          const annRank = annRanks.get(idxVal) ?? null;
+          const sparseRrf = sparseRank ? 1 / (rrfK + sparseRank) : 0;
+          const annRrf = annRank ? 1 / (rrfK + annRank) : 0;
+          scoreType = 'rrf';
+          score = sparseRrf + annRrf;
+          blendInfo = {
+            k: rrfK,
+            sparseRank,
+            annRank,
+            sparseRrf,
+            annRrf,
+            score
+          };
+        } else if (blendEnabled && (sparseScore != null || annScore != null)) {
+          const sparseMax = sparseScore != null
+            ? Math.max(sparseScore, sparseMaxScore || 0)
+            : 0;
+          const normalizedSparse = sparseScore != null && sparseMax > 0
+            ? sparseScore / sparseMax
+            : null;
+          const clippedAnn = annScore != null
+            ? Math.max(-1, Math.min(1, annScore))
+            : null;
+          const normalizedAnn = clippedAnn != null ? (clippedAnn + 1) / 2 : null;
+          const activeSparseWeight = normalizedSparse != null ? blendSparseWeight : 0;
+          const activeAnnWeight = normalizedAnn != null ? blendAnnWeight : 0;
+          const weightSum = activeSparseWeight + activeAnnWeight;
+          const blended = weightSum > 0
+            ? ((normalizedSparse ?? 0) * activeSparseWeight + (normalizedAnn ?? 0) * activeAnnWeight) / weightSum
+            : 0;
+          scoreType = 'blend';
+          score = blended;
+          blendInfo = {
+            score: blended,
+            sparseNormalized: normalizedSparse,
+            annNormalized: normalizedAnn,
+            sparseWeight: activeSparseWeight,
+            annWeight: activeAnnWeight
+          };
+        } else if (sparseScore != null) {
+          scoreType = sparseTypeValue;
+          score = sparseScore;
+        } else if (annScore != null) {
+          scoreType = 'ann';
+          score = annScore;
+        } else {
+          scoreType = 'none';
+          score = 0;
+        }
+        const chunk = meta[idxVal];
+        if (!chunk) return null;
+        const fileRelations = idx.fileRelations
+          ? (typeof idx.fileRelations.get === 'function'
+            ? idx.fileRelations.get(chunk.file)
+            : idx.fileRelations[chunk.file])
+          : null;
+        const enrichedChunk = fileRelations
+          ? {
+            ...chunk,
+            imports: fileRelations.imports || chunk.imports,
+            exports: fileRelations.exports || chunk.exports,
+            usages: fileRelations.usages || chunk.usages,
+            importLinks: fileRelations.importLinks || chunk.importLinks
+          }
+          : chunk;
+        let phraseMatches = 0;
+        let phraseBoost = 0;
+        let phraseFactor = 0;
+        if (phraseNgramSet && phraseRange?.min && phraseRange?.max) {
+          const matchInfo = getPhraseMatchInfo(chunk, phraseNgramSet, phraseRange);
+          phraseMatches = matchInfo.matches;
+          if (phraseMatches) {
+            phraseFactor = Math.min(0.5, phraseMatches * 0.1);
+            phraseBoost = score * phraseFactor;
+            score += phraseBoost;
+          }
+        }
+        let symbolBoost = 0;
+        let symbolFactor = 1;
+        let symbolInfo = null;
+        if (symbolBoostEnabled) {
+          const isDefinition = isDefinitionKind(chunk.kind);
+          const isExported = isExportedChunk(enrichedChunk);
+          let factor = 1;
+          if (isDefinition) factor *= symbolBoostDefinitionWeight;
+          if (isExported) factor *= symbolBoostExportWeight;
+          symbolFactor = factor;
+          if (factor !== 1) {
+            symbolBoost = score * (factor - 1);
+            score *= factor;
+          }
+          symbolInfo = {
+            definition: isDefinition,
+            export: isExported,
+            factor: symbolFactor,
+            boost: symbolBoost
+          };
+        }
+        const scoreBreakdown = {
+          sparse: sparseScore != null ? {
+            type: sparseTypeValue,
+            score: sparseScore,
+            normalized: scores.fts != null ? sqliteFtsNormalize : null,
+            weights: scores.fts != null ? sqliteFtsWeights : null,
+            profile: scores.fts != null ? sqliteFtsProfile : null,
+            fielded: fieldWeightsEnabled || false,
+            k1: scores.bm25 != null ? bm25K1 : null,
+            b: scores.bm25 != null ? bm25B : null,
+            ftsFallback: sqliteFtsRequested ? !sqliteFtsUsed : false
+          } : null,
+          ann: annScore != null ? {
+            score: annScore,
+            source: scores.annSource || null
+          } : null,
+          rrf: useRrf ? blendInfo : null,
+          phrase: phraseNgramSet ? {
+            matches: phraseMatches,
+            boost: phraseBoost,
+            factor: phraseFactor
+          } : null,
+          symbol: symbolInfo,
+          blend: blendEnabled && !useRrf ? blendInfo : null,
+          selected: {
+            type: scoreType,
+            score
+          }
+        };
+        return {
+          idx: idxVal,
+          score,
+          scoreType,
+          scoreBreakdown,
+          chunk: enrichedChunk,
+          sparseScore,
+          sparseType: sparseTypeValue,
+          annScore,
+          annSource: scores.annSource || null
+        };
+      })
+      .filter(Boolean)
+      .sort((a, b) => (b.score - a.score) || (a.idx - b.idx))
+      .slice(0, searchTopN);
+
+    const ranked = scored
+      .map((entry) => ({
+        ...entry.chunk,
+        score: entry.score,
+        scoreType: entry.scoreType,
+        sparseScore: entry.sparseScore,
+        sparseType: entry.sparseType,
+        annScore: entry.annScore,
+        annSource: entry.annSource,
+        annType: entry.annSource,
+        scoreBreakdown: entry.scoreBreakdown
+      }))
+      .filter(Boolean);
+
+    return ranked;
+  };
+}
diff --git a/src/search/query-cache.js b/src/retrieval/query-cache.js
similarity index 100%
rename from src/search/query-cache.js
rename to src/retrieval/query-cache.js
diff --git a/src/retrieval/query-intent.js b/src/retrieval/query-intent.js
new file mode 100644
index 000000000..516238842
--- /dev/null
+++ b/src/retrieval/query-intent.js
@@ -0,0 +1,84 @@
+const PATH_PATTERN = /(^|[\s"'`])(\.{1,2}[\\/]|[A-Za-z]:[\\/]|~[\\/]|\/)/;
+const CODE_TOKEN_PATTERN = /[{}()[\];:<>.=]|=>|->|::|\+\+|--|\|\||&&/;
+const CAMEL_PATTERN = /[a-z][A-Z]/;
+const SNAKE_PATTERN = /_/;
+
+const DEFAULT_FIELD_WEIGHTS = {
+  code: { name: 2.0, signature: 1.5, doc: 1.2, comment: 0.6, body: 1.0 },
+  prose: { name: 1.2, signature: 0.9, doc: 2.1, comment: 1.8, body: 1.7 },
+  path: { name: 2.4, signature: 1.7, doc: 0.9, comment: 0.4, body: 0.7 },
+  mixed: { name: 1.8, signature: 1.3, doc: 1.6, comment: 1.2, body: 1.2 }
+};
+
+const detectSignals = (query, tokens) => {
+  const normalized = query || '';
+  const words = tokens.filter((token) => /^[a-z0-9_]+$/i.test(token));
+  const symbolTokens = tokens.filter((token) => /[^a-z0-9_]/i.test(token));
+  const hasPath = PATH_PATTERN.test(normalized) || /[\\/]/.test(normalized);
+  const hasCodePunctuation = CODE_TOKEN_PATTERN.test(normalized)
+    || symbolTokens.length > 0;
+  const hasCamel = CAMEL_PATTERN.test(normalized);
+  const hasSnake = SNAKE_PATTERN.test(normalized);
+  const wordCount = words.length;
+  return {
+    hasPath,
+    hasCodePunctuation,
+    hasCamel,
+    hasSnake,
+    wordCount,
+    symbolCount: symbolTokens.length
+  };
+};
+
+export const classifyQuery = ({ query, tokens = [], phrases = [], filters = {} }) => {
+  const signals = detectSignals(query, tokens);
+  const scores = { code: 0, prose: 0, path: 0 };
+
+  if (signals.hasPath || filters?.file || filters?.path) scores.path += 3;
+  if (signals.hasCodePunctuation) scores.code += 2;
+  if (signals.hasCamel || signals.hasSnake) scores.code += 1;
+  if (signals.wordCount >= 3) scores.prose += 2;
+  if (phrases.length >= 2) scores.prose += 1;
+  if (signals.symbolCount >= 2) scores.code += 1;
+
+  const sorted = Object.entries(scores).sort((a, b) => b[1] - a[1]);
+  const [topType, topScore] = sorted[0];
+  const secondScore = sorted[1]?.[1] ?? 0;
+  let type = topScore === 0 ? 'mixed' : topType;
+  if (topScore >= 2 && (topScore - secondScore <= 1)) {
+    type = 'mixed';
+  }
+  if (scores.path >= 3 && scores.path >= scores.code && scores.path >= scores.prose) {
+    type = 'path';
+  }
+  const vectorMode = type === 'prose' ? 'doc' : (type === 'code' || type === 'path' ? 'code' : null);
+
+  return {
+    type,
+    scores,
+    signals,
+    vectorMode,
+    reason: type === 'mixed' ? 'signals mixed or weak' : `dominant ${type} signals`
+  };
+};
+
+export const resolveIntentVectorMode = (denseVectorMode, intent) => {
+  if (denseVectorMode !== 'auto') return denseVectorMode;
+  if (intent?.vectorMode) return intent.vectorMode;
+  return denseVectorMode;
+};
+
+export const resolveIntentFieldWeights = (fieldWeightsInput, intent) => {
+  if (fieldWeightsInput === false) return null;
+  const key = intent?.type && DEFAULT_FIELD_WEIGHTS[intent.type]
+    ? intent.type
+    : 'code';
+  const resolved = { ...DEFAULT_FIELD_WEIGHTS[key] };
+  if (fieldWeightsInput && typeof fieldWeightsInput === 'object') {
+    for (const [field, value] of Object.entries(resolved)) {
+      const override = Number(fieldWeightsInput[field]);
+      if (Number.isFinite(override)) resolved[field] = override;
+    }
+  }
+  return resolved;
+};
diff --git a/src/search/query-parse.js b/src/retrieval/query-parse.js
similarity index 100%
rename from src/search/query-parse.js
rename to src/retrieval/query-parse.js
diff --git a/src/search/query.js b/src/retrieval/query.js
similarity index 80%
rename from src/search/query.js
rename to src/retrieval/query.js
index a8502a128..6fc216bdc 100644
--- a/src/search/query.js
+++ b/src/retrieval/query.js
@@ -1,4 +1,10 @@
-import { extractNgrams, splitId, splitWordsWithDict } from '../shared/tokenize.js';
+import {
+  extractNgrams,
+  extractPunctuationTokens,
+  splitId,
+  splitIdPreserveCase,
+  splitWordsWithDict
+} from '../shared/tokenize.js';
 
 /**
  * Parse churn arg into a numeric threshold.
@@ -78,11 +84,13 @@ export function parseQueryInput(raw) {
 
 const normalizeToken = (value) => String(value || '').normalize('NFKD');
 
-const expandQueryToken = (raw, dict) => {
+const expandQueryToken = (raw, dict, options) => {
+  const caseSensitive = options?.caseSensitive === true;
   const normalized = normalizeToken(raw);
   if (!normalized) return [];
+  if (caseSensitive) return [normalized];
   if (normalized.length <= 3 || dict.has(normalized)) return [normalized];
-  const expanded = splitWordsWithDict(normalized, dict);
+  const expanded = splitWordsWithDict(normalized, dict, options);
   return expanded.length ? expanded : [normalized];
 };
 
@@ -92,13 +100,16 @@ const expandQueryToken = (raw, dict) => {
  * @param {Set<string>} dict
  * @returns {string[]}
  */
-export function tokenizeQueryTerms(rawTerms, dict) {
+export function tokenizeQueryTerms(rawTerms, dict, options) {
+  const caseSensitive = options?.caseSensitive === true;
+  const splitter = caseSensitive ? splitIdPreserveCase : splitId;
   const tokens = [];
   const entries = Array.isArray(rawTerms) ? rawTerms : (rawTerms ? [rawTerms] : []);
   for (const entry of entries) {
-    const parts = splitId(String(entry || '')).map(normalizeToken).filter(Boolean);
+    tokens.push(...extractPunctuationTokens(entry));
+    const parts = splitter(String(entry || '')).map(normalizeToken).filter(Boolean);
     for (const part of parts) {
-      tokens.push(...expandQueryToken(part, dict));
+      tokens.push(...expandQueryToken(part, dict, options));
     }
   }
   return tokens.filter(Boolean);
@@ -110,11 +121,14 @@ export function tokenizeQueryTerms(rawTerms, dict) {
  * @param {Set<string>} dict
  * @returns {string[]}
  */
-export function tokenizePhrase(phrase, dict) {
-  const parts = splitId(String(phrase || '')).map(normalizeToken).filter(Boolean);
+export function tokenizePhrase(phrase, dict, options) {
+  const caseSensitive = options?.caseSensitive === true;
+  const splitter = caseSensitive ? splitIdPreserveCase : splitId;
+  const parts = splitter(String(phrase || '')).map(normalizeToken).filter(Boolean);
   const tokens = [];
+  tokens.push(...extractPunctuationTokens(phrase));
   for (const part of parts) {
-    tokens.push(...expandQueryToken(part, dict));
+    tokens.push(...expandQueryToken(part, dict, options));
   }
   return tokens.filter(Boolean);
 }
diff --git a/src/search/rankers.js b/src/retrieval/rankers.js
similarity index 58%
rename from src/search/rankers.js
rename to src/retrieval/rankers.js
index f12be5947..7a11d0ffc 100644
--- a/src/search/rankers.js
+++ b/src/retrieval/rankers.js
@@ -1,15 +1,17 @@
-import { SimpleMinHash } from '../indexer/minhash.js';
+import { SimpleMinHash } from '../index/minhash.js';
 
 /**
  * Legacy BM25-like scoring using chunk metadata fields directly.
  * @param {object} idx
  * @param {string[]} tokens
  * @param {number} topN
+ * @param {Set<number>|null} [allowedIdx]
  * @returns {Array<{idx:number,score:number}>}
  */
-export function rankBM25Legacy(idx, tokens, topN) {
+export function rankBM25Legacy(idx, tokens, topN, allowedIdx = null) {
+  if (allowedIdx && allowedIdx.size === 0) return [];
   const scores = new Map();
-  const ids = idx.chunkMeta.map((_, i) => i);
+  const ids = allowedIdx ? Array.from(allowedIdx) : idx.chunkMeta.map((_, i) => i);
   ids.forEach((i) => {
     const chunk = idx.chunkMeta[i];
     if (!chunk) return;
@@ -56,13 +58,25 @@ export function getTokenIndex(idx) {
  * @param {string[]} params.tokens
  * @param {number} params.topN
  * @param {object|null} [params.tokenIndexOverride]
+ * @param {Set<number>|null} [params.allowedIdx]
  * @param {number} [params.k1]
  * @param {number} [params.b]
  * @returns {Array<{idx:number,score:number}>}
  */
-export function rankBM25({ idx, tokens, topN, tokenIndexOverride = null, k1 = 1.2, b = 0.75 }) {
+export function rankBM25({
+  idx,
+  tokens,
+  topN,
+  tokenIndexOverride = null,
+  allowedIdx = null,
+  k1 = 1.2,
+  b = 0.75
+}) {
   const tokenIndex = tokenIndexOverride || getTokenIndex(idx);
-  if (!tokenIndex || !tokenIndex.vocab || !tokenIndex.postings) return rankBM25Legacy(idx, tokens, topN);
+  if (!tokenIndex || !tokenIndex.vocab || !tokenIndex.postings) {
+    return rankBM25Legacy(idx, tokens, topN, allowedIdx);
+  }
+  if (allowedIdx && allowedIdx.size === 0) return [];
 
   const scores = new Map();
   const docLengths = tokenIndex.docLengths;
@@ -81,6 +95,7 @@ export function rankBM25({ idx, tokens, topN, tokenIndexOverride = null, k1 = 1.
     const idf = Math.log(1 + (totalDocs - df + 0.5) / (df + 0.5));
 
     for (const [docId, tf] of posting) {
+      if (allowedIdx && !allowedIdx.has(docId)) continue;
       const dl = docLengths[docId] || 0;
       const denom = tf + k1 * (1 - b + b * (dl / avgDocLen));
       const score = idf * ((tf * (k1 + 1)) / denom) * qCount;
@@ -99,6 +114,79 @@ export function rankBM25({ idx, tokens, topN, tokenIndexOverride = null, k1 = 1.
     .slice(0, topN);
 }
 
+/**
+ * Rank documents using BM25 across fielded postings.
+ * @param {object} params
+ * @param {object} params.idx
+ * @param {string[]} params.tokens
+ * @param {number} params.topN
+ * @param {object} params.fieldWeights
+ * @param {Set<number>|null} [params.allowedIdx]
+ * @param {number} [params.k1]
+ * @param {number} [params.b]
+ * @returns {Array<{idx:number,score:number}>}
+ */
+export function rankBM25Fields({
+  idx,
+  tokens,
+  topN,
+  fieldWeights,
+  allowedIdx = null,
+  k1 = 1.2,
+  b = 0.75
+}) {
+  const fields = idx.fieldPostings?.fields;
+  if (!fields || !fieldWeights || !tokens.length) {
+    return rankBM25({ idx, tokens, topN, k1, b, allowedIdx });
+  }
+  if (allowedIdx && allowedIdx.size === 0) return [];
+
+  const qtf = new Map();
+  tokens.forEach((tok) => qtf.set(tok, (qtf.get(tok) || 0) + 1));
+
+  const scores = new Map();
+  for (const [field, weight] of Object.entries(fieldWeights)) {
+    const fieldWeight = Number(weight);
+    if (!Number.isFinite(fieldWeight) || fieldWeight <= 0) continue;
+    const index = fields[field];
+    if (!index || !index.vocab || !index.postings) continue;
+    if (!index.vocabIndex) {
+      index.vocabIndex = new Map(index.vocab.map((t, i) => [t, i]));
+    }
+    const docLengths = Array.isArray(index.docLengths) ? index.docLengths : [];
+    const avgDocLen = Number.isFinite(index.avgDocLen) ? index.avgDocLen : 1;
+    const totalDocs = Number.isFinite(index.totalDocs) ? index.totalDocs : docLengths.length;
+    if (!totalDocs) continue;
+
+    for (const [tok, qCount] of qtf.entries()) {
+      const tokIdx = index.vocabIndex.get(tok);
+      if (tokIdx === undefined) continue;
+      const posting = index.postings[tokIdx] || [];
+      const df = posting.length;
+      if (!df) continue;
+      const idf = Math.log(1 + (totalDocs - df + 0.5) / (df + 0.5));
+
+      for (const [docId, tf] of posting) {
+        if (allowedIdx && !allowedIdx.has(docId)) continue;
+        const dl = docLengths[docId] || 0;
+        const denom = tf + k1 * (1 - b + b * (dl / avgDocLen));
+        const score = idf * ((tf * (k1 + 1)) / denom) * qCount * fieldWeight;
+        scores.set(docId, (scores.get(docId) || 0) + score);
+      }
+    }
+  }
+
+  const weighted = [...scores.entries()].map(([docId, score]) => {
+    const weight = idx.chunkMeta[docId]?.weight || 1;
+    return { idx: docId, score: score * weight };
+  });
+
+  return weighted
+    .filter(({ score }) => score > 0)
+    .sort((a, b) => (b.score - a.score) || (a.idx - b.idx))
+    .slice(0, topN);
+}
+
 function minhashSigForTokens(tokens) {
   const mh = new SimpleMinHash();
   tokens.forEach((t) => mh.update(t));
@@ -118,14 +206,20 @@ function jaccard(sigA, sigB) {
  * @param {number} topN
  * @returns {Array<{idx:number,sim:number}>}
  */
-export function rankMinhash(idx, tokens, topN) {
+export function rankMinhash(idx, tokens, topN, candidateSet = null) {
   if (!idx.minhash?.signatures?.length) return [];
+  if (!Array.isArray(tokens) || !tokens.length) return [];
   const qSig = minhashSigForTokens(tokens);
-  const scored = idx.minhash.signatures
-    .map((sig, i) => ({ idx: i, sim: jaccard(qSig, sig) }))
+  const ids = candidateSet ? Array.from(candidateSet) : idx.minhash.signatures.map((_, i) => i);
+  const scored = [];
+  for (const id of ids) {
+    const sig = idx.minhash.signatures[id];
+    if (!sig) continue;
+    scored.push({ idx: id, sim: jaccard(qSig, sig) });
+  }
+  return scored
     .sort((a, b) => (b.sim - a.sim) || (a.idx - b.idx))
     .slice(0, topN);
-  return scored;
 }
 
 /**
@@ -140,10 +234,10 @@ export function rankDenseVectors(idx, queryEmbedding, topN, candidateSet) {
   const vectors = idx.denseVec?.vectors;
   if (!queryEmbedding || !Array.isArray(vectors) || !vectors.length) return [];
   const dims = idx.denseVec?.dims || queryEmbedding.length;
-  const levels = 256;
   const minVal = -1;
-  const maxVal = 1;
-  const scale = (maxVal - minVal) / (levels - 1);
+  const scale = Number.isFinite(idx.denseVec?.scale)
+    ? idx.denseVec.scale
+    : (2 / 255);
   const ids = candidateSet ? Array.from(candidateSet) : vectors.map((_, i) => i);
   const scored = [];
 
diff --git a/src/retrieval/sqlite-cache.js b/src/retrieval/sqlite-cache.js
new file mode 100644
index 000000000..2b0c62a21
--- /dev/null
+++ b/src/retrieval/sqlite-cache.js
@@ -0,0 +1,56 @@
+import fsSync from 'node:fs';
+
+const fileSignature = (filePath) => {
+  try {
+    const stat = fsSync.statSync(filePath);
+    return `${stat.size}:${stat.mtimeMs}`;
+  } catch {
+    return null;
+  }
+};
+
+export function createSqliteDbCache() {
+  const entries = new Map();
+
+  const get = (dbPath) => {
+    const entry = entries.get(dbPath);
+    if (!entry) return null;
+    const signature = fileSignature(dbPath);
+    if (!signature || signature !== entry.signature) {
+      try {
+        entry.db?.close?.();
+      } catch {}
+      entries.delete(dbPath);
+      return null;
+    }
+    return entry.db || null;
+  };
+
+  const set = (dbPath, db) => {
+    const signature = fileSignature(dbPath);
+    entries.set(dbPath, { db, signature });
+  };
+
+  const close = (dbPath) => {
+    const entry = entries.get(dbPath);
+    if (!entry) return;
+    try {
+      entry.db?.close?.();
+    } catch {}
+    entries.delete(dbPath);
+  };
+
+  const closeAll = () => {
+    for (const dbPath of entries.keys()) {
+      close(dbPath);
+    }
+  };
+
+  return {
+    get,
+    set,
+    close,
+    closeAll,
+    size: () => entries.size
+  };
+}
diff --git a/src/search/sqlite-helpers.js b/src/retrieval/sqlite-helpers.js
similarity index 60%
rename from src/search/sqlite-helpers.js
rename to src/retrieval/sqlite-helpers.js
index 7c87b2848..6bb673abd 100644
--- a/src/search/sqlite-helpers.js
+++ b/src/retrieval/sqlite-helpers.js
@@ -1,8 +1,10 @@
 import { extractNgrams, tri } from '../shared/tokenize.js';
 import { parseArrayField, parseJson } from './query-cache.js';
 import { buildFtsBm25Expr } from './fts.js';
+import { buildFilterIndex } from './filter-index.js';
 
 const SQLITE_IN_LIMIT = 900;
+const FTS_TOKEN_SAFE = /^[\p{L}\p{N}_]+$/u;
 
 /**
  * Create SQLite helper functions for search.
@@ -24,8 +26,12 @@ export function createSqliteHelpers(options) {
     vectorExtension,
     vectorAnnState,
     queryVectorAnn,
-    modelIdDefault
+    modelIdDefault,
+    fileChargramN
   } = options;
+  const chargramMaxTokenLength = postingsConfig?.chargramMaxTokenLength == null
+    ? null
+    : Math.max(2, Math.floor(Number(postingsConfig.chargramMaxTokenLength)));
 
   const sqliteCache = {
     tokenStats: new Map(),
@@ -43,76 +49,134 @@ export function createSqliteHelpers(options) {
     return Array.from(view);
   }
 
+  /**
+   * Map a chunk row into the in-memory metadata shape.
+   * @param {object} row
+   * @returns {object}
+   */
+  function mapChunkRow(row) {
+    const start = Number.isFinite(row.start) ? row.start : null;
+    const end = Number.isFinite(row.end) ? row.end : null;
+    const startLine = Number.isFinite(row.startLine) ? row.startLine : null;
+    const endLine = Number.isFinite(row.endLine) ? row.endLine : null;
+    const metaV2 = row.chunk_id
+      ? {
+        chunkId: row.chunk_id,
+        file: row.file || null,
+        segment: null,
+        range: {
+          start,
+          end,
+          startLine,
+          endLine
+        },
+        lang: null,
+        ext: row.ext || null,
+        kind: row.kind || null,
+        name: row.name || null
+      }
+      : null;
+    return {
+      id: row.id,
+      file: row.file,
+      start,
+      end,
+      startLine,
+      endLine,
+      ext: row.ext,
+      kind: row.kind,
+      name: row.name,
+      metaV2,
+      weight: typeof row.weight === 'number' ? row.weight : 1,
+      headline: row.headline,
+      preContext: parseJson(row.preContext, []),
+      postContext: parseJson(row.postContext, []),
+      tokens: parseArrayField(row.tokens),
+      ngrams: parseJson(row.ngrams, []),
+      codeRelations: parseJson(row.codeRelations, null),
+      docmeta: parseJson(row.docmeta, null),
+      stats: parseJson(row.stats, null),
+      complexity: parseJson(row.complexity, null),
+      lint: parseJson(row.lint, null),
+      externalDocs: parseJson(row.externalDocs, null),
+      last_modified: row.last_modified,
+      last_author: row.last_author,
+      churn: row.churn,
+      chunk_authors: parseJson(row.chunk_authors, null)
+    };
+  }
+
+  /**
+   * Fill an array of chunk metadata with rows.
+   * @param {Array<object>} rows
+   * @param {Array<object>} target
+   */
+  function hydrateChunkMeta(rows, target) {
+    for (const row of rows) {
+      target[row.id] = mapChunkRow(row);
+    }
+  }
+
   /**
    * Load index artifacts from SQLite into in-memory structures.
    * @param {'code'|'prose'} mode
    * @returns {object}
    */
-  function loadIndexFromSqlite(mode) {
+  function loadIndexFromSqlite(mode, options = {}) {
     const db = getDb(mode);
     if (!db) throw new Error('SQLite backend requested but database is not available.');
-    const chunkRows = db.prepare('SELECT * FROM chunks WHERE mode = ? ORDER BY id').all(mode);
+    const includeMinhash = options.includeMinhash !== false;
+    const includeDense = options.includeDense !== false;
+    const includeChunks = options.includeChunks !== false;
+    const includeFilterIndex = options.includeFilterIndex !== false;
     let maxLocalId = -1;
-    for (const row of chunkRows) {
-      if (row.id > maxLocalId) maxLocalId = row.id;
-    }
-
-    const chunkMeta = maxLocalId >= 0 ? Array.from({ length: maxLocalId + 1 }) : [];
-    for (const row of chunkRows) {
-      chunkMeta[row.id] = {
-        id: row.id,
-        file: row.file,
-        start: row.start,
-        end: row.end,
-        startLine: row.startLine,
-        endLine: row.endLine,
-        ext: row.ext,
-        kind: row.kind,
-        name: row.name,
-        weight: typeof row.weight === 'number' ? row.weight : 1,
-        headline: row.headline,
-        preContext: parseJson(row.preContext, []),
-        postContext: parseJson(row.postContext, []),
-        tokens: parseArrayField(row.tokens),
-        ngrams: parseJson(row.ngrams, []),
-        codeRelations: parseJson(row.codeRelations, null),
-        docmeta: parseJson(row.docmeta, null),
-        stats: parseJson(row.stats, null),
-        complexity: parseJson(row.complexity, null),
-        lint: parseJson(row.lint, null),
-        externalDocs: parseJson(row.externalDocs, null),
-        last_modified: row.last_modified,
-        last_author: row.last_author,
-        churn: row.churn,
-        chunk_authors: parseJson(row.chunk_authors, null)
-      };
+    let chunkMeta = [];
+    if (includeChunks) {
+      const chunkRows = db.prepare('SELECT * FROM chunks WHERE mode = ? ORDER BY id').all(mode);
+      for (const row of chunkRows) {
+        if (row.id > maxLocalId) maxLocalId = row.id;
+      }
+      chunkMeta = maxLocalId >= 0 ? Array.from({ length: maxLocalId + 1 }) : [];
+      hydrateChunkMeta(chunkRows, chunkMeta);
+    } else {
+      const maxRow = db.prepare('SELECT MAX(id) as maxId FROM chunks WHERE mode = ?').get(mode);
+      maxLocalId = Number.isFinite(maxRow?.maxId) ? maxRow.maxId : -1;
+      chunkMeta = maxLocalId >= 0 ? Array.from({ length: maxLocalId + 1 }) : [];
     }
 
-    const signatures = Array.from({ length: chunkMeta.length });
-    const sigStmt = db.prepare('SELECT doc_id, sig FROM minhash_signatures WHERE mode = ? ORDER BY doc_id');
-    for (const row of sigStmt.iterate(mode)) {
-      signatures[row.doc_id] = unpackUint32(row.sig);
+    let minhash = null;
+    if (includeMinhash) {
+      const signatures = Array.from({ length: chunkMeta.length });
+      const sigStmt = db.prepare('SELECT doc_id, sig FROM minhash_signatures WHERE mode = ? ORDER BY doc_id');
+      for (const row of sigStmt.iterate(mode)) {
+        signatures[row.doc_id] = unpackUint32(row.sig);
+      }
+      minhash = signatures.length ? { signatures } : null;
     }
-    const minhash = signatures.length ? { signatures } : null;
 
-    const denseMeta = db.prepare('SELECT dims, scale, model FROM dense_meta WHERE mode = ?').get(mode) || {};
-    const vectors = Array.from({ length: chunkMeta.length });
-    const denseStmt = db.prepare('SELECT doc_id, vector FROM dense_vectors WHERE mode = ? ORDER BY doc_id');
-    for (const row of denseStmt.iterate(mode)) {
-      vectors[row.doc_id] = row.vector;
+    let denseVec = null;
+    if (includeDense) {
+      const denseMeta = db.prepare('SELECT dims, scale, model FROM dense_meta WHERE mode = ?').get(mode) || {};
+      const vectors = Array.from({ length: chunkMeta.length });
+      const denseStmt = db.prepare('SELECT doc_id, vector FROM dense_vectors WHERE mode = ? ORDER BY doc_id');
+      for (const row of denseStmt.iterate(mode)) {
+        vectors[row.doc_id] = row.vector;
+      }
+      const fallbackVec = vectors.find((vec) => vec && vec.length);
+      denseVec = vectors.length ? {
+        model: denseMeta.model || modelIdDefault,
+        dims: denseMeta.dims || (fallbackVec ? fallbackVec.length : 0),
+        scale: typeof denseMeta.scale === 'number' ? denseMeta.scale : 1.0,
+        vectors
+      } : null;
     }
-    const fallbackVec = vectors.find((vec) => vec && vec.length);
-    const denseVec = vectors.length ? {
-      model: denseMeta.model || modelIdDefault,
-      dims: denseMeta.dims || (fallbackVec ? fallbackVec.length : 0),
-      scale: typeof denseMeta.scale === 'number' ? denseMeta.scale : 1.0,
-      vectors
-    } : null;
 
     return {
       chunkMeta,
       denseVec,
-      minhash
+      minhash,
+      filterIndex: includeFilterIndex ? buildFilterIndex(chunkMeta, { fileChargramN }) : null,
+      loadChunkMetaByIds
     };
   }
 
@@ -130,6 +194,30 @@ export function createSqliteHelpers(options) {
     return chunks;
   }
 
+  /**
+   * Load chunk metadata rows for a list of ids.
+   * @param {'code'|'prose'} mode
+   * @param {number[]} ids
+   * @param {Array<object>|null} target
+   * @returns {Array<object>}
+   */
+  function loadChunkMetaByIds(mode, ids, target = null) {
+    const db = getDb(mode);
+    if (!db || !ids || !ids.length) return target || [];
+    const unique = Array.from(new Set(ids.filter((id) => Number.isFinite(id))));
+    if (!unique.length) return target || [];
+    const out = target || [];
+    for (const chunk of chunkArray(unique)) {
+      const placeholders = chunk.map(() => '?').join(',');
+      const stmt = db.prepare(
+        `SELECT * FROM chunks WHERE mode = ? AND id IN (${placeholders})`
+      );
+      const rows = stmt.all(mode, ...chunk);
+      hydrateChunkMeta(rows, out);
+    }
+    return out;
+  }
+
   /**
    * Fetch vocabulary rows for a list of values.
    * @param {'code'|'prose'} mode
@@ -312,6 +400,7 @@ export function createSqliteHelpers(options) {
     if (postingsConfig.enableChargrams !== false) {
       const gramSet = new Set();
       for (const token of tokens) {
+        if (chargramMaxTokenLength && token.length > chargramMaxTokenLength) continue;
         for (let n = postingsConfig.chargramMinN; n <= postingsConfig.chargramMaxN; n++) {
           for (const gram of tri(token, n)) {
             gramSet.add(gram);
@@ -340,16 +429,33 @@ export function createSqliteHelpers(options) {
    * @param {'code'|'prose'} mode
    * @param {number} topN
    * @param {boolean} [normalizeScores]
+   * @param {Set<number>|null} [allowedIds]
    * @returns {Array<{idx:number,score:number}>}
    */
-  function rankSqliteFts(idx, queryTokens, mode, topN, normalizeScores = false) {
+  function rankSqliteFts(idx, queryTokens, mode, topN, normalizeScores = false, allowedIds = null) {
     const db = getDb(mode);
     if (!db || !queryTokens.length) return [];
-    const ftsQuery = queryTokens.join(' ');
+    if (allowedIds && allowedIds.size === 0) return [];
+    const ftsTokens = queryTokens.filter((token) => FTS_TOKEN_SAFE.test(token));
+    if (!ftsTokens.length) return [];
+    const ftsQuery = ftsTokens.join(' ');
     const bm25Expr = buildFtsBm25Expr(sqliteFtsWeights);
+    const allowedList = allowedIds && allowedIds.size ? Array.from(allowedIds) : null;
+    const canPushdown = !!(allowedList && allowedList.length <= SQLITE_IN_LIMIT);
+    const allowedClause = canPushdown
+      ? ` AND chunks_fts.rowid IN (${allowedList.map(() => '?').join(',')})`
+      : '';
+    const params = canPushdown
+      ? [ftsQuery, mode, ...allowedList, topN]
+      : [ftsQuery, mode, topN];
     const rows = db.prepare(
-      `SELECT rowid AS id, ${bm25Expr} AS score FROM chunks_fts WHERE chunks_fts MATCH ? AND mode = ? ORDER BY score ASC, rowid ASC LIMIT ?`
-    ).all(ftsQuery, mode, topN);
+      `SELECT chunks_fts.rowid AS id, ${bm25Expr} AS score, chunks.weight AS weight
+       FROM chunks_fts
+       JOIN chunks ON chunks.id = chunks_fts.rowid
+       WHERE chunks_fts MATCH ? AND chunks.mode = ?
+       ${allowedClause}
+       ORDER BY score ASC, chunks_fts.rowid ASC LIMIT ?`
+    ).all(...params);
     const rawScores = rows.map((row) => -row.score);
     let min = 0;
     let max = 0;
@@ -360,15 +466,23 @@ export function createSqliteHelpers(options) {
     const hits = [];
     for (let i = 0; i < rows.length; i++) {
       const row = rows[i];
-      if (row.id < 0 || row.id >= idx.chunkMeta.length) continue;
-      const weight = idx.chunkMeta[row.id]?.weight || 1;
+      if (row.id == null || row.id < 0) continue;
+      const weight = typeof row.weight === 'number'
+        ? row.weight
+        : (idx.chunkMeta?.[row.id]?.weight || 1);
       const raw = rawScores[i];
       const normalized = normalizeScores
         ? (max > min ? (raw - min) / (max - min) : 1)
         : raw;
       hits.push({ idx: row.id, score: normalized * weight });
     }
-    return hits;
+    let filteredHits = hits;
+    if (allowedIds && allowedIds.size && !canPushdown) {
+      filteredHits = filteredHits.filter((hit) => allowedIds.has(hit.idx));
+    }
+    return filteredHits
+      .sort((a, b) => (b.score - a.score) || (a.idx - b.idx))
+      .slice(0, topN);
   }
 
   /**
diff --git a/src/search/cli.js b/src/search/cli.js
deleted file mode 100644
index 5046970b8..000000000
--- a/src/search/cli.js
+++ /dev/null
@@ -1,976 +0,0 @@
-/**
- * Ultra-Complete Search Utility for Rich Semantic Index (Pretty Output)
- * By: ChatGPT & Nick, 2025
- *   [--calls function]  Filter for call relationships (calls to/from function)
- *   [--uses ident]      Filter for usage of identifier
- */
-
-import fs from 'node:fs/promises';
-import fsSync from 'node:fs';
-import path from 'node:path';
-import crypto from 'node:crypto';
-import minimist from 'minimist';
-import { DEFAULT_MODEL_ID, getDictionaryPaths, getDictConfig, getIndexDir, getMetricsDir, getModelConfig, loadUserConfig, resolveRepoRoot, resolveSqlitePaths } from '../../tools/dict-utils.js';
-import { getVectorExtensionConfig, hasVectorTable, loadVectorExtension, queryVectorAnn, resolveVectorExtensionPath } from '../../tools/vector-extension.js';
-import { resolveFtsWeights } from './fts.js';
-import { getQueryEmbedding } from './embedding.js';
-import { loadQueryCache, parseJson, pruneQueryCache } from './query-cache.js';
-import { normalizeExtFilter, parseMetaFilters } from './filters.js';
-import { formatFullChunk, formatShortChunk } from './output.js';
-import { parseChurnArg, parseModifiedArgs, parseQueryInput, tokenizePhrase, tokenizeQueryTerms, buildPhraseNgrams } from './query-parse.js';
-import { normalizePostingsConfig } from '../shared/postings-config.js';
-import { createSqliteHelpers } from './sqlite-helpers.js';
-import { createSearchPipeline } from './pipeline.js';
-
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['json', 'json-compact', 'human', 'stats', 'ann', 'headline', 'lint', 'matched', 'async', 'generator', 'returns'],
-  alias: { n: 'top', c: 'context', t: 'type' },
-  default: { n: 5, context: 3 },
-  string: [
-    'calls',
-    'uses',
-    'signature',
-    'param',
-    'decorator',
-    'inferred-type',
-    'return-type',
-    'throws',
-    'reads',
-    'writes',
-    'mutates',
-    'churn',
-    'alias',
-    'awaits',
-    'branches',
-    'loops',
-    'breaks',
-    'continues',
-    'risk',
-    'risk-tag',
-    'risk-source',
-    'risk-sink',
-    'risk-category',
-    'risk-flow',
-    'meta',
-    'meta-json',
-    'file',
-    'ext',
-    'chunk-author',
-    'modified-after',
-    'modified-since',
-    'visibility',
-    'extends',
-    'mode',
-    'backend',
-    'path',
-    'model',
-    'repo',
-    'fts-profile',
-    'fts-weights',
-    'bm25-k1',
-    'bm25-b'
-  ],
-});
-const t0 = Date.now();
-const rootArg = argv.repo ? path.resolve(argv.repo) : null;
-const ROOT = rootArg || resolveRepoRoot(process.cwd());
-const userConfig = loadUserConfig(ROOT);
-const modelConfig = getModelConfig(ROOT, userConfig);
-const modelIdDefault = argv.model || modelConfig.id || DEFAULT_MODEL_ID;
-const sqliteConfig = userConfig.sqlite || {};
-const postingsConfig = normalizePostingsConfig(userConfig.indexing?.postings || {});
-const vectorExtension = getVectorExtensionConfig(ROOT, userConfig);
-const bm25Config = userConfig.search?.bm25 || {};
-const bm25K1 = Number.isFinite(Number(argv['bm25-k1']))
-  ? Number(argv['bm25-k1'])
-  : (Number.isFinite(Number(bm25Config.k1)) ? Number(bm25Config.k1) : 1.2);
-const bm25B = Number.isFinite(Number(argv['bm25-b']))
-  ? Number(argv['bm25-b'])
-  : (Number.isFinite(Number(bm25Config.b)) ? Number(bm25Config.b) : 0.75);
-const sqliteFtsNormalize = userConfig.search?.sqliteFtsNormalize === true;
-const sqliteFtsProfile = (argv['fts-profile'] || process.env.PAIROFCLEATS_FTS_PROFILE || userConfig.search?.sqliteFtsProfile || 'balanced').toLowerCase();
-let sqliteFtsWeightsConfig = userConfig.search?.sqliteFtsWeights || null;
-if (argv['fts-weights']) {
-  const parsed = parseJson(argv['fts-weights'], null);
-  if (parsed) {
-    sqliteFtsWeightsConfig = parsed;
-  } else {
-    const values = String(argv['fts-weights'])
-      .split(/[,\s]+/)
-      .filter(Boolean)
-      .map((val) => Number(val))
-      .filter((val) => Number.isFinite(val));
-    sqliteFtsWeightsConfig = values.length ? values : sqliteFtsWeightsConfig;
-  }
-}
-const metricsDir = getMetricsDir(ROOT, userConfig);
-const useStubEmbeddings = process.env.PAIROFCLEATS_EMBEDDINGS === 'stub';
-const rawArgs = process.argv.slice(2);
-const query = argv._.join(' ').trim();
-if (!query) {
-  console.error('usage: search "query" [--repo path|--json|--json-compact|--human|--stats|--no-ann|--context N|--type T|--backend memory|sqlite|sqlite-fts|...]|--mode code|prose|both|records|all|--meta key=value|--meta-json {...}|--path path|--file path|--ext .ext|--churn [min]|--modified-after date|--modified-since days|--chunk-author name|--signature|--param|--decorator|--inferred-type|--return-type|--throws|--reads|--writes|--mutates|--alias|--awaits|--branches|--loops|--breaks|--continues|--risk|--risk-tag|--risk-source|--risk-sink|--risk-category|--risk-flow|--extends|--visibility|--async|--generator|--returns');
-  process.exit(1);
-}
-const contextLines = Math.max(0, parseInt(argv.context, 10) || 0);
-const searchType = argv.type || null;
-const searchAuthor = argv.author || null;
-const searchImport = argv.import || null;
-const chunkAuthorFilter = argv['chunk-author'] || null;
-const searchMode = String(argv.mode || 'both').toLowerCase();
-const allowedModes = new Set(['code', 'prose', 'both', 'records', 'all']);
-if (!allowedModes.has(searchMode)) {
-  console.error(`Invalid --mode ${searchMode}. Use code|prose|both|records|all.`);
-  process.exit(1);
-}
-const runCode = searchMode === 'code' || searchMode === 'both' || searchMode === 'all';
-const runProse = searchMode === 'prose' || searchMode === 'both' || searchMode === 'all';
-const runRecords = searchMode === 'records' || searchMode === 'all';
-const branchesMin = Number.isFinite(Number(argv.branches)) ? Number(argv.branches) : null;
-const loopsMin = Number.isFinite(Number(argv.loops)) ? Number(argv.loops) : null;
-const breaksMin = Number.isFinite(Number(argv.breaks)) ? Number(argv.breaks) : null;
-const continuesMin = Number.isFinite(Number(argv.continues)) ? Number(argv.continues) : null;
-let churnMin = null;
-try {
-  churnMin = parseChurnArg(argv.churn);
-} catch (err) {
-  console.error(err.message);
-  process.exit(1);
-}
-let modifiedArgs;
-try {
-  modifiedArgs = parseModifiedArgs(argv['modified-after'], argv['modified-since']);
-} catch (err) {
-  console.error(err.message);
-  process.exit(1);
-}
-const modifiedAfter = modifiedArgs.modifiedAfter;
-const modifiedSinceDays = modifiedArgs.modifiedSinceDays;
-const fileFilters = [];
-if (argv.path) fileFilters.push(argv.path);
-if (argv.file) fileFilters.push(argv.file);
-const fileFilter = fileFilters.length ? fileFilters.flat() : null;
-const extFilter = normalizeExtFilter(argv.ext);
-const metaFilters = parseMetaFilters(argv.meta, argv['meta-json']);
-const sqlitePaths = resolveSqlitePaths(ROOT, userConfig);
-const sqliteCodePath = sqlitePaths.codePath;
-const sqliteProsePath = sqlitePaths.prosePath;
-const needsCode = runCode;
-const needsProse = runProse;
-const backendArg = typeof argv.backend === 'string' ? argv.backend.toLowerCase() : '';
-const sqliteScoreModeConfig = sqliteConfig.scoreMode === 'fts';
-const sqliteFtsRequested = backendArg === 'sqlite-fts' || backendArg === 'fts' || (!backendArg && sqliteScoreModeConfig);
-const backendForcedSqlite = backendArg === 'sqlite' || sqliteFtsRequested;
-const backendDisabled = backendArg && !(backendArg === 'sqlite' || sqliteFtsRequested);
-const sqliteConfigured = sqliteConfig.use === true;
-const sqliteCodeAvailable = fsSync.existsSync(sqliteCodePath);
-const sqliteProseAvailable = fsSync.existsSync(sqliteProsePath);
-const sqliteAvailable = (!needsCode || sqliteCodeAvailable) && (!needsProse || sqliteProseAvailable);
-const annFlagPresent = rawArgs.includes('--ann') || rawArgs.includes('--no-ann');
-const annDefault = userConfig.search?.annDefault !== false;
-const annEnabled = annFlagPresent ? argv.ann : annDefault;
-const vectorAnnEnabled = annEnabled && vectorExtension.enabled;
-const queryCacheConfig = userConfig.search?.queryCache || {};
-const queryCacheEnabled = queryCacheConfig.enabled === true;
-const queryCacheMaxEntries = Number.isFinite(Number(queryCacheConfig.maxEntries))
-  ? Math.max(1, Number(queryCacheConfig.maxEntries))
-  : 200;
-const queryCacheTtlMs = Number.isFinite(Number(queryCacheConfig.ttlMs))
-  ? Math.max(0, Number(queryCacheConfig.ttlMs))
-  : 0;
-const queryCachePath = path.join(metricsDir, 'queryCache.json');
-const jsonCompact = argv['json-compact'] === true;
-const jsonOutput = argv.json || jsonCompact;
-
-const sqliteFtsWeights = resolveFtsWeights(sqliteFtsProfile, sqliteFtsWeightsConfig);
-
-if (backendForcedSqlite && !sqliteAvailable) {
-  const missing = [];
-  if (needsCode && !sqliteCodeAvailable) missing.push(`code=${sqliteCodePath}`);
-  if (needsProse && !sqliteProseAvailable) missing.push(`prose=${sqliteProsePath}`);
-  const suffix = missing.length ? missing.join(', ') : 'missing sqlite index';
-  console.error(`SQLite backend requested but index not found (${suffix}).`);
-  process.exit(1);
-}
-
-const needsSqlite = runCode || runProse;
-if (!needsSqlite && backendForcedSqlite) {
-  console.warn('SQLite backend requested, but records-only mode selected; using file-backed records index.');
-}
-let useSqlite = needsSqlite && (backendForcedSqlite || (!backendDisabled && sqliteConfigured)) && sqliteAvailable;
-let dbCode = null;
-let dbProse = null;
-const vectorAnnState = {
-  code: { available: false },
-  prose: { available: false },
-  records: { available: false }
-};
-const vectorAnnUsed = { code: false, prose: false, records: false };
-let vectorAnnWarned = false;
-if (useSqlite) {
-  let Database;
-  try {
-    ({ default: Database } = await import('better-sqlite3'));
-  } catch (err) {
-    console.error('better-sqlite3 is required for the SQLite backend. Run npm install first.');
-    process.exit(1);
-  }
-
-  const requiredTables = sqliteFtsRequested
-    ? [
-      'chunks',
-      'chunks_fts',
-      'minhash_signatures',
-      'dense_vectors',
-      'dense_meta'
-    ]
-    : [
-      'chunks',
-      'token_vocab',
-      'token_postings',
-      'doc_lengths',
-      'token_stats',
-      'phrase_vocab',
-      'phrase_postings',
-      'chargram_vocab',
-      'chargram_postings',
-      'minhash_signatures',
-      'dense_vectors',
-      'dense_meta'
-    ];
-
-  const openSqlite = (dbPath, label) => {
-    const db = new Database(dbPath, { readonly: true });
-    const tableRows = db.prepare("SELECT name FROM sqlite_master WHERE type='table'").all();
-    const tableNames = new Set(tableRows.map((row) => row.name));
-    const missing = requiredTables.filter((name) => !tableNames.has(name));
-    if (missing.length) {
-      const message = `SQLite index ${label} is missing required tables (${missing.join(', ')}). Rebuild with npm run build-sqlite-index.`;
-      if (backendForcedSqlite) {
-        console.error(message);
-        process.exit(1);
-      }
-      console.warn(`${message} Falling back to file-backed indexes.`);
-      db.close();
-      return null;
-    }
-    return db;
-  };
-
-  const initVectorAnn = (db, mode) => {
-    if (!vectorAnnEnabled || !db) return;
-    const loadResult = loadVectorExtension(db, vectorExtension, `sqlite ${mode}`);
-    if (!loadResult.ok) {
-      if (!vectorAnnWarned) {
-        const extPath = resolveVectorExtensionPath(vectorExtension);
-        console.warn(`[ann] SQLite vector extension unavailable (${loadResult.reason}).`);
-        console.warn(`[ann] Expected extension at ${extPath || 'unset'}; falling back to JS ANN.`);
-        vectorAnnWarned = true;
-      }
-      return;
-    }
-    if (!hasVectorTable(db, vectorExtension.table)) {
-      if (!vectorAnnWarned) {
-        console.warn(`[ann] SQLite vector table missing (${vectorExtension.table}). Rebuild with npm run build-sqlite-index.`);
-        vectorAnnWarned = true;
-      }
-      return;
-    }
-    vectorAnnState[mode].available = true;
-  };
-
-  if (needsCode) dbCode = openSqlite(sqliteCodePath, 'code');
-  if (needsProse) dbProse = openSqlite(sqliteProsePath, 'prose');
-  if (needsCode) initVectorAnn(dbCode, 'code');
-  if (needsProse) initVectorAnn(dbProse, 'prose');
-  if ((needsCode && !dbCode) || (needsProse && !dbProse)) {
-    if (dbCode) dbCode.close();
-    if (dbProse) dbProse.close();
-    dbCode = null;
-    dbProse = null;
-    useSqlite = false;
-  }
-}
-
-const backendLabel = useSqlite
-  ? (sqliteFtsRequested ? 'sqlite-fts' : 'sqlite')
-  : 'memory';
-let modelIdForCode = null;
-let modelIdForProse = null;
-let modelIdForRecords = null;
-
-/**
- * Return the active SQLite connection for a mode.
- * @param {'code'|'prose'} mode
- * @returns {import('better-sqlite3').Database|null}
- */
-function getSqliteDb(mode) {
-  if (!useSqlite) return null;
-  if (mode === 'code') return dbCode;
-  if (mode === 'prose') return dbProse;
-  return null;
-}
-
-const sqliteHelpers = createSqliteHelpers({
-  getDb: getSqliteDb,
-  postingsConfig,
-  sqliteFtsWeights,
-  vectorExtension,
-  vectorAnnState,
-  queryVectorAnn,
-  modelIdDefault
-});
-const {
-  loadIndexFromSqlite,
-  buildCandidateSetSqlite,
-  getTokenIndexForQuery,
-  rankSqliteFts,
-  rankVectorAnnSqlite
-} = sqliteHelpers;
-
-
-const dictConfig = getDictConfig(ROOT, userConfig);
-const dictionaryPaths = await getDictionaryPaths(ROOT, dictConfig);
-const dict = new Set();
-for (const dictFile of dictionaryPaths) {
-  try {
-    const contents = fsSync.readFileSync(dictFile, 'utf8');
-    contents
-      .split(/\r?\n/)
-      .map((w) => w.trim().toLowerCase())
-      .filter(Boolean)
-      .forEach((w) => dict.add(w));
-  } catch {}
-}
-
-const color = {
-  green: (t) => `\x1b[32m${t}\x1b[0m`,
-  yellow: (t) => `\x1b[33m${t}\x1b[0m`,
-  red: (t) => `\x1b[31m${t}\x1b[0m`,
-  cyan: (t) => `\x1b[36m${t}\x1b[0m`,
-  magenta: (t) => `\x1b[35m${t}\x1b[0m`,
-  blue: (t) => `\x1b[34m${t}\x1b[0m`,
-  gray: (t) => `\x1b[90m${t}\x1b[0m`,
-  bold: (t) => `\x1b[1m${t}\x1b[0m`,
-  underline: (t) => `\x1b[4m${t}\x1b[0m`
-};
-
-// --- LOAD INDEX ---
-/**
- * Load file-backed index artifacts from a directory.
- * @param {string} dir
- * @returns {object}
- */
-function loadIndex(dir) {
-  const readJson = (name) => JSON.parse(fsSync.readFileSync(path.join(dir, name), 'utf8'));
-  const loadOptional = (name) => {
-    try {
-      return readJson(name);
-    } catch {
-      return null;
-    }
-  };
-  const chunkMeta = readJson('chunk_meta.json');
-  const denseVec = loadOptional('dense_vectors_uint8.json');
-  if (denseVec && !denseVec.model) denseVec.model = modelIdDefault;
-  const idx = {
-    chunkMeta,
-    denseVec,
-    minhash: loadOptional('minhash_signatures.json'),
-    phraseNgrams: loadOptional('phrase_ngrams.json'),
-    chargrams: loadOptional('chargram_postings.json')
-  };
-  try {
-    idx.tokenIndex = readJson('token_postings.json');
-  } catch {}
-  return idx;
-}
-/**
- * Resolve the index directory (cache-first, local fallback).
- * @param {'code'|'prose'|'records'} mode
- * @returns {string}
- */
-function resolveIndexDir(mode) {
-  const cached = getIndexDir(ROOT, mode, userConfig);
-  const cachedMeta = path.join(cached, 'chunk_meta.json');
-  if (fsSync.existsSync(cachedMeta)) return cached;
-  const local = path.join(ROOT, `index-${mode}`);
-  const localMeta = path.join(local, 'chunk_meta.json');
-  if (fsSync.existsSync(localMeta)) return local;
-  return cached;
-}
-
-/**
- * Build a size/mtime signature for a file.
- * @param {string} filePath
- * @returns {string|null}
- */
-function fileSignature(filePath) {
-  try {
-    const stat = fsSync.statSync(filePath);
-    return `${stat.size}:${stat.mtimeMs}`;
-  } catch {
-    return null;
-  }
-}
-
-/**
- * Build a signature payload for cache invalidation.
- * @returns {object}
- */
-function getIndexSignature() {
-  if (useSqlite) {
-    const recordDir = runRecords ? resolveIndexDir('records') : null;
-    const recordMeta = recordDir ? path.join(recordDir, 'chunk_meta.json') : null;
-    const recordDense = recordDir ? path.join(recordDir, 'dense_vectors_uint8.json') : null;
-    return {
-      backend: backendLabel,
-      code: fileSignature(sqliteCodePath),
-      prose: fileSignature(sqliteProsePath),
-      records: recordMeta ? fileSignature(recordMeta) : null,
-      recordsDense: recordDense ? fileSignature(recordDense) : null
-    };
-  }
-  const codeDir = resolveIndexDir('code');
-  const proseDir = resolveIndexDir('prose');
-  const codeMeta = path.join(codeDir, 'chunk_meta.json');
-  const proseMeta = path.join(proseDir, 'chunk_meta.json');
-  const codeDense = path.join(codeDir, 'dense_vectors_uint8.json');
-  const proseDense = path.join(proseDir, 'dense_vectors_uint8.json');
-  const recordDir = runRecords ? resolveIndexDir('records') : null;
-  const recordMeta = recordDir ? path.join(recordDir, 'chunk_meta.json') : null;
-  const recordDense = recordDir ? path.join(recordDir, 'dense_vectors_uint8.json') : null;
-  return {
-    backend: backendLabel,
-    code: fileSignature(codeMeta),
-    prose: fileSignature(proseMeta),
-    codeDense: fileSignature(codeDense),
-    proseDense: fileSignature(proseDense),
-    records: recordMeta ? fileSignature(recordMeta) : null,
-    recordsDense: recordDense ? fileSignature(recordDense) : null
-  };
-}
-
-/**
- * Build a deterministic cache key for the current query + settings.
- * @returns {{key:string,payload:object}}
- */
-function buildQueryCacheKey() {
-  const payload = {
-    query,
-    backend: backendLabel,
-    mode: searchMode,
-    topN: argv.n,
-    ann: annEnabled,
-    annMode: vectorExtension.annMode,
-    annProvider: vectorExtension.provider,
-    annExtension: vectorAnnEnabled,
-    sqliteFtsNormalize,
-    sqliteFtsProfile,
-    sqliteFtsWeights,
-    models: {
-      code: modelIdForCode,
-      prose: modelIdForProse,
-      records: modelIdForRecords
-    },
-    filters: {
-      type: searchType,
-      author: searchAuthor,
-      calls: argv.calls || null,
-      uses: argv.uses || null,
-      signature: argv.signature || null,
-      param: argv.param || null,
-      import: searchImport,
-      lint: argv.lint || false,
-      churn: churnMin,
-      decorator: argv.decorator || null,
-      inferredType: argv['inferred-type'] || null,
-      returnType: argv['return-type'] || null,
-      throws: argv.throws || null,
-      reads: argv.reads || null,
-      writes: argv.writes || null,
-      mutates: argv.mutates || null,
-      risk: argv.risk || null,
-      riskTag: argv['risk-tag'] || null,
-      riskSource: argv['risk-source'] || null,
-      riskSink: argv['risk-sink'] || null,
-      riskCategory: argv['risk-category'] || null,
-      riskFlow: argv['risk-flow'] || null,
-      awaits: argv.awaits || null,
-      visibility: argv.visibility || null,
-      extends: argv.extends || null,
-      async: argv.async || false,
-      generator: argv.generator || false,
-      returns: argv.returns || false,
-      file: fileFilter || null,
-      ext: extFilter || null,
-      meta: metaFilters,
-      chunkAuthor: chunkAuthorFilter || null,
-      modifiedAfter,
-      modifiedSinceDays
-    }
-  };
-  const raw = JSON.stringify(payload);
-  const key = crypto.createHash('sha1').update(raw).digest('hex');
-  return { key, payload };
-}
-
-
-const idxProse = runProse
-  ? (useSqlite ? loadIndexFromSqlite('prose') : loadIndex(resolveIndexDir('prose')))
-  : { chunkMeta: [], denseVec: null, minhash: null };
-const idxCode = runCode
-  ? (useSqlite ? loadIndexFromSqlite('code') : loadIndex(resolveIndexDir('code')))
-  : { chunkMeta: [], denseVec: null, minhash: null };
-const idxRecords = runRecords
-  ? loadIndex(resolveIndexDir('records'))
-  : { chunkMeta: [], denseVec: null, minhash: null };
-modelIdForCode = runCode ? (idxCode?.denseVec?.model || modelIdDefault) : null;
-modelIdForProse = runProse ? (idxProse?.denseVec?.model || modelIdDefault) : null;
-modelIdForRecords = runRecords ? (idxRecords?.denseVec?.model || modelIdDefault) : null;
-
-// --- QUERY TOKENIZATION ---
-const parsedQuery = parseQueryInput(query);
-const includeTokens = tokenizeQueryTerms(parsedQuery.includeTerms, dict);
-const phraseTokens = parsedQuery.phrases
-  .map((phrase) => tokenizePhrase(phrase, dict))
-  .filter((tokens) => tokens.length);
-const phraseInfo = buildPhraseNgrams(phraseTokens, postingsConfig);
-const phraseNgrams = phraseInfo.ngrams;
-const phraseNgramSet = phraseNgrams.length ? new Set(phraseNgrams) : null;
-const phraseRange = { min: phraseInfo.minLen, max: phraseInfo.maxLen };
-const excludeTokens = tokenizeQueryTerms(parsedQuery.excludeTerms, dict);
-const excludePhraseTokens = parsedQuery.excludePhrases
-  .map((phrase) => tokenizePhrase(phrase, dict))
-  .filter((tokens) => tokens.length);
-const excludePhraseInfo = buildPhraseNgrams(excludePhraseTokens, postingsConfig);
-const excludePhraseNgrams = excludePhraseInfo.ngrams;
-const excludePhraseRange = excludePhraseInfo.minLen && excludePhraseInfo.maxLen
-  ? { min: excludePhraseInfo.minLen, max: excludePhraseInfo.maxLen }
-  : null;
-const queryTokens = [...includeTokens, ...phraseTokens.flat()];
-const rx = queryTokens.length ? new RegExp(`(${queryTokens.join('|')})`, 'ig') : null;
-const embeddingQueryText = [...parsedQuery.includeTerms, ...parsedQuery.phrases]
-  .join(' ')
-  .trim() || query;
-const filters = {
-  type: searchType,
-  author: searchAuthor,
-  importName: searchImport,
-  lint: argv.lint,
-  churn: churnMin,
-  calls: argv.calls,
-  uses: argv.uses,
-  signature: argv.signature,
-  param: argv.param,
-  decorator: argv.decorator,
-  inferredType: argv['inferred-type'],
-  returnType: argv['return-type'],
-  throws: argv.throws,
-  reads: argv.reads,
-  writes: argv.writes,
-  mutates: argv.mutates,
-  alias: argv.alias,
-  risk: argv.risk,
-  riskTag: argv['risk-tag'],
-  riskSource: argv['risk-source'],
-  riskSink: argv['risk-sink'],
-  riskCategory: argv['risk-category'],
-  riskFlow: argv['risk-flow'],
-  awaits: argv.awaits,
-  branches: branchesMin,
-  loops: loopsMin,
-  breaks: breaksMin,
-  continues: continuesMin,
-  visibility: argv.visibility,
-  extends: argv.extends,
-  async: argv.async,
-  generator: argv.generator,
-  returns: argv.returns,
-  file: fileFilter,
-  ext: extFilter,
-  meta: metaFilters,
-  chunkAuthor: chunkAuthorFilter,
-  modifiedAfter,
-  excludeTokens,
-  excludePhrases: excludePhraseNgrams,
-  excludePhraseRange
-};
-const searchPipeline = createSearchPipeline({
-  useSqlite,
-  sqliteFtsRequested,
-  sqliteFtsNormalize,
-  sqliteFtsProfile,
-  sqliteFtsWeights,
-  bm25K1,
-  bm25B,
-  postingsConfig,
-  queryTokens,
-  phraseNgramSet,
-  phraseRange,
-  filters,
-  topN: argv.n,
-  annEnabled,
-  vectorAnnState,
-  vectorAnnUsed,
-  buildCandidateSetSqlite,
-  getTokenIndexForQuery,
-  rankSqliteFts,
-  rankVectorAnnSqlite
-});
-// --- SEARCH BM25 TOKENS/PHRASES ---
-
-/**
- * Build a compact search hit payload for tooling.
- * @param {object} hit
- * @returns {object}
- */
-function compactHit(hit) {
-  if (!hit || typeof hit !== 'object') return hit;
-  const compact = {};
-  const fields = [
-    'id',
-    'file',
-    'start',
-    'end',
-    'startLine',
-    'endLine',
-    'ext',
-    'kind',
-    'name',
-    'headline',
-    'score',
-    'scoreType',
-    'sparseScore',
-    'sparseType',
-    'annScore',
-    'annSource',
-    'annType'
-  ];
-  for (const field of fields) {
-    if (hit[field] !== undefined) compact[field] = hit[field];
-  }
-  return compact;
-}
-
-
-// --- MAIN ---
-(async () => {
-  let cacheHit = false;
-  let cacheKey = null;
-  let cacheSignature = null;
-  let cacheData = null;
-  let cachedPayload = null;
-
-  if (queryCacheEnabled) {
-    const signature = getIndexSignature();
-    cacheSignature = JSON.stringify(signature);
-    const cacheKeyInfo = buildQueryCacheKey();
-    cacheKey = cacheKeyInfo.key;
-    cacheData = loadQueryCache(queryCachePath);
-    const entry = cacheData.entries.find((e) => e.key === cacheKey && e.signature === cacheSignature);
-    if (entry) {
-      const ttl = Number.isFinite(Number(entry.ttlMs)) ? Number(entry.ttlMs) : queryCacheTtlMs;
-      if (!ttl || (Date.now() - entry.ts) <= ttl) {
-        cachedPayload = entry.payload || null;
-        if (cachedPayload) {
-          const hasCode = !runCode || Array.isArray(cachedPayload.code);
-          const hasProse = !runProse || Array.isArray(cachedPayload.prose);
-          const hasRecords = !runRecords || Array.isArray(cachedPayload.records);
-          if (hasCode && hasProse && hasRecords) {
-            cacheHit = true;
-            entry.ts = Date.now();
-          }
-        }
-      }
-    }
-  }
-
-  const needsEmbedding = !cacheHit && annEnabled && (
-    (runProse && (idxProse.denseVec?.vectors?.length || vectorAnnState.prose.available)) ||
-    (runCode && (idxCode.denseVec?.vectors?.length || vectorAnnState.code.available)) ||
-    (runRecords && idxRecords.denseVec?.vectors?.length)
-  );
-  const embeddingCache = new Map();
-  const getEmbeddingForModel = async (modelId, dims) => {
-    if (!modelId) return null;
-    const cacheKey = useStubEmbeddings ? `${modelId}:${dims || 'default'}` : modelId;
-    if (embeddingCache.has(cacheKey)) return embeddingCache.get(cacheKey);
-    const embedding = await getQueryEmbedding({
-      text: embeddingQueryText,
-      modelId,
-      dims,
-      modelDir: modelConfig.dir,
-      useStub: useStubEmbeddings
-    });
-    embeddingCache.set(cacheKey, embedding);
-    return embedding;
-  };
-  const queryEmbeddingCode = needsEmbedding && runCode && (idxCode.denseVec?.vectors?.length || vectorAnnState.code.available)
-    ? await getEmbeddingForModel(modelIdForCode, idxCode.denseVec?.dims || null)
-    : null;
-  const queryEmbeddingProse = needsEmbedding && runProse && (idxProse.denseVec?.vectors?.length || vectorAnnState.prose.available)
-    ? await getEmbeddingForModel(modelIdForProse, idxProse.denseVec?.dims || null)
-    : null;
-  const queryEmbeddingRecords = needsEmbedding && runRecords && idxRecords.denseVec?.vectors?.length
-    ? await getEmbeddingForModel(modelIdForRecords, idxRecords.denseVec?.dims || null)
-    : null;
-  const proseHits = cacheHit && cachedPayload
-    ? (cachedPayload.prose || [])
-    : (runProse ? searchPipeline(idxProse, 'prose', queryEmbeddingProse) : []);
-  const codeHits = cacheHit && cachedPayload
-    ? (cachedPayload.code || [])
-    : (runCode ? searchPipeline(idxCode, 'code', queryEmbeddingCode) : []);
-  const recordHits = cacheHit && cachedPayload
-    ? (cachedPayload.records || [])
-    : (runRecords ? searchPipeline(idxRecords, 'records', queryEmbeddingRecords) : []);
-  const annBackend = vectorAnnEnabled && (vectorAnnUsed.code || vectorAnnUsed.prose)
-    ? 'sqlite-extension'
-    : 'js';
-
-  // Output
-  if (jsonOutput) {
-    // Full JSON
-    const memory = process.memoryUsage();
-    console.log(JSON.stringify({
-      backend: backendLabel,
-      prose: jsonCompact ? proseHits.map(compactHit) : proseHits,
-      code: jsonCompact ? codeHits.map(compactHit) : codeHits,
-      records: jsonCompact ? recordHits.map(compactHit) : recordHits,
-      stats: {
-        elapsedMs: Date.now() - t0,
-        annEnabled,
-        annMode: vectorExtension.annMode,
-        annBackend,
-        annExtension: vectorAnnEnabled ? {
-          provider: vectorExtension.provider,
-          table: vectorExtension.table,
-          available: {
-            code: vectorAnnState.code.available,
-            prose: vectorAnnState.prose.available,
-            records: vectorAnnState.records.available
-          }
-        } : null,
-        models: {
-          code: modelIdForCode,
-          prose: modelIdForProse,
-          records: modelIdForRecords
-        },
-        cache: {
-          enabled: queryCacheEnabled,
-          hit: cacheHit,
-          key: cacheKey
-        },
-        memory: {
-          rss: memory.rss,
-          heapTotal: memory.heapTotal,
-          heapUsed: memory.heapUsed,
-          external: memory.external,
-          arrayBuffers: memory.arrayBuffers
-        }
-      }
-    }, null, 2));
-  }
-
-  if (!jsonOutput) {
-    let showProse = runProse ? argv.n : 0;
-    let showCode = runCode ? argv.n : 0;
-    let showRecords = runRecords ? argv.n : 0;
-
-  if (runProse && runCode) {
-    if (proseHits.length < argv.n) {
-      showCode += showProse;
-    }
-    if (codeHits.length < argv.n) {
-      showProse += showCode;
-    }
-  }
-
-  // Human output, enhanced formatting and summaries
-  if (runProse) {
-    console.log(color.bold(`\n===== 📖 Markdown Results (${backendLabel}) =====`));
-    const summaryState = { lastCount: 0 };
-    proseHits.slice(0, showProse).forEach((h, i) => {
-      if (i < 2) {
-        process.stdout.write(formatFullChunk({
-          chunk: h,
-          index: i,
-          mode: 'prose',
-          score: h.score,
-          scoreType: h.scoreType,
-          color,
-          queryTokens,
-          rx,
-          matched: argv.matched,
-          rootDir: ROOT,
-          summaryState
-        }));
-      } else {
-        process.stdout.write(formatShortChunk({
-          chunk: h,
-          index: i,
-          mode: 'prose',
-          score: h.score,
-          scoreType: h.scoreType,
-          color,
-          queryTokens,
-          rx,
-          matched: argv.matched
-        }));
-      }
-    });
-    console.log('\n');
-  }
-
-  if (runCode) {
-    console.log(color.bold(`===== 🔨 Code Results (${backendLabel}) =====`));
-    const summaryState = { lastCount: 0 };
-    codeHits.slice(0, showCode).forEach((h, i) => {
-      if (i < 1) {
-        process.stdout.write(formatFullChunk({
-          chunk: h,
-          index: i,
-          mode: 'code',
-          score: h.score,
-          scoreType: h.scoreType,
-          color,
-          queryTokens,
-          rx,
-          matched: argv.matched,
-          rootDir: ROOT,
-          summaryState
-        }));
-      } else {
-        process.stdout.write(formatShortChunk({
-          chunk: h,
-          index: i,
-          mode: 'code',
-          score: h.score,
-          scoreType: h.scoreType,
-          color,
-          queryTokens,
-          rx,
-          matched: argv.matched
-        }));
-      }
-    });
-    console.log('\n');
-  }
-
-  if (runRecords) {
-    console.log(color.bold(`===== 🧾 Records Results (${backendLabel}) =====`));
-    recordHits.slice(0, showRecords).forEach((h, i) => {
-      if (i < 2) {
-        process.stdout.write(formatFullChunk({
-          chunk: h,
-          index: i,
-          mode: 'records',
-          score: h.score,
-          scoreType: h.scoreType,
-          color,
-          queryTokens,
-          rx,
-          matched: argv.matched,
-          rootDir: null,
-          summaryState: null
-        }));
-      } else {
-        process.stdout.write(formatShortChunk({
-          chunk: h,
-          index: i,
-          mode: 'records',
-          score: h.score,
-          scoreType: h.scoreType,
-          color,
-          queryTokens,
-          rx,
-          matched: argv.matched
-        }));
-      }
-    });
-    console.log('\n');
-  }
- 
-    // Optionally stats
-    if (argv.stats) {
-      const cacheTag = queryCacheEnabled ? (cacheHit ? 'cache=hit' : 'cache=miss') : 'cache=off';
-      const statsParts = [
-        `prose chunks=${idxProse.chunkMeta.length}`,
-        `code chunks=${idxCode.chunkMeta.length}`,
-        runRecords ? `records chunks=${idxRecords.chunkMeta.length}` : null,
-        `(${cacheTag})`
-      ].filter(Boolean);
-      console.log(color.gray(`Stats: ${statsParts.join(', ')}`));
-    }
-  }
-
-  /* ---------- Update .repoMetrics and .searchHistory ---------- */
-  const metricsPath = path.join(metricsDir, 'metrics.json');
-  const historyPath = path.join(metricsDir, 'searchHistory');
-  const noResultPath = path.join(metricsDir, 'noResultQueries');
-  await fs.mkdir(path.dirname(metricsPath), { recursive: true });
-
-  let metrics = {};
-  try {
-    metrics = JSON.parse(await fs.readFile(metricsPath, 'utf8'));
-  } catch {
-    metrics = {};
-  }
-  const inc = (f, key) => {
-    if (!metrics[f]) metrics[f] = { md: 0, code: 0, records: 0, terms: [] };
-    metrics[f][key] = (metrics[f][key] || 0) + 1;
-    queryTokens.forEach((t) => {
-      if (!metrics[f].terms.includes(t)) metrics[f].terms.push(t);
-    });
-  };
-  proseHits.forEach((h) => inc(h.file, 'md'));
-  codeHits.forEach((h) => inc(h.file, 'code'));
-  recordHits.forEach((h) => inc(h.file, 'records'));
-  await fs.writeFile(metricsPath, JSON.stringify(metrics) + '\n');
-
-  await fs.appendFile(
-    historyPath,
-    JSON.stringify({
-      time: new Date().toISOString(),
-      query,
-      mdFiles: proseHits.length,
-      codeFiles: codeHits.length,
-      recordFiles: recordHits.length,
-      ms: Date.now() - t0,
-      cached: cacheHit,
-    }) + '\n'
-  );
-
-  if (proseHits.length === 0 && codeHits.length === 0 && recordHits.length === 0) {
-    await fs.appendFile(
-      noResultPath,
-      JSON.stringify({ time: new Date().toISOString(), query }) + '\n'
-    );
-  }
-
-  if (queryCacheEnabled && cacheKey) {
-    if (!cacheData) cacheData = { version: 1, entries: [] };
-    if (!cacheHit) {
-      cacheData.entries = cacheData.entries.filter((entry) => entry.key !== cacheKey);
-      cacheData.entries.push({
-        key: cacheKey,
-        ts: Date.now(),
-        ttlMs: queryCacheTtlMs,
-        signature: cacheSignature,
-        meta: {
-          query,
-          backend: backendLabel
-        },
-        payload: {
-          prose: proseHits,
-          code: codeHits,
-          records: recordHits
-        }
-      });
-    }
-    pruneQueryCache(cacheData, queryCacheMaxEntries);
-    try {
-      await fs.mkdir(path.dirname(queryCachePath), { recursive: true });
-      await fs.writeFile(queryCachePath, JSON.stringify(cacheData, null, 2));
-    } catch {}
-  }
-})();
diff --git a/src/search/embedding.js b/src/search/embedding.js
deleted file mode 100644
index 466c0bbd8..000000000
--- a/src/search/embedding.js
+++ /dev/null
@@ -1,42 +0,0 @@
-import fs from 'node:fs';
-import { stubEmbedding } from '../shared/embedding.js';
-
-const embedderCache = new Map();
-
-async function getEmbedder(modelId, modelDir) {
-  if (embedderCache.has(modelId)) return embedderCache.get(modelId);
-  const { pipeline, env } = await import('@xenova/transformers');
-  if (modelDir) {
-    try {
-      fs.mkdirSync(modelDir, { recursive: true });
-    } catch {}
-    env.cacheDir = modelDir;
-  }
-  const embedder = await pipeline('feature-extraction', modelId);
-  embedderCache.set(modelId, embedder);
-  return embedder;
-}
-
-/**
- * Compute a query embedding using the configured model.
- * Returns null when embeddings are unavailable.
- * @param {object} options
- * @param {string} options.text
- * @param {string} options.modelId
- * @param {number} options.dims
- * @param {string} options.modelDir
- * @param {boolean} options.useStub
- * @returns {Promise<number[]|null>}
- */
-export async function getQueryEmbedding({ text, modelId, dims, modelDir, useStub }) {
-  if (useStub) {
-    return stubEmbedding(text, dims);
-  }
-  try {
-    const embedder = await getEmbedder(modelId, modelDir);
-    const output = await embedder(text, { pooling: 'mean', normalize: true });
-    return Array.from(output.data);
-  } catch {
-    return null;
-  }
-}
diff --git a/src/search/filters.js b/src/search/filters.js
deleted file mode 100644
index 4e2c5e6bf..000000000
--- a/src/search/filters.js
+++ /dev/null
@@ -1,66 +0,0 @@
-import { parseJson } from './query-cache.js';
-
-/**
- * Normalize extension filters into a lowercase list.
- * @param {string|string[]|null|undefined} extArg
- * @returns {string[]|null}
- */
-export function normalizeExtFilter(extArg) {
-  const entries = Array.isArray(extArg) ? extArg : (extArg ? [extArg] : []);
-  if (!entries.length) return null;
-  const normalized = [];
-  for (const entry of entries) {
-    String(entry || '')
-      .split(/[,\\s]+/)
-      .map((raw) => raw.trim())
-      .filter(Boolean)
-      .forEach((raw) => {
-        let value = raw.toLowerCase();
-        value = value.replace(/^\\*+/, '');
-        if (!value) return;
-        if (!value.startsWith('.')) value = `.${value}`;
-        normalized.push(value);
-      });
-  }
-  return normalized.length ? Array.from(new Set(normalized)) : null;
-}
-
-/**
- * Parse --meta and --meta-json into a normalized filter list.
- * @param {string|string[]|null|undefined} metaArg
- * @param {string|string[]|null|undefined} metaJsonArg
- * @returns {Array<{key:string,value:any}>|null}
- */
-export function parseMetaFilters(metaArg, metaJsonArg) {
-  const filters = [];
-  const pushFilter = (rawKey, rawValue) => {
-    const key = String(rawKey || '').trim();
-    if (!key) return;
-    const value = rawValue === undefined ? null : rawValue;
-    filters.push({ key, value });
-  };
-  const handleEntry = (entry) => {
-    const text = String(entry || '').trim();
-    if (!text) return;
-    const split = text.split('=');
-    const key = split.shift();
-    const value = split.length ? split.join('=').trim() : null;
-    pushFilter(key, value === '' ? null : value);
-  };
-  const metaEntries = Array.isArray(metaArg) ? metaArg : (metaArg ? [metaArg] : []);
-  for (const entry of metaEntries) handleEntry(entry);
-  const metaJsonEntries = Array.isArray(metaJsonArg) ? metaJsonArg : (metaJsonArg ? [metaJsonArg] : []);
-  for (const entry of metaJsonEntries) {
-    const parsed = parseJson(entry, null);
-    if (!parsed) continue;
-    if (Array.isArray(parsed)) {
-      parsed.forEach((item) => {
-        if (!item || typeof item !== 'object') return;
-        Object.entries(item).forEach(([key, value]) => pushFilter(key, value));
-      });
-    } else if (typeof parsed === 'object') {
-      Object.entries(parsed).forEach(([key, value]) => pushFilter(key, value));
-    }
-  }
-  return filters.length ? filters : null;
-}
diff --git a/src/search/fts.js b/src/search/fts.js
deleted file mode 100644
index 631c42584..000000000
--- a/src/search/fts.js
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- * Resolve FTS5 bm25 weights from a profile or config override.
- * @param {string} profile
- * @param {object|number[]|null} config
- * @returns {number[]}
- */
-export function resolveFtsWeights(profile, config) {
-  const profiles = {
-    balanced: { file: 0.2, name: 1.5, kind: 0.6, headline: 2.0, tokens: 1.0 },
-    headline: { file: 0.1, name: 1.2, kind: 0.4, headline: 3.0, tokens: 1.0 },
-    name: { file: 0.2, name: 2.5, kind: 0.8, headline: 1.2, tokens: 1.0 }
-  };
-  const base = profiles[profile] || profiles.balanced;
-
-  if (Array.isArray(config)) {
-    const values = config.map((v) => Number(v)).filter((v) => Number.isFinite(v));
-    if (values.length >= 6) return values.slice(0, 6);
-    if (values.length === 5) return [0, ...values];
-  } else if (config && typeof config === 'object') {
-    const merged = { ...base };
-    for (const key of ['file', 'name', 'kind', 'headline', 'tokens']) {
-      if (Number.isFinite(Number(config[key]))) merged[key] = Number(config[key]);
-    }
-    return [0, merged.file, merged.name, merged.kind, merged.headline, merged.tokens];
-  }
-
-  return [0, base.file, base.name, base.kind, base.headline, base.tokens];
-}
-
-/**
- * Build a bm25(chunks_fts, ...) SQL expression from weights.
- * @param {number[]} weights
- * @returns {string}
- */
-export function buildFtsBm25Expr(weights) {
-  const safe = weights.map((val) => (Number.isFinite(val) ? val : 1));
-  return `bm25(chunks_fts, ${safe.join(', ')})`;
-}
diff --git a/src/search/pipeline.js b/src/search/pipeline.js
deleted file mode 100644
index 54cd95337..000000000
--- a/src/search/pipeline.js
+++ /dev/null
@@ -1,256 +0,0 @@
-import { filterChunks } from './output.js';
-import { rankBM25, rankDenseVectors, rankMinhash } from './rankers.js';
-import { extractNgrams, tri } from '../shared/tokenize.js';
-
-/**
- * Create a search pipeline runner bound to a shared context.
- * @param {object} context
- * @returns {(idx:object, mode:'code'|'prose'|'records', queryEmbedding:number[]|null)=>Array<object>}
- */
-export function createSearchPipeline(context) {
-  const {
-    useSqlite,
-    sqliteFtsRequested,
-    sqliteFtsNormalize,
-    sqliteFtsProfile,
-    sqliteFtsWeights,
-    bm25K1,
-    bm25B,
-    postingsConfig,
-    queryTokens,
-    phraseNgramSet,
-    phraseRange,
-    filters,
-    topN,
-    annEnabled,
-    vectorAnnState,
-    vectorAnnUsed,
-    buildCandidateSetSqlite,
-    getTokenIndexForQuery,
-    rankSqliteFts,
-    rankVectorAnnSqlite
-  } = context;
-
-  /**
-   * Build a candidate set from file-backed indexes (or SQLite).
-   * @param {object} idx
-   * @param {string[]} tokens
-   * @param {'code'|'prose'|'records'} mode
-   * @returns {Set<number>|null}
-   */
-  function buildCandidateSet(idx, tokens, mode) {
-    if (useSqlite && (mode === 'code' || mode === 'prose')) {
-      return buildCandidateSetSqlite(mode, tokens);
-    }
-
-    const candidates = new Set();
-    let matched = false;
-
-    if (postingsConfig.enablePhraseNgrams !== false && idx.phraseNgrams?.vocab && idx.phraseNgrams?.postings) {
-      const vocabIndex = new Map(idx.phraseNgrams.vocab.map((t, i) => [t, i]));
-      const ngrams = extractNgrams(tokens, postingsConfig.phraseMinN, postingsConfig.phraseMaxN);
-      for (const ng of ngrams) {
-        const hit = vocabIndex.get(ng);
-        if (hit === undefined) continue;
-        const posting = idx.phraseNgrams.postings[hit] || [];
-        posting.forEach((id) => candidates.add(id));
-        matched = matched || posting.length > 0;
-      }
-    }
-
-    if (postingsConfig.enableChargrams !== false && idx.chargrams?.vocab && idx.chargrams?.postings) {
-      const vocabIndex = new Map(idx.chargrams.vocab.map((t, i) => [t, i]));
-      for (const token of tokens) {
-        for (let n = postingsConfig.chargramMinN; n <= postingsConfig.chargramMaxN; n++) {
-          for (const gram of tri(token, n)) {
-            const hit = vocabIndex.get(gram);
-            if (hit === undefined) continue;
-            const posting = idx.chargrams.postings[hit] || [];
-            posting.forEach((id) => candidates.add(id));
-            matched = matched || posting.length > 0;
-          }
-        }
-      }
-    }
-
-    return matched ? candidates : null;
-  }
-
-  function getPhraseMatchInfo(chunk, phraseSet, range) {
-    if (!phraseSet || !phraseSet.size || !chunk) return { matches: 0 };
-    let ngrams = Array.isArray(chunk.ngrams) && chunk.ngrams.length ? chunk.ngrams : null;
-    if (!ngrams && Array.isArray(chunk.tokens) && range?.min && range?.max) {
-      ngrams = extractNgrams(chunk.tokens, range.min, range.max);
-    }
-    if (!ngrams || !ngrams.length) return { matches: 0 };
-    let matches = 0;
-    for (const ng of ngrams) {
-      if (phraseSet.has(ng)) matches += 1;
-    }
-    return { matches };
-  }
-
-  /**
-   * Execute the full search pipeline for a mode.
-   * @param {object} idx
-   * @param {'code'|'prose'|'records'} mode
-   * @param {number[]|null} queryEmbedding
-   * @returns {Array<object>}
-   */
-  return function runSearch(idx, mode, queryEmbedding) {
-    const meta = idx.chunkMeta;
-    const sqliteEnabledForMode = useSqlite && (mode === 'code' || mode === 'prose');
-
-    // Filtering
-    const filteredMeta = filterChunks(meta, filters);
-    const allowedIdx = new Set(filteredMeta.map((c) => c.id));
-
-    const searchTopN = Math.max(1, Number(topN) || 1);
-    const expandedTopN = searchTopN * 3;
-
-    // Main search: BM25 token match
-    let candidates = null;
-    let bmHits = [];
-    if (sqliteEnabledForMode && sqliteFtsRequested) {
-      bmHits = rankSqliteFts(idx, queryTokens, mode, expandedTopN, sqliteFtsNormalize);
-      candidates = bmHits.length ? new Set(bmHits.map((h) => h.idx)) : null;
-    } else {
-      const tokenIndexOverride = sqliteEnabledForMode ? getTokenIndexForQuery(queryTokens, mode) : null;
-      candidates = buildCandidateSet(idx, queryTokens, mode);
-      bmHits = rankBM25({
-        idx,
-        tokens: queryTokens,
-        topN: expandedTopN,
-        tokenIndexOverride,
-        k1: bm25K1,
-        b: bm25B
-      });
-    }
-
-    // MinHash (embedding) ANN, if requested
-    let annHits = [];
-    let annSource = null;
-    if (annEnabled) {
-      if (queryEmbedding && vectorAnnState?.[mode]?.available) {
-        annHits = rankVectorAnnSqlite(mode, queryEmbedding, expandedTopN, candidates);
-        if (!annHits.length && candidates && candidates.size) {
-          annHits = rankVectorAnnSqlite(mode, queryEmbedding, expandedTopN, null);
-        }
-        if (annHits.length) {
-          vectorAnnUsed[mode] = true;
-          annSource = 'sqlite-vector';
-        }
-      }
-      if (!annHits.length && queryEmbedding && idx.denseVec?.vectors?.length) {
-        annHits = rankDenseVectors(idx, queryEmbedding, expandedTopN, candidates);
-        if (annHits.length) annSource = 'dense';
-      }
-      if (!annHits.length) {
-        annHits = rankMinhash(idx, queryTokens, expandedTopN);
-        if (annHits.length) annSource = 'minhash';
-      }
-    }
-
-    // Combine and dedup
-    const allHits = new Map();
-    const sparseType = (sqliteEnabledForMode && sqliteFtsRequested) ? 'fts' : 'bm25';
-    const recordHit = (idxVal, update) => {
-      const current = allHits.get(idxVal) || { bm25: null, fts: null, ann: null, annSource: null };
-      allHits.set(idxVal, { ...current, ...update });
-    };
-    bmHits.forEach((h) => {
-      recordHit(h.idx, sparseType === 'fts' ? { fts: h.score } : { bm25: h.score });
-    });
-    annHits.forEach((h) => {
-      recordHit(h.idx, { ann: h.sim, annSource });
-    });
-
-    const scored = [...allHits.entries()]
-      .filter(([idxVal]) => allowedIdx.has(idxVal))
-      .map(([idxVal, scores]) => {
-        const sparseScore = scores.fts ?? scores.bm25 ?? null;
-        const annScore = scores.ann ?? null;
-        const sparseTypeValue = scores.fts != null ? 'fts' : (scores.bm25 != null ? 'bm25' : null);
-        let scoreType = null;
-        let score = null;
-        if (annScore != null && (sparseScore == null || annScore > sparseScore)) {
-          scoreType = 'ann';
-          score = annScore;
-        } else if (sparseScore != null) {
-          scoreType = sparseTypeValue;
-          score = sparseScore;
-        } else {
-          scoreType = 'none';
-          score = 0;
-        }
-        const chunk = meta[idxVal];
-        if (!chunk) return null;
-        let phraseMatches = 0;
-        let phraseBoost = 0;
-        let phraseFactor = 0;
-        if (phraseNgramSet && phraseRange?.min && phraseRange?.max) {
-          const matchInfo = getPhraseMatchInfo(chunk, phraseNgramSet, phraseRange);
-          phraseMatches = matchInfo.matches;
-          if (phraseMatches) {
-            phraseFactor = Math.min(0.5, phraseMatches * 0.1);
-            phraseBoost = score * phraseFactor;
-            score += phraseBoost;
-          }
-        }
-        const scoreBreakdown = {
-          sparse: sparseScore != null ? {
-            type: sparseTypeValue,
-            score: sparseScore,
-            normalized: scores.fts != null ? sqliteFtsNormalize : null,
-            weights: scores.fts != null ? sqliteFtsWeights : null,
-            profile: scores.fts != null ? sqliteFtsProfile : null,
-            k1: scores.bm25 != null ? bm25K1 : null,
-            b: scores.bm25 != null ? bm25B : null
-          } : null,
-          ann: annScore != null ? {
-            score: annScore,
-            source: scores.annSource || null
-          } : null,
-          phrase: phraseNgramSet ? {
-            matches: phraseMatches,
-            boost: phraseBoost,
-            factor: phraseFactor
-          } : null,
-          selected: {
-            type: scoreType,
-            score
-          }
-        };
-        return {
-          idx: idxVal,
-          score,
-          scoreType,
-          scoreBreakdown,
-          chunk,
-          sparseScore,
-          sparseType: sparseTypeValue,
-          annScore,
-          annSource: scores.annSource || null
-        };
-      })
-      .filter(Boolean)
-      .sort((a, b) => (b.score - a.score) || (a.idx - b.idx))
-      .slice(0, searchTopN);
-
-    const ranked = scored
-      .map((entry) => ({
-        ...entry.chunk,
-        score: entry.score,
-        scoreType: entry.scoreType,
-        sparseScore: entry.sparseScore,
-        sparseType: entry.sparseType,
-        annScore: entry.annScore,
-        annSource: entry.annSource,
-        annType: entry.annSource,
-        scoreBreakdown: entry.scoreBreakdown
-      }))
-      .filter(Boolean);
-
-    return ranked;
-  };
-}
diff --git a/src/shared/artifact-io.js b/src/shared/artifact-io.js
new file mode 100644
index 000000000..d1dcb0825
--- /dev/null
+++ b/src/shared/artifact-io.js
@@ -0,0 +1,255 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { gunzipSync } from 'node:zlib';
+
+const MAX_JSON_BYTES_ENV = Number(process.env.PAIROFCLEATS_MAX_JSON_BYTES);
+export const MAX_JSON_BYTES = Number.isFinite(MAX_JSON_BYTES_ENV) && MAX_JSON_BYTES_ENV > 0
+  ? Math.floor(MAX_JSON_BYTES_ENV)
+  : 512 * 1024 * 1024 - 1024;
+
+const toJsonTooLargeError = (filePath, size) => {
+  const err = new Error(
+    `JSON artifact too large to load (${size} bytes): ${filePath}`
+  );
+  err.code = 'ERR_JSON_TOO_LARGE';
+  return err;
+};
+
+const getBakPath = (filePath) => `${filePath}.bak`;
+
+const cleanupBak = (filePath) => {
+  const bakPath = getBakPath(filePath);
+  if (!fs.existsSync(bakPath)) return;
+  try {
+    fs.rmSync(bakPath, { force: true });
+  } catch {}
+};
+
+const PIECE_CACHE_LIMIT = 8;
+const pieceCache = new Map();
+
+const buildCacheKey = (filePath) => {
+  try {
+    const stat = fs.statSync(filePath);
+    return `${filePath}:${stat.size}:${stat.mtimeMs}`;
+  } catch {
+    return null;
+  }
+};
+
+const readCache = (filePath) => {
+  const key = buildCacheKey(filePath);
+  if (!key) return null;
+  const cached = pieceCache.get(key);
+  if (!cached) return null;
+  pieceCache.delete(key);
+  pieceCache.set(key, cached);
+  return cached;
+};
+
+const writeCache = (filePath, value) => {
+  const key = buildCacheKey(filePath);
+  if (!key) return;
+  if (pieceCache.has(key)) pieceCache.delete(key);
+  pieceCache.set(key, value);
+  if (pieceCache.size > PIECE_CACHE_LIMIT) {
+    const firstKey = pieceCache.keys().next().value;
+    if (firstKey) pieceCache.delete(firstKey);
+  }
+};
+
+const shouldTreatAsTooLarge = (err) => {
+  if (!err) return false;
+  if (err.code === 'ERR_STRING_TOO_LONG') return true;
+  const message = typeof err.message === 'string' ? err.message : '';
+  return message.includes('Invalid string length');
+};
+
+const readBuffer = (targetPath, maxBytes) => {
+  const stat = fs.statSync(targetPath);
+  if (stat.size > maxBytes) {
+    throw toJsonTooLargeError(targetPath, stat.size);
+  }
+  return fs.readFileSync(targetPath);
+};
+
+export const readJsonFile = (filePath, { maxBytes = MAX_JSON_BYTES } = {}) => {
+  const parseBuffer = (buffer, sourcePath) => {
+    if (buffer.length > maxBytes) {
+      throw toJsonTooLargeError(sourcePath, buffer.length);
+    }
+    try {
+      return JSON.parse(buffer.toString('utf8'));
+    } catch (err) {
+      if (shouldTreatAsTooLarge(err)) {
+        throw toJsonTooLargeError(sourcePath, buffer.length);
+      }
+      throw err;
+    }
+  };
+  const tryRead = (targetPath, options = {}) => {
+    const { gzip = false, cleanup = false } = options;
+    const buffer = readBuffer(targetPath, maxBytes);
+    const parsed = parseBuffer(gzip ? gunzipSync(buffer) : buffer, targetPath);
+    if (cleanup) cleanupBak(targetPath);
+    return parsed;
+  };
+  const bakPath = getBakPath(filePath);
+  if (fs.existsSync(filePath)) {
+    try {
+      return tryRead(filePath, { cleanup: true });
+    } catch (err) {
+      if (fs.existsSync(bakPath)) {
+        return tryRead(bakPath);
+      }
+      throw err;
+    }
+  }
+  if (filePath.endsWith('.json')) {
+    const gzPath = `${filePath}.gz`;
+    const gzBakPath = getBakPath(gzPath);
+    if (fs.existsSync(gzPath)) {
+      try {
+        return tryRead(gzPath, { gzip: true, cleanup: true });
+      } catch (err) {
+        if (fs.existsSync(gzBakPath)) {
+          return tryRead(gzBakPath, { gzip: true });
+        }
+        throw err;
+      }
+    }
+  }
+  if (fs.existsSync(bakPath)) {
+    return tryRead(bakPath);
+  }
+  if (filePath.endsWith('.json')) {
+    const gzBakPath = getBakPath(`${filePath}.gz`);
+    if (fs.existsSync(gzBakPath)) {
+      return tryRead(gzBakPath, { gzip: true });
+    }
+  }
+  throw new Error(`Missing JSON artifact: ${filePath}`);
+};
+
+const readJsonFileCached = (filePath, { maxBytes = MAX_JSON_BYTES } = {}) => {
+  const cached = readCache(filePath);
+  if (cached) return cached;
+  const data = readJsonFile(filePath, { maxBytes });
+  writeCache(filePath, data);
+  return data;
+};
+
+export const readJsonLinesArraySync = (filePath, { maxBytes = MAX_JSON_BYTES } = {}) => {
+  const cached = readCache(filePath);
+  if (cached) return cached;
+  const tryRead = (targetPath, cleanup = false) => {
+    const stat = fs.statSync(targetPath);
+    if (stat.size > maxBytes) {
+      throw toJsonTooLargeError(targetPath, stat.size);
+    }
+    let raw = '';
+    try {
+      raw = fs.readFileSync(targetPath, 'utf8');
+    } catch (err) {
+      if (shouldTreatAsTooLarge(err)) {
+        throw toJsonTooLargeError(targetPath, stat.size);
+      }
+      throw err;
+    }
+    if (!raw.trim()) return [];
+    const parsed = raw
+      .split(/\r?\n/)
+      .filter((line) => line.trim().length > 0)
+      .map((line) => JSON.parse(line));
+    if (cleanup) cleanupBak(targetPath);
+    writeCache(targetPath, parsed);
+    return parsed;
+  };
+  const bakPath = getBakPath(filePath);
+  if (fs.existsSync(filePath)) {
+    try {
+      return tryRead(filePath, true);
+    } catch (err) {
+      if (fs.existsSync(bakPath)) {
+        return tryRead(bakPath);
+      }
+      throw err;
+    }
+  }
+  if (fs.existsSync(bakPath)) {
+    return tryRead(bakPath);
+  }
+  throw new Error(`Missing JSONL artifact: ${filePath}`);
+};
+
+const readShardFiles = (dir, prefix) => {
+  if (!fs.existsSync(dir)) return [];
+  return fs
+    .readdirSync(dir)
+    .filter((name) => name.startsWith(prefix) && (name.endsWith('.json') || name.endsWith('.jsonl')))
+    .sort()
+    .map((name) => path.join(dir, name));
+};
+
+const existsOrBak = (filePath) => fs.existsSync(filePath) || fs.existsSync(getBakPath(filePath));
+
+export const loadChunkMeta = (dir, { maxBytes = MAX_JSON_BYTES } = {}) => {
+  const metaPath = path.join(dir, 'chunk_meta.meta.json');
+  const partsDir = path.join(dir, 'chunk_meta.parts');
+  if (existsOrBak(metaPath) || fs.existsSync(partsDir)) {
+    const meta = existsOrBak(metaPath) ? readJsonFile(metaPath, { maxBytes }) : null;
+    const parts = Array.isArray(meta?.parts) && meta.parts.length
+      ? meta.parts.map((name) => path.join(dir, name))
+      : readShardFiles(partsDir, 'chunk_meta.part-');
+    if (!parts.length) {
+      throw new Error(`Missing chunk_meta shard files in ${partsDir}`);
+    }
+    return parts.flatMap((partPath) => readJsonLinesArraySync(partPath, { maxBytes }));
+  }
+  const jsonlPath = path.join(dir, 'chunk_meta.jsonl');
+  if (existsOrBak(jsonlPath)) {
+    return readJsonLinesArraySync(jsonlPath, { maxBytes });
+  }
+  const jsonPath = path.join(dir, 'chunk_meta.json');
+  if (existsOrBak(jsonPath)) {
+    return readJsonFile(jsonPath, { maxBytes });
+  }
+  throw new Error(`Missing index artifact: chunk_meta.json`);
+};
+
+export const loadTokenPostings = (dir, { maxBytes = MAX_JSON_BYTES } = {}) => {
+  const metaPath = path.join(dir, 'token_postings.meta.json');
+  const shardsDir = path.join(dir, 'token_postings.shards');
+  if (existsOrBak(metaPath) || fs.existsSync(shardsDir)) {
+    const meta = existsOrBak(metaPath) ? readJsonFile(metaPath, { maxBytes }) : {};
+    const shards = Array.isArray(meta?.parts) && meta.parts.length
+      ? meta.parts.map((name) => path.join(dir, name))
+      : readShardFiles(shardsDir, 'token_postings.part-');
+    if (!shards.length) {
+      throw new Error(`Missing token_postings shard files in ${shardsDir}`);
+    }
+    const vocab = [];
+    const postings = [];
+    for (const shardPath of shards) {
+      const shard = readJsonFileCached(shardPath, { maxBytes });
+      const shardVocab = Array.isArray(shard?.vocab) ? shard.vocab : (Array.isArray(shard?.arrays?.vocab) ? shard.arrays.vocab : []);
+      const shardPostings = Array.isArray(shard?.postings) ? shard.postings : (Array.isArray(shard?.arrays?.postings) ? shard.arrays.postings : []);
+      vocab.push(...shardVocab);
+      postings.push(...shardPostings);
+    }
+    const docLengths = Array.isArray(meta?.docLengths)
+      ? meta.docLengths
+      : (Array.isArray(meta?.arrays?.docLengths) ? meta.arrays.docLengths : []);
+    return {
+      ...meta,
+      vocab,
+      postings,
+      docLengths
+    };
+  }
+  const jsonPath = path.join(dir, 'token_postings.json');
+  if (existsOrBak(jsonPath)) {
+    return readJsonFile(jsonPath, { maxBytes });
+  }
+  throw new Error(`Missing index artifact: token_postings.json`);
+};
diff --git a/src/shared/artifact-schemas.js b/src/shared/artifact-schemas.js
new file mode 100644
index 000000000..d4c189d0a
--- /dev/null
+++ b/src/shared/artifact-schemas.js
@@ -0,0 +1,320 @@
+import Ajv from 'ajv';
+
+const ajv = new Ajv({
+  allErrors: true,
+  allowUnionTypes: true,
+  strict: true
+});
+
+const intId = { type: 'integer', minimum: 0 };
+const nullableString = { type: ['string', 'null'] };
+const nullableInt = { type: ['integer', 'null'], minimum: 0 };
+
+const chunkMetaEntry = {
+  type: 'object',
+  required: ['id', 'start', 'end'],
+  properties: {
+    id: intId,
+    fileId: nullableInt,
+    start: intId,
+    end: intId,
+    startLine: nullableInt,
+    endLine: nullableInt,
+    kind: nullableString,
+    name: nullableString,
+    ext: nullableString
+  },
+  additionalProperties: true
+};
+
+const postingEntry = {
+  type: 'array',
+  minItems: 2,
+  maxItems: 2,
+  items: [intId, intId]
+};
+
+const postingsList = {
+  type: 'array',
+  items: { type: 'array', items: postingEntry }
+};
+
+const vocabArray = {
+  type: 'array',
+  items: { type: 'string' }
+};
+
+const docLengthsArray = {
+  type: 'array',
+  items: intId
+};
+
+const graphNode = {
+  type: 'object',
+  required: ['id', 'out', 'in'],
+  properties: {
+    id: { type: 'string' },
+    file: nullableString,
+    name: nullableString,
+    kind: nullableString,
+    chunkId: nullableString,
+    out: { type: 'array', items: { type: 'string' } },
+    in: { type: 'array', items: { type: 'string' } }
+  },
+  additionalProperties: true
+};
+
+const graphPayload = {
+  type: 'object',
+  required: ['nodeCount', 'edgeCount', 'nodes'],
+  properties: {
+    nodeCount: intId,
+    edgeCount: intId,
+    nodes: { type: 'array', items: graphNode }
+  },
+  additionalProperties: true
+};
+
+const idPostingList = {
+  type: 'array',
+  items: { type: 'array', items: intId }
+};
+
+const denseVectorArray = {
+  type: 'array',
+  items: intId
+};
+
+const validators = {
+  chunk_meta: ajv.compile({
+    type: 'array',
+    items: chunkMetaEntry
+  }),
+  file_meta: ajv.compile({
+    type: 'array',
+    items: {
+      type: 'object',
+      required: ['id', 'file'],
+      properties: {
+        id: intId,
+        file: { type: 'string' },
+        ext: nullableString
+      },
+      additionalProperties: true
+    }
+  }),
+  repo_map: ajv.compile({
+    type: 'array',
+    items: {
+      type: 'object',
+      required: ['file', 'name'],
+      properties: {
+        file: { type: 'string' },
+        name: { type: 'string' },
+        kind: nullableString,
+        signature: nullableString,
+        exported: { type: ['boolean', 'null'] }
+      },
+      additionalProperties: true
+    }
+  }),
+  file_relations: ajv.compile({
+    type: 'array',
+    items: {
+      type: 'object',
+      required: ['file', 'relations'],
+      properties: {
+        file: { type: 'string' },
+        relations: { type: 'object' }
+      },
+      additionalProperties: true
+    }
+  }),
+  token_postings: ajv.compile({
+    type: 'object',
+    required: ['vocab', 'postings', 'docLengths'],
+    properties: {
+      vocab: vocabArray,
+      postings: postingsList,
+      docLengths: docLengthsArray,
+      avgDocLen: { type: 'number' },
+      totalDocs: { type: 'integer' }
+    },
+    additionalProperties: true
+  }),
+  field_postings: ajv.compile({
+    type: 'object',
+    required: ['fields'],
+    properties: {
+      fields: {
+        type: 'object',
+        additionalProperties: {
+          type: 'object',
+          required: ['vocab', 'postings', 'docLengths'],
+          properties: {
+            vocab: vocabArray,
+            postings: postingsList,
+            docLengths: docLengthsArray,
+            avgDocLen: { type: 'number' },
+            totalDocs: { type: 'integer' }
+          },
+          additionalProperties: true
+        }
+      }
+    },
+    additionalProperties: true
+  }),
+  field_tokens: ajv.compile({
+    type: 'array',
+    items: {
+      type: 'object',
+        properties: {
+          name: { type: 'array', items: { type: 'string' } },
+          signature: { type: 'array', items: { type: 'string' } },
+          doc: { type: 'array', items: { type: 'string' } },
+          comment: { type: 'array', items: { type: 'string' } },
+          body: { type: 'array', items: { type: 'string' } }
+        },
+      additionalProperties: true
+    }
+  }),
+  minhash_signatures: ajv.compile({
+    type: 'object',
+    required: ['signatures'],
+    properties: {
+      signatures: {
+        type: 'array',
+        items: { type: 'array', items: intId }
+      }
+    },
+    additionalProperties: true
+  }),
+  dense_vectors: ajv.compile({
+    type: 'object',
+    required: ['dims', 'vectors'],
+    properties: {
+      dims: { type: 'integer', minimum: 1 },
+      model: nullableString,
+      scale: { type: 'number' },
+      vectors: { type: 'array', items: denseVectorArray }
+    },
+    additionalProperties: true
+  }),
+  dense_vectors_hnsw_meta: ajv.compile({
+    type: 'object',
+    required: ['dims', 'count', 'space', 'm', 'efConstruction', 'efSearch'],
+    properties: {
+      version: { type: 'integer', minimum: 1 },
+      generatedAt: nullableString,
+      model: nullableString,
+      dims: { type: 'integer', minimum: 1 },
+      count: { type: 'integer', minimum: 0 },
+      space: { type: 'string' },
+      m: { type: 'integer', minimum: 1 },
+      efConstruction: { type: 'integer', minimum: 1 },
+      efSearch: { type: 'integer', minimum: 1 }
+    },
+    additionalProperties: true
+  }),
+  phrase_ngrams: ajv.compile({
+    type: 'object',
+    required: ['vocab', 'postings'],
+    properties: {
+      vocab: vocabArray,
+      postings: idPostingList
+    },
+    additionalProperties: true
+  }),
+  chargram_postings: ajv.compile({
+    type: 'object',
+    required: ['vocab', 'postings'],
+    properties: {
+      vocab: vocabArray,
+      postings: idPostingList
+    },
+    additionalProperties: true
+  }),
+  filter_index: ajv.compile({
+    type: 'object',
+    required: ['fileById', 'fileChunksById'],
+    properties: {
+      fileChargramN: { type: 'integer', minimum: 2 },
+      fileById: { type: 'array', items: { type: 'string' } },
+      fileChunksById: idPostingList,
+      byExt: { type: 'object' },
+      byKind: { type: 'object' },
+      byAuthor: { type: 'object' },
+      byChunkAuthor: { type: 'object' },
+      byVisibility: { type: 'object' },
+      fileChargrams: { type: 'object' }
+    },
+    additionalProperties: true
+  }),
+  pieces_manifest: ajv.compile({
+    type: 'object',
+    required: ['version', 'pieces'],
+    properties: {
+      version: { type: 'integer' },
+      generatedAt: nullableString,
+      updatedAt: nullableString,
+      mode: nullableString,
+      stage: nullableString,
+      pieces: {
+        type: 'array',
+        items: {
+          type: 'object',
+          required: ['type', 'name', 'format', 'path'],
+          properties: {
+            type: { type: 'string' },
+            name: { type: 'string' },
+            format: { type: 'string' },
+            path: { type: 'string' },
+            bytes: { type: ['integer', 'null'] },
+            checksum: nullableString
+          },
+          additionalProperties: true
+        }
+      }
+    },
+    additionalProperties: true
+  }),
+  index_state: ajv.compile({
+    type: 'object',
+    required: ['generatedAt', 'mode'],
+    properties: {
+      generatedAt: { type: 'string' },
+      updatedAt: nullableString,
+      mode: { type: 'string' },
+      stage: nullableString
+    },
+    additionalProperties: true
+  }),
+  graph_relations: ajv.compile({
+    type: 'object',
+    required: ['version', 'generatedAt', 'callGraph', 'usageGraph', 'importGraph'],
+    properties: {
+      version: { type: 'integer', minimum: 1 },
+      generatedAt: { type: 'string' },
+      callGraph: graphPayload,
+      usageGraph: graphPayload,
+      importGraph: graphPayload
+    },
+    additionalProperties: true
+  })
+};
+
+const formatError = (error) => {
+  const path = error.instancePath || '/';
+  const message = error.message || 'schema error';
+  return `${path} ${message}`.trim();
+};
+
+export function validateArtifact(name, data) {
+  const validator = validators[name];
+  if (!validator) return { ok: true, errors: [] };
+  const ok = Boolean(validator(data));
+  const errors = ok || !validator.errors
+    ? []
+    : validator.errors.map(formatError);
+  return { ok, errors };
+}
diff --git a/src/shared/bench-progress.js b/src/shared/bench-progress.js
new file mode 100644
index 000000000..77ff2e158
--- /dev/null
+++ b/src/shared/bench-progress.js
@@ -0,0 +1,29 @@
+export function formatShardFileProgress(entry, options = {}) {
+  const shardByLabel = options.shardByLabel instanceof Map ? options.shardByLabel : new Map();
+  const lineTotal = options.lineTotal;
+  const count = Number.isFinite(entry.fileIndex) ? entry.fileIndex : entry.count;
+  const total = Number.isFinite(entry.fileTotal) ? entry.fileTotal : entry.total;
+  const pct = Number.isFinite(entry.pct)
+    ? entry.pct
+    : (Number.isFinite(count) && Number.isFinite(total) && total > 0)
+      ? (count / total) * 100
+      : null;
+  const pctText = Number.isFinite(pct) ? `${pct.toFixed(1)}%` : null;
+  const shardLabel = entry.shardLabel;
+  const shardInfo = shardLabel ? shardByLabel.get(shardLabel) : null;
+  const shardText = shardInfo
+    ? `${shardInfo.index}/${shardInfo.total}`
+    : (shardLabel || null);
+  const shardPrefix = shardText ? `[shard ${shardText}]` : '[shard]';
+  const countText = Number.isFinite(count) && Number.isFinite(total)
+    ? `${count}/${total}`
+    : null;
+  const lineText = Number.isFinite(lineTotal) && lineTotal > 0
+    ? `lines ${lineTotal.toLocaleString()}`
+    : null;
+  const head = [shardPrefix, countText, pctText ? `(${pctText})` : null]
+    .filter(Boolean)
+    .join(' ');
+  const tail = [lineText, entry.file].filter(Boolean);
+  return tail.length ? `${head} | ${tail.join(' | ')}` : head;
+}
diff --git a/src/shared/bundle-io.js b/src/shared/bundle-io.js
new file mode 100644
index 000000000..6a75616ec
--- /dev/null
+++ b/src/shared/bundle-io.js
@@ -0,0 +1,116 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { Packr, Unpackr } from 'msgpackr';
+import { sha1, checksumString } from './hash.js';
+import { stableStringify } from './stable-json.js';
+
+const BUNDLE_FORMAT_TAG = 'pairofcleats.bundle';
+const BUNDLE_VERSION = 1;
+const MSGPACK_EXTENSIONS = new Set(['.mpk', '.msgpack', '.msgpackr']);
+
+const packr = new Packr({ useRecords: false, structuredClone: true });
+const unpackr = new Unpackr({ useRecords: false });
+
+const normalizeBundlePayload = (value) => {
+  if (Array.isArray(value)) {
+    return value.map((entry) => normalizeBundlePayload(entry));
+  }
+  if (!value || typeof value !== 'object' || value.constructor !== Object) {
+    return value;
+  }
+  const out = {};
+  for (const key of Object.keys(value).sort()) {
+    out[key] = normalizeBundlePayload(value[key]);
+  }
+  return out;
+};
+
+const checksumBundlePayload = async (payload) => (
+  checksumString(stableStringify(payload))
+);
+
+export function normalizeBundleFormat(raw) {
+  if (typeof raw !== 'string') return 'json';
+  const normalized = raw.trim().toLowerCase();
+  if (normalized === 'msgpack' || normalized === 'msgpackr' || normalized === 'mpk') {
+    return 'msgpack';
+  }
+  return 'json';
+}
+
+export function resolveBundleFilename(relKey, format) {
+  const ext = format === 'msgpack' ? 'mpk' : 'json';
+  return `${sha1(relKey)}.${ext}`;
+}
+
+export function resolveBundleFormatFromName(bundleName, fallback = 'json') {
+  if (typeof bundleName !== 'string' || !bundleName) return fallback;
+  const ext = path.extname(bundleName).toLowerCase();
+  return MSGPACK_EXTENSIONS.has(ext) ? 'msgpack' : 'json';
+}
+
+export async function writeBundleFile({ bundlePath, bundle, format = 'json' }) {
+  const resolvedFormat = normalizeBundleFormat(format);
+  if (resolvedFormat === 'msgpack') {
+    const normalized = normalizeBundlePayload(bundle);
+    const checksum = await checksumBundlePayload(normalized);
+    const envelope = {
+      format: BUNDLE_FORMAT_TAG,
+      version: BUNDLE_VERSION,
+      checksum: checksum ? { algo: checksum.algo, value: checksum.value } : null,
+      payload: normalized
+    };
+    const encoded = packr.pack(envelope);
+    await fs.writeFile(bundlePath, Buffer.from(encoded));
+    return {
+      format: resolvedFormat,
+      checksum: checksum?.value ?? null,
+      checksumAlgo: checksum?.algo ?? null
+    };
+  }
+  await fs.writeFile(bundlePath, `${JSON.stringify(bundle)}\n`);
+  return { format: resolvedFormat, checksum: null, checksumAlgo: null };
+}
+
+export async function readBundleFile(bundlePath, { format = null } = {}) {
+  const resolvedFormat = format || resolveBundleFormatFromName(bundlePath);
+  if (resolvedFormat === 'msgpack') {
+    const buffer = await fs.readFile(bundlePath);
+    const envelope = unpackr.unpack(buffer);
+    if (!envelope || typeof envelope !== 'object') {
+      return { ok: false, reason: 'invalid bundle envelope' };
+    }
+    if (envelope.format !== BUNDLE_FORMAT_TAG || envelope.version !== BUNDLE_VERSION) {
+      return { ok: false, reason: 'unsupported bundle envelope' };
+    }
+    const payload = envelope.payload;
+    if (!payload || !Array.isArray(payload.chunks)) {
+      return { ok: false, reason: 'invalid bundle payload' };
+    }
+    const checksum = envelope.checksum?.value;
+    if (checksum) {
+      const normalized = normalizeBundlePayload(payload);
+      if (envelope.checksum?.algo === 'xxh64') {
+        const expected = await checksumBundlePayload(normalized);
+        if (!expected || expected.value !== checksum) {
+          return { ok: false, reason: 'bundle checksum mismatch' };
+        }
+        return { ok: true, bundle: normalized };
+      }
+      if (envelope.checksum?.algo === 'sha1') {
+        const expected = sha1(stableStringify(normalized));
+        if (expected !== checksum) {
+          return { ok: false, reason: 'bundle checksum mismatch' };
+        }
+        return { ok: true, bundle: normalized };
+      }
+    }
+    return { ok: true, bundle: payload };
+  }
+  const raw = await fs.readFile(bundlePath, 'utf8');
+  const bundle = JSON.parse(raw);
+  if (!bundle || !Array.isArray(bundle.chunks)) {
+    return { ok: false, reason: 'invalid bundle' };
+  }
+  return { ok: true, bundle };
+}
diff --git a/src/shared/cache.js b/src/shared/cache.js
new file mode 100644
index 000000000..e7ad09fcb
--- /dev/null
+++ b/src/shared/cache.js
@@ -0,0 +1,136 @@
+import { LRUCache } from 'lru-cache';
+
+const BYTES_PER_MB = 1024 * 1024;
+
+export const DEFAULT_CACHE_MB = {
+  fileText: 64,
+  summary: 32,
+  lint: 16,
+  complexity: 16,
+  gitMeta: 16
+};
+
+export const DEFAULT_CACHE_TTL_MS = {
+  fileText: 0,
+  summary: 0,
+  lint: 0,
+  complexity: 0,
+  gitMeta: 0
+};
+
+export const mbToBytes = (value) => {
+  const parsed = Number(value);
+  if (!Number.isFinite(parsed)) return 0;
+  return Math.max(0, Math.floor(parsed * BYTES_PER_MB));
+};
+
+export const estimateStringBytes = (value) => {
+  if (typeof value !== 'string') return 0;
+  return Buffer.byteLength(value, 'utf8');
+};
+
+export const estimateJsonBytes = (value) => {
+  try {
+    return Buffer.byteLength(JSON.stringify(value), 'utf8');
+  } catch {
+    return 0;
+  }
+};
+
+export function createCacheReporter({ enabled = false, log = null } = {}) {
+  const entries = [];
+  return {
+    track(stats) {
+      if (stats) entries.push(stats);
+    },
+    report() {
+      if (!enabled || !log || !entries.length) return;
+      log('Cache stats:');
+      for (const stats of entries) {
+        const sizeMb = stats.maxSizeBytes ? (stats.maxSizeBytes / BYTES_PER_MB).toFixed(1) : 'n/a';
+        const ttlMs = Number.isFinite(stats.ttlMs) ? stats.ttlMs : 0;
+        log(`- ${stats.name}: hits=${stats.hits}, misses=${stats.misses}, evictions=${stats.evictions}, sets=${stats.sets}, maxEntries=${stats.maxEntries ?? 'n/a'}, maxMb=${sizeMb}, ttlMs=${ttlMs}`);
+      }
+    }
+  };
+}
+
+export function createLruCache({
+  name,
+  maxMb,
+  ttlMs,
+  maxEntries,
+  sizeCalculation,
+  reporter
+}) {
+  const entryLimit = Number.isFinite(Number(maxEntries))
+    ? Math.max(0, Math.floor(Number(maxEntries)))
+    : null;
+  const hasEntryLimit = entryLimit !== null;
+  const maxSizeBytes = hasEntryLimit ? 0 : mbToBytes(maxMb);
+  const ttlValue = Number.isFinite(Number(ttlMs)) ? Math.max(0, Number(ttlMs)) : 0;
+
+  const stats = {
+    name,
+    hits: 0,
+    misses: 0,
+    evictions: 0,
+    sets: 0,
+    maxEntries: hasEntryLimit ? entryLimit : null,
+    maxSizeBytes,
+    ttlMs: ttlValue
+  };
+
+  if (reporter && typeof reporter.track === 'function') {
+    reporter.track(stats);
+  }
+
+  if ((hasEntryLimit && entryLimit > 0) || maxSizeBytes > 0) {
+    const options = {
+      allowStale: false,
+      updateAgeOnGet: true,
+      dispose: (_value, _key, reason) => {
+        if (reason === 'evict') stats.evictions += 1;
+      }
+    };
+    if (hasEntryLimit && entryLimit > 0) {
+      options.max = entryLimit;
+    } else {
+      options.maxSize = maxSizeBytes;
+      options.sizeCalculation = typeof sizeCalculation === 'function'
+        ? sizeCalculation
+        : estimateJsonBytes;
+    }
+    if (ttlValue > 0) options.ttl = ttlValue;
+    const cache = new LRUCache(options);
+    return {
+      get(key) {
+        const value = cache.get(key);
+        if (value === undefined) {
+          stats.misses += 1;
+          return null;
+        }
+        stats.hits += 1;
+        return value;
+      },
+      set(key, value) {
+        stats.sets += 1;
+        cache.set(key, value);
+      },
+      cache,
+      stats
+    };
+  }
+
+  return {
+    get() {
+      stats.misses += 1;
+      return null;
+    },
+    set() {
+      stats.sets += 1;
+    },
+    cache: null,
+    stats
+  };
+}
diff --git a/src/shared/capabilities.js b/src/shared/capabilities.js
new file mode 100644
index 000000000..333608867
--- /dev/null
+++ b/src/shared/capabilities.js
@@ -0,0 +1,44 @@
+import { tryRequire } from './optional-deps.js';
+
+let cached = null;
+
+const check = (name, options) => tryRequire(name, options).ok;
+
+export function getCapabilities(options = {}) {
+  if (cached && options.refresh !== true) return cached;
+  const opts = {
+    verbose: options.verbose === true,
+    logger: options.logger
+  };
+  cached = {
+    watcher: {
+      chokidar: check('chokidar', opts),
+      parcel: check('@parcel/watcher', opts)
+    },
+    regex: {
+      re2: check('re2', opts),
+      re2js: check('re2js', opts)
+    },
+    hash: {
+      nodeRsXxhash: check('@node-rs/xxhash', opts),
+      wasmXxhash: check('xxhash-wasm', opts)
+    },
+    compression: {
+      gzip: true,
+      zstd: check('@mongodb-js/zstd', opts)
+    },
+    extractors: {
+      pdf: check('pdfjs-dist', opts),
+      docx: check('mammoth', opts)
+    },
+    mcp: {
+      sdk: check('@modelcontextprotocol/sdk', opts),
+      legacy: true
+    },
+    externalBackends: {
+      tantivy: check('tantivy', opts),
+      lancedb: check('@lancedb/lancedb', opts)
+    }
+  };
+  return cached;
+}
diff --git a/src/shared/cli-options.js b/src/shared/cli-options.js
new file mode 100644
index 000000000..27283abeb
--- /dev/null
+++ b/src/shared/cli-options.js
@@ -0,0 +1,123 @@
+import { validateConfig } from '../config/validate.js';
+
+export const INDEX_BUILD_OPTIONS = {
+  mode: { type: 'string', default: 'all' },
+  stage: { type: 'string' },
+  dims: { type: 'number', default: 384 },
+  threads: { type: 'number' },
+  incremental: { type: 'boolean', default: false, alias: 'i' },
+  'stub-embeddings': { type: 'boolean', default: false },
+  watch: { type: 'boolean', default: false },
+  'watch-poll': { type: 'number', default: 2000 },
+  'watch-debounce': { type: 'number', default: 500 },
+  sqlite: { type: 'boolean' },
+  'debug-crash': { type: 'boolean', default: false },
+  model: { type: 'string' },
+  repo: { type: 'string' }
+};
+
+export const BENCH_OPTIONS = {
+  ann: { type: 'boolean' },
+  'no-ann': { type: 'boolean' },
+  json: { type: 'boolean', default: false },
+  'write-report': { type: 'boolean', default: false },
+  build: { type: 'boolean', default: false },
+  'build-index': { type: 'boolean', default: false },
+  'build-sqlite': { type: 'boolean', default: false },
+  incremental: { type: 'boolean', default: false },
+  'stub-embeddings': { type: 'boolean', default: false },
+  'index-profile': { type: 'string' },
+  'no-index-profile': { type: 'boolean', default: false },
+  'real-embeddings': { type: 'boolean', default: false },
+  queries: { type: 'string' },
+  backend: { type: 'string' },
+  out: { type: 'string' },
+  'bm25-k1': { type: 'number' },
+  'bm25-b': { type: 'number' },
+  'fts-profile': { type: 'string' },
+  'fts-weights': { type: 'string' },
+  repo: { type: 'string' },
+  top: { type: 'number', default: 5 },
+  limit: { type: 'number', default: 0 },
+  'heap-mb': { type: 'number' },
+  'query-concurrency': { type: 'number' },
+  threads: { type: 'number' }
+};
+
+export function mergeCliOptions(...sets) {
+  const merged = {};
+  for (const set of sets) {
+    if (!set || typeof set !== 'object') continue;
+    for (const [key, value] of Object.entries(set)) {
+      merged[key] = value;
+    }
+  }
+  return merged;
+}
+
+const INDEX_BUILD_SCHEMA = {
+  type: 'object',
+  properties: {
+    mode: { type: 'string' },
+    stage: { type: 'string' },
+    dims: { type: 'number' },
+    threads: { type: 'number' },
+    incremental: { type: 'boolean' },
+    watch: { type: 'boolean' },
+    sqlite: { type: 'boolean' },
+    model: { type: 'string' },
+    repo: { type: 'string' }
+  }
+};
+
+const BENCH_SCHEMA = {
+  type: 'object',
+  properties: {
+    ann: { type: 'boolean' },
+    'no-ann': { type: 'boolean' },
+    build: { type: 'boolean' },
+    'build-index': { type: 'boolean' },
+    'build-sqlite': { type: 'boolean' },
+    incremental: { type: 'boolean' },
+    'stub-embeddings': { type: 'boolean' },
+    'index-profile': { type: 'string' },
+    'real-embeddings': { type: 'boolean' },
+    backend: { type: 'string' },
+    top: { type: 'number' },
+    limit: { type: 'number' },
+    'bm25-k1': { type: 'number' },
+    'bm25-b': { type: 'number' },
+    'fts-profile': { type: 'string' },
+    'fts-weights': { type: 'string' },
+    'query-concurrency': { type: 'number' },
+    threads: { type: 'number' },
+    'heap-mb': { type: 'number' }
+  }
+};
+
+const throwOnErrors = (label, errors) => {
+  if (!errors.length) return;
+  const message = errors.join('; ');
+  throw new Error(`${label} validation failed: ${message}`);
+};
+
+export function validateBuildArgs(argv) {
+  const result = validateConfig(INDEX_BUILD_SCHEMA, argv);
+  if (!result.ok) throwOnErrors('build-index args', result.errors);
+}
+
+export function validateBenchArgs(argv) {
+  const result = validateConfig(BENCH_SCHEMA, argv);
+  if (!result.ok) throwOnErrors('bench args', result.errors);
+  const conflicts = [];
+  if (argv.ann && argv['no-ann']) {
+    conflicts.push('ann and no-ann cannot both be set');
+  }
+  if (argv['stub-embeddings'] && argv['real-embeddings']) {
+    conflicts.push('stub-embeddings and real-embeddings cannot both be set');
+  }
+  if (argv['index-profile'] && argv['no-index-profile']) {
+    conflicts.push('index-profile and no-index-profile cannot both be set');
+  }
+  throwOnErrors('bench args', conflicts);
+}
diff --git a/src/shared/cli.js b/src/shared/cli.js
new file mode 100644
index 000000000..7e3fe9d61
--- /dev/null
+++ b/src/shared/cli.js
@@ -0,0 +1,47 @@
+import path from 'node:path';
+import yargs from 'yargs/yargs';
+import { hideBin } from 'yargs/helpers';
+
+const DEFAULT_PARSER_CONFIG = {
+  'camel-case-expansion': false,
+  'dot-notation': false
+};
+
+/**
+ * Create a configured yargs instance for CLI tools.
+ * @param {{argv?:string[],scriptName?:string,usage?:string,options?:object,aliases?:object}} input
+ * @returns {import('yargs').Argv}
+ */
+export function createCli(input = {}) {
+  const {
+    argv = process.argv,
+    scriptName,
+    usage,
+    options = {},
+    aliases = {}
+  } = input;
+  const name = scriptName || path.basename(argv[1] || 'cli');
+  const mergedOptions = { ...options };
+  if (!Object.prototype.hasOwnProperty.call(mergedOptions, 'profile')) {
+    mergedOptions.profile = {
+      type: 'string',
+      describe: 'Profile name from profiles/*.json'
+    };
+  }
+  const parser = yargs(hideBin(argv))
+    .scriptName(name)
+    .parserConfiguration(DEFAULT_PARSER_CONFIG)
+    .strict(false)
+    .help()
+    .alias('h', 'help')
+    .wrap(100);
+  if (usage) parser.usage(usage);
+  if (Object.keys(mergedOptions).length) parser.options(mergedOptions);
+  if (Object.keys(aliases).length) parser.alias(aliases);
+  parser.middleware((args) => {
+    if (args.profile) {
+      process.env.PAIROFCLEATS_PROFILE = String(args.profile).trim();
+    }
+  });
+  return parser;
+}
diff --git a/src/shared/concurrency.js b/src/shared/concurrency.js
index c700070bd..248c02158 100644
--- a/src/shared/concurrency.js
+++ b/src/shared/concurrency.js
@@ -1,22 +1,95 @@
+import PQueue from 'p-queue';
+
 /**
- * Run async work over items with a concurrency limit.
+ * Create shared task queues for IO, CPU, and embeddings work.
+ * @param {{ioConcurrency:number,cpuConcurrency:number,embeddingConcurrency?:number,ioPendingLimit?:number,cpuPendingLimit?:number,embeddingPendingLimit?:number}} input
+ * @returns {{io:PQueue,cpu:PQueue,embedding:PQueue}}
+ */
+export function createTaskQueues({
+  ioConcurrency,
+  cpuConcurrency,
+  embeddingConcurrency,
+  ioPendingLimit,
+  cpuPendingLimit,
+  embeddingPendingLimit
+}) {
+  const io = new PQueue({ concurrency: Math.max(1, Math.floor(ioConcurrency || 1)) });
+  const cpu = new PQueue({ concurrency: Math.max(1, Math.floor(cpuConcurrency || 1)) });
+  const embeddingLimit = Number.isFinite(Number(embeddingConcurrency))
+    ? Math.max(1, Math.floor(Number(embeddingConcurrency)))
+    : Math.max(1, Math.floor(cpuConcurrency || 1));
+  const embedding = new PQueue({ concurrency: embeddingLimit });
+  const applyLimit = (queue, limit) => {
+    if (!Number.isFinite(limit) || limit <= 0) return;
+    queue.maxPending = Math.floor(limit);
+  };
+  applyLimit(io, ioPendingLimit);
+  applyLimit(cpu, cpuPendingLimit);
+  applyLimit(embedding, embeddingPendingLimit);
+  return { io, cpu, embedding };
+}
+
+/**
+ * Run async work over items using a shared queue.
+ * @param {PQueue} queue
  * @param {Array<any>} items
- * @param {number} limit
  * @param {(item:any, index:number)=>Promise<any>} worker
- * @returns {Promise<any[]>}
+ * @param {{collectResults?:boolean,onResult?:(result:any, index:number)=>Promise<void>,retries?:number,retryDelayMs?:number}} [options]
+ * @returns {Promise<any[]|null>}
  */
-export async function runWithConcurrency(items, limit, worker) {
-  if (!items.length) return [];
-  const results = new Array(items.length);
-  const workerCount = Math.max(1, Math.min(limit, items.length));
-  let nextIndex = 0;
-  const runners = Array.from({ length: workerCount }, async () => {
+export async function runWithQueue(queue, items, worker, options = {}) {        
+  if (!items.length) return options.collectResults === false ? null : [];       
+  const collectResults = options.collectResults !== false;
+  const onResult = typeof options.onResult === 'function' ? options.onResult : null;
+  const retries = Number.isFinite(Number(options.retries)) ? Math.max(0, Math.floor(Number(options.retries))) : 0;
+  const retryDelayMs = Number.isFinite(Number(options.retryDelayMs)) ? Math.max(0, Math.floor(Number(options.retryDelayMs))) : 0;
+  const results = collectResults ? new Array(items.length) : null;
+  const pending = new Set();
+  const maxPending = Number.isFinite(queue?.maxPending) ? queue.maxPending : null;
+  const enqueue = async (item, index) => {
+    if (maxPending) {
+      while (pending.size >= maxPending) {
+        await Promise.race(pending);
+      }
+    }
+    const task = queue.add(async () => {
+    let attempt = 0;
+    let result;
     while (true) {
-      const idx = nextIndex++;
-      if (idx >= items.length) break;
-      results[idx] = await worker(items[idx], idx);
+      try {
+        result = await worker(item, index);
+        break;
+      } catch (err) {
+        attempt += 1;
+        if (attempt > retries) throw err;
+        if (retryDelayMs > 0) {
+          await new Promise((resolve) => setTimeout(resolve, retryDelayMs));    
+        }
+      }
     }
-  });
-  await Promise.all(runners);
+    if (collectResults) results[index] = result;
+    if (onResult) await onResult(result, index);
+    return result;
+    });
+    pending.add(task);
+    task.finally(() => pending.delete(task));
+  };
+  for (let index = 0; index < items.length; index += 1) {
+    await enqueue(items[index], index);
+  }
+  await Promise.all(pending);
   return results;
 }
+
+/**
+ * Run async work over items with a per-call concurrency limit.
+ * @param {Array<any>} items
+ * @param {number} limit
+ * @param {(item:any, index:number)=>Promise<any>} worker
+ * @param {{collectResults?:boolean,onResult?:(result:any, index:number)=>Promise<void>}} [options]
+ * @returns {Promise<any[]|null>}
+ */
+export async function runWithConcurrency(items, limit, worker, options = {}) {
+  const queue = new PQueue({ concurrency: Math.max(1, Math.floor(limit || 1)) });
+  return runWithQueue(queue, items, worker, options);
+}
diff --git a/src/shared/config.js b/src/shared/config.js
new file mode 100644
index 000000000..a7f93f88c
--- /dev/null
+++ b/src/shared/config.js
@@ -0,0 +1,17 @@
+export function isPlainObject(value) {
+  return value && typeof value === 'object' && !Array.isArray(value);
+}
+
+export function mergeConfig(base, overrides) {
+  if (!isPlainObject(base)) return overrides;
+  if (!isPlainObject(overrides)) return base;
+  const next = { ...base };
+  for (const [key, value] of Object.entries(overrides)) {
+    if (isPlainObject(value) && isPlainObject(next[key])) {
+      next[key] = mergeConfig(next[key], value);
+    } else {
+      next[key] = value;
+    }
+  }
+  return next;
+}
diff --git a/src/shared/dictionary.js b/src/shared/dictionary.js
new file mode 100644
index 000000000..18d22f067
--- /dev/null
+++ b/src/shared/dictionary.js
@@ -0,0 +1,99 @@
+import { TextEncoder } from 'node:util';
+
+const encoder = new TextEncoder();
+
+const compareBytes = (query, bytes, start, end) => {
+  const entryLen = end - start;
+  const minLen = Math.min(query.length, entryLen);
+  for (let i = 0; i < minLen; i += 1) {
+    const diff = query[i] - bytes[start + i];
+    if (diff < 0) return -1;
+    if (diff > 0) return 1;
+  }
+  if (query.length === entryLen) return 0;
+  return query.length < entryLen ? -1 : 1;
+};
+
+const compareEntry = (a, b) => {
+  const minLen = Math.min(a.bytes.length, b.bytes.length);
+  for (let i = 0; i < minLen; i += 1) {
+    const diff = a.bytes[i] - b.bytes[i];
+    if (diff < 0) return -1;
+    if (diff > 0) return 1;
+  }
+  if (a.bytes.length === b.bytes.length) return 0;
+  return a.bytes.length < b.bytes.length ? -1 : 1;
+};
+
+const normalizeWord = (word) => (typeof word === 'string' ? word : '');
+
+export const createSharedDictionary = (words) => {
+  const list = Array.isArray(words) ? words : Array.from(words || []);
+  const entries = [];
+  let maxLen = 0;
+  for (const word of list) {
+    const normalized = normalizeWord(word);
+    if (!normalized) continue;
+    if (normalized.length > maxLen) maxLen = normalized.length;
+    entries.push({ word: normalized, bytes: encoder.encode(normalized) });
+  }
+  if (!entries.length) return null;
+  entries.sort(compareEntry);
+  let totalBytes = 0;
+  for (const entry of entries) totalBytes += entry.bytes.length;
+  if (!Number.isFinite(totalBytes) || totalBytes <= 0) return null;
+  if (totalBytes > 0xffffffff) return null;
+
+  const bytesBuffer = new SharedArrayBuffer(totalBytes);
+  const bytesView = new Uint8Array(bytesBuffer);
+  const offsetsBuffer = new SharedArrayBuffer((entries.length + 1) * Uint32Array.BYTES_PER_ELEMENT);
+  const offsetsView = new Uint32Array(offsetsBuffer);
+
+  let offset = 0;
+  for (let i = 0; i < entries.length; i += 1) {
+    const entry = entries[i];
+    offsetsView[i] = offset;
+    bytesView.set(entry.bytes, offset);
+    offset += entry.bytes.length;
+  }
+  offsetsView[entries.length] = offset;
+
+  return {
+    bytes: bytesBuffer,
+    offsets: offsetsBuffer,
+    count: entries.length,
+    maxLen
+  };
+};
+
+export const createSharedDictionaryView = (payload) => {
+  if (!payload?.bytes || !payload?.offsets) return null;
+  const bytes = new Uint8Array(payload.bytes);
+  const offsets = new Uint32Array(payload.offsets);
+  const countRaw = Number.isFinite(payload.count) ? payload.count : Math.max(0, offsets.length - 1);
+  const count = Math.max(0, Math.min(countRaw, offsets.length - 1));
+  const maxLen = Number.isFinite(payload.maxLen) ? payload.maxLen : 0;
+  const has = (value) => {
+    if (typeof value !== 'string' || !value) return false;
+    if (maxLen && value.length > maxLen) return false;
+    const query = encoder.encode(value);
+    let low = 0;
+    let high = count - 1;
+    while (low <= high) {
+      const mid = (low + high) >> 1;
+      const start = offsets[mid];
+      const end = offsets[mid + 1];
+      const cmp = compareBytes(query, bytes, start, end);
+      if (cmp === 0) return true;
+      if (cmp < 0) high = mid - 1;
+      else low = mid + 1;
+    }
+    return false;
+  };
+  return {
+    size: count,
+    has,
+    __sharedDict: true,
+    __maxTokenLength: maxLen
+  };
+};
diff --git a/src/shared/embedding.js b/src/shared/embedding.js
index 32b5173eb..9c7f313c8 100644
--- a/src/shared/embedding.js
+++ b/src/shared/embedding.js
@@ -7,7 +7,8 @@ import crypto from 'node:crypto';
  * @returns {number[]}
  */
 export function stubEmbedding(text, dims) {
-  const safeDims = Number.isFinite(dims) && dims > 0 ? Math.floor(dims) : 512;
+  // Keep stub embeddings aligned with the default index dimensions.
+  const safeDims = Number.isFinite(dims) && dims > 0 ? Math.floor(dims) : 384;
   const hash = crypto.createHash('sha256').update(text).digest();
   let seed = 0;
   for (const byte of hash) seed = (seed * 31 + byte) >>> 0;
diff --git a/src/shared/encoding.js b/src/shared/encoding.js
new file mode 100644
index 000000000..baf0aca58
--- /dev/null
+++ b/src/shared/encoding.js
@@ -0,0 +1,82 @@
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import chardet from 'chardet';
+import iconv from 'iconv-lite';
+import { sha1 } from './hash.js';
+
+const utf8Decoder = new TextDecoder('utf-8', { fatal: true });
+
+const normalizeEncoding = (value) => {
+  if (!value) return null;
+  return String(value).trim().replace(/_/g, '-').toLowerCase();
+};
+
+const detectEncoding = (buffer) => {
+  if (!buffer || !buffer.length) return { encoding: null, confidence: null };
+  try {
+    const detected = chardet.analyse(buffer) || [];
+    if (Array.isArray(detected) && detected.length) {
+      const best = detected[0];
+      return {
+        encoding: normalizeEncoding(best?.name),
+        confidence: Number.isFinite(best?.confidence) ? best.confidence : null
+      };
+    }
+  } catch {}
+  try {
+    const detected = normalizeEncoding(chardet.detect(buffer));
+    return { encoding: detected, confidence: null };
+  } catch {}
+  return { encoding: null, confidence: null };
+};
+
+export const decodeTextBuffer = (buffer) => {
+  if (!buffer || !buffer.length) {
+    return {
+      text: '',
+      encoding: 'utf8',
+      usedFallback: false,
+      confidence: null
+    };
+  }
+  try {
+    return {
+      text: utf8Decoder.decode(buffer),
+      encoding: 'utf8',
+      usedFallback: false,
+      confidence: null
+    };
+  } catch {}
+  const { encoding: detected, confidence } = detectEncoding(buffer);
+  let encoding = detected || 'latin1';
+  if (!iconv.encodingExists(encoding)) {
+    encoding = 'latin1';
+  }
+  return {
+    text: iconv.decode(buffer, encoding),
+    encoding,
+    usedFallback: true,
+    confidence
+  };
+};
+
+export const readTextFile = async (filePath) => {
+  const buffer = await fsPromises.readFile(filePath);
+  return decodeTextBuffer(buffer);
+};
+
+export const readTextFileWithHash = async (filePath, options = {}) => {
+  const buffer = options.buffer ?? await fsPromises.readFile(filePath);
+  const decoded = decodeTextBuffer(buffer);
+  const hash = sha1(buffer);
+  return {
+    ...decoded,
+    hash,
+    buffer
+  };
+};
+
+export const readTextFileSync = (filePath) => {
+  const buffer = fs.readFileSync(filePath);
+  return decodeTextBuffer(buffer);
+};
diff --git a/src/shared/env.js b/src/shared/env.js
new file mode 100644
index 000000000..3b1471d1b
--- /dev/null
+++ b/src/shared/env.js
@@ -0,0 +1,65 @@
+const TRUE_VALUES = new Set(['1', 'true', 'yes', 'on']);
+const FALSE_VALUES = new Set(['0', 'false', 'no', 'off']);
+
+const normalizeString = (value) => {
+  if (typeof value !== 'string') return '';
+  return value.trim();
+};
+
+const parseBool = (value) => {
+  if (value == null) return null;
+  const normalized = String(value).trim().toLowerCase();
+  if (TRUE_VALUES.has(normalized)) return true;
+  if (FALSE_VALUES.has(normalized)) return false;
+  return null;
+};
+
+const parseNumber = (value) => {
+  if (value == null || value === '') return null;
+  const parsed = Number(value);
+  return Number.isFinite(parsed) ? parsed : null;
+};
+
+export function getEnvConfig(env = process.env) {
+  return {
+    profile: normalizeString(env.PAIROFCLEATS_PROFILE),
+    cacheRoot: normalizeString(env.PAIROFCLEATS_CACHE_ROOT),
+    home: normalizeString(env.PAIROFCLEATS_HOME),
+    dictDir: normalizeString(env.PAIROFCLEATS_DICT_DIR),
+    model: normalizeString(env.PAIROFCLEATS_MODEL),
+    modelsDir: normalizeString(env.PAIROFCLEATS_MODELS_DIR),
+    toolingDir: normalizeString(env.PAIROFCLEATS_TOOLING_DIR),
+    toolingInstallScope: normalizeString(env.PAIROFCLEATS_TOOLING_INSTALL_SCOPE),
+    extensionsDir: normalizeString(env.PAIROFCLEATS_EXTENSIONS_DIR),
+    embeddings: normalizeString(env.PAIROFCLEATS_EMBEDDINGS),
+    debugCrash: parseBool(env.PAIROFCLEATS_DEBUG_CRASH),
+    threads: parseNumber(env.PAIROFCLEATS_THREADS),
+    bundleThreads: parseNumber(env.PAIROFCLEATS_BUNDLE_THREADS),
+    workerPool: normalizeString(env.PAIROFCLEATS_WORKER_POOL),
+    maxOldSpaceMb: parseNumber(env.PAIROFCLEATS_MAX_OLD_SPACE_MB),
+    uvThreadpoolSize: parseNumber(env.PAIROFCLEATS_UV_THREADPOOL_SIZE),
+    nodeOptions: normalizeString(env.PAIROFCLEATS_NODE_OPTIONS),
+    stage: normalizeString(env.PAIROFCLEATS_STAGE),
+    ftsProfile: normalizeString(env.PAIROFCLEATS_FTS_PROFILE),
+    vectorExtension: normalizeString(env.PAIROFCLEATS_VECTOR_EXTENSION),
+    verbose: parseBool(env.PAIROFCLEATS_VERBOSE),
+    progressFiles: parseBool(env.PAIROFCLEATS_PROGRESS_FILES),
+    progressLines: parseBool(env.PAIROFCLEATS_PROGRESS_LINES),
+    fileCacheMax: parseNumber(env.PAIROFCLEATS_FILE_CACHE_MAX),
+    summaryCacheMax: parseNumber(env.PAIROFCLEATS_SUMMARY_CACHE_MAX),
+    logFormat: normalizeString(env.PAIROFCLEATS_LOG_FORMAT),
+    logLevel: normalizeString(env.PAIROFCLEATS_LOG_LEVEL)
+  };
+}
+
+export function parseEnvBool(value) {
+  return parseBool(value);
+}
+
+export function normalizeEnvString(value) {
+  return normalizeString(value);
+}
+
+export function parseEnvNumber(value) {
+  return parseNumber(value);
+}
diff --git a/src/shared/error-codes.js b/src/shared/error-codes.js
new file mode 100644
index 000000000..a16fc79af
--- /dev/null
+++ b/src/shared/error-codes.js
@@ -0,0 +1,24 @@
+export const ERROR_CODES = Object.freeze({
+  INVALID_REQUEST: 'INVALID_REQUEST',
+  NOT_FOUND: 'NOT_FOUND',
+  NO_INDEX: 'NO_INDEX',
+  INTERNAL: 'INTERNAL',
+  QUEUE_OVERLOADED: 'QUEUE_OVERLOADED',
+  TOOL_TIMEOUT: 'TOOL_TIMEOUT',
+  DOWNLOAD_VERIFY_FAILED: 'DOWNLOAD_VERIFY_FAILED',
+  ARCHIVE_UNSAFE: 'ARCHIVE_UNSAFE',
+  ARCHIVE_TOO_LARGE: 'ARCHIVE_TOO_LARGE'
+});
+
+export const isErrorCode = (value) => (
+  typeof value === 'string' && Object.values(ERROR_CODES).includes(value)
+);
+
+export const createError = (code, message, details = null) => {
+  const err = new Error(message || 'Error');
+  err.code = code;
+  if (details && typeof details === 'object') {
+    Object.assign(err, details);
+  }
+  return err;
+};
diff --git a/src/shared/file-stats.js b/src/shared/file-stats.js
new file mode 100644
index 000000000..7240659e5
--- /dev/null
+++ b/src/shared/file-stats.js
@@ -0,0 +1,36 @@
+import fs from 'node:fs';
+import { runWithConcurrency } from './concurrency.js';
+import { toPosix } from './files.js';
+
+export async function countFileLines(filePath) {
+  return new Promise((resolve) => {
+    let count = 0;
+    let sawData = false;
+    const stream = fs.createReadStream(filePath);
+    stream.on('data', (chunk) => {
+      sawData = sawData || chunk.length > 0;
+      for (let i = 0; i < chunk.length; i += 1) {
+        if (chunk[i] === 10) count += 1;
+      }
+    });
+    stream.on('error', () => resolve(0));
+    stream.on('end', () => resolve(sawData ? count + 1 : 0));
+  });
+}
+
+export async function countLinesForEntries(entries, { concurrency = 8 } = {}) {
+  const lineCounts = new Map();
+  if (!Array.isArray(entries) || entries.length === 0) return lineCounts;
+  await runWithConcurrency(
+    entries,
+    concurrency,
+    async (entry) => {
+      const rel = toPosix(entry.rel || entry.abs || '');
+      if (!rel) return;
+      const lines = await countFileLines(entry.abs);
+      lineCounts.set(rel, lines);
+    },
+    { collectResults: false }
+  );
+  return lineCounts;
+}
diff --git a/src/shared/hash.js b/src/shared/hash.js
index aa67f74eb..b2547a5b4 100644
--- a/src/shared/hash.js
+++ b/src/shared/hash.js
@@ -1,4 +1,27 @@
 import crypto from 'node:crypto';
+import fs from 'node:fs';
+import xxhash from 'xxhash-wasm';
+
+const XXHASH_HEX_WIDTH = 16;
+let xxhashState = null;
+
+const loadXxhash = async () => {
+  if (!xxhashState) {
+    xxhashState = xxhash();
+  }
+  return xxhashState;
+};
+
+const formatXxhashHex = (value) => {
+  if (typeof value === 'bigint') {
+    return value.toString(16).padStart(XXHASH_HEX_WIDTH, '0');
+  }
+  if (typeof value === 'number') {
+    return Math.floor(value).toString(16).padStart(XXHASH_HEX_WIDTH, '0');
+  }
+  if (typeof value === 'string') return value;
+  return '';
+};
 
 /**
  * Compute a SHA1 hash hex string.
@@ -8,3 +31,34 @@ import crypto from 'node:crypto';
 export function sha1(str) {
   return crypto.createHash('sha1').update(str).digest('hex');
 }
+
+/**
+ * Compute a SHA1 hash for a file on disk.
+ * @param {string} filePath
+ * @returns {Promise<string>}
+ */
+export function sha1File(filePath) {
+  return new Promise((resolve, reject) => {
+    const hash = crypto.createHash('sha1');
+    const stream = fs.createReadStream(filePath);
+    stream.on('error', reject);
+    stream.on('data', (chunk) => hash.update(chunk));
+    stream.on('end', () => resolve(hash.digest('hex')));
+  });
+}
+
+export async function checksumString(input) {
+  const { h64ToString } = await loadXxhash();
+  return { algo: 'xxh64', value: h64ToString(input) };
+}
+
+export async function checksumFile(filePath) {
+  const { create64 } = await loadXxhash();
+  return new Promise((resolve, reject) => {
+    const hasher = create64();
+    const stream = fs.createReadStream(filePath);
+    stream.on('error', reject);
+    stream.on('data', (chunk) => hasher.update(chunk));
+    stream.on('end', () => resolve({ algo: 'xxh64', value: formatXxhashHex(hasher.digest()) }));
+  });
+}
diff --git a/src/shared/hash/xxhash-backend.js b/src/shared/hash/xxhash-backend.js
new file mode 100644
index 000000000..39f4e44ca
--- /dev/null
+++ b/src/shared/hash/xxhash-backend.js
@@ -0,0 +1,106 @@
+import fs from 'node:fs';
+import xxhashWasm from 'xxhash-wasm';
+import { tryRequire } from '../optional-deps.js';
+
+const XXHASH_HEX_WIDTH = 16;
+let wasmStatePromise = null;
+let wasmBackendPromise = null;
+
+const loadWasmState = async () => {
+  if (!wasmStatePromise) {
+    wasmStatePromise = xxhashWasm();
+  }
+  return wasmStatePromise;
+};
+
+export const formatXxhashHex = (value) => {
+  if (typeof value === 'bigint') {
+    return value.toString(16).padStart(XXHASH_HEX_WIDTH, '0');
+  }
+  if (typeof value === 'number') {
+    return Math.floor(value).toString(16).padStart(XXHASH_HEX_WIDTH, '0');
+  }
+  if (typeof value === 'string') {
+    const trimmed = value.startsWith('0x') ? value.slice(2) : value;
+    return trimmed.padStart(XXHASH_HEX_WIDTH, '0');
+  }
+  return '';
+};
+
+const createWasmBackend = async () => {
+  if (wasmBackendPromise) return wasmBackendPromise;
+  wasmBackendPromise = (async () => {
+    const { h64ToString, create64 } = await loadWasmState();
+    return {
+      name: 'wasm',
+      hash64: async (input) => formatXxhashHex(h64ToString(input)),
+      hash64Stream: async (stream) => new Promise((resolve, reject) => {
+        const hasher = create64();
+        stream.on('error', reject);
+        stream.on('data', (chunk) => hasher.update(chunk));
+        stream.on('end', () => resolve(formatXxhashHex(hasher.digest())));
+      })
+    };
+  })();
+  return wasmBackendPromise;
+};
+
+const resolveNativeFns = (mod) => {
+  const hash64 = mod?.xxh64 || mod?.xxhash64 || mod?.hash64 || mod?.xxh64Raw;
+  const create64 = mod?.createXXHash64 || mod?.createXxh64 || mod?.createHash64 || mod?.create64;
+  return { hash64, create64 };
+};
+
+const createNativeBackend = async (options = {}) => {
+  const result = tryRequire('@node-rs/xxhash', options);
+  if (!result.ok || !result.mod) return null;
+  const { hash64, create64 } = resolveNativeFns(result.mod);
+  if (typeof hash64 !== 'function') return null;
+  const base = {
+    name: 'native',
+    hash64: async (input) => formatXxhashHex(hash64(input))
+  };
+  if (typeof create64 === 'function') {
+    return {
+      ...base,
+      hash64Stream: async (stream) => new Promise((resolve, reject) => {
+        const hasher = create64();
+        stream.on('error', reject);
+        stream.on('data', (chunk) => hasher.update(chunk));
+        stream.on('end', () => resolve(formatXxhashHex(hasher.digest())));
+      })
+    };
+  }
+  const wasmBackend = await createWasmBackend();
+  return {
+    ...base,
+    hash64Stream: wasmBackend.hash64Stream
+  };
+};
+
+const maybeLogFallback = (message, options = {}) => {
+  if (!options?.verbose && options?.verbose !== true) return;
+  const logger = typeof options.logger === 'function' ? options.logger : console.warn;
+  logger(`[hash] ${message}`);
+};
+
+export const resolveXxhashBackend = async ({ backend = 'auto', logger, verbose } = {}) => {
+  const normalized = typeof backend === 'string' ? backend.trim().toLowerCase() : 'auto';
+  const options = { logger, verbose };
+  if (normalized === 'native') {
+    const nativeBackend = await createNativeBackend(options);
+    if (nativeBackend) return nativeBackend;
+    maybeLogFallback('Native xxhash unavailable; falling back to wasm.', options);
+    return createWasmBackend();
+  }
+  if (normalized === 'wasm') {
+    return createWasmBackend();
+  }
+  const nativeBackend = await createNativeBackend(options);
+  if (nativeBackend) return nativeBackend;
+  return createWasmBackend();
+};
+
+export const hash64Stream = (stream, backend) => backend.hash64Stream(stream);
+
+export const hashFileStream = (filePath) => fs.createReadStream(filePath);
diff --git a/src/shared/hnsw.js b/src/shared/hnsw.js
new file mode 100644
index 000000000..dbe01e5b1
--- /dev/null
+++ b/src/shared/hnsw.js
@@ -0,0 +1,220 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { createRequire } from 'node:module';
+
+const require = createRequire(import.meta.url);
+
+const parseNodeMajor = () => {
+  const raw = process.versions?.node || '';
+  const major = Number(String(raw).split('.')[0]);
+  return Number.isFinite(major) ? major : null;
+};
+
+const supportsHnswRuntime = () => {
+  const major = parseNodeMajor();
+  if (!Number.isFinite(major)) return true;
+  return major < 24;
+};
+
+let warnedRuntimeUnsupported = false;
+let warnedLoadFailure = false;
+let warnedFallbackUsed = false;
+const warnRuntimeUnsupported = () => {
+  if (warnedRuntimeUnsupported) return;
+  warnedRuntimeUnsupported = true;
+  console.warn(`[ann] HNSW disabled on Node ${process.versions.node}; use Node 20/22 or disable embeddings.hnsw.`);
+};
+
+const warnLoadFailure = (message) => {
+  if (warnedLoadFailure) return;
+  warnedLoadFailure = true;
+  console.warn(`[ann] HNSW index load failed; falling back to JS ANN. ${message || ''}`.trim());
+};
+
+const warnFallbackUsed = (message) => {
+  if (warnedFallbackUsed) return;
+  warnedFallbackUsed = true;
+  console.warn(`[ann] HNSW primary index unreadable; using backup. ${message || ''}`.trim());
+};
+
+const resolveHnswLib = () => {
+  if (!supportsHnswRuntime()) {
+    warnRuntimeUnsupported();
+    return null;
+  }
+  try {
+    return require('hnswlib-node');
+  } catch {
+    return null;
+  }
+};
+
+const getBakPath = (filePath) => `${filePath}.bak`;
+
+const resolveIndexPath = (indexPath) => {
+  if (!indexPath) return null;
+  if (fs.existsSync(indexPath)) {
+    return { path: indexPath, cleanup: true };
+  }
+  const bakPath = getBakPath(indexPath);
+  if (fs.existsSync(bakPath)) {
+    return { path: bakPath, cleanup: false };
+  }
+  return null;
+};
+
+const cleanupBak = (indexPath) => {
+  const bakPath = getBakPath(indexPath);
+  if (!fs.existsSync(bakPath)) return;
+  try {
+    fs.rmSync(bakPath, { force: true });
+  } catch {}
+};
+
+const SPACES = new Set(['cosine', 'l2', 'ip']);
+
+const normalizeInt = (value, fallback) => {
+  const parsed = Number(value);
+  if (!Number.isFinite(parsed) || parsed <= 0) return fallback;
+  return Math.floor(parsed);
+};
+
+const normalizeSpace = (value) => {
+  if (typeof value !== 'string') return 'cosine';
+  const trimmed = value.trim().toLowerCase();
+  return SPACES.has(trimmed) ? trimmed : 'cosine';
+};
+
+export function normalizeHnswConfig(raw = {}) {
+  if (raw === false) return { enabled: false };
+  const config = raw && typeof raw === 'object' ? raw : {};
+  return {
+    enabled: config.enabled !== false,
+    space: normalizeSpace(config.space),
+    m: normalizeInt(config.m, 16),
+    efConstruction: normalizeInt(config.efConstruction, 200),
+    efSearch: normalizeInt(config.efSearch, 64),
+    randomSeed: normalizeInt(config.randomSeed, 100),
+    allowReplaceDeleted: config.allowReplaceDeleted === true
+  };
+}
+
+export function resolveHnswPaths(indexDir) {
+  return {
+    indexPath: path.join(indexDir, 'dense_vectors_hnsw.bin'),
+    metaPath: path.join(indexDir, 'dense_vectors_hnsw.meta.json')
+  };
+}
+
+export function validateHnswMetaCompatibility({ denseVectors, hnswMeta } = {}) {
+  const warnings = [];
+  if (!denseVectors || !hnswMeta) {
+    return { ok: true, warnings };
+  }
+  const vecDims = Number(denseVectors.dims);
+  const metaDims = Number(hnswMeta.dims);
+  if (Number.isFinite(vecDims) && Number.isFinite(metaDims) && vecDims !== metaDims) {
+    warnings.push(`dims mismatch (vectors=${vecDims}, meta=${metaDims})`);
+  }
+  const vecModel = typeof denseVectors.model === 'string' ? denseVectors.model : null;
+  const metaModel = typeof hnswMeta.model === 'string' ? hnswMeta.model : null;
+  if (vecModel && metaModel && vecModel !== metaModel) {
+    warnings.push(`model mismatch (vectors=${vecModel}, meta=${metaModel})`);
+  }
+  const vecCount = Array.isArray(denseVectors.vectors) ? denseVectors.vectors.length : null;
+  const metaCount = Number(hnswMeta.count);
+  if (Number.isFinite(metaCount) && metaCount >= 0 && Number.isFinite(vecCount) && vecCount !== metaCount) {
+    warnings.push(`count mismatch (vectors=${vecCount}, meta=${metaCount})`);
+  }
+  const metaSpace = typeof hnswMeta.space === 'string' ? hnswMeta.space.trim().toLowerCase() : null;
+  if (metaSpace && !SPACES.has(metaSpace)) {
+    warnings.push(`space invalid (meta=${metaSpace})`);
+  }
+  return { ok: warnings.length === 0, warnings };
+}
+
+export function loadHnswIndex({ indexPath, dims, config, lib } = {}) {
+  const resolved = resolveIndexPath(indexPath);
+  if (!resolved) return null;
+  if (!Number.isFinite(dims) || dims <= 0) return null;
+  const normalized = normalizeHnswConfig(config);
+  if (!normalized.enabled) return null;
+  const resolvedLib = lib || resolveHnswLib();
+  const HNSW = resolvedLib?.HierarchicalNSW || resolvedLib?.default?.HierarchicalNSW || resolvedLib?.default;
+  if (!HNSW) return null;
+  const buildIndex = () => new HNSW(normalized.space, dims);
+  const applyEfSearch = (index) => {
+    if (!normalized.efSearch) return;
+    try {
+      index.setEf(normalized.efSearch);
+    } catch {}
+  };
+  const tryLoad = (candidatePath) => {
+    const index = buildIndex();
+    index.readIndexSync(candidatePath, normalized.allowReplaceDeleted);
+    applyEfSearch(index);
+    return index;
+  };
+
+  try {
+    const index = tryLoad(resolved.path);
+    if (resolved.cleanup) cleanupBak(indexPath);
+    return index;
+  } catch (err) {
+    // If the primary file exists but is unreadable/corrupt, fall back to the
+    // backup if available. This avoids hard failures when a prior atomic
+    // replace left a valid .bak behind.
+    const primaryPath = indexPath;
+    const bakPath = getBakPath(indexPath);
+    const altPath = resolved.path === primaryPath ? bakPath : primaryPath;
+    if (altPath && altPath !== resolved.path && fs.existsSync(altPath)) {
+      try {
+        const index = tryLoad(altPath);
+        warnFallbackUsed(path.basename(altPath));
+        return index;
+      } catch (altErr) {
+        warnLoadFailure(altErr?.message ? `(${altErr.message})` : '');
+        return null;
+      }
+    }
+    warnLoadFailure(err?.message ? `(${err.message})` : '');
+    return null;
+  }
+}
+
+export function rankHnswIndex({ index, space }, queryEmbedding, topN, candidateSet) {
+  const embedding = Array.isArray(queryEmbedding)
+    ? queryEmbedding
+    : (ArrayBuffer.isView(queryEmbedding) ? Array.from(queryEmbedding) : null);
+  if (!index || !embedding || !embedding.length) return [];
+  // If a candidate set is provided but empty, the correct answer is an empty
+  // hit list (consistent with other rankers) rather than an unfiltered search.
+  if (candidateSet && typeof candidateSet.size === 'number' && candidateSet.size === 0) return [];
+  const requested = Math.max(1, Number(topN) || 1);
+  const maxElements = typeof index.getCurrentCount === 'function'
+    ? index.getCurrentCount()
+    : (typeof index.getMaxElements === 'function'
+      ? index.getMaxElements()
+      : index.maxElements);
+  const cap = Number.isFinite(maxElements) && maxElements > 0
+    ? Math.min(requested, Math.floor(maxElements))
+    : requested;
+  const limit = candidateSet && typeof candidateSet.size === 'number'
+    ? Math.max(1, Math.min(cap, candidateSet.size))
+    : cap;
+  const filter = candidateSet && typeof candidateSet.size === 'number'
+    ? (label) => candidateSet.has(label)
+    : undefined;
+  const result = index.searchKnn(embedding, limit, filter);
+  const distances = result?.distances || [];
+  const neighbors = result?.neighbors || [];
+  const hits = [];
+  for (let i = 0; i < neighbors.length; i += 1) {
+    const idx = neighbors[i];
+    if (idx == null) continue;
+    const distance = distances[i];
+    const sim = space === 'l2' ? -distance : 1 - distance;
+    hits.push({ idx, sim });
+  }
+  return hits.sort((a, b) => (b.sim - a.sim) || (a.idx - b.idx));
+}
diff --git a/src/shared/json-stream.js b/src/shared/json-stream.js
new file mode 100644
index 000000000..78ea0cca7
--- /dev/null
+++ b/src/shared/json-stream.js
@@ -0,0 +1,274 @@
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import { once } from 'node:events';
+import { Transform } from 'node:stream';
+import { Gzip } from 'fflate';
+
+const writeChunk = async (stream, chunk) => {
+  if (!stream.write(chunk)) {
+    await once(stream, 'drain');
+  }
+};
+
+const waitForFinish = (stream) => new Promise((resolve, reject) => {
+  stream.on('error', reject);
+  stream.on('finish', resolve);
+});
+
+const createTempPath = (filePath) => (
+  `${filePath}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`
+);
+
+const createFflateGzipStream = (options = {}) => {
+  const level = Number.isFinite(Number(options.level)) ? Math.floor(Number(options.level)) : 6;
+  const gzip = new Gzip({ level });
+  const stream = new Transform({
+    transform(chunk, encoding, callback) {
+      try {
+        const buffer = typeof chunk === 'string' ? Buffer.from(chunk, encoding) : Buffer.from(chunk);
+        gzip.push(buffer, false);
+        callback();
+      } catch (err) {
+        callback(err);
+      }
+    },
+    flush(callback) {
+      try {
+        gzip.push(new Uint8Array(0), true);
+        callback();
+      } catch (err) {
+        callback(err);
+      }
+    }
+  });
+  gzip.ondata = (chunk) => {
+    if (chunk && chunk.length) {
+      stream.push(Buffer.from(chunk));
+    }
+  };
+  return stream;
+};
+
+const getBakPath = (filePath) => `${filePath}.bak`;
+
+const replaceFile = async (tempPath, finalPath) => {
+  const bakPath = getBakPath(finalPath);
+  const finalExists = fs.existsSync(finalPath);
+  let backupAvailable = fs.existsSync(bakPath);
+  const copyFallback = async () => {
+    try {
+      await fsPromises.copyFile(tempPath, finalPath);
+      await fsPromises.rm(tempPath, { force: true });
+      return true;
+    } catch {
+      return false;
+    }
+  };
+  if (finalExists && !backupAvailable) {
+    try {
+      await fsPromises.rename(finalPath, bakPath);
+      backupAvailable = true;
+    } catch (err) {
+      if (err?.code !== 'ENOENT') {
+        backupAvailable = fs.existsSync(bakPath);
+      }
+    }
+  }
+  try {
+    await fsPromises.rename(tempPath, finalPath);
+  } catch (err) {
+    if (err?.code !== 'EEXIST'
+      && err?.code !== 'EPERM'
+      && err?.code !== 'ENOTEMPTY'
+      && err?.code !== 'EACCES'
+      && err?.code !== 'EXDEV') {
+      throw err;
+    }
+    if (!backupAvailable) {
+      if (await copyFallback()) return;
+      throw err;
+    }
+    try {
+      await fsPromises.rm(finalPath, { force: true });
+    } catch {}
+    try {
+      await fsPromises.rename(tempPath, finalPath);
+    } catch (renameErr) {
+      if (await copyFallback()) return;
+      throw renameErr;
+    }
+  }
+};
+
+const createJsonWriteStream = (filePath, options = {}) => {
+  const { compression = null, atomic = false } = options;
+  const targetPath = atomic ? createTempPath(filePath) : filePath;
+  const fileStream = fs.createWriteStream(targetPath);
+  if (compression === 'gzip') {
+    const gzip = createFflateGzipStream();
+    gzip.pipe(fileStream);
+    return {
+      stream: gzip,
+      done: Promise.all([waitForFinish(gzip), waitForFinish(fileStream)])
+        .then(async () => {
+          if (atomic) {
+            await replaceFile(targetPath, filePath);
+          }
+        })
+        .catch(async (err) => {
+          if (atomic) {
+            try { await fsPromises.rm(targetPath, { force: true }); } catch {}
+          }
+          throw err;
+        })
+    };
+  }
+  return {
+    stream: fileStream,
+    done: waitForFinish(fileStream)
+      .then(async () => {
+        if (atomic) {
+          await replaceFile(targetPath, filePath);
+        }
+      })
+      .catch(async (err) => {
+        if (atomic) {
+          try { await fsPromises.rm(targetPath, { force: true }); } catch {}
+        }
+        throw err;
+      })
+  };
+};
+
+const normalizeJsonValue = (value) => {
+  if (value && typeof value === 'object' && typeof value.toJSON === 'function') {
+    try {
+      return value.toJSON();
+    } catch {
+      return value;
+    }
+  }
+  return value;
+};
+
+const writeJsonValue = async (stream, value) => {
+  const normalized = normalizeJsonValue(value);
+  if (normalized === null || typeof normalized !== 'object') {
+    if (normalized === undefined || typeof normalized === 'function' || typeof normalized === 'symbol') {
+      await writeChunk(stream, 'null');
+      return;
+    }
+    await writeChunk(stream, JSON.stringify(normalized));
+    return;
+  }
+  if (Array.isArray(normalized)) {
+    await writeChunk(stream, '[');
+    let first = true;
+    for (const item of normalized) {
+      if (!first) await writeChunk(stream, ',');
+      const itemValue = normalizeJsonValue(item);
+      if (itemValue === undefined || typeof itemValue === 'function' || typeof itemValue === 'symbol') {
+        await writeChunk(stream, 'null');
+      } else {
+        await writeJsonValue(stream, itemValue);
+      }
+      first = false;
+    }
+    await writeChunk(stream, ']');
+    return;
+  }
+  await writeChunk(stream, '{');
+  let first = true;
+  for (const [key, entry] of Object.entries(normalized)) {
+    const entryValue = normalizeJsonValue(entry);
+    if (entryValue === undefined || typeof entryValue === 'function' || typeof entryValue === 'symbol') {
+      continue;
+    }
+    if (!first) await writeChunk(stream, ',');
+    await writeChunk(stream, `${JSON.stringify(key)}:`);
+    await writeJsonValue(stream, entryValue);
+    first = false;
+  }
+  await writeChunk(stream, '}');
+};
+
+const writeArrayItems = async (stream, items) => {
+  let first = true;
+  for (const item of items) {
+    if (!first) await writeChunk(stream, ',');
+    await writeJsonValue(stream, item);
+    first = false;
+  }
+};
+
+/**
+ * Stream JSON lines to disk (one JSON object per line).
+ * @param {string} filePath
+ * @param {Iterable<any>} items
+ * @param {{trailingNewline?:boolean,compression?:string|null}} [options]
+ * @returns {Promise<void>}
+ */
+export async function writeJsonLinesFile(filePath, items, options = {}) {
+  const { compression = null, atomic = false } = options;
+  const { stream, done } = createJsonWriteStream(filePath, { compression, atomic });
+  for (const item of items) {
+    await writeJsonValue(stream, item);
+    await writeChunk(stream, '\n');
+  }
+  stream.end();
+  await done;
+}
+
+/**
+ * Stream a JSON array to disk without holding the full string in memory.       
+ * @param {string} filePath
+ * @param {Iterable<any>} items
+ * @param {{trailingNewline?:boolean}} [options]
+ * @returns {Promise<void>}
+ */
+export async function writeJsonArrayFile(filePath, items, options = {}) {
+  const { trailingNewline = true, compression = null, atomic = false } = options;
+  const { stream, done } = createJsonWriteStream(filePath, { compression, atomic });
+  await writeChunk(stream, '[');
+  await writeArrayItems(stream, items);
+  await writeChunk(stream, ']');
+  if (trailingNewline) await writeChunk(stream, '\n');
+  stream.end();
+  await done;
+}
+
+/**
+ * Stream a JSON object with one or more array fields to disk.
+ * @param {string} filePath
+ * @param {{fields?:object,arrays?:object,trailingNewline?:boolean}} input
+ * @returns {Promise<void>}
+ */
+export async function writeJsonObjectFile(filePath, input = {}) {
+  const {
+    fields = {},
+    arrays = {},
+    trailingNewline = true,
+    compression = null,
+    atomic = false
+  } = input;
+  const { stream, done } = createJsonWriteStream(filePath, { compression, atomic });
+  await writeChunk(stream, '{');
+  let first = true;
+  for (const [key, value] of Object.entries(fields)) {
+    if (!first) await writeChunk(stream, ',');
+    await writeChunk(stream, `${JSON.stringify(key)}:`);
+    await writeJsonValue(stream, value);
+    first = false;
+  }
+  for (const [key, items] of Object.entries(arrays)) {
+    const header = `${JSON.stringify(key)}:[`;
+    await writeChunk(stream, `${first ? '' : ','}${header}`);
+    first = false;
+    await writeArrayItems(stream, items);
+    await writeChunk(stream, ']');
+  }
+  await writeChunk(stream, '}');
+  if (trailingNewline) await writeChunk(stream, '\n');
+  stream.end();
+  await done;
+}
diff --git a/src/shared/jsonc.js b/src/shared/jsonc.js
new file mode 100644
index 000000000..04b492c45
--- /dev/null
+++ b/src/shared/jsonc.js
@@ -0,0 +1,26 @@
+import fs from 'node:fs';
+import { parse as parseJsonc, printParseErrorCode } from 'jsonc-parser';
+
+const describeSource = (source) => (source ? ` ${source}` : '');
+
+export function parseJsoncText(rawText, source = '') {
+  const text = typeof rawText === 'string' ? rawText : String(rawText ?? '');
+  if (!text.trim()) {
+    throw new Error(`Failed to parse${describeSource(source)}: empty file.`);
+  }
+  const errors = [];
+  const parsed = parseJsonc(text, errors, { allowTrailingComma: false });
+  if (errors.length) {
+    const first = errors[0];
+    const code = typeof printParseErrorCode === 'function'
+      ? printParseErrorCode(first.error)
+      : String(first.error);
+    throw new Error(`Failed to parse${describeSource(source)}: ${code}`);
+  }
+  return parsed;
+}
+
+export function readJsoncFile(filePath) {
+  const raw = fs.readFileSync(filePath, 'utf8');
+  return parseJsoncText(raw, filePath);
+}
diff --git a/src/shared/jsonrpc.js b/src/shared/jsonrpc.js
new file mode 100644
index 000000000..3786b1889
--- /dev/null
+++ b/src/shared/jsonrpc.js
@@ -0,0 +1,102 @@
+import { PassThrough } from 'node:stream';
+import { StreamMessageReader, StreamMessageWriter } from 'vscode-jsonrpc';
+
+const writerCache = new WeakMap();
+
+const getWriterState = (outputStream) => {
+  let state = writerCache.get(outputStream);
+  if (state) return state;
+  const writer = new StreamMessageWriter(outputStream);
+  state = { writer, closed: false, queue: Promise.resolve() };
+  const markClosed = () => {
+    state.closed = true;
+  };
+  if (typeof outputStream.once === 'function') {
+    outputStream.once('close', markClosed);
+    outputStream.once('finish', markClosed);
+    outputStream.once('error', markClosed);
+  }
+  writerCache.set(outputStream, state);
+  return state;
+};
+
+/**
+ * Get a JSON-RPC writer bound to a specific stream with serialized writes.
+ * @param {import('node:stream').Writable} outputStream
+ * @returns {{write:(payload:object)=>Promise<void>,close:()=>void}}
+ */
+export function getJsonRpcWriter(outputStream) {
+  if (!outputStream || typeof outputStream.write !== 'function') {
+    throw new Error('getJsonRpcWriter requires a writable stream.');
+  }
+  const state = getWriterState(outputStream);
+  const write = (payload) => {
+    const run = async () => {
+      if (state.closed || outputStream.destroyed || outputStream.writableEnded) {
+        throw new Error('JSON-RPC stream closed.');
+      }
+      return state.writer.write(payload);
+    };
+    state.queue = state.queue.then(run, run);
+    return state.queue.catch((err) => {
+      if (err?.code === 'ERR_STREAM_DESTROYED') {
+        state.closed = true;
+      }
+      throw err;
+    });
+  };
+  const close = () => {
+    state.closed = true;
+    state.writer.dispose?.();
+    writerCache.delete(outputStream);
+  };
+  return { write, close };
+}
+
+/**
+ * Close and dispose a cached JSON-RPC writer for a stream.
+ * @param {import('node:stream').Writable} outputStream
+ */
+export function closeJsonRpcWriter(outputStream) {
+  const state = writerCache.get(outputStream);
+  if (!state) return;
+  state.closed = true;
+  state.writer.dispose?.();
+  writerCache.delete(outputStream);
+}
+
+/**
+ * Write a JSON-RPC message with Content-Length framing.
+ * @param {import('node:stream').Writable} outputStream
+ * @param {object} payload
+ * @returns {Promise<void>|void}
+ */
+export function writeFramedJsonRpc(outputStream, payload) {
+  return getJsonRpcWriter(outputStream).write(payload);
+}
+
+/**
+ * Create a framed JSON-RPC parser for Content-Length-delimited payloads.
+ * @param {{onMessage?:(msg:object)=>void,onError?:(err:Error)=>void,maxBufferBytes?:number}} input
+ * @returns {{push:(chunk:Buffer|string)=>void,dispose:()=>void}}
+ */
+export function createFramedJsonRpcParser({ onMessage, onError } = {}) {
+  const stream = new PassThrough();
+  const reader = new StreamMessageReader(stream);
+  const handleMessage = typeof onMessage === 'function' ? onMessage : () => {};
+  const handleError = typeof onError === 'function' ? onError : () => {};
+
+  reader.onError(handleError);
+  reader.listen(handleMessage);
+
+  return {
+    push(chunk) {
+      if (!chunk || chunk.length === 0) return;
+      stream.write(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
+    },
+    dispose() {
+      reader.dispose();
+      stream.end();
+    }
+  };
+}
diff --git a/src/shared/lancedb.js b/src/shared/lancedb.js
new file mode 100644
index 000000000..63974e967
--- /dev/null
+++ b/src/shared/lancedb.js
@@ -0,0 +1,65 @@
+import path from 'node:path';
+
+const METRICS = new Set(['cosine', 'l2', 'dot']);
+
+const normalizeText = (value, fallback) => {
+  if (typeof value !== 'string') return fallback;
+  const trimmed = value.trim();
+  return trimmed ? trimmed : fallback;
+};
+
+const normalizeInt = (value, fallback) => {
+  const parsed = Number(value);
+  if (!Number.isFinite(parsed) || parsed <= 0) return fallback;
+  return Math.floor(parsed);
+};
+
+const normalizeMetric = (value) => {
+  if (typeof value !== 'string') return 'cosine';
+  const trimmed = value.trim().toLowerCase();
+  if (trimmed === 'ip') return 'dot';
+  return METRICS.has(trimmed) ? trimmed : 'cosine';
+};
+
+export function normalizeLanceDbConfig(raw = {}) {
+  if (raw === false) return { enabled: false };
+  const config = raw && typeof raw === 'object' ? raw : {};
+  return {
+    enabled: config.enabled !== false,
+    table: normalizeText(config.table, 'vectors'),
+    embeddingColumn: normalizeText(config.embeddingColumn, 'vector'),
+    idColumn: normalizeText(config.idColumn, 'id'),
+    metric: normalizeMetric(config.metric),
+    batchSize: normalizeInt(config.batchSize, 1024)
+  };
+}
+
+export function resolveLanceDbPaths(indexDir) {
+  return {
+    merged: {
+      dir: path.join(indexDir, 'dense_vectors.lancedb'),
+      metaPath: path.join(indexDir, 'dense_vectors.lancedb.meta.json')
+    },
+    doc: {
+      dir: path.join(indexDir, 'dense_vectors_doc.lancedb'),
+      metaPath: path.join(indexDir, 'dense_vectors_doc.lancedb.meta.json')
+    },
+    code: {
+      dir: path.join(indexDir, 'dense_vectors_code.lancedb'),
+      metaPath: path.join(indexDir, 'dense_vectors_code.lancedb.meta.json')
+    }
+  };
+}
+
+export function resolveLanceDbTarget(mode, denseVectorMode) {
+  const resolved = typeof denseVectorMode === 'string'
+    ? denseVectorMode.trim().toLowerCase()
+    : '';
+  if (resolved === 'code') return 'code';
+  if (resolved === 'doc') return 'doc';
+  if (resolved === 'auto') {
+    if (mode === 'code') return 'code';
+    if (mode === 'prose' || mode === 'extracted-prose') return 'doc';
+  }
+  return 'merged';
+}
diff --git a/src/shared/metrics.js b/src/shared/metrics.js
new file mode 100644
index 000000000..235752663
--- /dev/null
+++ b/src/shared/metrics.js
@@ -0,0 +1,271 @@
+import { Counter, Gauge, Histogram, Registry } from 'prom-client';
+
+const registry = new Registry();
+let initialized = false;
+let metrics = null;
+
+const normalizeString = (value) => (
+  typeof value === 'string' ? value.trim().toLowerCase() : ''
+);
+
+const normalizeLabel = (value, allowed, fallback = 'unknown') => {
+  const normalized = normalizeString(value);
+  if (!normalized) return fallback;
+  return allowed.has(normalized) ? normalized : fallback;
+};
+
+const STAGES = new Set(['stage1', 'stage2', 'stage3', 'stage4', 'unknown']);
+const MODES = new Set(['code', 'prose', 'all', 'records', 'extracted-prose', 'unknown']);
+const BACKENDS = new Set(['memory', 'sqlite', 'sqlite-fts', 'unknown']);
+const STATUSES = new Set(['ok', 'error', 'unknown']);
+const ANN = new Set(['on', 'off', 'unknown']);
+const POOLS = new Set(['tokenize', 'quantize', 'watch', 'unknown']);
+const TASKS = new Set(['tokenize', 'quantize', 'unknown']);
+const WATCH_EVENTS = new Set(['add', 'change', 'unlink', 'error', 'unknown']);
+const DEBOUNCE = new Set(['scheduled', 'fired', 'canceled', 'unknown']);
+const CACHES = new Set(['query', 'embedding', 'output', 'unknown']);
+const CACHE_RESULTS = new Set(['hit', 'miss', 'unknown']);
+const SURFACES = new Set(['cli', 'api', 'mcp', 'search', 'index', 'unknown']);
+const FALLBACKS = new Set(['backend', 'vector-candidates', 'unknown']);
+const TIMEOUTS = new Set(['tool', 'search', 'index', 'unknown']);
+
+const normalizeStage = (value) => normalizeLabel(value, STAGES);
+const normalizeMode = (value) => normalizeLabel(value, MODES);
+const normalizeBackend = (value) => normalizeLabel(value, BACKENDS);
+const normalizeStatus = (value) => normalizeLabel(value, STATUSES);
+const normalizePool = (value) => normalizeLabel(value, POOLS);
+const normalizeTask = (value) => normalizeLabel(value, TASKS);
+const normalizeWatchEvent = (value) => normalizeLabel(value, WATCH_EVENTS);
+const normalizeDebounce = (value) => normalizeLabel(value, DEBOUNCE);
+const normalizeCache = (value) => normalizeLabel(value, CACHES);
+const normalizeCacheResult = (value) => normalizeLabel(value, CACHE_RESULTS);
+const normalizeSurface = (value) => normalizeLabel(value, SURFACES);
+const normalizeFallback = (value) => normalizeLabel(value, FALLBACKS);
+const normalizeTimeout = (value) => normalizeLabel(value, TIMEOUTS);
+const normalizeAnn = (value) => {
+  if (value === true || value === 'on') return 'on';
+  if (value === false || value === 'off') return 'off';
+  return normalizeLabel(value, ANN);
+};
+
+const ensureMetrics = () => {
+  if (initialized) return;
+  metrics = {
+    indexDuration: new Histogram({
+      name: 'pairofcleats_index_duration_seconds',
+      help: 'Index build duration in seconds.',
+      labelNames: ['stage', 'mode', 'status'],
+      buckets: [0.1, 0.5, 1, 2, 5, 10, 30, 60, 120, 300, 600, 1200, 3600],
+      registers: [registry]
+    }),
+    searchDuration: new Histogram({
+      name: 'pairofcleats_search_duration_seconds',
+      help: 'Search duration in seconds.',
+      labelNames: ['mode', 'backend', 'ann', 'status'],
+      buckets: [0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 60],
+      registers: [registry]
+    }),
+    indexRuns: new Counter({
+      name: 'pairofcleats_index_runs_total',
+      help: 'Count of index runs.',
+      labelNames: ['stage', 'mode', 'status'],
+      registers: [registry]
+    }),
+    searchRuns: new Counter({
+      name: 'pairofcleats_search_runs_total',
+      help: 'Count of search runs.',
+      labelNames: ['mode', 'backend', 'ann', 'status'],
+      registers: [registry]
+    }),
+    workerQueueDepth: new Gauge({
+      name: 'pairofcleats_worker_queue_depth',
+      help: 'Worker pool queue depth.',
+      labelNames: ['pool'],
+      registers: [registry]
+    }),
+    workerActiveTasks: new Gauge({
+      name: 'pairofcleats_worker_active_tasks',
+      help: 'Active worker pool tasks.',
+      labelNames: ['pool'],
+      registers: [registry]
+    }),
+    workerTaskDuration: new Histogram({
+      name: 'pairofcleats_worker_task_duration_seconds',
+      help: 'Worker task duration in seconds.',
+      labelNames: ['pool', 'task', 'worker', 'status'],
+      buckets: [0.001, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10],
+      registers: [registry]
+    }),
+    workerRetries: new Counter({
+      name: 'pairofcleats_worker_retries_total',
+      help: 'Worker pool restart attempts.',
+      labelNames: ['pool'],
+      registers: [registry]
+    }),
+    watchBacklog: new Gauge({
+      name: 'pairofcleats_watch_backlog',
+      help: 'Pending watch backlog size.',
+      labelNames: ['pool'],
+      registers: [registry]
+    }),
+    watchEvents: new Counter({
+      name: 'pairofcleats_watch_events_total',
+      help: 'Total watch events observed.',
+      labelNames: ['event'],
+      registers: [registry]
+    }),
+    watchDebounce: new Counter({
+      name: 'pairofcleats_watch_debounce_total',
+      help: 'Watch debounce schedule events.',
+      labelNames: ['type'],
+      registers: [registry]
+    }),
+    watchBuildDuration: new Histogram({
+      name: 'pairofcleats_watch_build_duration_seconds',
+      help: 'Watch-triggered build duration in seconds.',
+      labelNames: ['status'],
+      buckets: [0.01, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10, 30, 60, 120, 300],
+      registers: [registry]
+    }),
+    watchBursts: new Counter({
+      name: 'pairofcleats_watch_bursts_total',
+      help: 'Detected watch event bursts.',
+      labelNames: ['pool'],
+      registers: [registry]
+    }),
+    cacheEvents: new Counter({
+      name: 'pairofcleats_cache_events_total',
+      help: 'Cache hit/miss events.',
+      labelNames: ['cache', 'result'],
+      registers: [registry]
+    }),
+    fallbacks: new Counter({
+      name: 'pairofcleats_fallbacks_total',
+      help: 'Fallback events by surface.',
+      labelNames: ['surface', 'reason'],
+      registers: [registry]
+    }),
+    timeouts: new Counter({
+      name: 'pairofcleats_timeouts_total',
+      help: 'Timeout events by surface.',
+      labelNames: ['surface', 'operation'],
+      registers: [registry]
+    })
+  };
+  initialized = true;
+};
+
+const normalizeSeconds = (value) => {
+  const parsed = Number(value);
+  if (!Number.isFinite(parsed) || parsed < 0) return 0;
+  return parsed;
+};
+
+export function observeIndexDuration({ stage, mode, status, seconds }) {
+  ensureMetrics();
+  const labels = {
+    stage: normalizeStage(stage),
+    mode: normalizeMode(mode),
+    status: normalizeStatus(status)
+  };
+  const duration = normalizeSeconds(seconds);
+  metrics.indexDuration.observe(labels, duration);
+  metrics.indexRuns.inc(labels);
+}
+
+export function observeSearchDuration({ mode, backend, ann, status, seconds }) {
+  ensureMetrics();
+  const labels = {
+    mode: normalizeMode(mode),
+    backend: normalizeBackend(backend),
+    ann: normalizeAnn(ann),
+    status: normalizeStatus(status)
+  };
+  const duration = normalizeSeconds(seconds);
+  metrics.searchDuration.observe(labels, duration);
+  metrics.searchRuns.inc(labels);
+}
+
+export function setWorkerQueueDepth({ pool, value }) {
+  ensureMetrics();
+  metrics.workerQueueDepth.set({ pool: normalizePool(pool) }, Number(value) || 0);
+}
+
+export function setWorkerActiveTasks({ pool, value }) {
+  ensureMetrics();
+  metrics.workerActiveTasks.set({ pool: normalizePool(pool) }, Number(value) || 0);
+}
+
+export function observeWorkerTaskDuration({ pool, task, worker, status, seconds }) {
+  ensureMetrics();
+  metrics.workerTaskDuration.observe({
+    pool: normalizePool(pool),
+    task: normalizeTask(task),
+    worker: worker ? String(worker) : 'unknown',
+    status: normalizeStatus(status)
+  }, normalizeSeconds(seconds));
+}
+
+export function incWorkerRetries({ pool }) {
+  ensureMetrics();
+  metrics.workerRetries.inc({ pool: normalizePool(pool) });
+}
+
+export function setWatchBacklog(value) {
+  ensureMetrics();
+  metrics.watchBacklog.set({ pool: 'watch' }, Number(value) || 0);
+}
+
+export function incWatchEvent(eventType) {
+  ensureMetrics();
+  metrics.watchEvents.inc({ event: normalizeWatchEvent(eventType) });
+}
+
+export function incWatchDebounce(type) {
+  ensureMetrics();
+  metrics.watchDebounce.inc({ type: normalizeDebounce(type) });
+}
+
+export function observeWatchBuildDuration({ status, seconds }) {
+  ensureMetrics();
+  metrics.watchBuildDuration.observe({ status: normalizeStatus(status) }, normalizeSeconds(seconds));
+}
+
+export function incWatchBurst() {
+  ensureMetrics();
+  metrics.watchBursts.inc({ pool: 'watch' });
+}
+
+export function incCacheEvent({ cache, result }) {
+  ensureMetrics();
+  metrics.cacheEvents.inc({
+    cache: normalizeCache(cache),
+    result: normalizeCacheResult(result)
+  });
+}
+
+export function incFallback({ surface, reason }) {
+  ensureMetrics();
+  metrics.fallbacks.inc({
+    surface: normalizeSurface(surface),
+    reason: normalizeFallback(reason)
+  });
+}
+
+export function incTimeout({ surface, operation }) {
+  ensureMetrics();
+  metrics.timeouts.inc({
+    surface: normalizeSurface(surface),
+    operation: normalizeTimeout(operation)
+  });
+}
+
+export function getMetricsRegistry() {
+  ensureMetrics();
+  return registry;
+}
+
+export async function getMetricsText() {
+  ensureMetrics();
+  return registry.metrics();
+}
diff --git a/src/shared/onnx-embeddings.js b/src/shared/onnx-embeddings.js
new file mode 100644
index 000000000..6f5715bdf
--- /dev/null
+++ b/src/shared/onnx-embeddings.js
@@ -0,0 +1,334 @@
+import fs from 'node:fs';
+import path from 'node:path';
+
+const GRAPH_LEVELS = new Set(['disabled', 'basic', 'extended', 'all']);
+const PROVIDER_ALIASES = new Map([
+  ['onnx', 'onnx'],
+  ['onnxruntime', 'onnx'],
+  ['onnxruntime-node', 'onnx'],
+  ['xenova', 'xenova'],
+  ['transformers', 'xenova']
+]);
+
+const normalizeProvider = (value) => {
+  // Default to Xenova/Transformers when unset; reject unknown values.
+  if (typeof value !== 'string') return 'xenova';
+  const trimmed = value.trim().toLowerCase();
+  if (!trimmed) return 'xenova';
+  const resolved = PROVIDER_ALIASES.get(trimmed);
+  if (!resolved) {
+    const supported = Array.from(new Set(PROVIDER_ALIASES.values())).sort();
+    throw new Error(
+      `[embeddings] Unknown embedding provider: ${JSON.stringify(value)}. `
+        + `Expected one of: ${supported.join(', ')}.`
+    );
+  }
+  return resolved;
+};
+
+const normalizeProviders = (value) => {
+  if (Array.isArray(value)) {
+    return value.map((entry) => String(entry).trim()).filter(Boolean);
+  }
+  if (typeof value === 'string') {
+    return value.split(',').map((entry) => entry.trim()).filter(Boolean);
+  }
+  return null;
+};
+
+const normalizeThread = (value) => {
+  const parsed = Number(value);
+  if (!Number.isFinite(parsed) || parsed <= 0) return null;
+  return Math.floor(parsed);
+};
+
+const normalizeGraphLevel = (value) => {
+  if (typeof value !== 'string') return null;
+  const normalized = value.trim().toLowerCase();
+  return GRAPH_LEVELS.has(normalized) ? normalized : null;
+};
+
+const LARGE_MODEL_BYTES = Math.floor(1.5 * 1024 * 1024 * 1024);
+
+const statSize = (filePath) => {
+  try {
+    return fs.statSync(filePath).size;
+  } catch {
+    return null;
+  }
+};
+
+export function normalizeOnnxConfig(raw = {}) {
+  const config = raw && typeof raw === 'object' ? raw : {};
+  const executionProviders = normalizeProviders(config.executionProviders);
+  return {
+    modelPath: typeof config.modelPath === 'string' ? config.modelPath.trim() : '',
+    tokenizerId: typeof config.tokenizerId === 'string' ? config.tokenizerId.trim() : '',
+    executionProviders: executionProviders && executionProviders.length ? executionProviders : null,
+    intraOpNumThreads: normalizeThread(config.intraOpNumThreads),
+    interOpNumThreads: normalizeThread(config.interOpNumThreads),
+    graphOptimizationLevel: normalizeGraphLevel(config.graphOptimizationLevel)
+  };
+}
+
+export function normalizeEmbeddingProvider(raw) {
+  return normalizeProvider(raw);
+}
+
+export function resolveOnnxModelPath({ rootDir, modelPath, modelsDir, modelId }) {
+  const root = rootDir ? path.resolve(rootDir) : process.cwd();
+  const trimmed = typeof modelPath === 'string' ? modelPath.trim() : '';
+  const tryPath = (candidate) => {
+    if (!candidate || !fs.existsSync(candidate)) return null;
+    try {
+      const stat = fs.statSync(candidate);
+      if (stat.isDirectory()) {
+        const nested = [
+          path.join(candidate, 'model.onnx'),
+          path.join(candidate, 'model_quantized.onnx'),
+          path.join(candidate, 'onnx', 'model.onnx'),
+          path.join(candidate, 'onnx', 'model_quantized.onnx')
+        ];
+        for (const entry of nested) {
+          if (fs.existsSync(entry)) return entry;
+        }
+        return null;
+      }
+    } catch {
+      return null;
+    }
+    return candidate;
+  };
+  if (trimmed) {
+    const resolved = path.isAbsolute(trimmed) ? trimmed : path.join(root, trimmed);
+    const stat = tryPath(resolved);
+    if (stat) return stat;
+  }
+  const modelRoot = modelId && modelsDir ? path.join(modelsDir, modelId) : null;
+  const candidates = [
+    modelRoot ? path.join(modelRoot, 'onnx', 'model.onnx') : null,
+    modelRoot ? path.join(modelRoot, 'onnx', 'model_quantized.onnx') : null,
+    modelRoot ? path.join(modelRoot, 'model.onnx') : null,
+    modelRoot ? path.join(modelRoot, 'model_quantized.onnx') : null
+  ];
+  for (const candidate of candidates) {
+    const resolved = tryPath(candidate);
+    if (resolved) return resolved;
+  }
+  return null;
+}
+
+const onnxCache = new Map();
+
+const normalizeVec = (vec) => {
+  if (!Array.isArray(vec) || vec.length === 0) return vec || [];
+  const norm = Math.sqrt(vec.reduce((sum, v) => sum + v * v, 0));
+  if (!Number.isFinite(norm) || norm === 0) return vec;
+  return vec.map((v) => v / norm);
+};
+
+const normalizeExecutionProviders = (providers, lowMemory) => {
+  if (!providers || !lowMemory) return providers;
+  return providers.map((entry) => {
+    if (typeof entry === 'string') {
+      return entry === 'cpu' ? { name: 'cpu', useArena: false } : entry;
+    }
+    if (entry && entry.name === 'cpu' && entry.useArena === undefined) {
+      return { ...entry, useArena: false };
+    }
+    return entry;
+  });
+};
+
+const buildSessionOptions = (config, { lowMemory = false } = {}) => {
+  const options = {};
+  const providers = normalizeExecutionProviders(config.executionProviders, lowMemory);
+  if (providers && providers.length) {
+    options.executionProviders = providers;
+  } else if (lowMemory) {
+    options.executionProviders = [{ name: 'cpu', useArena: false }];
+  }
+  if (config.intraOpNumThreads) options.intraOpNumThreads = config.intraOpNumThreads;
+  if (config.interOpNumThreads) options.interOpNumThreads = config.interOpNumThreads;
+  if (config.graphOptimizationLevel) {
+    options.graphOptimizationLevel = config.graphOptimizationLevel;
+  } else if (lowMemory) {
+    options.graphOptimizationLevel = 'basic';
+  }
+  if (lowMemory) {
+    options.enableCpuMemArena = false;
+    options.enableMemPattern = false;
+    options.executionMode = 'sequential';
+  }
+  return Object.keys(options).length ? options : undefined;
+};
+
+const flatten = (nested) => nested.flatMap((row) => row.map((value) => BigInt(value)));
+
+const toTensor = (TensorCtor, rows) => {
+  const batch = rows.length;
+  const width = rows[0]?.length || 0;
+  if (!batch || !width) return null;
+  const data = BigInt64Array.from(flatten(rows));
+  return new TensorCtor('int64', data, [batch, width]);
+};
+
+const buildFeeds = (session, encoded, TensorCtor) => {
+  const inputNames = Array.isArray(session.inputNames) ? session.inputNames : [];
+  const feeds = {};
+  const inputs = {
+    input_ids: encoded.input_ids,
+    attention_mask: encoded.attention_mask,
+    token_type_ids: encoded.token_type_ids
+  };
+  for (const name of inputNames) {
+    const values = inputs[name];
+    if (!values) continue;
+    const tensor = toTensor(TensorCtor, values);
+    if (tensor) feeds[name] = tensor;
+  }
+  return feeds;
+};
+
+const findOutput = (outputs) => {
+  if (!outputs) return null;
+  const preferred = [
+    'sentence_embedding',
+    'embeddings',
+    'pooler_output',
+    'last_hidden_state',
+    'output_0'
+  ];
+  for (const key of preferred) {
+    if (outputs[key]) return outputs[key];
+  }
+  const fallbackKey = Object.keys(outputs)[0];
+  return fallbackKey ? outputs[fallbackKey] : null;
+};
+
+const meanPool = (tensor, attentionMask) => {
+  const dims = tensor?.dims || [];
+  if (dims.length !== 3) return [];
+  const [batch, seq, hidden] = dims;
+  const data = tensor.data || [];
+  const flatMask = attentionMask ? attentionMask.flat() : new Array(batch * seq).fill(1);
+  const output = new Array(batch).fill(null);
+  for (let b = 0; b < batch; b += 1) {
+    const vec = new Array(hidden).fill(0);
+    let count = 0;
+    for (let t = 0; t < seq; t += 1) {
+      const maskVal = Number(flatMask[b * seq + t] ?? 0);
+      if (!maskVal) continue;
+      count += 1;
+      const offset = (b * seq + t) * hidden;
+      for (let h = 0; h < hidden; h += 1) {
+        vec[h] += data[offset + h];
+      }
+    }
+    if (count > 0) {
+      for (let h = 0; h < hidden; h += 1) {
+        vec[h] = vec[h] / count;
+      }
+    }
+    output[b] = normalizeVec(vec);
+  }
+  return output;
+};
+
+const rowsFromTensor = (tensor) => {
+  const dims = tensor?.dims || [];
+  if (dims.length !== 2) return [];
+  const [rows, cols] = dims;
+  const data = tensor.data || [];
+  const out = new Array(rows);
+  for (let r = 0; r < rows; r += 1) {
+    const start = r * cols;
+    out[r] = normalizeVec(Array.from(data.slice(start, start + cols)));
+  }
+  return out;
+};
+
+export function createOnnxEmbedder({ rootDir, modelId, modelsDir, onnxConfig }) {
+  const normalized = normalizeOnnxConfig(onnxConfig);
+  const resolvedModelPath = resolveOnnxModelPath({
+    rootDir,
+    modelPath: normalized.modelPath,
+    modelsDir,
+    modelId
+  });
+  if (!resolvedModelPath) {
+    const hint = modelId ? ` (modelId=${JSON.stringify(modelId)})` : '';
+    throw new Error(
+      `ONNX model path not found${hint}. `
+        + 'Set indexing.embeddings.onnx.modelPath or run "npm run download-models".'
+    );
+  }
+  const modelSize = statSize(resolvedModelPath);
+  const lowMemory = Number.isFinite(modelSize) && modelSize >= LARGE_MODEL_BYTES;
+  const tokenizerId = normalized.tokenizerId || modelId;
+  const cacheKey = JSON.stringify({
+    resolvedModelPath,
+    tokenizerId,
+    executionProviders: normalized.executionProviders || null,
+    lowMemory,
+    intraOpNumThreads: normalized.intraOpNumThreads || null,
+    interOpNumThreads: normalized.interOpNumThreads || null,
+    graphOptimizationLevel: normalized.graphOptimizationLevel || null
+  });
+  if (!onnxCache.has(cacheKey)) {
+    const sessionOptions = buildSessionOptions(normalized, { lowMemory });
+    const promise = (async () => {
+      const { AutoTokenizer, env } = await import('@xenova/transformers');
+      if (modelsDir) {
+        env.cacheDir = modelsDir;
+      }
+      const tokenizer = await AutoTokenizer.from_pretrained(tokenizerId);
+      const { InferenceSession, Tensor } = await import('onnxruntime-node');
+      let session;
+      try {
+        session = await InferenceSession.create(resolvedModelPath, sessionOptions);
+      } catch (err) {
+        if (!lowMemory) {
+          const fallbackOptions = buildSessionOptions(normalized, { lowMemory: true });
+          session = await InferenceSession.create(resolvedModelPath, fallbackOptions);
+        } else {
+          throw err;
+        }
+      }
+      return { tokenizer, session, Tensor };
+    })();
+    onnxCache.set(cacheKey, promise);
+  }
+  const embedderPromise = onnxCache.get(cacheKey);
+  const getEmbeddings = async (texts) => {
+    const list = Array.isArray(texts) ? texts : [];
+    if (!list.length) return [];
+    const { tokenizer, session, Tensor } = await embedderPromise;
+    const wantsTokenTypeIds = Array.isArray(session.inputNames)
+      && session.inputNames.includes('token_type_ids');
+    const encoded = tokenizer(list, {
+      padding: true,
+      truncation: true,
+      return_tensor: false,
+      return_token_type_ids: wantsTokenTypeIds
+    });
+    const feeds = buildFeeds(session, encoded, Tensor);
+    if (!Object.keys(feeds).length) return Array.from({ length: list.length }, () => []);
+    const outputs = await session.run(feeds);
+    const mainOutput = findOutput(outputs);
+    if (!mainOutput) return Array.from({ length: list.length }, () => []);
+    if (Array.isArray(mainOutput?.dims) && mainOutput.dims.length === 2) {
+      return rowsFromTensor(mainOutput);
+    }
+    const mask = encoded.attention_mask;
+    return meanPool(mainOutput, mask);
+  };
+  return {
+    embedderPromise,
+    getEmbeddings,
+    getEmbedding: async (text) => {
+      const list = await getEmbeddings([text]);
+      return list[0] || [];
+    }
+  };
+}
diff --git a/src/shared/optional-deps.js b/src/shared/optional-deps.js
new file mode 100644
index 000000000..149fc895c
--- /dev/null
+++ b/src/shared/optional-deps.js
@@ -0,0 +1,48 @@
+import { createRequire } from 'node:module';
+
+const require = createRequire(import.meta.url);
+const TRUE_VALUES = new Set(['1', 'true', 'yes', 'on']);
+
+const isVerbose = (options = {}) => {
+  if (options.verbose === true) return true;
+  const raw = String(process.env.PAIROFCLEATS_VERBOSE || '').trim().toLowerCase();
+  return TRUE_VALUES.has(raw);
+};
+
+const normalizeErrorReason = (err) => {
+  const code = err?.code;
+  if (code === 'MODULE_NOT_FOUND' || code === 'ERR_MODULE_NOT_FOUND') {
+    return 'missing';
+  }
+  if (code === 'ERR_REQUIRE_ESM') return 'unsupported';
+  return 'error';
+};
+
+const maybeLog = (message, err, options = {}) => {
+  if (!isVerbose(options)) return;
+  const logger = typeof options.logger === 'function' ? options.logger : console.warn;
+  const detail = err?.message ? ` (${err.message})` : '';
+  logger(`[deps] ${message}${detail}`);
+};
+
+export function tryRequire(name, options = {}) {
+  try {
+    const mod = require(name);
+    return { ok: true, mod };
+  } catch (err) {
+    const reason = normalizeErrorReason(err);
+    maybeLog(`Optional dependency unavailable: ${name}`, err, options);
+    return { ok: false, error: err, reason };
+  }
+}
+
+export async function tryImport(name, options = {}) {
+  try {
+    const mod = await import(name);
+    return { ok: true, mod };
+  } catch (err) {
+    const reason = normalizeErrorReason(err);
+    maybeLog(`Optional dependency unavailable: ${name}`, err, options);
+    return { ok: false, error: err, reason };
+  }
+}
diff --git a/src/shared/postings-config.js b/src/shared/postings-config.js
index b439012ec..16ee2d647 100644
--- a/src/shared/postings-config.js
+++ b/src/shared/postings-config.js
@@ -7,13 +7,23 @@
  *   phraseMinN:number,
  *   phraseMaxN:number,
  *   chargramMinN:number,
- *   chargramMaxN:number
+ *   chargramMaxN:number,
+ *   chargramMaxTokenLength:number|null,
+ *   chargramSource:string,
+ *   fielded:boolean
  * }}
  */
 export function normalizePostingsConfig(input = {}) {
   const cfg = input && typeof input === 'object' ? input : {};
   const enablePhraseNgrams = cfg.enablePhraseNgrams !== false;
   const enableChargrams = cfg.enableChargrams !== false;
+  const fielded = cfg.fielded !== false;
+  const chargramSourceRaw = typeof cfg.chargramSource === 'string'
+    ? cfg.chargramSource.trim().toLowerCase()
+    : '';
+  const chargramSource = ['full', 'fields'].includes(chargramSourceRaw)
+    ? chargramSourceRaw
+    : 'fields';
 
   const toInt = (value) => {
     const num = Number(value);
@@ -31,6 +41,15 @@ export function normalizePostingsConfig(input = {}) {
 
   const phraseRange = normalizeRange(cfg.phraseMinN, cfg.phraseMaxN, { min: 2, max: 4 });
   const chargramRange = normalizeRange(cfg.chargramMinN, cfg.chargramMaxN, { min: 3, max: 5 });
+  let chargramMaxTokenLength = 48;
+  if (cfg.chargramMaxTokenLength === 0 || cfg.chargramMaxTokenLength === false) {
+    chargramMaxTokenLength = null;
+  } else {
+    const maxTokenRaw = Number(cfg.chargramMaxTokenLength);
+    if (Number.isFinite(maxTokenRaw)) {
+      chargramMaxTokenLength = Math.max(2, Math.floor(maxTokenRaw));
+    }
+  }
 
   return {
     enablePhraseNgrams,
@@ -38,6 +57,9 @@ export function normalizePostingsConfig(input = {}) {
     phraseMinN: phraseRange.min,
     phraseMaxN: phraseRange.max,
     chargramMinN: chargramRange.min,
-    chargramMaxN: chargramRange.max
+    chargramMaxN: chargramRange.max,
+    chargramMaxTokenLength,
+    chargramSource,
+    fielded
   };
 }
diff --git a/src/shared/progress.js b/src/shared/progress.js
index f6428d700..68a12d974 100644
--- a/src/shared/progress.js
+++ b/src/shared/progress.js
@@ -1,19 +1,196 @@
+import pino from 'pino';
+
 /**
  * Write a simple progress line to stderr.
  * @param {string} step
  * @param {number} i
  * @param {number} total
  */
+let lastProgressActive = false;
+let lastProgressWidth = 0;
+let logger = null;
+let structuredEnabled = false;
+let logContext = {};
+let ringMax = 200;
+let ringMaxBytes = 2 * 1024 * 1024;
+const ringEvents = [];
+const ringSizes = [];
+let ringBytes = 0;
+const defaultRedactPaths = [
+  'password',
+  'token',
+  'secret',
+  'apiKey',
+  'authorization',
+  'headers.authorization',
+  'headers.cookie',
+  'headers.set-cookie',
+  'auth',
+  'credentials'
+];
+
+const normalizeRedact = (value) => {
+  if (value === false) return null;
+  if (Array.isArray(value)) {
+    return value.length ? { paths: value, censor: '[redacted]' } : null;
+  }
+  if (value && typeof value === 'object') {
+    const paths = Array.isArray(value.paths) ? value.paths : [];
+    const censor = typeof value.censor === 'string' ? value.censor : '[redacted]';
+    const remove = value.remove === true;
+    return paths.length ? { paths, censor, remove } : null;
+  }
+  return { paths: defaultRedactPaths, censor: '[redacted]' };
+};
+
+const recordEvent = (level, msg, meta) => {
+  const payload = {
+    ts: new Date().toISOString(),
+    level,
+    msg,
+    meta: meta && typeof meta === 'object' ? meta : null
+  };
+  let encoded = '';
+  try {
+    encoded = JSON.stringify(payload);
+  } catch {
+    encoded = '{"ts":"[unserializable]","level":"error","msg":"[unserializable]"}';
+  }
+  const size = Buffer.byteLength(encoded, 'utf8');
+  ringEvents.push(payload);
+  ringSizes.push(size);
+  ringBytes += size;
+  while (ringEvents.length > ringMax || ringBytes > ringMaxBytes) {
+    ringBytes -= ringSizes.shift() || 0;
+    ringEvents.shift();
+  }
+};
+
+export function configureLogger(options = {}) {
+  const enabled = options.enabled === true;
+  structuredEnabled = enabled;
+  if (!enabled) {
+    logger = null;
+    logContext = options.context && typeof options.context === 'object'
+      ? { ...options.context }
+      : {};
+    return;
+  }
+  if (Number.isFinite(Number(options.ringMax))) {
+    ringMax = Math.max(1, Math.floor(Number(options.ringMax)));
+  }
+  if (Number.isFinite(Number(options.ringMaxBytes))) {
+    ringMaxBytes = Math.max(1024, Math.floor(Number(options.ringMaxBytes)));
+  }
+  const level = typeof options.level === 'string' && options.level.trim()
+    ? options.level.trim().toLowerCase()
+    : 'info';
+  const redact = normalizeRedact(options.redact);
+  const transport = options.pretty
+    ? {
+      target: 'pino-pretty',
+      options: { colorize: true, translateTime: 'SYS:standard' }
+    }
+    : undefined;
+  logger = pino({
+    level,
+    base: null,
+    timestamp: pino.stdTimeFunctions.isoTime,
+    ...(redact ? { redact } : {})
+  }, transport);
+  logContext = options.context && typeof options.context === 'object'
+    ? { ...options.context }
+    : {};
+}
+
+export function updateLogContext(context = {}) {
+  if (!context || typeof context !== 'object') return;
+  logContext = { ...logContext, ...context };
+}
+
+export function getRecentLogEvents() {
+  return ringEvents.slice();
+}
+
+export function isStructuredLogging() {
+  return structuredEnabled;
+}
+
+function clearProgressLine() {
+  if (!lastProgressActive || !process.stderr.isTTY) return;
+  const width = Math.max(0, lastProgressWidth);
+  if (width > 0) {
+    process.stderr.write(`\r${' '.repeat(width)}\r`);
+  }
+  lastProgressActive = false;
+  lastProgressWidth = 0;
+}
+
 export function showProgress(step, i, total) {
+  if (structuredEnabled) return;
   const pct = ((i / total) * 100).toFixed(1);
-  process.stderr.write(`\r${step.padEnd(40)} ${i}/${total} (${pct}%)`.padEnd(70));
-  if (i === total) process.stderr.write('\n');
+  const line = `${step} ${i}/${total} (${pct}%)`;
+  const isTty = process.stderr.isTTY;
+  if (isTty) {
+    process.stderr.write(`\r${line}\x1b[K`);
+    lastProgressActive = true;
+    lastProgressWidth = line.length;
+    if (i === total) {
+      process.stderr.write('\n');
+      lastProgressActive = false;
+      lastProgressWidth = 0;
+    }
+  } else {
+    process.stderr.write(`${line}\n`);
+    lastProgressActive = false;
+    lastProgressWidth = 0;
+  }
 }
 
 /**
  * Write a log message to stderr.
  * @param {string} msg
+ * @param {object} [meta]
+ */
+export function log(msg, meta = null) {
+  if (logger) {
+    logger.info({ ...logContext, ...(meta || {}) }, msg);
+    recordEvent('info', msg, meta);
+    return;
+  }
+  recordEvent('info', msg, meta);
+  clearProgressLine();
+  process.stderr.write(`\n${msg}\n`);
+}
+
+/**
+ * Write a single log line to stderr without extra spacing.
+ * @param {string} msg
+ * @param {object} [meta]
+ */
+export function logLine(msg, meta = null) {
+  if (logger) {
+    logger.info({ ...logContext, ...(meta || {}) }, msg);
+    recordEvent('info', msg, meta);
+    return;
+  }
+  recordEvent('info', msg, meta);
+  clearProgressLine();
+  process.stderr.write(`${msg}\n`);
+}
+
+/**
+ * Write an error log message.
+ * @param {string} msg
+ * @param {object} [meta]
  */
-export function log(msg) {
+export function logError(msg, meta = null) {
+  if (logger) {
+    logger.error({ ...logContext, ...(meta || {}) }, msg);
+    recordEvent('error', msg, meta);
+    return;
+  }
+  recordEvent('error', msg, meta);
+  clearProgressLine();
   process.stderr.write(`\n${msg}\n`);
 }
diff --git a/src/shared/safe-regex.js b/src/shared/safe-regex.js
new file mode 100644
index 000000000..4b3659168
--- /dev/null
+++ b/src/shared/safe-regex.js
@@ -0,0 +1,173 @@
+import { compileRe2js } from './safe-regex/backends/re2js.js';
+import { compileRe2, isRe2Available } from './safe-regex/backends/re2.js';
+
+export const DEFAULT_SAFE_REGEX_CONFIG = {
+  engine: 'auto',
+  maxPatternLength: 512,
+  maxInputLength: 10000,
+  maxProgramSize: 2000,
+  timeoutMs: 25,
+  flags: ''
+};
+
+const normalizeLimit = (value, fallback) => {
+  if (value === 0 || value === false) return null;
+  const parsed = Number(value);
+  if (Number.isFinite(parsed) && parsed > 0) return Math.floor(parsed);
+  return fallback;
+};
+
+const normalizeEngine = (raw, fallback) => {
+  if (!raw) return fallback;
+  const key = String(raw).trim().toLowerCase();
+  if (key === 'auto') return 'auto';
+  if (key === 're2') return 're2';
+  if (key === 're2js') return 're2js';
+  return fallback;
+};
+
+const normalizeFlags = (raw) => {
+  if (!raw) return '';
+  const seen = new Set();
+  const out = [];
+  for (const ch of String(raw)) {
+    if (!'gimsuy'.includes(ch) || seen.has(ch)) continue;
+    seen.add(ch);
+    out.push(ch);
+  }
+  return out.join('');
+};
+
+const mergeFlags = (explicit, fallback) => {
+  const primary = normalizeFlags(explicit);
+  const defaults = normalizeFlags(fallback);
+  if (!defaults) return primary;
+  if (!primary) return defaults;
+  const merged = [];
+  const seen = new Set();
+  for (const ch of `${defaults}${primary}`) {
+    if (seen.has(ch)) continue;
+    seen.add(ch);
+    merged.push(ch);
+  }
+  return merged.join('');
+};
+
+class SafeRegex {
+  constructor(backend, source, flags, config, requestedEngine) {
+    this.backend = backend;
+    this.engine = backend?.engine || 're2js';
+    this.requestedEngine = requestedEngine || 'auto';
+    this.source = source;
+    this.flags = flags;
+    this.config = config;
+    this.lastIndex = 0;
+    this.isGlobal = flags.includes('g');
+    this.isSticky = flags.includes('y');
+    this.usesLastIndex = this.isGlobal || this.isSticky;
+  }
+
+  exec(input) {
+    const text = String(input ?? '');
+    if (!text) {
+      if (this.usesLastIndex) this.lastIndex = 0;
+      return null;
+    }
+
+    const { maxInputLength, timeoutMs } = this.config || {};
+    if (maxInputLength && text.length > maxInputLength) {
+      if (this.usesLastIndex) this.lastIndex = 0;
+      return null;
+    }
+
+    const startIndex = this.usesLastIndex && Number.isFinite(this.lastIndex)
+      ? Math.max(0, this.lastIndex)
+      : 0;
+
+    if (this.usesLastIndex && startIndex > text.length) {
+      this.lastIndex = 0;
+      return null;
+    }
+
+    const match = this.backend.match(text, startIndex, { timeoutMs, sticky: this.isSticky });
+    if (!match) {
+      if (this.usesLastIndex) this.lastIndex = 0;
+      return null;
+    }
+
+    const groups = Array.isArray(match.groups) ? match.groups : [];
+    const result = groups.slice();
+    result.index = match.index;
+    result.input = text;
+
+    if (this.usesLastIndex) {
+      if (Number.isFinite(match.nextLastIndex)) {
+        this.lastIndex = match.nextLastIndex;
+      } else {
+        let next = match.end;
+        if (Number.isFinite(next) && next === match.index) {
+          next = Math.min(text.length, next + 1);
+        }
+        this.lastIndex = Number.isFinite(next) ? next : 0;
+      }
+    }
+
+    return result;
+  }
+
+  test(input) {
+    return !!this.exec(input);
+  }
+}
+
+export function normalizeSafeRegexConfig(raw = {}, defaults = {}) {
+  const base = { ...DEFAULT_SAFE_REGEX_CONFIG, ...defaults };
+  const config = raw && typeof raw === 'object' ? raw : {};
+  const hasFlagOverride = Object.prototype.hasOwnProperty.call(config, 'flags');
+  const hasEngineOverride = Object.prototype.hasOwnProperty.call(config, 'engine');
+  return {
+    engine: normalizeEngine(hasEngineOverride ? config.engine : base.engine, base.engine),
+    maxPatternLength: normalizeLimit(config.maxPatternLength, base.maxPatternLength),
+    maxInputLength: normalizeLimit(config.maxInputLength, base.maxInputLength),
+    maxProgramSize: normalizeLimit(config.maxProgramSize, base.maxProgramSize),
+    timeoutMs: normalizeLimit(config.timeoutMs, base.timeoutMs),
+    flags: normalizeFlags(hasFlagOverride ? config.flags : base.flags)
+  };
+}
+
+let warnedMissingRe2 = false;
+const warnMissingRe2Once = () => {
+  if (warnedMissingRe2) return;
+  warnedMissingRe2 = true;
+  console.warn('SafeRegex: engine "re2" requested but optional dependency "re2" is not available; falling back to re2js.');
+};
+
+export function createSafeRegex(pattern, flags = '', config = {}) {
+  const normalized = normalizeSafeRegexConfig(config);
+  const source = String(pattern ?? '');
+  if (!source) return null;
+  if (normalized.maxPatternLength && source.length > normalized.maxPatternLength) {
+    return null;
+  }
+
+  const combinedFlags = mergeFlags(flags, normalized.flags);
+  const requestedEngine = normalized.engine || 'auto';
+
+  // Try native RE2 if requested (auto or explicit) and available.
+  if (requestedEngine !== 're2js') {
+    const nativeAvailable = isRe2Available();
+    if (nativeAvailable) {
+      const backend = compileRe2(source, combinedFlags);
+      if (backend) return new SafeRegex(backend, source, combinedFlags, normalized, requestedEngine);
+    } else if (requestedEngine === 're2') {
+      warnMissingRe2Once();
+    }
+  }
+
+  // Fall back to RE2JS.
+  const backend = compileRe2js(source, combinedFlags, normalized);
+  if (!backend) return null;
+  return new SafeRegex(backend, source, combinedFlags, normalized, requestedEngine);
+}
+
+export const isNativeRe2Available = isRe2Available;
diff --git a/src/shared/safe-regex/backends/re2.js b/src/shared/safe-regex/backends/re2.js
new file mode 100644
index 000000000..cc114988d
--- /dev/null
+++ b/src/shared/safe-regex/backends/re2.js
@@ -0,0 +1,64 @@
+import { createRequire } from 'node:module';
+
+let cachedRe2 = undefined;
+
+const loadRe2 = () => {
+  if (cachedRe2 !== undefined) return cachedRe2;
+  try {
+    const require = createRequire(import.meta.url);
+    const mod = require('re2');
+    const RE2 = (mod && typeof mod === 'object' && 'default' in mod) ? mod.default : mod;
+    cachedRe2 = typeof RE2 === 'function' ? RE2 : null;
+  } catch {
+    cachedRe2 = null;
+  }
+  return cachedRe2;
+};
+
+export const isRe2Available = () => Boolean(loadRe2());
+
+export const compileRe2 = (source, flags) => {
+  const RE2 = loadRe2();
+  if (!RE2) return null;
+  try {
+    const compiled = new RE2(source, flags);
+    if (!compiled) return null;
+    return {
+      engine: 're2',
+      source,
+      flags,
+      match(text, startIndex, { timeoutMs = 0, sticky = false } = {}) {
+        const started = timeoutMs ? Date.now() : 0;
+
+        // Keep lastIndex behavior consistent with JS RegExp semantics:
+        // only meaningful for global (g) or sticky (y).
+        const usesLastIndex = flags.includes('g') || flags.includes('y') || sticky;
+        if (typeof compiled.lastIndex === 'number') {
+          compiled.lastIndex = usesLastIndex ? startIndex : 0;
+        }
+
+        const result = compiled.exec(text);
+
+        if (timeoutMs && Date.now() - started > timeoutMs) return null;
+        if (!result) return null;
+
+        const index = Number.isFinite(result.index) ? result.index : 0;
+
+        // Some RE2 builds may ignore 'y'; enforce sticky if requested.
+        if (sticky && index !== startIndex) return null;
+
+        const groups = Array.from(result);
+        const matchText = groups[0] ?? '';
+        const end = index + String(matchText).length;
+
+        const nextLastIndex = (typeof compiled.lastIndex === 'number' && Number.isFinite(compiled.lastIndex))
+          ? compiled.lastIndex
+          : null;
+
+        return { groups, index, end, nextLastIndex };
+      }
+    };
+  } catch {
+    return null;
+  }
+};
diff --git a/src/shared/safe-regex/backends/re2js.js b/src/shared/safe-regex/backends/re2js.js
new file mode 100644
index 000000000..6c9391f9c
--- /dev/null
+++ b/src/shared/safe-regex/backends/re2js.js
@@ -0,0 +1,43 @@
+import { RE2JS } from 're2js';
+
+const toFlagMask = (flags) => {
+  let mask = 0;
+  if (flags.includes('i')) mask |= RE2JS.CASE_INSENSITIVE;
+  if (flags.includes('m')) mask |= RE2JS.MULTILINE;
+  if (flags.includes('s')) mask |= RE2JS.DOTALL;
+  return mask;
+};
+
+export const compileRe2js = (source, flags, config = {}) => {
+  const mask = toFlagMask(flags);
+  try {
+    const translated = RE2JS.translateRegExp(source);
+    const compiled = RE2JS.compile(translated, mask);
+    if (config.maxProgramSize && compiled.programSize() > config.maxProgramSize) {
+      return null;
+    }
+    const groupCount = compiled.groupCount();
+    return {
+      engine: 're2js',
+      source,
+      flags,
+      match(text, startIndex, { timeoutMs = 0, sticky = false } = {}) {
+        const started = timeoutMs ? Date.now() : 0;
+        const matcher = compiled.matcher(text);
+        const found = matcher.find(startIndex);
+        if (timeoutMs && Date.now() - started > timeoutMs) return null;
+        if (!found) return null;
+        const index = matcher.start();
+        if (sticky && index !== startIndex) return null;
+        const end = matcher.end();
+        const groups = new Array(groupCount + 1);
+        for (let i = 0; i <= groupCount; i += 1) {
+          groups[i] = matcher.group(i);
+        }
+        return { groups, index, end };
+      }
+    };
+  } catch {
+    return null;
+  }
+};
diff --git a/src/shared/stable-json.js b/src/shared/stable-json.js
new file mode 100644
index 000000000..ff75a57ac
--- /dev/null
+++ b/src/shared/stable-json.js
@@ -0,0 +1,17 @@
+export function stableStringify(value) {
+  return JSON.stringify(normalize(value));
+}
+
+function normalize(value) {
+  if (Array.isArray(value)) {
+    return value.map((entry) => normalize(entry));
+  }
+  if (!value || typeof value !== 'object' || value.constructor !== Object) {
+    return value;
+  }
+  const out = {};
+  for (const key of Object.keys(value).sort()) {
+    out[key] = normalize(value[key]);
+  }
+  return out;
+}
diff --git a/src/shared/threads.js b/src/shared/threads.js
new file mode 100644
index 000000000..a933cb66c
--- /dev/null
+++ b/src/shared/threads.js
@@ -0,0 +1,77 @@
+import os from 'node:os';
+
+/**
+ * Resolve thread limits and concurrency defaults.
+ * @param {object} input
+ * @returns {object}
+ */
+export function resolveThreadLimits(input = {}) {
+  const {
+    argv = {},
+    rawArgv = [],
+    envConfig = {},
+    configConcurrency = null,
+    importConcurrencyConfig = null,
+    ioConcurrencyCapConfig = null,
+    defaultMultiplier = 4
+  } = input;
+  const cpuCount = os.cpus().length;
+  const defaultFileConcurrency = Math.max(1, Math.min(cpuCount, 16));
+  const defaultThreads = Math.max(1, defaultFileConcurrency * defaultMultiplier);
+  const rawCliThreads = Number(argv.threads);
+  const envThreads = Number(envConfig.threads);
+  const threadsArgPresent = Array.isArray(rawArgv)
+    && rawArgv.some((arg) => arg === '--threads' || String(arg).startsWith('--threads='));
+  const envThreadsProvided = Number.isFinite(envThreads) && envThreads > 0;
+  const cliThreadsProvided = threadsArgPresent
+    || (Number.isFinite(rawCliThreads) && rawCliThreads !== defaultThreads);
+  const cliConcurrency = envThreadsProvided
+    ? envThreads
+    : (cliThreadsProvided ? rawCliThreads : null);
+  const requestedConcurrency = Number.isFinite(cliConcurrency)
+    ? Math.floor(cliConcurrency)
+    : Number.isFinite(configConcurrency)
+      ? Math.floor(configConcurrency)
+      : defaultFileConcurrency;
+  const cappedConcurrency = Math.max(1, Math.min(cpuCount, requestedConcurrency));
+  const maxConcurrencyCap = Math.max(defaultFileConcurrency, cappedConcurrency);
+  const fileConcurrency = Math.max(1, Math.min(maxConcurrencyCap, cappedConcurrency));
+  const importConcurrency = Math.max(
+    1,
+    Math.min(
+      maxConcurrencyCap,
+      Number.isFinite(cliConcurrency)
+        ? fileConcurrency
+        : Number.isFinite(Number(importConcurrencyConfig))
+          ? Number(importConcurrencyConfig)
+          : fileConcurrency
+    )
+  );
+  const ioPlatformCap = process.platform === 'win32' ? 32 : 64;
+  const ioBase = Math.max(fileConcurrency, importConcurrency);
+  const configuredIoCap = Number.isFinite(Number(ioConcurrencyCapConfig)) && Number(ioConcurrencyCapConfig) > 0
+    ? Math.floor(Number(ioConcurrencyCapConfig))
+    : null;
+  const ioDerived = Math.max(1, Math.min(ioPlatformCap, ioBase * 4));
+  const ioConcurrency = configuredIoCap !== null
+    ? Math.max(1, Math.min(ioDerived, configuredIoCap))
+    : ioDerived;
+  const cpuConcurrency = Math.max(1, Math.min(maxConcurrencyCap, fileConcurrency));
+  const source = envThreadsProvided
+    ? 'env'
+    : cliThreadsProvided
+      ? 'cli'
+      : Number.isFinite(configConcurrency)
+        ? 'config'
+        : 'default';
+  return {
+    cpuCount,
+    defaultThreads,
+    maxConcurrencyCap,
+    fileConcurrency,
+    importConcurrency,
+    ioConcurrency,
+    cpuConcurrency,
+    source
+  };
+}
diff --git a/src/shared/tokenize.js b/src/shared/tokenize.js
index 7e2470302..5b8b7e13b 100644
--- a/src/shared/tokenize.js
+++ b/src/shared/tokenize.js
@@ -1,3 +1,4 @@
+import AhoCorasick from 'aho-corasick';
 import Snowball from 'snowball-stemmers';
 
 const stemmer = Snowball.newStemmer('english');
@@ -32,32 +33,254 @@ export function splitId(s) {
 }
 
 /**
- * Split a token into dictionary words when possible.
- * @param {string} token
- * @param {Set<string>} dict
+ * Split an identifier into tokens while preserving case.
+ * @param {string} s
  * @returns {string[]}
  */
-export function splitWordsWithDict(token, dict) {
-  if (!dict || dict.size === 0) return [token];
+export function splitIdPreserveCase(s) {
+  return s
+    .replace(/([a-z])([A-Z])/g, '$1 $2')
+    .replace(/[_\-]+/g, ' ')
+    .split(/[^a-zA-Z0-9]+/u)
+    .flatMap((tok) => tok.split(/(?<=.)(?=[A-Z])/))
+    .filter(Boolean);
+}
+
+export function extractPunctuationTokens(text) {
+  if (!text) return [];
+  const tokens = text.match(/[=<>!:+\-*/%&|^~.?]{1,4}|[()[\]{}.,;:]/g);
+  return tokens ? tokens.filter(Boolean) : [];
+}
+
+const DEFAULT_DICT_SEGMENTATION = {
+  mode: 'auto',
+  dpMaxTokenLength: 32
+};
+
+const VALID_DICT_SEGMENT_MODES = new Set(['auto', 'greedy', 'dp', 'aho']);
+const MAX_AHO_DICT_SIZE = 200000;
+
+const normalizeDictSegmentation = (options = {}) => {
+  const modeRaw = typeof options.segmentation === 'string'
+    ? options.segmentation.toLowerCase()
+    : '';
+  const mode = VALID_DICT_SEGMENT_MODES.has(modeRaw)
+    ? modeRaw
+    : DEFAULT_DICT_SEGMENTATION.mode;
+  const dpMaxTokenLengthRaw = Number(options.dpMaxTokenLength);
+  const dpMaxTokenLength = Number.isFinite(dpMaxTokenLengthRaw)
+    ? Math.max(4, Math.floor(dpMaxTokenLengthRaw))
+    : DEFAULT_DICT_SEGMENTATION.dpMaxTokenLength;
+  return { mode, dpMaxTokenLength };
+};
+
+const getDictMaxLen = (dict) => {
+  if (!dict || dict.size === 0) return 0;
+  const cached = dict.__maxTokenLength;
+  if (Number.isFinite(cached) && cached > 0) return cached;
+  const altMax = Number.isFinite(dict.maxLen) && dict.maxLen > 0 ? dict.maxLen : 0;
+  if (altMax) return altMax;
+  if (dict.__sharedDict) return 0;
+  if (typeof dict[Symbol.iterator] !== 'function') return 0;
+  let maxLen = 0;
+  for (const word of dict) {
+    if (typeof word === 'string' && word.length > maxLen) maxLen = word.length;
+  }
+  dict.__maxTokenLength = maxLen;
+  return maxLen;
+};
+
+const buildDictAhoMatcher = (dict) => {
+  if (!dict || dict.__sharedDict) return null;
+  if (typeof dict[Symbol.iterator] !== 'function') return null;
+  if (Number.isFinite(dict.size) && dict.size > MAX_AHO_DICT_SIZE) return null;
+  const matcher = new AhoCorasick();
+  const words = [];
+  for (const word of dict) {
+    if (typeof word !== 'string' || !word) continue;
+    words.push(word);
+    matcher.add(word, word);
+  }
+  if (!words.length) return null;
+  matcher.build_fail();
+  return { matcher, words };
+};
+
+const getDictAhoMatcher = (dict) => {
+  if (!dict || dict.size === 0 || dict.__sharedDict) return null;
+  const cached = dict.__ahoMatcher;
+  if (cached && cached.size === dict.size) return cached.matcher;
+  const built = buildDictAhoMatcher(dict);
+  if (!built) return null;
+  dict.__ahoMatcher = {
+    matcher: built.matcher,
+    size: dict.size,
+    words: built.words
+  };
+  return built.matcher;
+};
+
+const buildAhoMatches = (token, dict) => {
+  const matcher = getDictAhoMatcher(dict);
+  if (!matcher || !token) return null;
+  const matchesByStart = Array.from({ length: token.length }, () => []);
+  matcher.search(token, (value, _data, offset) => {
+    if (!value) return;
+    const start = Number(offset);
+    if (!Number.isFinite(start) || start < 0) return;
+    const end = start + value.length;
+    if (end > token.length) return;
+    matchesByStart[start].push({ word: value, end });
+  });
+  return matchesByStart;
+};
+
+const findLongestMatch = (token, start, dict, maxLen) => {
+  const endLimit = Math.min(token.length, start + maxLen);
+  for (let end = endLimit; end > start; end--) {
+    const sub = token.slice(start, end);
+    if (dict.has(sub)) return sub;
+  }
+  return null;
+};
+
+const hasDictMatchAt = (token, start, dict, maxLen) => !!findLongestMatch(token, start, dict, maxLen);
+
+const splitWordsWithDictGreedy = (token, dict, maxLen) => {
   const result = [];
   let i = 0;
   while (i < token.length) {
-    let found = false;
-    for (let j = token.length; j > i; j--) {
-      const sub = token.slice(i, j);
-      if (dict.has(sub)) {
-        result.push(sub);
-        i = j;
-        found = true;
-        break;
+    const match = findLongestMatch(token, i, dict, maxLen);
+    if (match) {
+      result.push(match);
+      i += match.length;
+      continue;
+    }
+    const unknownStart = i;
+    i += 1;
+    while (i < token.length && !hasDictMatchAt(token, i, dict, maxLen)) {
+      i += 1;
+    }
+    result.push(token.slice(unknownStart, i));
+  }
+  return result;
+};
+
+const pickBetterSegment = (current, candidate) => {
+  if (!current) return candidate;
+  if (candidate.matchChars > current.matchChars) return candidate;
+  if (candidate.matchChars < current.matchChars) return current;
+  if (candidate.segments < current.segments) return candidate;
+  if (candidate.segments > current.segments) return current;
+  if (candidate.isDict && !current.isDict) return candidate;
+  return current;
+};
+
+const splitWordsWithDictDp = (token, dict, maxLen, matchesByStart = null) => {
+  const n = token.length;
+  const best = new Array(n + 1).fill(null);
+  best[n] = { matchChars: 0, segments: 0, next: n, token: '', isDict: false };
+  for (let i = n - 1; i >= 0; i--) {
+    let bestChoice = null;
+    const fallback = best[i + 1];
+    if (fallback) {
+      bestChoice = pickBetterSegment(bestChoice, {
+        matchChars: fallback.matchChars,
+        segments: fallback.segments + 1,
+        next: i + 1,
+        token: token.slice(i, i + 1),
+        isDict: false
+      });
+    }
+    if (matchesByStart) {
+      const matches = matchesByStart[i];
+      if (matches && matches.length) {
+        for (const match of matches) {
+          const nextScore = best[match.end];
+          if (!nextScore) continue;
+          bestChoice = pickBetterSegment(bestChoice, {
+            matchChars: nextScore.matchChars + match.word.length,
+            segments: nextScore.segments + 1,
+            next: match.end,
+            token: match.word,
+            isDict: true
+          });
+        }
+      }
+    } else {
+      const endLimit = Math.min(n, i + maxLen);
+      for (let end = endLimit; end > i; end--) {
+        const word = token.slice(i, end);
+        if (!dict.has(word)) continue;
+        const nextScore = best[end];
+        if (!nextScore) continue;
+        bestChoice = pickBetterSegment(bestChoice, {
+          matchChars: nextScore.matchChars + word.length,
+          segments: nextScore.segments + 1,
+          next: end,
+          token: word,
+          isDict: true
+        });
       }
     }
-    if (!found) {
-      result.push(token[i]);
-      i++;
+    best[i] = bestChoice;
+  }
+  const segments = [];
+  let idx = 0;
+  while (idx < n && best[idx]) {
+    const entry = best[idx];
+    segments.push(entry);
+    idx = entry.next;
+  }
+  const result = [];
+  let buffer = '';
+  for (const seg of segments) {
+    if (!seg.isDict) {
+      buffer += seg.token;
+      continue;
+    }
+    if (buffer) {
+      result.push(buffer);
+      buffer = '';
     }
+    result.push(seg.token);
   }
+  if (buffer) result.push(buffer);
   return result;
+};
+
+const scoreSegments = (segments, dict) => segments.reduce((sum, seg) => (
+  dict.has(seg) ? sum + seg.length : sum
+), 0);
+
+/**
+ * Split a token into dictionary words when possible.
+ * @param {string} token
+ * @param {{size:number,has:function}|Set<string>} dict
+ * @param {{segmentation?:string,dpMaxTokenLength?:number}} [options]
+ * @returns {string[]}
+ */
+export function splitWordsWithDict(token, dict, options = {}) {
+  if (!dict || dict.size === 0 || typeof dict.has !== 'function') return [token];
+  if (!token) return [];
+  const { mode, dpMaxTokenLength } = normalizeDictSegmentation(options);
+  const maxLen = getDictMaxLen(dict);
+  if (!maxLen) return [token];
+  const greedy = splitWordsWithDictGreedy(token, dict, maxLen);
+  if (mode === 'greedy') return greedy;
+  const shouldUseDp = token.length <= dpMaxTokenLength;
+  const matchesByStart = shouldUseDp && ['auto', 'dp', 'aho'].includes(mode)
+    ? buildAhoMatches(token, dict)
+    : null;
+  if (mode === 'dp' || mode === 'aho') {
+    if (!shouldUseDp) return greedy;
+    return splitWordsWithDictDp(token, dict, maxLen, matchesByStart);
+  }
+  if (shouldUseDp) {
+    const dp = splitWordsWithDictDp(token, dict, maxLen, matchesByStart);
+    if (scoreSegments(dp, dict) > scoreSegments(greedy, dict)) return dp;
+  }
+  return greedy;
 }
 
 /**
diff --git a/src/sqlite/utils.js b/src/sqlite/utils.js
deleted file mode 100644
index 07dcaf186..000000000
--- a/src/sqlite/utils.js
+++ /dev/null
@@ -1,90 +0,0 @@
-import fs from 'node:fs';
-import path from 'node:path';
-
-/**
- * Split an array into fixed-size chunks.
- * @param {Array<any>} items
- * @param {number} [size]
- * @returns {Array<Array<any>>}
- */
-export function chunkArray(items, size = 900) {
-  const chunks = [];
-  for (let i = 0; i < items.length; i += size) {
-    chunks.push(items.slice(i, i + size));
-  }
-  return chunks;
-}
-
-/**
- * Return the set of table names in a SQLite database.
- * @param {import('better-sqlite3').Database} db
- * @returns {Set<string>}
- */
-export function getTableNames(db) {
-  const rows = db.prepare("SELECT name FROM sqlite_master WHERE type='table'").all();
-  return new Set(rows.map((row) => row.name));
-}
-
-/**
- * Check that all required tables exist.
- * @param {import('better-sqlite3').Database} db
- * @param {string[]} requiredTables
- * @returns {boolean}
- */
-export function hasRequiredTables(db, requiredTables) {
-  const tableNames = getTableNames(db);
-  return requiredTables.every((name) => tableNames.has(name));
-}
-
-/**
- * Normalize a file path to POSIX separators.
- * @param {string} value
- * @returns {string}
- */
-export function normalizeFilePath(value) {
-  if (typeof value !== 'string') return value;
-  return value.replace(/\\/g, '/');
-}
-
-/**
- * Read and parse JSON from disk.
- * @param {string} filePath
- * @returns {any}
- */
-export function readJson(filePath) {
-  return JSON.parse(fs.readFileSync(filePath, 'utf8'));
-}
-
-/**
- * Read JSON from disk if it exists; otherwise return null.
- * @param {string} dir
- * @param {string} name
- * @returns {any|null}
- */
-export function loadOptional(dir, name) {
-  const target = path.join(dir, name);
-  if (!fs.existsSync(target)) return null;
-  return readJson(target);
-}
-
-/**
- * Load file-backed index artifacts from a directory.
- * @param {string} dir
- * @param {string} modelId
- * @returns {object|null}
- */
-export function loadIndex(dir, modelId) {
-  const chunkMetaPath = path.join(dir, 'chunk_meta.json');
-  if (!fs.existsSync(chunkMetaPath)) return null;
-  const chunkMeta = readJson(chunkMetaPath);
-  const denseVec = loadOptional(dir, 'dense_vectors_uint8.json');
-  if (denseVec && !denseVec.model) denseVec.model = modelId || null;
-  return {
-    chunkMeta,
-    denseVec,
-    phraseNgrams: loadOptional(dir, 'phrase_ngrams.json'),
-    chargrams: loadOptional(dir, 'chargram_postings.json'),
-    minhash: loadOptional(dir, 'minhash_signatures.json'),
-    tokenPostings: loadOptional(dir, 'token_postings.json')
-  };
-}
diff --git a/src/storage/backend-policy.js b/src/storage/backend-policy.js
new file mode 100644
index 000000000..0f90a48f7
--- /dev/null
+++ b/src/storage/backend-policy.js
@@ -0,0 +1,218 @@
+export function resolveBackendPolicy({
+  backendArg,
+  sqliteScoreModeConfig = false,
+  sqliteConfigured = true,
+  sqliteAvailable = false,
+  lmdbConfigured = true,
+  lmdbAvailable = false,
+  sqliteAutoChunkThreshold = 0,
+  sqliteAutoArtifactBytes = 0,
+  needsSqlite = true,
+  chunkCounts = [],
+  artifactBytes = []
+} = {}) {
+  const normalized = typeof backendArg === 'string' ? backendArg.toLowerCase() : '';
+  const backendAuto = !normalized || normalized === 'auto';
+  const sqliteFtsRequested = normalized === 'sqlite-fts'
+    || normalized === 'fts'
+    || (backendAuto && sqliteScoreModeConfig === true);
+  const backendForcedSqlite = normalized === 'sqlite' || sqliteFtsRequested;
+  const backendForcedLmdb = normalized === 'lmdb';
+  const backendForcedMemory = normalized === 'memory';
+  const backendDisabled = normalized
+    && !backendAuto
+    && !backendForcedSqlite
+    && !backendForcedLmdb
+    && !backendForcedMemory;
+
+  const counts = Array.isArray(chunkCounts)
+    ? chunkCounts.filter((count) => Number.isFinite(count))
+    : [];
+  const maxChunkCount = counts.length ? Math.max(...counts) : null;
+  const byteTotals = Array.isArray(artifactBytes)
+    ? artifactBytes.filter((count) => Number.isFinite(count))
+    : [];
+  const totalArtifactBytes = byteTotals.length
+    ? byteTotals.reduce((sum, next) => sum + next, 0)
+    : null;
+
+  const policy = {
+    requested: normalized || 'auto',
+    sqliteAutoChunkThreshold,
+    sqliteAutoArtifactBytes,
+    maxChunkCount,
+    totalArtifactBytes,
+    lmdbAvailable,
+    lmdbConfigured
+  };
+
+  if (backendDisabled) {
+    return {
+      useSqlite: false,
+      useLmdb: false,
+      backendLabel: 'memory',
+      sqliteFtsRequested: false,
+      backendForcedSqlite: false,
+      backendForcedLmdb: false,
+      backendForcedMemory: false,
+      backendDisabled: true,
+      reason: 'unknown backend requested',
+      policy
+    };
+  }
+
+  if (!needsSqlite) {
+    return {
+      useSqlite: false,
+      useLmdb: false,
+      backendLabel: 'memory',
+      sqliteFtsRequested: false,
+      backendForcedSqlite: false,
+      backendForcedLmdb,
+      backendForcedMemory,
+      backendDisabled: false,
+      reason: 'no sqlite needed for selected mode',
+      policy
+    };
+  }
+
+  if (backendForcedLmdb && !lmdbAvailable) {
+    return {
+      useSqlite: false,
+      useLmdb: false,
+      backendLabel: 'lmdb',
+      sqliteFtsRequested: false,
+      backendForcedSqlite: false,
+      backendForcedLmdb,
+      backendForcedMemory,
+      backendDisabled: false,
+      reason: 'lmdb indexes missing',
+      error: 'LMDB backend requested but index not found',
+      policy
+    };
+  }
+
+  if (backendForcedLmdb) {
+    return {
+      useSqlite: false,
+      useLmdb: true,
+      backendLabel: 'lmdb',
+      sqliteFtsRequested: false,
+      backendForcedSqlite: false,
+      backendForcedLmdb,
+      backendForcedMemory,
+      backendDisabled: false,
+      reason: 'lmdb backend forced by flag',
+      policy
+    };
+  }
+
+  if (backendForcedSqlite && !sqliteAvailable) {
+    return {
+      useSqlite: false,
+      useLmdb: false,
+      backendLabel: sqliteFtsRequested ? 'sqlite-fts' : 'sqlite',
+      sqliteFtsRequested,
+      backendForcedSqlite,
+      backendForcedLmdb,
+      backendForcedMemory,
+      backendDisabled: false,
+      reason: 'sqlite indexes missing',
+      error: 'SQLite backend requested but index not found',
+      policy
+    };
+  }
+
+  if (backendForcedSqlite) {
+    return {
+      useSqlite: true,
+      useLmdb: false,
+      backendLabel: sqliteFtsRequested ? 'sqlite-fts' : 'sqlite',
+      sqliteFtsRequested,
+      backendForcedSqlite,
+      backendForcedLmdb,
+      backendForcedMemory,
+      backendDisabled: false,
+      reason: 'sqlite backend forced by flag',
+      policy
+    };
+  }
+
+  if (backendForcedMemory) {
+    return {
+      useSqlite: false,
+      useLmdb: false,
+      backendLabel: 'memory',
+      sqliteFtsRequested: false,
+      backendForcedSqlite: false,
+      backendForcedLmdb,
+      backendForcedMemory: true,
+      backendDisabled: false,
+      reason: 'memory backend forced by flag',
+      policy
+    };
+  }
+
+  if (!sqliteConfigured || !sqliteAvailable) {
+    if (lmdbConfigured && lmdbAvailable) {
+      return {
+        useSqlite: false,
+        useLmdb: true,
+        backendLabel: 'lmdb',
+        sqliteFtsRequested: false,
+        backendForcedSqlite: false,
+        backendForcedLmdb: false,
+        backendForcedMemory: false,
+        backendDisabled: false,
+        reason: sqliteConfigured ? 'sqlite indexes unavailable; using lmdb' : 'sqlite disabled; using lmdb',
+        policy
+      };
+    }
+    return {
+      useSqlite: false,
+      useLmdb: false,
+      backendLabel: 'memory',
+      sqliteFtsRequested: false,
+      backendForcedSqlite: false,
+      backendForcedLmdb,
+      backendForcedMemory: false,
+      backendDisabled: false,
+      reason: sqliteConfigured ? 'sqlite indexes unavailable' : 'sqlite disabled',
+      policy
+    };
+  }
+
+  let autoUseSqlite = true;
+  let autoReason = 'auto default';
+  const thresholdsEnabled = sqliteAutoChunkThreshold > 0 || sqliteAutoArtifactBytes > 0;
+  if (thresholdsEnabled) {
+    const hits = [];
+    if (sqliteAutoChunkThreshold > 0 && Number.isFinite(maxChunkCount)) {
+      hits.push(maxChunkCount >= sqliteAutoChunkThreshold ? 'chunkCount' : null);
+    }
+    if (sqliteAutoArtifactBytes > 0 && Number.isFinite(totalArtifactBytes)) {
+      hits.push(totalArtifactBytes >= sqliteAutoArtifactBytes ? 'artifactBytes' : null);
+    }
+    const hitReasons = hits.filter(Boolean);
+    if (hitReasons.length) {
+      autoUseSqlite = true;
+      autoReason = `auto threshold met (${hitReasons.join(', ')})`;
+    } else if (hits.length) {
+      autoUseSqlite = false;
+      autoReason = 'auto threshold not met';
+    }
+  }
+
+  return {
+    useSqlite: autoUseSqlite,
+    useLmdb: false,
+    backendLabel: autoUseSqlite ? (sqliteFtsRequested ? 'sqlite-fts' : 'sqlite') : 'memory',
+    sqliteFtsRequested,
+    backendForcedSqlite: false,
+    backendForcedLmdb: false,
+    backendForcedMemory: false,
+    backendDisabled: false,
+    reason: autoReason,
+    policy
+  };
+}
diff --git a/src/storage/lmdb/schema.js b/src/storage/lmdb/schema.js
new file mode 100644
index 000000000..3b65652ed
--- /dev/null
+++ b/src/storage/lmdb/schema.js
@@ -0,0 +1,31 @@
+export const LMDB_SCHEMA_VERSION = 1;
+
+export const LMDB_META_KEYS = {
+  schemaVersion: 'meta:schemaVersion',
+  createdAt: 'meta:createdAt',
+  mode: 'meta:mode',
+  artifacts: 'meta:artifacts',
+  chunkCount: 'meta:chunkCount',
+  sourceIndex: 'meta:sourceIndex'
+};
+
+export const LMDB_ARTIFACT_KEYS = {
+  chunkMeta: 'artifact:chunk_meta',
+  tokenPostings: 'artifact:token_postings',
+  fileMeta: 'artifact:file_meta',
+  fileRelations: 'artifact:file_relations',
+  repoMap: 'artifact:repo_map',
+  filterIndex: 'artifact:filter_index',
+  fieldPostings: 'artifact:field_postings',
+  fieldTokens: 'artifact:field_tokens',
+  phraseNgrams: 'artifact:phrase_ngrams',
+  chargramPostings: 'artifact:chargram_postings',
+  minhashSignatures: 'artifact:minhash_signatures',
+  denseVectors: 'artifact:dense_vectors_uint8',
+  denseVectorsDoc: 'artifact:dense_vectors_doc_uint8',
+  denseVectorsCode: 'artifact:dense_vectors_code_uint8',
+  denseHnswMeta: 'artifact:dense_vectors_hnsw_meta',
+  indexState: 'artifact:index_state'
+};
+
+export const LMDB_ARTIFACT_LIST = Object.values(LMDB_ARTIFACT_KEYS);
diff --git a/src/sqlite/build-helpers.js b/src/storage/sqlite/build-helpers.js
similarity index 90%
rename from src/sqlite/build-helpers.js
rename to src/storage/sqlite/build-helpers.js
index 663f8013a..8b563473f 100644
--- a/src/sqlite/build-helpers.js
+++ b/src/storage/sqlite/build-helpers.js
@@ -9,8 +9,14 @@ import { normalizeFilePath } from './utils.js';
  */
 export function buildChunkRow(chunk, mode, id) {
   const tokensArray = Array.isArray(chunk.tokens) ? chunk.tokens : [];
+  const chunkId = chunk?.metaV2?.chunkId || chunk?.chunkId || null;
+  const signature = typeof chunk.docmeta?.signature === 'string'
+    ? chunk.docmeta.signature
+    : (typeof chunk.signature === 'string' ? chunk.signature : null);
+  const doc = typeof chunk.docmeta?.doc === 'string' ? chunk.docmeta.doc : null;
   return {
     id,
+    chunk_id: chunkId,
     mode,
     file: normalizeFilePath(chunk.file),
     start: chunk.start,
@@ -20,7 +26,9 @@ export function buildChunkRow(chunk, mode, id) {
     ext: chunk.ext || null,
     kind: chunk.kind || null,
     name: chunk.name || null,
+    signature,
     headline: chunk.headline || null,
+    doc,
     preContext: chunk.preContext ? JSON.stringify(chunk.preContext) : null,
     postContext: chunk.postContext ? JSON.stringify(chunk.postContext) : null,
     weight: typeof chunk.weight === 'number' ? chunk.weight : 1,
diff --git a/src/storage/sqlite/build/bundle-loader.js b/src/storage/sqlite/build/bundle-loader.js
new file mode 100644
index 000000000..d1a4ed7a9
--- /dev/null
+++ b/src/storage/sqlite/build/bundle-loader.js
@@ -0,0 +1,46 @@
+import fsSync from 'node:fs';
+import path from 'node:path';
+import Piscina from 'piscina';
+import { readBundleFile } from '../../../shared/bundle-io.js';
+
+export const createBundleLoader = ({ bundleThreads, workerPath }) => {
+  const useWorkers = Number.isFinite(bundleThreads) && bundleThreads > 1;
+  const pool = useWorkers && workerPath
+    ? new Piscina({ filename: workerPath, maxThreads: bundleThreads })
+    : null;
+
+  const loadBundle = async ({ bundleDir, entry, file }) => {
+    const bundleName = entry?.bundle;
+    if (!bundleName) {
+      return { file, ok: false, reason: 'missing bundle entry' };
+    }
+    const bundlePath = path.join(bundleDir, bundleName);
+    if (!fsSync.existsSync(bundlePath)) {
+      return { file, ok: false, reason: 'bundle file missing' };
+    }
+    try {
+      if (pool) {
+        const result = await pool.run({ bundlePath });
+        if (!result?.ok) {
+          return { file, ok: false, reason: result?.reason || 'invalid bundle' };
+        }
+        return { file, ok: true, bundle: result.bundle };
+      }
+      const result = await readBundleFile(bundlePath);
+      if (!result.ok) {
+        return { file, ok: false, reason: result.reason || 'invalid bundle' };
+      }
+      return { file, ok: true, bundle: result.bundle };
+    } catch (err) {
+      return { file, ok: false, reason: err?.message || String(err) };
+    }
+  };
+
+  const close = async () => {
+    if (pool) {
+      await pool.destroy();
+    }
+  };
+
+  return { loadBundle, close, useWorkers };
+};
diff --git a/src/storage/sqlite/build/delete.js b/src/storage/sqlite/build/delete.js
new file mode 100644
index 000000000..513f86b78
--- /dev/null
+++ b/src/storage/sqlite/build/delete.js
@@ -0,0 +1,45 @@
+import { chunkArray } from '../utils.js';
+
+export function deleteDocIds(db, mode, docIds, extraTables = []) {
+  if (!docIds.length) return;
+  const deleteTargets = [
+    { table: 'chunks', column: 'id' },
+    { table: 'chunks_fts', column: 'rowid' },
+    { table: 'token_postings', column: 'doc_id' },
+    { table: 'phrase_postings', column: 'doc_id' },
+    { table: 'chargram_postings', column: 'doc_id' },
+    { table: 'minhash_signatures', column: 'doc_id' },
+    { table: 'dense_vectors', column: 'doc_id' },
+    { table: 'doc_lengths', column: 'doc_id' }
+  ];
+  for (const extra of extraTables) {
+    if (extra?.table && extra?.column) deleteTargets.push(extra);
+  }
+  for (const chunk of chunkArray(docIds)) {
+    const placeholders = chunk.map(() => '?').join(',');
+    for (const target of deleteTargets) {
+      const withMode = target.withMode !== false;
+      const values = target.transform ? chunk.map(target.transform) : chunk;
+      const where = withMode
+        ? `mode = ? AND ${target.column} IN (${placeholders})`
+        : `${target.column} IN (${placeholders})`;
+      const stmt = db.prepare(`DELETE FROM ${target.table} WHERE ${where}`);
+      if (withMode) {
+        stmt.run(mode, ...values);
+      } else {
+        stmt.run(...values);
+      }
+    }
+  }
+}
+
+export function updateTokenStats(db, mode, insertTokenStats) {
+  const row = db.prepare(
+    'SELECT COUNT(*) AS total_docs, AVG(len) AS avg_doc_len FROM doc_lengths WHERE mode = ?'
+  ).get(mode) || {};
+  insertTokenStats.run(
+    mode,
+    typeof row.avg_doc_len === 'number' ? row.avg_doc_len : 0,
+    typeof row.total_docs === 'number' ? row.total_docs : 0
+  );
+}
diff --git a/src/storage/sqlite/build/from-artifacts.js b/src/storage/sqlite/build/from-artifacts.js
new file mode 100644
index 000000000..0db127f19
--- /dev/null
+++ b/src/storage/sqlite/build/from-artifacts.js
@@ -0,0 +1,557 @@
+import fsSync from 'node:fs';
+import path from 'node:path';
+import readline from 'node:readline';
+import {
+  buildChunkRow,
+  buildTokenFrequency,
+  prepareVectorAnnTable
+} from '../build-helpers.js';
+import { CREATE_INDEXES_SQL, CREATE_TABLES_BASE_SQL, SCHEMA_VERSION } from '../schema.js';
+import { normalizeFilePath, readJson, loadOptional } from '../utils.js';
+import { packUint32, packUint8, dequantizeUint8ToFloat32, toVectorId } from '../vector.js';
+import { applyBuildPragmas, restoreBuildPragmas } from './pragmas.js';
+import { normalizeManifestFiles } from './manifest.js';
+import { validateSqliteDatabase } from './validate.js';
+import { createInsertStatements } from './statements.js';
+
+const listShardFiles = (dir, prefix) => {
+  if (!fsSync.existsSync(dir)) return [];
+  return fsSync
+    .readdirSync(dir)
+    .filter((name) => name.startsWith(prefix) && (name.endsWith('.json') || name.endsWith('.jsonl')))
+    .sort()
+    .map((name) => path.join(dir, name));
+};
+
+const resolveChunkMetaSources = (dir) => {
+  const metaPath = path.join(dir, 'chunk_meta.meta.json');
+  const partsDir = path.join(dir, 'chunk_meta.parts');
+  if (fsSync.existsSync(metaPath) || fsSync.existsSync(partsDir)) {
+    let parts = [];
+    if (fsSync.existsSync(metaPath)) {
+      try {
+        const meta = readJson(metaPath);
+        if (Array.isArray(meta?.parts) && meta.parts.length) {
+          parts = meta.parts.map((name) => path.join(dir, name));
+        }
+      } catch {}
+    }
+    if (!parts.length) {
+      parts = listShardFiles(partsDir, 'chunk_meta.part-');
+    }
+    return parts.length ? { format: 'jsonl', paths: parts } : null;
+  }
+  const jsonlPath = path.join(dir, 'chunk_meta.jsonl');
+  if (fsSync.existsSync(jsonlPath)) {
+    return { format: 'jsonl', paths: [jsonlPath] };
+  }
+  const jsonPath = path.join(dir, 'chunk_meta.json');
+  if (fsSync.existsSync(jsonPath)) {
+    return { format: 'json', paths: [jsonPath] };
+  }
+  return null;
+};
+
+const resolveTokenPostingsSources = (dir) => {
+  const metaPath = path.join(dir, 'token_postings.meta.json');
+  const shardsDir = path.join(dir, 'token_postings.shards');
+  if (!fsSync.existsSync(metaPath) && !fsSync.existsSync(shardsDir)) return null;
+  let parts = [];
+  if (fsSync.existsSync(metaPath)) {
+    try {
+      const meta = readJson(metaPath);
+      if (Array.isArray(meta?.parts) && meta.parts.length) {
+        parts = meta.parts.map((name) => path.join(dir, name));
+      }
+    } catch {}
+  }
+  if (!parts.length) {
+    parts = listShardFiles(shardsDir, 'token_postings.part-');
+  }
+  return parts.length ? { metaPath, parts } : null;
+};
+
+const readJsonLinesFile = async (filePath, onEntry) => {
+  const stream = fsSync.createReadStream(filePath, { encoding: 'utf8' });
+  const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
+  for await (const line of rl) {
+    const trimmed = line.trim();
+    if (!trimmed) continue;
+    onEntry(JSON.parse(trimmed));
+  }
+};
+
+export const loadIndexPieces = (dir, modelId) => {
+  const sources = resolveChunkMetaSources(dir);
+  if (!sources) return null;
+  const denseVec = loadOptional(dir, 'dense_vectors_uint8.json');
+  if (denseVec && !denseVec.model) denseVec.model = modelId || null;
+  return {
+    chunkMeta: null,
+    dir,
+    fileMeta: loadOptional(dir, 'file_meta.json'),
+    denseVec,
+    phraseNgrams: loadOptional(dir, 'phrase_ngrams.json'),
+    chargrams: loadOptional(dir, 'chargram_postings.json'),
+    minhash: loadOptional(dir, 'minhash_signatures.json'),
+    tokenPostings: null
+  };
+};
+
+export async function buildDatabaseFromArtifacts({
+  Database,
+  outPath,
+  index,
+  indexDir,
+  mode,
+  manifestFiles,
+  emitOutput,
+  validateMode,
+  vectorConfig,
+  modelConfig
+}) {
+  if (!index) return 0;
+  const manifestLookup = normalizeManifestFiles(manifestFiles || {});
+  if (emitOutput && manifestLookup.conflicts.length) {
+    console.warn(`[sqlite] Manifest path conflicts for ${mode}; using normalized entries.`);
+  }
+  const manifestByNormalized = manifestLookup.map;
+  const validationStats = { chunks: 0, dense: 0, minhash: 0 };
+  const vectorExtension = vectorConfig?.extension || {};
+  const encodeVector = vectorConfig?.encodeVector;
+
+  const db = new Database(outPath);
+  applyBuildPragmas(db);
+
+  let count = 0;
+  let succeeded = false;
+  try {
+    db.exec(CREATE_TABLES_BASE_SQL);
+    db.pragma(`user_version = ${SCHEMA_VERSION}`);
+    const vectorAnn = prepareVectorAnnTable({ db, indexData: index, mode, vectorConfig });
+
+    const statements = createInsertStatements(db);
+    const {
+      insertChunk,
+      insertFts,
+      insertTokenVocab,
+      insertTokenPosting,
+      insertDocLength,
+      insertTokenStats,
+      insertPhraseVocab,
+      insertPhrasePosting,
+      insertChargramVocab,
+      insertChargramPosting,
+      insertMinhash,
+      insertDense,
+      insertDenseMeta,
+      insertFileManifest
+    } = statements;
+
+    function ingestTokenIndex(tokenIndex, targetMode) {
+      if (!tokenIndex?.vocab || !tokenIndex?.postings) return;
+      const vocab = tokenIndex.vocab;
+      const postings = tokenIndex.postings;
+      const docLengths = Array.isArray(tokenIndex.docLengths) ? tokenIndex.docLengths : [];
+      const avgDocLen = typeof tokenIndex.avgDocLen === 'number' ? tokenIndex.avgDocLen : null;
+      const totalDocs = typeof tokenIndex.totalDocs === 'number' ? tokenIndex.totalDocs : docLengths.length;
+
+      const insertVocabTx = db.transaction(() => {
+        for (let i = 0; i < vocab.length; i += 1) {
+          insertTokenVocab.run(targetMode, i, vocab[i]);
+        }
+      });
+      insertVocabTx();
+
+      const insertPostingsTx = db.transaction(() => {
+        for (let tokenId = 0; tokenId < postings.length; tokenId += 1) {
+          const posting = postings[tokenId] || [];
+          for (const entry of posting) {
+            if (!entry) continue;
+            const docId = entry[0];
+            const tf = entry[1];
+            insertTokenPosting.run(targetMode, tokenId, docId, tf);
+          }
+        }
+      });
+      insertPostingsTx();
+
+      const insertLengthsTx = db.transaction(() => {
+        for (let docId = 0; docId < docLengths.length; docId += 1) {
+          insertDocLength.run(targetMode, docId, docLengths[docId]);
+        }
+      });
+      insertLengthsTx();
+
+      insertTokenStats.run(targetMode, avgDocLen, totalDocs);
+    }
+
+    function ingestTokenIndexFromPieces(targetMode, indexDir) {
+      const directPath = path.join(indexDir, 'token_postings.json');
+      const directPathGz = `${directPath}.gz`;
+      const sources = resolveTokenPostingsSources(indexDir);
+      if (!sources && !fsSync.existsSync(directPath) && !fsSync.existsSync(directPathGz)) {
+        return false;
+      }
+      if (!sources) {
+        const tokenIndex = readJson(directPath);
+        ingestTokenIndex(tokenIndex, targetMode);
+        return true;
+      }
+      const meta = fsSync.existsSync(sources.metaPath) ? readJson(sources.metaPath) : {};
+      const docLengths = Array.isArray(meta?.docLengths)
+        ? meta.docLengths
+        : (Array.isArray(meta?.arrays?.docLengths) ? meta.arrays.docLengths : []);
+      const totalDocs = Number.isFinite(meta?.totalDocs) ? meta.totalDocs : docLengths.length;
+      const avgDocLen = Number.isFinite(meta?.avgDocLen)
+        ? meta.avgDocLen
+        : (Number.isFinite(meta?.fields?.avgDocLen) ? meta.fields.avgDocLen : (
+          docLengths.length
+            ? docLengths.reduce((sum, len) => sum + (Number.isFinite(len) ? len : 0), 0) / docLengths.length
+            : 0
+        ));
+      const insertLengthsTx = db.transaction(() => {
+        for (let docId = 0; docId < docLengths.length; docId += 1) {
+          insertDocLength.run(targetMode, docId, docLengths[docId]);
+        }
+      });
+      insertLengthsTx();
+      insertTokenStats.run(targetMode, avgDocLen, totalDocs);
+      let tokenId = 0;
+      for (const shardPath of sources.parts) {
+        const shard = readJson(shardPath);
+        const vocab = Array.isArray(shard?.vocab)
+          ? shard.vocab
+          : (Array.isArray(shard?.arrays?.vocab) ? shard.arrays.vocab : []);
+        const postings = Array.isArray(shard?.postings)
+          ? shard.postings
+          : (Array.isArray(shard?.arrays?.postings) ? shard.arrays.postings : []);
+        const insertVocabTx = db.transaction(() => {
+          for (let i = 0; i < vocab.length; i += 1) {
+            insertTokenVocab.run(targetMode, tokenId + i, vocab[i]);
+          }
+        });
+        insertVocabTx();
+        const insertPostingsTx = db.transaction(() => {
+          for (let i = 0; i < postings.length; i += 1) {
+            const posting = postings[i] || [];
+            const postingTokenId = tokenId + i;
+            for (const entry of posting) {
+              if (!entry) continue;
+              insertTokenPosting.run(targetMode, postingTokenId, entry[0], entry[1]);
+            }
+          }
+        });
+        insertPostingsTx();
+        tokenId += vocab.length;
+      }
+      return true;
+    }
+
+    function ingestTokenIndexFromChunks(chunks, targetMode) {
+      if (!Array.isArray(chunks) || !chunks.length) return;
+      const tokenIdMap = new Map();
+      let nextTokenId = 0;
+      let totalDocs = 0;
+      let totalLen = 0;
+      const insertTx = db.transaction(() => {
+        for (let i = 0; i < chunks.length; i += 1) {
+          const chunk = chunks[i];
+          if (!chunk) continue;
+          const docId = Number.isFinite(chunk.id) ? chunk.id : i;
+          const tokensArray = Array.isArray(chunk.tokens) ? chunk.tokens : [];
+          const docLen = tokensArray.length;
+          totalDocs += 1;
+          totalLen += docLen;
+          insertDocLength.run(targetMode, docId, docLen);
+          if (!docLen) continue;
+          const freq = buildTokenFrequency(tokensArray);
+          for (const [token, tf] of freq.entries()) {
+            let tokenId = tokenIdMap.get(token);
+            if (tokenId === undefined) {
+              tokenId = nextTokenId;
+              nextTokenId += 1;
+              tokenIdMap.set(token, tokenId);
+              insertTokenVocab.run(targetMode, tokenId, token);
+            }
+            insertTokenPosting.run(targetMode, tokenId, docId, tf);
+          }
+        }
+      });
+      insertTx();
+      insertTokenStats.run(targetMode, totalDocs ? totalLen / totalDocs : 0, totalDocs);
+    }
+
+    function ingestPostingIndex(indexData, targetMode, insertVocabStmt, insertPostingStmt) {
+      if (!indexData?.vocab || !indexData?.postings) return;
+      const vocab = indexData.vocab;
+      const postings = indexData.postings;
+
+      const insertVocabTx = db.transaction(() => {
+        for (let i = 0; i < vocab.length; i += 1) {
+          insertVocabStmt.run(targetMode, i, vocab[i]);
+        }
+      });
+      insertVocabTx();
+
+      const insertPostingsTx = db.transaction(() => {
+        for (let tokenId = 0; tokenId < postings.length; tokenId += 1) {
+          const posting = postings[tokenId] || [];
+          for (const docId of posting) {
+            insertPostingStmt.run(targetMode, tokenId, docId);
+          }
+        }
+      });
+      insertPostingsTx();
+    }
+
+    function ingestMinhash(minhash, targetMode) {
+      if (!minhash?.signatures || !minhash.signatures.length) return;
+      const insertTx = db.transaction(() => {
+        for (let docId = 0; docId < minhash.signatures.length; docId += 1) {
+          const sig = minhash.signatures[docId];
+          if (!sig) continue;
+          insertMinhash.run(targetMode, docId, packUint32(sig));
+          validationStats.minhash += 1;
+        }
+      });
+      insertTx();
+    }
+
+    function ingestDense(dense, targetMode) {
+      if (!dense?.vectors || !dense.vectors.length) return;
+      insertDenseMeta.run(
+        targetMode,
+        dense.dims || null,
+        typeof dense.scale === 'number' ? dense.scale : 1.0,
+        dense.model || modelConfig.id || null
+      );
+      const insertTx = db.transaction(() => {
+        for (let docId = 0; docId < dense.vectors.length; docId += 1) {
+          const vec = dense.vectors[docId];
+          if (!vec) continue;
+          insertDense.run(targetMode, docId, packUint8(vec));
+          validationStats.dense += 1;
+          if (vectorAnn?.insert && encodeVector) {
+            const floatVec = dequantizeUint8ToFloat32(vec);
+            const encoded = encodeVector(floatVec, vectorExtension);
+            if (encoded) vectorAnn.insert.run(toVectorId(docId), encoded);
+          }
+        }
+      });
+      insertTx();
+    }
+
+    const buildChunkRowWithMeta = (chunk, targetMode, fileMetaById) => {
+      const fileMeta = Number.isFinite(chunk.fileId)
+        ? fileMetaById.get(chunk.fileId)
+        : null;
+      const resolvedFile = normalizeFilePath(chunk.file || fileMeta?.file);
+      const resolvedExt = chunk.ext || fileMeta?.ext || null;
+      const resolvedExternalDocs = chunk.externalDocs || fileMeta?.externalDocs || null;
+      const resolvedLastModified = chunk.last_modified || fileMeta?.last_modified || null;
+      const resolvedLastAuthor = chunk.last_author || fileMeta?.last_author || null;
+      const resolvedChurn = typeof chunk.churn === 'number'
+        ? chunk.churn
+        : (typeof fileMeta?.churn === 'number' ? fileMeta.churn : null);
+      const resolvedChurnAdded = typeof chunk.churn_added === 'number'
+        ? chunk.churn_added
+        : (typeof fileMeta?.churn_added === 'number' ? fileMeta.churn_added : null);
+      const resolvedChurnDeleted = typeof chunk.churn_deleted === 'number'
+        ? chunk.churn_deleted
+        : (typeof fileMeta?.churn_deleted === 'number' ? fileMeta.churn_deleted : null);
+      const resolvedChurnCommits = typeof chunk.churn_commits === 'number'
+        ? chunk.churn_commits
+        : (typeof fileMeta?.churn_commits === 'number' ? fileMeta.churn_commits : null);
+      const tokensArray = Array.isArray(chunk.tokens) ? chunk.tokens : [];
+      const tokensText = tokensArray.join(' ');
+      const signatureText = typeof chunk.docmeta?.signature === 'string'
+        ? chunk.docmeta.signature
+        : (typeof chunk.signature === 'string' ? chunk.signature : null);
+      const docText = typeof chunk.docmeta?.doc === 'string' ? chunk.docmeta.doc : null;
+      const stableChunkId = chunk?.metaV2?.chunkId || chunk?.chunkId || null;
+      return {
+        id: Number.isFinite(chunk.id) ? chunk.id : null,
+        chunk_id: stableChunkId,
+        mode: targetMode,
+        file: resolvedFile,
+        start: chunk.start,
+        end: chunk.end,
+        startLine: chunk.startLine || null,
+        endLine: chunk.endLine || null,
+        ext: resolvedExt,
+        kind: chunk.kind || null,
+        name: chunk.name || null,
+        signature: signatureText,
+        headline: chunk.headline || null,
+        doc: docText,
+        preContext: chunk.preContext ? JSON.stringify(chunk.preContext) : null,
+        postContext: chunk.postContext ? JSON.stringify(chunk.postContext) : null,
+        weight: typeof chunk.weight === 'number' ? chunk.weight : 1,
+        tokens: tokensArray.length ? JSON.stringify(tokensArray) : null,
+        tokensText,
+        ngrams: chunk.ngrams ? JSON.stringify(chunk.ngrams) : null,
+        codeRelations: chunk.codeRelations ? JSON.stringify(chunk.codeRelations) : null,
+        docmeta: chunk.docmeta ? JSON.stringify(chunk.docmeta) : null,
+        stats: chunk.stats ? JSON.stringify(chunk.stats) : null,
+        complexity: chunk.complexity ? JSON.stringify(chunk.complexity) : null,
+        lint: chunk.lint ? JSON.stringify(chunk.lint) : null,
+        externalDocs: resolvedExternalDocs ? JSON.stringify(resolvedExternalDocs) : null,
+        last_modified: resolvedLastModified,
+        last_author: resolvedLastAuthor,
+        churn: resolvedChurn,
+        churn_added: resolvedChurnAdded,
+        churn_deleted: resolvedChurnDeleted,
+        churn_commits: resolvedChurnCommits,
+        chunk_authors: chunk.chunk_authors ? JSON.stringify(chunk.chunk_authors) : null
+      };
+    };
+
+    const ingestChunkMetaPieces = async (targetMode, indexDir, fileMetaById) => {
+      const sources = resolveChunkMetaSources(indexDir);
+      if (!sources) return { count: 0, fileCounts: new Map() };
+      const fileCounts = new Map();
+      const rows = [];
+      const insert = db.transaction((batch) => {
+        for (const row of batch) {
+          insertChunk.run(row);
+          insertFts.run(row);
+        }
+      });
+      const flush = () => {
+        if (!rows.length) return;
+        insert(rows);
+        rows.length = 0;
+      };
+      let chunkCount = 0;
+      const handleChunk = (chunk) => {
+        if (!chunk) return;
+        if (!Number.isFinite(chunk.id)) {
+          chunk.id = chunkCount;
+        }
+        const row = buildChunkRowWithMeta(chunk, targetMode, fileMetaById);
+        if (row.file) {
+          fileCounts.set(row.file, (fileCounts.get(row.file) || 0) + 1);
+        }
+        rows.push(row);
+        chunkCount += 1;
+        if (rows.length >= 500) flush();
+      };
+      if (sources.format === 'json') {
+        const data = readJson(sources.paths[0]);
+        if (Array.isArray(data)) {
+          for (const chunk of data) handleChunk(chunk);
+        }
+      } else {
+        for (const chunkPath of sources.paths) {
+          await readJsonLinesFile(chunkPath, handleChunk);
+        }
+      }
+      flush();
+      return { count: chunkCount, fileCounts };
+    };
+
+    async function ingestIndex(indexData, targetMode, indexDir) {
+      if (!indexData && !indexDir) return 0;
+      const fileMetaById = new Map();
+      if (Array.isArray(indexData?.fileMeta)) {
+        for (const entry of indexData.fileMeta) {
+          if (!entry || !Number.isFinite(entry.id)) continue;
+          fileMetaById.set(entry.id, entry);
+        }
+      }
+      let chunkCount = 0;
+      let fileCounts = new Map();
+      if (Array.isArray(indexData?.chunkMeta)) {
+        const insert = db.transaction((rows) => {
+          for (const row of rows) {
+            insertChunk.run(row);
+            insertFts.run(row);
+          }
+        });
+        const rows = [];
+        for (let i = 0; i < indexData.chunkMeta.length; i += 1) {
+          const chunk = indexData.chunkMeta[i];
+          if (!chunk) continue;
+          if (!Number.isFinite(chunk.id)) {
+            chunk.id = i;
+          }
+          const row = buildChunkRowWithMeta(chunk, targetMode, fileMetaById);
+          rows.push(row);
+          if (row.file) {
+            fileCounts.set(row.file, (fileCounts.get(row.file) || 0) + 1);
+          }
+          chunkCount += 1;
+        }
+        insert(rows);
+      } else if (indexDir) {
+        const result = await ingestChunkMetaPieces(targetMode, indexDir, fileMetaById);
+        chunkCount = result.count;
+        fileCounts = result.fileCounts;
+      }
+
+      let tokenIngested = false;
+      if (indexData?.tokenPostings) {
+        ingestTokenIndex(indexData.tokenPostings, targetMode);
+        tokenIngested = true;
+      }
+      if (!tokenIngested && indexDir) {
+        tokenIngested = ingestTokenIndexFromPieces(targetMode, indexDir);
+      }
+      if (!tokenIngested) {
+        console.warn(`[sqlite] token_postings missing; rebuilding tokens for ${targetMode}.`);
+        if (Array.isArray(indexData?.chunkMeta)) {
+          ingestTokenIndexFromChunks(indexData.chunkMeta, targetMode);
+        } else {
+          console.warn(`[sqlite] chunk_meta unavailable for token rebuild (${targetMode}).`);
+        }
+      }
+
+      ingestPostingIndex(indexData?.phraseNgrams, targetMode, insertPhraseVocab, insertPhrasePosting);
+      ingestPostingIndex(indexData?.chargrams, targetMode, insertChargramVocab, insertChargramPosting);
+      ingestMinhash(indexData?.minhash, targetMode);
+      ingestDense(indexData?.denseVec, targetMode);
+      ingestFileManifest(fileCounts, targetMode);
+
+      return chunkCount;
+    }
+
+    function ingestFileManifest(fileCounts, targetMode) {
+      if (!fileCounts || !fileCounts.size) return;
+      const insertTx = db.transaction(() => {
+        for (const [file, count] of fileCounts.entries()) {
+          const normalizedFile = normalizeFilePath(file);
+          const entry = manifestByNormalized.get(normalizedFile)?.entry || null;
+          insertFileManifest.run(
+            targetMode,
+            normalizedFile,
+            entry?.hash || null,
+            Number.isFinite(entry?.mtimeMs) ? entry.mtimeMs : null,
+            Number.isFinite(entry?.size) ? entry.size : null,
+            count
+          );
+        }
+      });
+      insertTx();
+    }
+
+    count = await ingestIndex(index, mode, indexDir);
+    validationStats.chunks = count;
+    db.exec(CREATE_INDEXES_SQL);
+    validateSqliteDatabase(db, mode, {
+      validateMode,
+      expected: validationStats,
+      emitOutput
+    });
+    succeeded = true;
+  } finally {
+    restoreBuildPragmas(db);
+    db.close();
+    if (!succeeded) {
+      try {
+        fsSync.rmSync(outPath, { force: true });
+      } catch {}
+    }
+  }
+  return count;
+}
diff --git a/src/storage/sqlite/build/from-bundles.js b/src/storage/sqlite/build/from-bundles.js
new file mode 100644
index 000000000..05e5a1df0
--- /dev/null
+++ b/src/storage/sqlite/build/from-bundles.js
@@ -0,0 +1,301 @@
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { buildChunkRow, buildTokenFrequency } from '../build-helpers.js';
+import { CREATE_INDEXES_SQL, CREATE_TABLES_BASE_SQL, SCHEMA_VERSION } from '../schema.js';
+import { normalizeFilePath } from '../utils.js';
+import { packUint32, packUint8, quantizeVec, toVectorId } from '../vector.js';
+import { applyBuildPragmas, restoreBuildPragmas } from './pragmas.js';
+import { normalizeManifestFiles } from './manifest.js';
+import { validateSqliteDatabase } from './validate.js';
+import { createInsertStatements } from './statements.js';
+import { createBundleLoader } from './bundle-loader.js';
+
+export async function buildDatabaseFromBundles({
+  Database,
+  outPath,
+  mode,
+  incrementalData,
+  envConfig,
+  threadLimits,
+  emitOutput,
+  validateMode,
+  vectorConfig,
+  modelConfig,
+  workerPath
+}) {
+  if (!incrementalData?.manifest) {
+    return { count: 0, reason: 'missing incremental manifest' };
+  }
+  const manifestFiles = incrementalData.manifest.files || {};
+  const manifestLookup = normalizeManifestFiles(manifestFiles);
+  const manifestEntries = manifestLookup.entries;
+  if (!manifestEntries.length) {
+    return { count: 0, reason: 'incremental manifest empty' };
+  }
+  if (emitOutput && manifestLookup.conflicts.length) {
+    console.warn(`[sqlite] Manifest path conflicts for ${mode}; using normalized entries.`);
+  }
+  const totalFiles = manifestEntries.length;
+  let processedFiles = 0;
+  let lastProgressLog = 0;
+  const progressIntervalMs = 1000;
+  const envBundleThreads = Number(envConfig.bundleThreads);
+  const bundleThreads = Number.isFinite(envBundleThreads) && envBundleThreads > 0
+    ? Math.floor(envBundleThreads)
+    : Math.max(1, Math.floor(threadLimits.fileConcurrency));
+  const bundleLoader = createBundleLoader({ bundleThreads, workerPath });
+  const useBundleWorkers = bundleLoader.useWorkers;
+  const logBundleProgress = (file, force = false) => {
+    if (!emitOutput) return;
+    const now = Date.now();
+    if (!force && now - lastProgressLog < progressIntervalMs) return;
+    lastProgressLog = now;
+    const percent = ((processedFiles / totalFiles) * 100).toFixed(1);
+    const suffix = file ? ` | ${file}` : '';
+    console.log(`[sqlite] bundles ${processedFiles}/${totalFiles} (${percent}%)${suffix}`);
+  };
+  if (emitOutput) {
+    console.log(`[sqlite] Using incremental bundles for ${mode} (${totalFiles} files).`);
+    if (useBundleWorkers) {
+      console.log(`[sqlite] Bundle parser workers: ${bundleThreads}.`);
+    }
+  }
+
+  const db = new Database(outPath);
+  applyBuildPragmas(db);
+  db.exec(CREATE_TABLES_BASE_SQL);
+  db.pragma(`user_version = ${SCHEMA_VERSION}`);
+  let succeeded = false;
+  try {
+    const statements = createInsertStatements(db);
+    const {
+      insertChunk,
+      insertFts,
+      insertTokenVocab,
+      insertTokenPosting,
+      insertDocLength,
+      insertTokenStats,
+      insertPhraseVocab,
+      insertPhrasePosting,
+      insertChargramVocab,
+      insertChargramPosting,
+      insertMinhash,
+      insertDense,
+      insertDenseMeta,
+      insertFileManifest
+    } = statements;
+
+    const tokenIdMap = new Map();
+    const phraseIdMap = new Map();
+    const chargramIdMap = new Map();
+    let nextTokenId = 0;
+    let nextPhraseId = 0;
+    let nextChargramId = 0;
+    let nextDocId = 0;
+    let totalDocs = 0;
+    let totalLen = 0;
+    const validationStats = { chunks: 0, dense: 0, minhash: 0 };
+
+    const fileCounts = new Map();
+    for (const record of manifestEntries) {
+      fileCounts.set(record.normalized, 0);
+    }
+
+    const vectorExtension = vectorConfig?.extension || {};
+    const vectorAnnEnabled = vectorConfig?.enabled === true;
+    const encodeVector = vectorConfig?.encodeVector;
+    let denseMetaSet = false;
+    let denseDims = null;
+    let denseWarned = false;
+    let vectorAnnLoaded = false;
+    let vectorAnnReady = false;
+    let vectorAnnTable = vectorExtension.table || 'dense_vectors_ann';
+    let vectorAnnColumn = vectorExtension.column || 'embedding';
+    let insertVectorAnn = null;
+    if (vectorAnnEnabled) {
+      const loadResult = vectorConfig.loadVectorExtension(db, vectorExtension, `sqlite ${mode}`);
+      if (loadResult.ok) {
+        vectorAnnLoaded = true;
+        if (vectorConfig.hasVectorTable(db, vectorAnnTable)) {
+          vectorAnnReady = true;
+        }
+      } else {
+        console.warn(`[sqlite] Vector extension unavailable for ${mode}: ${loadResult.reason}`);
+      }
+    }
+
+    const insertBundle = db.transaction((bundle, fileKey) => {
+      const normalizedFile = normalizeFilePath(fileKey);
+      let chunkCount = 0;
+      for (const chunk of bundle.chunks || []) {
+        const docId = nextDocId;
+        nextDocId += 1;
+
+        const row = buildChunkRow({ ...chunk, file: chunk.file || fileKey }, mode, docId);
+        insertChunk.run(row);
+        insertFts.run(row);
+
+        const tokensArray = Array.isArray(chunk.tokens) ? chunk.tokens : [];
+        insertDocLength.run(mode, docId, tokensArray.length);
+        totalDocs += 1;
+        totalLen += tokensArray.length;
+
+        if (tokensArray.length) {
+          const freq = buildTokenFrequency(tokensArray);
+          for (const [token, tf] of freq.entries()) {
+            let tokenId = tokenIdMap.get(token);
+            if (tokenId === undefined) {
+              tokenId = nextTokenId;
+              nextTokenId += 1;
+              tokenIdMap.set(token, tokenId);
+              insertTokenVocab.run(mode, tokenId, token);
+            }
+            insertTokenPosting.run(mode, tokenId, docId, tf);
+          }
+        }
+
+        if (Array.isArray(chunk.ngrams)) {
+          const unique = new Set(chunk.ngrams);
+          for (const ng of unique) {
+            let phraseId = phraseIdMap.get(ng);
+            if (phraseId === undefined) {
+              phraseId = nextPhraseId;
+              nextPhraseId += 1;
+              phraseIdMap.set(ng, phraseId);
+              insertPhraseVocab.run(mode, phraseId, ng);
+            }
+            insertPhrasePosting.run(mode, phraseId, docId);
+          }
+        }
+
+        if (Array.isArray(chunk.chargrams)) {
+          const unique = new Set(chunk.chargrams);
+          for (const gram of unique) {
+            let gramId = chargramIdMap.get(gram);
+            if (gramId === undefined) {
+              gramId = nextChargramId;
+              nextChargramId += 1;
+              chargramIdMap.set(gram, gramId);
+              insertChargramVocab.run(mode, gramId, gram);
+            }
+            insertChargramPosting.run(mode, gramId, docId);
+          }
+        }
+
+        if (Array.isArray(chunk.minhashSig) && chunk.minhashSig.length) {
+          insertMinhash.run(mode, docId, packUint32(chunk.minhashSig));
+          validationStats.minhash += 1;
+        }
+
+        if (Array.isArray(chunk.embedding) && chunk.embedding.length) {
+          const dims = chunk.embedding.length;
+          if (!denseMetaSet) {
+            insertDenseMeta.run(mode, dims, 1.0, modelConfig.id || null);
+            denseMetaSet = true;
+            denseDims = dims;
+          } else if (denseDims !== null && dims !== denseDims && !denseWarned) {
+            console.warn(`Dense vector dims mismatch for ${mode}: expected ${denseDims}, got ${dims}`);
+            denseWarned = true;
+          }
+          insertDense.run(mode, docId, packUint8(quantizeVec(chunk.embedding)));
+          validationStats.dense += 1;
+          if (vectorAnnLoaded) {
+            if (!vectorAnnReady) {
+              const created = vectorConfig.ensureVectorTable(db, vectorExtension, dims);
+              if (created.ok) {
+                vectorAnnReady = true;
+                vectorAnnTable = created.tableName;
+                vectorAnnColumn = created.column;
+                insertVectorAnn = db.prepare(
+                  `INSERT OR REPLACE INTO ${vectorAnnTable} (rowid, ${vectorAnnColumn}) VALUES (?, ?)`
+                );
+              }
+            }
+            if (vectorAnnReady && insertVectorAnn && encodeVector) {
+              const encoded = encodeVector(chunk.embedding, vectorExtension);
+              if (encoded) insertVectorAnn.run(toVectorId(docId), encoded);
+            }
+          }
+        }
+
+        chunkCount += 1;
+      }
+
+      fileCounts.set(normalizedFile, (fileCounts.get(normalizedFile) || 0) + chunkCount);
+    });
+
+    let count = 0;
+    let bundleFailure = null;
+    const batchSize = useBundleWorkers
+      ? Math.max(1, Math.min(totalFiles, Math.max(1, bundleThreads * 2)))
+      : 1;
+    try {
+      for (let i = 0; i < manifestEntries.length; i += batchSize) {
+        const batch = manifestEntries.slice(i, i + batchSize);
+        const tasks = batch.map((record) => bundleLoader.loadBundle({
+          bundleDir: incrementalData.bundleDir,
+          entry: record.entry,
+          file: record.file
+        }));
+        const results = await Promise.all(tasks);
+        const failure = results.find((result) => !result.ok);
+        if (failure) {
+          bundleFailure = `${failure.reason} for ${failure.file}`;
+          break;
+        }
+        for (const result of results) {
+          insertBundle(result.bundle, result.file);
+          count += result.bundle.chunks.length;
+          processedFiles += 1;
+          logBundleProgress(result.file, processedFiles === totalFiles);
+        }
+        if (bundleFailure) break;
+      }
+    } finally {
+      await bundleLoader.close();
+    }
+
+    if (bundleFailure) {
+      if (emitOutput) {
+        console.warn(`[sqlite] Bundle build failed for ${mode}: ${bundleFailure}.`);
+      }
+      return { count: 0, reason: bundleFailure };
+    }
+
+    validationStats.chunks = count;
+    insertTokenStats.run(mode, totalDocs ? totalLen / totalDocs : 0, totalDocs);
+
+    const insertManifestTx = db.transaction(() => {
+      for (const [file, chunkCount] of fileCounts.entries()) {
+        const normalizedFile = normalizeFilePath(file);
+        const entry = manifestLookup.map.get(normalizedFile)?.entry || null;
+        insertFileManifest.run(
+          mode,
+          normalizedFile,
+          entry?.hash || null,
+          Number.isFinite(entry?.mtimeMs) ? entry.mtimeMs : null,
+          Number.isFinite(entry?.size) ? entry.size : null,
+          chunkCount
+        );
+      }
+    });
+    insertManifestTx();
+
+    db.exec(CREATE_INDEXES_SQL);
+    validateSqliteDatabase(db, mode, {
+      validateMode,
+      expected: validationStats,
+      emitOutput
+    });
+    succeeded = true;
+    return { count };
+  } finally {
+    restoreBuildPragmas(db);
+    db.close();
+    if (!succeeded) {
+      try {
+        fsSync.rmSync(outPath, { force: true });
+      } catch {}
+    }
+  }
+}
diff --git a/src/storage/sqlite/build/incremental-update.js b/src/storage/sqlite/build/incremental-update.js
new file mode 100644
index 000000000..7be740463
--- /dev/null
+++ b/src/storage/sqlite/build/incremental-update.js
@@ -0,0 +1,436 @@
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { readBundleFile } from '../../../shared/bundle-io.js';
+import { buildChunkRow, buildTokenFrequency } from '../build-helpers.js';
+import { REQUIRED_TABLES, SCHEMA_VERSION } from '../schema.js';
+import { hasRequiredTables, normalizeFilePath } from '../utils.js';
+import { packUint32, packUint8, quantizeVec, toVectorId } from '../vector.js';
+import { deleteDocIds, updateTokenStats } from './delete.js';
+import { diffFileManifests, getFileManifest, normalizeManifestFiles } from './manifest.js';
+import { createInsertStatements } from './statements.js';
+import { getSchemaVersion, validateSqliteDatabase } from './validate.js';
+import { ensureVocabIds } from './vocab.js';
+
+const MAX_INCREMENTAL_CHANGE_RATIO = 0.35;
+const VOCAB_GROWTH_LIMITS = {
+  token_vocab: { ratio: 0.4, absolute: 200000 },
+  phrase_vocab: { ratio: 0.5, absolute: 150000 },
+  chargram_vocab: { ratio: 1.0, absolute: 250000 }
+};
+
+class IncrementalSkipError extends Error {
+  constructor(reason) {
+    super(reason);
+    this.reason = reason;
+  }
+}
+
+export async function incrementalUpdateDatabase({
+  Database,
+  outPath,
+  mode,
+  incrementalData,
+  modelConfig,
+  vectorConfig,
+  emitOutput,
+  validateMode,
+  expectedDense
+}) {
+  if (!incrementalData?.manifest) {
+    return { used: false, reason: 'missing incremental manifest' };
+  }
+  if (!fsSync.existsSync(outPath)) {
+    return { used: false, reason: 'sqlite db missing' };
+  }
+
+  const expectedModel = expectedDense?.model || modelConfig.id || null;
+  const expectedDims = Number.isFinite(expectedDense?.dims) ? expectedDense.dims : null;
+
+  const db = new Database(outPath);
+  try {
+    db.pragma('journal_mode = WAL');
+    db.pragma('synchronous = NORMAL');
+  } catch {}
+
+  const schemaVersion = getSchemaVersion(db);
+  if (schemaVersion !== SCHEMA_VERSION) {
+    db.close();
+    return {
+      used: false,
+      reason: `schema mismatch (db=${schemaVersion ?? 'unknown'}, expected=${SCHEMA_VERSION})`
+    };
+  }
+
+  if (!hasRequiredTables(db, REQUIRED_TABLES)) {
+    db.close();
+    return { used: false, reason: 'schema missing' };
+  }
+
+  const dbDenseMeta = db.prepare(
+    'SELECT dims, scale, model FROM dense_meta WHERE mode = ?'
+  ).get(mode);
+  const dbDims = Number.isFinite(dbDenseMeta?.dims) ? dbDenseMeta.dims : null;
+  const dbModel = dbDenseMeta?.model || null;
+  if ((expectedModel || expectedDims !== null) && !dbDenseMeta) {
+    db.close();
+    return { used: false, reason: 'dense metadata missing' };
+  }
+  if (expectedModel) {
+    if (!dbModel) {
+      db.close();
+      return { used: false, reason: 'dense metadata model missing' };
+    }
+    if (dbModel !== expectedModel) {
+      db.close();
+      return { used: false, reason: `model mismatch (db=${dbModel}, expected=${expectedModel})` };
+    }
+  }
+  if (expectedDims !== null) {
+    if (dbDims === null) {
+      db.close();
+      return { used: false, reason: 'dense metadata dims missing' };
+    }
+    if (dbDims !== expectedDims) {
+      db.close();
+      return { used: false, reason: `dense dims mismatch (db=${dbDims}, expected=${expectedDims})` };
+    }
+  }
+
+  const manifestFiles = incrementalData.manifest.files || {};
+  const manifestLookup = normalizeManifestFiles(manifestFiles);
+  if (!manifestLookup.entries.length) {
+    db.close();
+    return { used: false, reason: 'incremental manifest empty' };
+  }
+  if (manifestLookup.conflicts.length) {
+    db.close();
+    return { used: false, reason: 'manifest path conflicts' };
+  }
+
+  const dbFiles = getFileManifest(db, mode);
+  if (!dbFiles.size) {
+    const chunkRow = db.prepare('SELECT COUNT(*) AS total FROM chunks WHERE mode = ?')
+      .get(mode) || {};
+    if (Number.isFinite(chunkRow.total) && chunkRow.total > 0) {
+      db.close();
+      return { used: false, reason: 'file manifest empty' };
+    }
+  }
+
+  const { changed, deleted } = diffFileManifests(manifestLookup.entries, dbFiles);
+  const totalFiles = manifestLookup.entries.length;
+  if (totalFiles) {
+    const changeRatio = (changed.length + deleted.length) / totalFiles;
+    if (changeRatio > MAX_INCREMENTAL_CHANGE_RATIO) {
+      db.close();
+      return {
+        used: false,
+        reason: `change ratio ${changeRatio.toFixed(2)} exceeds ${MAX_INCREMENTAL_CHANGE_RATIO}`
+      };
+    }
+  }
+  if (!changed.length && !deleted.length) {
+    db.close();
+    return { used: true, changedFiles: 0, deletedFiles: 0, insertedChunks: 0 };
+  }
+
+  const bundles = new Map();
+  for (const record of changed) {
+    const fileKey = record.file;
+    const normalizedFile = record.normalized;
+    const entry = record.entry;
+    const bundleName = entry?.bundle;
+    if (!bundleName) {
+      db.close();
+      return { used: false, reason: `missing bundle for ${fileKey}` };
+    }
+    const bundlePath = path.join(incrementalData.bundleDir, bundleName);
+    if (!fsSync.existsSync(bundlePath)) {
+      db.close();
+      return { used: false, reason: `bundle missing for ${fileKey}` };
+    }
+    const result = await readBundleFile(bundlePath);
+    if (!result.ok) {
+      db.close();
+      return { used: false, reason: `invalid bundle for ${fileKey}` };
+    }
+    bundles.set(normalizedFile, { bundle: result.bundle, entry, fileKey, normalizedFile });
+  }
+
+  const tokenValues = [];
+  const phraseValues = [];
+  const chargramValues = [];
+  const incomingDimsSet = new Set();
+  for (const bundleEntry of bundles.values()) {
+    const bundle = bundleEntry.bundle;
+    for (const chunk of bundle.chunks || []) {
+      const tokensArray = Array.isArray(chunk.tokens) ? chunk.tokens : [];
+      if (tokensArray.length) tokenValues.push(...tokensArray);
+      if (Array.isArray(chunk.ngrams)) phraseValues.push(...chunk.ngrams);
+      if (Array.isArray(chunk.chargrams)) chargramValues.push(...chunk.chargrams);
+      if (Array.isArray(chunk.embedding) && chunk.embedding.length) {
+        incomingDimsSet.add(chunk.embedding.length);
+      }
+    }
+  }
+  if (incomingDimsSet.size > 1) {
+    db.close();
+    return { used: false, reason: 'embedding dims mismatch across bundles' };
+  }
+  const incomingDims = incomingDimsSet.size ? [...incomingDimsSet][0] : null;
+  if (incomingDims !== null && dbDims !== null && incomingDims !== dbDims) {
+    db.close();
+    return { used: false, reason: `embedding dims mismatch (db=${dbDims}, incoming=${incomingDims})` };
+  }
+  if (incomingDims !== null && expectedDims !== null && incomingDims !== expectedDims) {
+    db.close();
+    return { used: false, reason: `embedding dims mismatch (expected=${expectedDims}, incoming=${incomingDims})` };
+  }
+
+  const statements = createInsertStatements(db);
+  const {
+    insertChunk,
+    insertFts,
+    insertTokenVocab,
+    insertTokenPosting,
+    insertDocLength,
+    insertTokenStats,
+    insertPhraseVocab,
+    insertPhrasePosting,
+    insertChargramVocab,
+    insertChargramPosting,
+    insertMinhash,
+    insertDense,
+    insertDenseMeta,
+    insertFileManifest
+  } = statements;
+
+  const existingIdsByFile = new Map();
+  const fileRows = db.prepare('SELECT id, file FROM chunks WHERE mode = ? ORDER BY id')
+    .all(mode);
+  for (const row of fileRows) {
+    const normalized = normalizeFilePath(row.file);
+    const entry = existingIdsByFile.get(normalized) || { file: normalized, ids: [] };
+    entry.ids.push(row.id);
+    existingIdsByFile.set(normalized, entry);
+  }
+
+  const maxRow = db.prepare('SELECT MAX(id) AS maxId FROM chunks WHERE mode = ?')
+    .get(mode);
+  let nextDocId = Number.isFinite(maxRow?.maxId) ? maxRow.maxId + 1 : 0;
+  const freeDocIds = [];
+  let insertedChunks = 0;
+
+  const vectorExtension = vectorConfig?.extension || {};
+  const vectorAnnEnabled = vectorConfig?.enabled === true;
+  const encodeVector = vectorConfig?.encodeVector;
+  let denseMetaSet = false;
+  let denseDims = null;
+  let denseWarned = false;
+  let vectorAnnLoaded = false;
+  let vectorAnnReady = false;
+  let vectorAnnTable = vectorExtension.table || 'dense_vectors_ann';
+  let vectorAnnColumn = vectorExtension.column || 'embedding';
+  let insertVectorAnn = null;
+  if (vectorAnnEnabled) {
+    const loadResult = vectorConfig.loadVectorExtension(db, vectorExtension, `sqlite ${mode}`);
+    if (loadResult.ok) {
+      vectorAnnLoaded = true;
+      if (vectorConfig.hasVectorTable(db, vectorAnnTable)) {
+        vectorAnnReady = true;
+      }
+    } else if (emitOutput) {
+      console.warn(`[sqlite] Vector extension unavailable for ${mode}: ${loadResult.reason}`);
+    }
+  }
+
+  const vectorDeleteTargets = vectorAnnLoaded && vectorAnnReady
+    ? [{ table: vectorAnnTable, column: 'rowid', withMode: false, transform: toVectorId }]
+    : [];
+
+  const applyChanges = db.transaction(() => {
+    const tokenVocab = ensureVocabIds(
+      db,
+      mode,
+      'token_vocab',
+      'token_id',
+      'token',
+      tokenValues,
+      insertTokenVocab,
+      { limits: VOCAB_GROWTH_LIMITS.token_vocab }
+    );
+    if (tokenVocab.skip) {
+      throw new IncrementalSkipError(tokenVocab.reason || 'token vocab growth too large');
+    }
+
+    const phraseVocab = ensureVocabIds(
+      db,
+      mode,
+      'phrase_vocab',
+      'phrase_id',
+      'ngram',
+      phraseValues,
+      insertPhraseVocab,
+      { limits: VOCAB_GROWTH_LIMITS.phrase_vocab }
+    );
+    if (phraseVocab.skip) {
+      throw new IncrementalSkipError(phraseVocab.reason || 'phrase vocab growth too large');
+    }
+
+    const chargramVocab = ensureVocabIds(
+      db,
+      mode,
+      'chargram_vocab',
+      'gram_id',
+      'gram',
+      chargramValues,
+      insertChargramVocab,
+      { limits: VOCAB_GROWTH_LIMITS.chargram_vocab }
+    );
+    if (chargramVocab.skip) {
+      throw new IncrementalSkipError(chargramVocab.reason || 'chargram vocab growth too large');
+    }
+
+    const tokenIdMap = tokenVocab.map;
+    const phraseIdMap = phraseVocab.map;
+    const chargramIdMap = chargramVocab.map;
+
+    for (const file of deleted) {
+      const normalizedFile = normalizeFilePath(file);
+      const entry = existingIdsByFile.get(normalizedFile);
+      const docIds = entry?.ids || [];
+      deleteDocIds(db, mode, docIds, vectorDeleteTargets);
+      db.prepare('DELETE FROM file_manifest WHERE mode = ? AND file = ?')
+        .run(mode, normalizedFile);
+    }
+
+    for (const record of changed) {
+      const normalizedFile = record.normalized;
+      const entry = existingIdsByFile.get(normalizedFile);
+      const reuseIds = entry?.ids || [];
+      const docIds = reuseIds;
+      let reuseIndex = 0;
+      deleteDocIds(db, mode, docIds, vectorDeleteTargets);
+
+      const bundleEntry = bundles.get(normalizedFile);
+      const bundle = bundleEntry?.bundle;
+      let chunkCount = 0;
+      for (const chunk of bundle?.chunks || []) {
+        let docId;
+        if (reuseIndex < reuseIds.length) {
+          docId = reuseIds[reuseIndex];
+          reuseIndex += 1;
+        } else if (freeDocIds.length) {
+          docId = freeDocIds.pop();
+        } else {
+          docId = nextDocId;
+          nextDocId += 1;
+        }
+        const row = buildChunkRow(
+          { ...chunk, file: chunk.file || normalizedFile },
+          mode,
+          docId
+        );
+        insertChunk.run(row);
+        insertFts.run(row);
+
+        const tokensArray = Array.isArray(chunk.tokens) ? chunk.tokens : [];
+        insertDocLength.run(mode, docId, tokensArray.length);
+        const freq = buildTokenFrequency(tokensArray);
+        for (const [token, tf] of freq.entries()) {
+          const tokenId = tokenIdMap.get(token);
+          if (tokenId === undefined) continue;
+          insertTokenPosting.run(mode, tokenId, docId, tf);
+        }
+
+        if (Array.isArray(chunk.ngrams)) {
+          const unique = new Set(chunk.ngrams);
+          for (const ng of unique) {
+            const phraseId = phraseIdMap.get(ng);
+            if (phraseId === undefined) continue;
+            insertPhrasePosting.run(mode, phraseId, docId);
+          }
+        }
+
+        if (Array.isArray(chunk.chargrams)) {
+          const unique = new Set(chunk.chargrams);
+          for (const gram of unique) {
+            const gramId = chargramIdMap.get(gram);
+            if (gramId === undefined) continue;
+            insertChargramPosting.run(mode, gramId, docId);
+          }
+        }
+
+        if (Array.isArray(chunk.minhashSig) && chunk.minhashSig.length) {
+          insertMinhash.run(mode, docId, packUint32(chunk.minhashSig));
+        }
+
+        if (Array.isArray(chunk.embedding) && chunk.embedding.length) {
+          const dims = chunk.embedding.length;
+          if (!denseMetaSet) {
+            insertDenseMeta.run(mode, dims, 1.0, modelConfig.id || null);
+            denseMetaSet = true;
+            denseDims = dims;
+          } else if (denseDims !== null && dims !== denseDims && !denseWarned) {
+            console.warn(`Dense vector dims mismatch for ${mode}: expected ${denseDims}, got ${dims}`);
+            denseWarned = true;
+          }
+          insertDense.run(mode, docId, packUint8(quantizeVec(chunk.embedding)));
+          if (vectorAnnLoaded) {
+            if (!vectorAnnReady) {
+              const created = vectorConfig.ensureVectorTable(db, vectorExtension, dims);
+              if (created.ok) {
+                vectorAnnReady = true;
+                vectorAnnTable = created.tableName;
+                vectorAnnColumn = created.column;
+                insertVectorAnn = db.prepare(
+                  `INSERT OR REPLACE INTO ${vectorAnnTable} (rowid, ${vectorAnnColumn}) VALUES (?, ?)`
+                );
+              }
+            }
+            if (vectorAnnReady && insertVectorAnn && encodeVector) {
+              const encoded = encodeVector(chunk.embedding, vectorExtension);
+              if (encoded) insertVectorAnn.run(toVectorId(docId), encoded);
+            }
+          }
+        }
+
+        chunkCount += 1;
+        insertedChunks += 1;
+      }
+      if (reuseIndex < reuseIds.length) {
+        freeDocIds.push(...reuseIds.slice(reuseIndex));
+      }
+
+      const manifestEntry = record.entry || bundleEntry?.entry || {};
+      insertFileManifest.run(
+        mode,
+        normalizedFile,
+        manifestEntry?.hash || null,
+        Number.isFinite(manifestEntry?.mtimeMs) ? manifestEntry.mtimeMs : null,
+        Number.isFinite(manifestEntry?.size) ? manifestEntry.size : null,
+        chunkCount
+      );
+    }
+
+    updateTokenStats(db, mode, insertTokenStats);
+    validateSqliteDatabase(db, mode, { validateMode, emitOutput });
+  });
+
+  try {
+    applyChanges();
+  } catch (err) {
+    db.close();
+    if (err instanceof IncrementalSkipError) {
+      return { used: false, reason: err.reason };
+    }
+    throw err;
+  }
+  db.close();
+  return {
+    used: true,
+    changedFiles: changed.length,
+    deletedFiles: deleted.length,
+    insertedChunks
+  };
+}
diff --git a/src/storage/sqlite/build/manifest.js b/src/storage/sqlite/build/manifest.js
new file mode 100644
index 000000000..40bc86c5f
--- /dev/null
+++ b/src/storage/sqlite/build/manifest.js
@@ -0,0 +1,74 @@
+import { normalizeFilePath } from '../utils.js';
+
+export function getFileManifest(db, mode) {
+  const rows = db.prepare('SELECT file, hash, mtimeMs, size FROM file_manifest WHERE mode = ?')
+    .all(mode);
+  const map = new Map();
+  for (const row of rows) {
+    map.set(normalizeFilePath(row.file), row);
+  }
+  return map;
+}
+
+export function isManifestMatch(entry, dbEntry) {
+  if (!dbEntry) return false;
+  if (entry?.hash && dbEntry.hash) return entry.hash === dbEntry.hash;
+  const mtimeMatch = Number.isFinite(entry?.mtimeMs) && Number.isFinite(dbEntry?.mtimeMs)
+    ? entry.mtimeMs === dbEntry.mtimeMs
+    : false;
+  const sizeMatch = Number.isFinite(entry?.size) && Number.isFinite(dbEntry?.size)
+    ? entry.size === dbEntry.size
+    : false;
+  return mtimeMatch && sizeMatch;
+}
+
+export function normalizeManifestFiles(manifestFiles) {
+  const entries = [];
+  const map = new Map();
+  const conflicts = [];
+  for (const [file, entry] of Object.entries(manifestFiles || {})) {
+    const normalized = normalizeFilePath(file);
+    const record = { file, normalized, entry };
+    const existing = map.get(normalized);
+    if (!existing) {
+      map.set(normalized, record);
+      continue;
+    }
+    if (isManifestMatch(entry, existing.entry)) {
+      if (!existing.entry?.hash && entry?.hash) {
+        map.set(normalized, record);
+      }
+      continue;
+    }
+    const score = (candidate) => (candidate?.hash ? 3 : 0)
+      + (Number.isFinite(candidate?.mtimeMs) ? 1 : 0)
+      + (Number.isFinite(candidate?.size) ? 1 : 0);
+    if (score(entry) > score(existing.entry)) {
+      map.set(normalized, record);
+    }
+    conflicts.push(normalized);
+  }
+  entries.push(...map.values());
+  return { entries, map, conflicts };
+}
+
+export function diffFileManifests(manifestEntries, dbFiles) {
+  const changed = [];
+  const deleted = [];
+  const manifestSet = new Set();
+
+  for (const record of manifestEntries || []) {
+    if (!record?.normalized) continue;
+    manifestSet.add(record.normalized);
+    const dbEntry = dbFiles.get(record.normalized);
+    if (!isManifestMatch(record.entry, dbEntry)) {
+      changed.push(record);
+    }
+  }
+
+  for (const [file] of dbFiles.entries()) {
+    if (!manifestSet.has(file)) deleted.push(file);
+  }
+
+  return { changed, deleted };
+}
diff --git a/src/storage/sqlite/build/pragmas.js b/src/storage/sqlite/build/pragmas.js
new file mode 100644
index 000000000..0dbeea388
--- /dev/null
+++ b/src/storage/sqlite/build/pragmas.js
@@ -0,0 +1,12 @@
+export const applyBuildPragmas = (db) => {
+  try { db.pragma('journal_mode = WAL'); } catch {}
+  try { db.pragma('synchronous = OFF'); } catch {}
+  try { db.pragma('temp_store = MEMORY'); } catch {}
+  try { db.pragma('cache_size = -200000'); } catch {}
+  try { db.pragma('mmap_size = 268435456'); } catch {}
+};
+
+export const restoreBuildPragmas = (db) => {
+  try { db.pragma('synchronous = NORMAL'); } catch {}
+  try { db.pragma('temp_store = DEFAULT'); } catch {}
+};
diff --git a/src/storage/sqlite/build/statements.js b/src/storage/sqlite/build/statements.js
new file mode 100644
index 000000000..b0736b503
--- /dev/null
+++ b/src/storage/sqlite/build/statements.js
@@ -0,0 +1,74 @@
+export const createInsertStatements = (db) => {
+  const insertChunk = db.prepare(`
+    INSERT OR REPLACE INTO chunks (
+      id, chunk_id, mode, file, start, end, startLine, endLine, ext, kind, name,
+      headline, preContext, postContext, weight, tokens, ngrams, codeRelations,
+      docmeta, stats, complexity, lint, externalDocs, last_modified, last_author,
+      churn, chunk_authors
+    ) VALUES (
+      @id, @chunk_id, @mode, @file, @start, @end, @startLine, @endLine, @ext, @kind,
+      @name, @headline, @preContext, @postContext, @weight, @tokens, @ngrams,
+      @codeRelations, @docmeta, @stats, @complexity, @lint, @externalDocs,
+      @last_modified, @last_author, @churn, @chunk_authors
+    );
+  `);
+
+  const insertFts = db.prepare(`
+    INSERT OR REPLACE INTO chunks_fts (rowid, mode, file, name, signature, kind, headline, doc, tokens)
+    VALUES (@id, @mode, @file, @name, @signature, @kind, @headline, @doc, @tokensText);
+  `);
+
+  const insertTokenVocab = db.prepare(
+    'INSERT OR REPLACE INTO token_vocab (mode, token_id, token) VALUES (?, ?, ?)'
+  );
+  const insertTokenPosting = db.prepare(
+    'INSERT OR REPLACE INTO token_postings (mode, token_id, doc_id, tf) VALUES (?, ?, ?, ?)'
+  );
+  const insertDocLength = db.prepare(
+    'INSERT OR REPLACE INTO doc_lengths (mode, doc_id, len) VALUES (?, ?, ?)'
+  );
+  const insertTokenStats = db.prepare(
+    'INSERT OR REPLACE INTO token_stats (mode, avg_doc_len, total_docs) VALUES (?, ?, ?)'
+  );
+  const insertPhraseVocab = db.prepare(
+    'INSERT OR REPLACE INTO phrase_vocab (mode, phrase_id, ngram) VALUES (?, ?, ?)'
+  );
+  const insertPhrasePosting = db.prepare(
+    'INSERT OR REPLACE INTO phrase_postings (mode, phrase_id, doc_id) VALUES (?, ?, ?)'
+  );
+  const insertChargramVocab = db.prepare(
+    'INSERT OR REPLACE INTO chargram_vocab (mode, gram_id, gram) VALUES (?, ?, ?)'
+  );
+  const insertChargramPosting = db.prepare(
+    'INSERT OR REPLACE INTO chargram_postings (mode, gram_id, doc_id) VALUES (?, ?, ?)'
+  );
+  const insertMinhash = db.prepare(
+    'INSERT OR REPLACE INTO minhash_signatures (mode, doc_id, sig) VALUES (?, ?, ?)'
+  );
+  const insertDense = db.prepare(
+    'INSERT OR REPLACE INTO dense_vectors (mode, doc_id, vector) VALUES (?, ?, ?)'
+  );
+  const insertDenseMeta = db.prepare(
+    'INSERT OR REPLACE INTO dense_meta (mode, dims, scale, model) VALUES (?, ?, ?, ?)'
+  );
+  const insertFileManifest = db.prepare(
+    'INSERT OR REPLACE INTO file_manifest (mode, file, hash, mtimeMs, size, chunk_count) VALUES (?, ?, ?, ?, ?, ?)'
+  );
+
+  return {
+    insertChunk,
+    insertFts,
+    insertTokenVocab,
+    insertTokenPosting,
+    insertDocLength,
+    insertTokenStats,
+    insertPhraseVocab,
+    insertPhrasePosting,
+    insertChargramVocab,
+    insertChargramPosting,
+    insertMinhash,
+    insertDense,
+    insertDenseMeta,
+    insertFileManifest
+  };
+};
diff --git a/src/storage/sqlite/build/validate.js b/src/storage/sqlite/build/validate.js
new file mode 100644
index 000000000..829d31585
--- /dev/null
+++ b/src/storage/sqlite/build/validate.js
@@ -0,0 +1,83 @@
+import { REQUIRED_TABLES } from '../schema.js';
+import { hasRequiredTables } from '../utils.js';
+
+export function getSchemaVersion(db) {
+  try {
+    const value = db.pragma('user_version', { simple: true });
+    return Number.isFinite(value) ? value : null;
+  } catch {
+    return null;
+  }
+}
+
+export function validateSqliteDatabase(db, mode, options = {}) {
+  const validateMode = options.validateMode || 'off';
+  if (validateMode === 'off') return;
+
+  const errors = [];
+  if (!hasRequiredTables(db, REQUIRED_TABLES)) {
+    errors.push('missing required tables');
+  }
+
+  const pragmaName = validateMode === 'full' ? 'integrity_check' : 'quick_check';
+  try {
+    const rows = db.prepare(`PRAGMA ${pragmaName}`).all();
+    const messages = [];
+    for (const row of rows) {
+      for (const value of Object.values(row)) {
+        if (value !== 'ok') messages.push(value);
+      }
+    }
+    if (messages.length) {
+      errors.push(`${pragmaName} failed: ${messages.join('; ')}`);
+    }
+  } catch (err) {
+    errors.push(`${pragmaName} failed: ${err?.message || err}`);
+  }
+
+  const expected = options.expected || {};
+  const expectedChunks = Number.isFinite(expected.chunks) ? expected.chunks : null;
+  if (expectedChunks !== null) {
+    const chunkCount = db.prepare('SELECT COUNT(*) AS total FROM chunks WHERE mode = ?')
+      .get(mode)?.total ?? 0;
+    if (chunkCount !== expectedChunks) {
+      errors.push(`chunks=${chunkCount} expected=${expectedChunks}`);
+    }
+    const ftsCount = db.prepare('SELECT COUNT(*) AS total FROM chunks_fts WHERE mode = ?')
+      .get(mode)?.total ?? 0;
+    if (ftsCount !== expectedChunks) {
+      errors.push(`chunks_fts=${ftsCount} expected=${expectedChunks}`);
+    }
+    const lengthCount = db.prepare('SELECT COUNT(*) AS total FROM doc_lengths WHERE mode = ?')
+      .get(mode)?.total ?? 0;
+    if (lengthCount !== expectedChunks) {
+      errors.push(`doc_lengths=${lengthCount} expected=${expectedChunks}`);
+    }
+  }
+
+  const expectedDense = Number.isFinite(expected.dense) ? expected.dense : null;
+  if (expectedDense !== null) {
+    const denseCount = db.prepare('SELECT COUNT(*) AS total FROM dense_vectors WHERE mode = ?')
+      .get(mode)?.total ?? 0;
+    if (denseCount !== expectedDense) {
+      errors.push(`dense_vectors=${denseCount} expected=${expectedDense}`);
+    }
+  }
+
+  const expectedMinhash = Number.isFinite(expected.minhash) ? expected.minhash : null;
+  if (expectedMinhash !== null) {
+    const minhashCount = db.prepare(
+      'SELECT COUNT(*) AS total FROM minhash_signatures WHERE mode = ?'
+    ).get(mode)?.total ?? 0;
+    if (minhashCount !== expectedMinhash) {
+      errors.push(`minhash_signatures=${minhashCount} expected=${expectedMinhash}`);
+    }
+  }
+
+  if (errors.length) {
+    throw new Error(`[sqlite] Validation (${validateMode}) failed for ${mode}: ${errors.join(', ')}`);
+  }
+  if (options.emitOutput) {
+    console.log(`[sqlite] Validation (${validateMode}) ok for ${mode}.`);
+  }
+}
diff --git a/src/storage/sqlite/build/vocab.js b/src/storage/sqlite/build/vocab.js
new file mode 100644
index 000000000..1fd8e585c
--- /dev/null
+++ b/src/storage/sqlite/build/vocab.js
@@ -0,0 +1,76 @@
+import { chunkArray } from '../utils.js';
+
+export function getVocabCount(db, mode, table) {
+  const row = db.prepare(`SELECT COUNT(*) AS total FROM ${table} WHERE mode = ?`)
+    .get(mode) || {};
+  return Number.isFinite(row.total) ? row.total : 0;
+}
+
+export function fetchVocabRows(db, mode, table, idColumn, valueColumn, values) {
+  const unique = Array.from(new Set(values.filter(Boolean)));
+  if (!unique.length) return [];
+  const rows = [];
+  for (const chunk of chunkArray(unique)) {
+    const placeholders = chunk.map(() => '?').join(',');
+    const stmt = db.prepare(
+      `SELECT ${idColumn} AS id, ${valueColumn} AS value FROM ${table} ` +
+      `WHERE mode = ? AND ${valueColumn} IN (${placeholders})`
+    );
+    rows.push(...stmt.all(mode, ...chunk));
+  }
+  return rows;
+}
+
+export function ensureVocabIds(
+  db,
+  mode,
+  table,
+  idColumn,
+  valueColumn,
+  values,
+  insertStmt,
+  options = {}
+) {
+  const unique = Array.from(new Set(values.filter(Boolean)));
+  const totalBefore = getVocabCount(db, mode, table);
+  if (!unique.length) {
+    return { map: new Map(), inserted: 0, total: totalBefore, skip: false };
+  }
+  const existing = fetchVocabRows(db, mode, table, idColumn, valueColumn, unique);
+  const map = new Map(existing.map((row) => [row.value, row.id]));
+  const missing = unique.filter((value) => !map.has(value));
+  if (!missing.length) {
+    return { map, inserted: 0, total: totalBefore, skip: false };
+  }
+
+  const limits = options?.limits || null;
+  if (limits && totalBefore > 0) {
+    const ratio = missing.length / totalBefore;
+    const ratioLimit = Number.isFinite(limits.ratio) ? limits.ratio : null;
+    const absLimit = Number.isFinite(limits.absolute) ? limits.absolute : null;
+    if ((ratioLimit !== null && ratio > ratioLimit) || (absLimit !== null && missing.length > absLimit)) {
+      return {
+        map,
+        inserted: 0,
+        total: totalBefore,
+        skip: true,
+        reason: `${table} growth ${missing.length}/${totalBefore}`
+      };
+    }
+  }
+
+  missing.sort();
+  const maxRow = db.prepare(`SELECT MAX(${idColumn}) AS maxId FROM ${table} WHERE mode = ?`)
+    .get(mode);
+  let nextId = Number.isFinite(maxRow?.maxId) ? maxRow.maxId + 1 : 0;
+  const insertTx = db.transaction(() => {
+    for (const value of missing) {
+      insertStmt.run(mode, nextId, value);
+      map.set(value, nextId);
+      nextId += 1;
+    }
+  });
+  insertTx();
+
+  return { map, inserted: missing.length, total: totalBefore + missing.length, skip: false };
+}
diff --git a/src/sqlite/incremental.js b/src/storage/sqlite/incremental.js
similarity index 100%
rename from src/sqlite/incremental.js
rename to src/storage/sqlite/incremental.js
diff --git a/src/sqlite/schema.js b/src/storage/sqlite/schema.js
similarity index 94%
rename from src/sqlite/schema.js
rename to src/storage/sqlite/schema.js
index e2efc89f5..57e19b395 100644
--- a/src/sqlite/schema.js
+++ b/src/storage/sqlite/schema.js
@@ -1,4 +1,4 @@
-export const SCHEMA_VERSION = 5;
+export const SCHEMA_VERSION = 7;
 
 export const REQUIRED_TABLES = [
   'chunks',
@@ -17,7 +17,7 @@ export const REQUIRED_TABLES = [
   'file_manifest'
 ];
 
-export const CREATE_TABLES_SQL = `
+export const CREATE_TABLES_BASE_SQL = `
   DROP TABLE IF EXISTS chunks_fts;
   DROP TABLE IF EXISTS chunks;
   DROP TABLE IF EXISTS token_postings;
@@ -35,6 +35,7 @@ export const CREATE_TABLES_SQL = `
 
   CREATE TABLE chunks (
     id INTEGER PRIMARY KEY,
+    chunk_id TEXT,
     mode TEXT,
     file TEXT,
     start INTEGER,
@@ -61,13 +62,14 @@ export const CREATE_TABLES_SQL = `
     churn REAL,
     chunk_authors TEXT
   );
-  CREATE INDEX idx_chunks_file ON chunks (mode, file);
   CREATE VIRTUAL TABLE chunks_fts USING fts5(
     mode UNINDEXED,
     file,
     name,
+    signature,
     kind,
     headline,
+    doc,
     tokens,
     tokenize = 'unicode61'
   );
@@ -85,7 +87,6 @@ export const CREATE_TABLES_SQL = `
     tf INTEGER NOT NULL,
     PRIMARY KEY (mode, token_id, doc_id)
   );
-  CREATE INDEX idx_token_postings_token ON token_postings (mode, token_id);
   CREATE TABLE doc_lengths (
     mode TEXT NOT NULL,
     doc_id INTEGER NOT NULL,
@@ -110,7 +111,6 @@ export const CREATE_TABLES_SQL = `
     doc_id INTEGER NOT NULL,
     PRIMARY KEY (mode, phrase_id, doc_id)
   );
-  CREATE INDEX idx_phrase_postings_phrase ON phrase_postings (mode, phrase_id);
   CREATE TABLE chargram_vocab (
     mode TEXT NOT NULL,
     gram_id INTEGER NOT NULL,
@@ -124,7 +124,6 @@ export const CREATE_TABLES_SQL = `
     doc_id INTEGER NOT NULL,
     PRIMARY KEY (mode, gram_id, doc_id)
   );
-  CREATE INDEX idx_chargram_postings_gram ON chargram_postings (mode, gram_id);
   CREATE TABLE minhash_signatures (
     mode TEXT NOT NULL,
     doc_id INTEGER NOT NULL,
@@ -152,5 +151,17 @@ export const CREATE_TABLES_SQL = `
     chunk_count INTEGER,
     PRIMARY KEY (mode, file)
   );
+`;
+
+export const CREATE_INDEXES_SQL = `
+  CREATE INDEX idx_chunks_file ON chunks (mode, file);
+  CREATE INDEX idx_token_postings_token ON token_postings (mode, token_id);
+  CREATE INDEX idx_phrase_postings_phrase ON phrase_postings (mode, phrase_id);
+  CREATE INDEX idx_chargram_postings_gram ON chargram_postings (mode, gram_id);
   CREATE INDEX idx_file_manifest_mode_file ON file_manifest (mode, file);
 `;
+
+export const CREATE_TABLES_SQL = `
+${CREATE_TABLES_BASE_SQL}
+${CREATE_INDEXES_SQL}
+`;
diff --git a/src/storage/sqlite/utils.js b/src/storage/sqlite/utils.js
new file mode 100644
index 000000000..35952e5a5
--- /dev/null
+++ b/src/storage/sqlite/utils.js
@@ -0,0 +1,184 @@
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import {
+  MAX_JSON_BYTES,
+  loadChunkMeta,
+  loadTokenPostings,
+  readJsonFile
+} from '../../shared/artifact-io.js';
+
+/**
+ * Split an array into fixed-size chunks.
+ * @param {Array<any>} items
+ * @param {number} [size]
+ * @returns {Array<Array<any>>}
+ */
+export function chunkArray(items, size = 900) {
+  const chunks = [];
+  for (let i = 0; i < items.length; i += size) {
+    chunks.push(items.slice(i, i + size));
+  }
+  return chunks;
+}
+
+/**
+ * Return the set of table names in a SQLite database.
+ * @param {import('better-sqlite3').Database} db
+ * @returns {Set<string>}
+ */
+export function getTableNames(db) {
+  const rows = db.prepare("SELECT name FROM sqlite_master WHERE type='table'").all();
+  return new Set(rows.map((row) => row.name));
+}
+
+/**
+ * Check that all required tables exist.
+ * @param {import('better-sqlite3').Database} db
+ * @param {string[]} requiredTables
+ * @returns {boolean}
+ */
+export function hasRequiredTables(db, requiredTables) {
+  const tableNames = getTableNames(db);
+  return requiredTables.every((name) => tableNames.has(name));
+}
+
+/**
+ * Normalize a file path to POSIX separators.
+ * @param {string} value
+ * @returns {string}
+ */
+export function normalizeFilePath(value) {
+  if (typeof value !== 'string') return value;
+  return value.replace(/\\/g, '/');
+}
+
+/**
+ * Read and parse JSON from disk.
+ * @param {string} filePath
+ * @returns {any}
+ */
+export function readJson(filePath) {
+  return readJsonFile(filePath, { maxBytes: MAX_JSON_BYTES });
+}
+
+/**
+ * Read JSON from disk if it exists; otherwise return null.
+ * @param {string} dir
+ * @param {string} name
+ * @returns {any|null}
+ */
+export function loadOptional(dir, name) {
+  const target = path.join(dir, name);
+  const hasTarget = fs.existsSync(target) || fs.existsSync(`${target}.bak`);
+  const hasGz = name.endsWith('.json')
+    && (fs.existsSync(`${target}.gz`) || fs.existsSync(`${target}.gz.bak`));
+  if (!hasTarget && !hasGz) {
+    return null;
+  }
+  try {
+    return readJson(target);
+  } catch (err) {
+    if (err?.code === 'ERR_JSON_TOO_LARGE') {
+      console.warn(`[sqlite] Skipping ${name}: ${err.message}`);
+      return null;
+    }
+    throw err;
+  }
+}
+
+/**
+ * Load file-backed index artifacts from a directory.
+ * @param {string} dir
+ * @param {string} modelId
+ * @returns {object|null}
+ */
+export function loadIndex(dir, modelId) {
+  const chunkMetaPath = path.join(dir, 'chunk_meta.json');
+  const chunkMetaJsonlPath = path.join(dir, 'chunk_meta.jsonl');
+  const chunkMetaMetaPath = path.join(dir, 'chunk_meta.meta.json');
+  if (!fs.existsSync(chunkMetaPath)
+    && !fs.existsSync(chunkMetaJsonlPath)
+    && !fs.existsSync(chunkMetaMetaPath)) {
+    return null;
+  }
+  const chunkMeta = loadChunkMeta(dir, { maxBytes: MAX_JSON_BYTES });
+  const denseVec = loadOptional(dir, 'dense_vectors_uint8.json');
+  if (denseVec && !denseVec.model) denseVec.model = modelId || null;
+  return {
+    chunkMeta,
+    fileMeta: loadOptional(dir, 'file_meta.json'),
+    denseVec,
+    phraseNgrams: loadOptional(dir, 'phrase_ngrams.json'),
+    chargrams: loadOptional(dir, 'chargram_postings.json'),
+    minhash: loadOptional(dir, 'minhash_signatures.json'),
+    tokenPostings: (() => {
+      const direct = loadOptional(dir, 'token_postings.json');
+      if (direct) return direct;
+      try {
+        return loadTokenPostings(dir, { maxBytes: MAX_JSON_BYTES });
+      } catch {
+        return null;
+      }
+    })()
+  };
+}
+
+const SQLITE_SIDECARS = ['-wal', '-shm'];
+
+async function removeSqliteSidecars(basePath) {
+  await Promise.all(SQLITE_SIDECARS.map(async (suffix) => {
+    try {
+      await fsPromises.rm(`${basePath}${suffix}`, { force: true });
+    } catch {}
+  }));
+}
+
+/**
+ * Atomically replace a sqlite database, cleaning up WAL/SHM sidecars.
+ * @param {string} tempDbPath
+ * @param {string} finalDbPath
+ * @param {{keepBackup?:boolean,backupPath?:string}} [options]
+ */
+export async function replaceSqliteDatabase(tempDbPath, finalDbPath, options = {}) {
+  const keepBackup = options.keepBackup === true;
+  const backupPath = options.backupPath || `${finalDbPath}.bak`;
+  const finalExists = fs.existsSync(finalDbPath);
+
+  await removeSqliteSidecars(finalDbPath);
+  await removeSqliteSidecars(tempDbPath);
+
+  let backupAvailable = fs.existsSync(backupPath);
+  if (finalExists && !backupAvailable) {
+    try {
+      await fsPromises.rename(finalDbPath, backupPath);
+      backupAvailable = true;
+    } catch (err) {
+      if (err?.code !== 'ENOENT') {
+        backupAvailable = fs.existsSync(backupPath);
+      }
+    }
+  }
+
+  try {
+    await fsPromises.rename(tempDbPath, finalDbPath);
+  } catch (err) {
+    if (err?.code !== 'EEXIST' && err?.code !== 'EPERM' && err?.code !== 'ENOTEMPTY') {
+      throw err;
+    }
+    if (!backupAvailable) {
+      throw err;
+    }
+    try {
+      await fsPromises.rm(finalDbPath, { force: true });
+    } catch {}
+    await fsPromises.rename(tempDbPath, finalDbPath);
+  }
+
+  if (!keepBackup) {
+    try {
+      await fsPromises.rm(backupPath, { force: true });
+    } catch {}
+  }
+  await removeSqliteSidecars(backupPath);
+}
diff --git a/src/sqlite/vector.js b/src/storage/sqlite/vector.js
similarity index 100%
rename from src/sqlite/vector.js
rename to src/storage/sqlite/vector.js
diff --git a/sublime/PairOfCleats/Default.sublime-commands b/sublime/PairOfCleats/Default.sublime-commands
new file mode 100644
index 000000000..673771531
--- /dev/null
+++ b/sublime/PairOfCleats/Default.sublime-commands
@@ -0,0 +1,94 @@
+[
+  {
+    "caption": "PairOfCleats: Open Settings",
+    "command": "pair_of_cleats_open_settings"
+  },
+  {
+    "caption": "PairOfCleats: Validate Settings",
+    "command": "pair_of_cleats_validate_settings"
+  },
+  {
+    "caption": "PairOfCleats: Search",
+    "command": "pair_of_cleats_search"
+  },
+  {
+    "caption": "PairOfCleats: Search (With Options)",
+    "command": "pair_of_cleats_search_with_options"
+  },
+  {
+    "caption": "PairOfCleats: Search Selection",
+    "command": "pair_of_cleats_search_selection"
+  },
+  {
+    "caption": "PairOfCleats: Search Symbol Under Cursor",
+    "command": "pair_of_cleats_search_symbol_under_cursor"
+  },
+  {
+    "caption": "PairOfCleats: Search History",
+    "command": "pair_of_cleats_search_history"
+  },
+  {
+    "caption": "PairOfCleats: Repeat Last Search",
+    "command": "pair_of_cleats_repeat_last_search"
+  },
+  {
+    "caption": "PairOfCleats: Explain Search",
+    "command": "pair_of_cleats_explain_search"
+  },
+  {
+    "caption": "PairOfCleats: Index Build (Code)",
+    "command": "pair_of_cleats_index_build_code"
+  },
+  {
+    "caption": "PairOfCleats: Index Build (Prose)",
+    "command": "pair_of_cleats_index_build_prose"
+  },
+  {
+    "caption": "PairOfCleats: Index Build (All)",
+    "command": "pair_of_cleats_index_build_all"
+  },
+  {
+    "caption": "PairOfCleats: Index Watch Start",
+    "command": "pair_of_cleats_index_watch_start"
+  },
+  {
+    "caption": "PairOfCleats: Index Watch Stop",
+    "command": "pair_of_cleats_index_watch_stop"
+  },
+  {
+    "caption": "PairOfCleats: Index Validate",
+    "command": "pair_of_cleats_index_validate"
+  },
+  {
+    "caption": "PairOfCleats: Open Index Directory",
+    "command": "pair_of_cleats_open_index_directory"
+  },
+  {
+    "caption": "PairOfCleats: Map (Repo)",
+    "command": "pair_of_cleats_map_repo"
+  },
+  {
+    "caption": "PairOfCleats: Map (Current Folder)",
+    "command": "pair_of_cleats_map_current_folder"
+  },
+  {
+    "caption": "PairOfCleats: Map (Current File)",
+    "command": "pair_of_cleats_map_current_file"
+  },
+  {
+    "caption": "PairOfCleats: Map (Symbol Under Cursor)",
+    "command": "pair_of_cleats_map_symbol_under_cursor"
+  },
+  {
+    "caption": "PairOfCleats: Map (Selection)",
+    "command": "pair_of_cleats_map_selection"
+  },
+  {
+    "caption": "PairOfCleats: Map Jump to Node",
+    "command": "pair_of_cleats_map_jump_to_node"
+  },
+  {
+    "caption": "PairOfCleats: Map Open Last Viewer",
+    "command": "pair_of_cleats_map_open_last_viewer"
+  }
+]
diff --git a/sublime/PairOfCleats/Default.sublime-keymap b/sublime/PairOfCleats/Default.sublime-keymap
new file mode 100644
index 000000000..fe51488c7
--- /dev/null
+++ b/sublime/PairOfCleats/Default.sublime-keymap
@@ -0,0 +1 @@
+[]
diff --git a/sublime/PairOfCleats/Main.sublime-menu b/sublime/PairOfCleats/Main.sublime-menu
new file mode 100644
index 000000000..db1842c2d
--- /dev/null
+++ b/sublime/PairOfCleats/Main.sublime-menu
@@ -0,0 +1,15 @@
+[
+  {
+    "id": "preferences",
+    "children": [
+      {
+        "caption": "PairOfCleats Settings",
+        "command": "pair_of_cleats_open_settings"
+      },
+      {
+        "caption": "PairOfCleats Validate Settings",
+        "command": "pair_of_cleats_validate_settings"
+      }
+    ]
+  }
+]
diff --git a/sublime/PairOfCleats/PairOfCleats.sublime-settings b/sublime/PairOfCleats/PairOfCleats.sublime-settings
new file mode 100644
index 000000000..0e15ef13f
--- /dev/null
+++ b/sublime/PairOfCleats/PairOfCleats.sublime-settings
@@ -0,0 +1,41 @@
+{
+  "pairofcleats_path": "",
+  "node_path": "",
+  "index_mode_default": "both",
+  "search_backend_default": "",
+  "open_results_in": "quick_panel",
+  "search_limit": 25,
+  "results_buffer_threshold": 50,
+  "history_limit": 25,
+  "search_prompt_options": false,
+  "index_watch_scope": "repo",
+  "index_watch_folder": "",
+  "index_watch_mode": "all",
+  "index_watch_poll_ms": 2000,
+  "index_watch_debounce_ms": 500,
+  "map_type_default": "combined",
+  "map_format_default": "html-iso",
+  "map_prompt_options": false,
+  "map_output_dir": ".pairofcleats/maps",
+  "map_only_exported": false,
+  "map_collapse_default": "none",
+  "map_max_files": 200,
+  "map_max_members_per_file": 60,
+  "map_max_edges": 3000,
+  "map_top_k_by_degree": false,
+  "map_show_report_panel": null,
+  "map_stream_output": false,
+  "map_open_uri_template": "subl://open?file={file}&line={line}&column={column}",
+  "map_three_url": "",
+  "map_index_mode": "code",
+  "map_wasd_sensitivity": 16000,
+  "map_wasd_acceleration": 6000,
+  "map_wasd_max_speed": 24000,
+  "map_wasd_drag": 6,
+  "map_zoom_sensitivity": 0.1,
+  "profile": "",
+  "cache_root": "",
+  "embeddings_mode": "",
+  "node_options": "",
+  "env": {}
+}
diff --git a/sublime/PairOfCleats/README.md b/sublime/PairOfCleats/README.md
new file mode 100644
index 000000000..76e994c5a
--- /dev/null
+++ b/sublime/PairOfCleats/README.md
@@ -0,0 +1,115 @@
+# PairOfCleats Sublime Text
+
+PairOfCleats integration for Sublime Text 3.
+
+## Install
+
+- Copy or symlink `sublime/PairOfCleats` into your Sublime `Packages` directory.
+- Ensure Node.js 18+ is available on PATH (or set `node_path`).
+- Install the PairOfCleats CLI (global npm install or local repo checkout).
+
+## Package Control notes
+
+This package avoids external Python dependencies. It relies on the Node runtime
+and the PairOfCleats CLI or local repo binaries.
+
+## CLI discovery
+
+Resolution order:
+1) `pairofcleats_path` setting (absolute or repo-relative)
+2) `node_modules/.bin/pairofcleats` (repo-local)
+3) `bin/pairofcleats.js` (repo-local)
+4) `pairofcleats` on PATH
+
+If the selected path ends in `.js`, the plugin runs it with `node_path` (or `node`).
+
+## Settings
+
+Open the command palette and run `PairOfCleats: Open Settings` or `PairOfCleats: Validate Settings`.
+
+- `pairofcleats_path`: Path to the CLI binary or `bin/pairofcleats.js`.
+- `node_path`: Optional override for the Node.js binary.
+- `index_mode_default`: `code`, `prose`, or `both`.
+- `search_backend_default`: `memory`, `sqlite`, `sqlite-fts`, or `lmdb`.
+- `open_results_in`: `quick_panel`, `new_tab`, or `output_panel`.
+- `search_limit`: Default `--top` value.
+- `results_buffer_threshold`: When using `quick_panel`, switch to the output panel once results reach this count (0 disables).
+- `history_limit`: Maximum queries stored per project.
+- `search_prompt_options`: Prompt for mode/backend/limit each search.
+- `index_watch_scope`: `repo` or `folder` for watch root selection.
+- `index_watch_folder`: Optional folder path (absolute or repo-relative) when using `folder` scope.
+- `index_watch_mode`: `all`, `code`, `prose`, `records`, or `extracted-prose`.
+- `index_watch_poll_ms`: Watch polling interval in ms (when polling is enabled).
+- `index_watch_debounce_ms`: Debounce interval for watch rebuilds (ms).
+- `map_type_default`: `combined`, `imports`, `calls`, `usages`, or `dataflow`.
+- `map_format_default`: `html-iso`, `html`, `svg`, `dot`, or `json`.
+- `map_prompt_options`: Prompt for map type/format each run.
+- `map_output_dir`: Output directory for map artifacts (absolute or repo-relative).
+- `map_only_exported`: When true, include exported symbols only.
+- `map_collapse_default`: `none`, `file`, or `dir`.
+- `map_max_files`: Guardrail for file nodes.
+- `map_max_members_per_file`: Guardrail for members per file.
+- `map_max_edges`: Guardrail for edges.
+- `map_top_k_by_degree`: Prefer top-k files by edge degree when truncating.
+- `map_show_report_panel`: Set to true to show warnings/summary in an output panel.
+- `map_stream_output`: Stream CLI output to the map panel.
+- `map_open_uri_template`: URI template for the isometric viewer (Sublime links).
+- `map_three_url`: Override three.js module path (default resolves from node_modules).
+- `map_index_mode`: Index mode to read (`code` or `prose`).
+- `map_wasd_sensitivity`: Isometric viewer WASD sensitivity.
+- `map_wasd_acceleration`: Isometric viewer WASD acceleration.
+- `map_wasd_max_speed`: Isometric viewer WASD max speed.
+- `map_wasd_drag`: Isometric viewer WASD damping.
+- `map_zoom_sensitivity`: Isometric viewer zoom sensitivity.
+- `profile`: Sets `PAIROFCLEATS_PROFILE`.
+- `cache_root`: Sets `PAIROFCLEATS_CACHE_ROOT`.
+- `embeddings_mode`: Sets `PAIROFCLEATS_EMBEDDINGS`.
+- `node_options`: Sets `PAIROFCLEATS_NODE_OPTIONS`.
+- `env`: Extra environment overrides (merged with defaults).
+
+## Commands
+
+- `PairOfCleats: Search`
+- `PairOfCleats: Search (With Options)`
+- `PairOfCleats: Search Selection`
+- `PairOfCleats: Search Symbol Under Cursor`
+- `PairOfCleats: Search History`
+- `PairOfCleats: Repeat Last Search`
+- `PairOfCleats: Explain Search`
+- `PairOfCleats: Index Build (Code)`
+- `PairOfCleats: Index Build (Prose)`
+- `PairOfCleats: Index Build (All)`
+- `PairOfCleats: Index Watch Start`
+- `PairOfCleats: Index Watch Stop`
+- `PairOfCleats: Index Validate`
+- `PairOfCleats: Open Index Directory`
+- `PairOfCleats: Map (Repo)`
+- `PairOfCleats: Map (Current Folder)`
+- `PairOfCleats: Map (Current File)`
+- `PairOfCleats: Map (Symbol Under Cursor)`
+- `PairOfCleats: Map (Selection)`
+- `PairOfCleats: Map Jump to Node`
+- `PairOfCleats: Map Open Last Viewer`
+
+## Project overrides
+
+In your `.sublime-project` file:
+
+```json
+{
+  "settings": {
+    "pairofcleats": {
+      "pairofcleats_path": "./bin/pairofcleats.js",
+      "env": {
+        "PAIROFCLEATS_PROFILE": "balanced"
+      }
+    }
+  }
+}
+```
+
+## CLI output contract
+
+The Sublime integration is designed to use `--json` output so it can access full
+metadata when available. It does not assume the compact JSON contract used by
+other editors.
diff --git a/sublime/PairOfCleats/__init__.py b/sublime/PairOfCleats/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/sublime/PairOfCleats/commands/__init__.py b/sublime/PairOfCleats/commands/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/sublime/PairOfCleats/commands/index.py b/sublime/PairOfCleats/commands/index.py
new file mode 100644
index 000000000..079a44b06
--- /dev/null
+++ b/sublime/PairOfCleats/commands/index.py
@@ -0,0 +1,373 @@
+import sublime
+import sublime_plugin
+
+from ..lib import config
+from ..lib import index_state
+from ..lib import indexing
+from ..lib import paths
+from ..lib import runner
+from ..lib import ui
+from ..lib import watch
+
+INDEX_PANEL = 'pairofcleats-index'
+
+
+def _resolve_repo_root(window):
+    return paths.resolve_repo_root(window, return_reason=True)
+
+
+def _has_repo_root(window):
+    return paths.has_repo_root(window)
+
+
+def _run_index_build(window, mode):
+    settings = config.get_settings(window)
+    repo_root, reason = _resolve_repo_root(window)
+    if not repo_root:
+        ui.show_error('PairOfCleats: {0}'.format(reason))
+        return
+    if reason:
+        ui.show_status('PairOfCleats: {0}'.format(reason))
+
+    errors = config.validate_settings(settings, repo_root)
+    if errors:
+        ui.show_error('PairOfCleats settings need attention:\n- {0}'.format('\n- '.join(errors)))
+        return
+
+    args = indexing.build_index_args(mode, repo_root=repo_root)
+    cli = paths.resolve_cli(settings, repo_root)
+    command = cli['command']
+    full_args = list(cli.get('args_prefix') or []) + args
+    env = config.build_env(settings)
+
+    ui.show_status('PairOfCleats: index build started ({0}).'.format(mode))
+
+    def on_done(result):
+        if result.returncode == 0:
+            index_state.record_last_build(window, mode)
+            ui.show_status('PairOfCleats: index build complete ({0}).'.format(mode))
+            return
+        message = result.output.strip() or 'PairOfCleats index build failed.'
+        ui.show_error(message)
+
+    runner.run_process(
+        command,
+        full_args,
+        cwd=repo_root,
+        env=env,
+        window=window,
+        title='PairOfCleats index build',
+        capture_json=False,
+        on_done=on_done,
+        stream_output=True,
+        panel_name=INDEX_PANEL
+    )
+
+
+def _run_index_watch(window):
+    settings = config.get_settings(window)
+    repo_root, reason = _resolve_repo_root(window)
+    if not repo_root:
+        ui.show_error('PairOfCleats: {0}'.format(reason))
+        return
+    if reason:
+        ui.show_status('PairOfCleats: {0}'.format(reason))
+
+    errors = config.validate_settings(settings, repo_root)
+    if errors:
+        ui.show_error('PairOfCleats settings need attention:\n- {0}'.format('\n- '.join(errors)))
+        return
+
+    if watch.is_running(window):
+        active_root = watch.current_root(window)
+        message = 'PairOfCleats: watch already running.'
+        if active_root:
+            message = '{0} ({1})'.format(message, active_root)
+        ui.show_status(message)
+        return
+
+    watch_root = paths.resolve_watch_root(window, settings)
+    if not watch_root:
+        ui.show_error('PairOfCleats: unable to resolve watch root.')
+        return
+
+    mode = settings.get('index_watch_mode') or 'all'
+    poll_ms = settings.get('index_watch_poll_ms')
+    debounce_ms = settings.get('index_watch_debounce_ms')
+
+    args = indexing.build_index_args(
+        mode,
+        repo_root=watch_root,
+        watch=True,
+        watch_poll_ms=poll_ms,
+        watch_debounce_ms=debounce_ms
+    )
+
+    cli = paths.resolve_cli(settings, repo_root)
+    command = cli['command']
+    full_args = list(cli.get('args_prefix') or []) + args
+    env = config.build_env(settings)
+
+    ui.show_status('PairOfCleats: watch started ({0}).'.format(watch_root))
+
+    def on_done(result):
+        watch.clear_if_done(window)
+        if result.returncode == 0:
+            ui.show_status('PairOfCleats: watch stopped.')
+            return
+        message = result.output.strip() or 'PairOfCleats watch failed.'
+        ui.show_error(message)
+
+    handle = runner.run_process(
+        command,
+        full_args,
+        cwd=watch_root,
+        env=env,
+        window=window,
+        title='PairOfCleats index watch',
+        capture_json=False,
+        on_done=on_done,
+        stream_output=True,
+        panel_name=INDEX_PANEL
+    )
+    watch.register(window, handle, watch_root)
+
+
+def _run_index_watch_stop(window):
+    if watch.stop(window):
+        ui.show_status('PairOfCleats: watch stopping...')
+    else:
+        ui.show_status('PairOfCleats: no watch to stop.')
+
+
+def _run_index_validate(window):
+    settings = config.get_settings(window)
+    repo_root, reason = _resolve_repo_root(window)
+    if not repo_root:
+        ui.show_error('PairOfCleats: {0}'.format(reason))
+        return
+    if reason:
+        ui.show_status('PairOfCleats: {0}'.format(reason))
+
+    errors = config.validate_settings(settings, repo_root)
+    if errors:
+        ui.show_error('PairOfCleats settings need attention:\n- {0}'.format('\n- '.join(errors)))
+        return
+
+    args = indexing.build_validate_args(repo_root=repo_root, json_output=True)
+    cli = paths.resolve_cli(settings, repo_root)
+    command = cli['command']
+    full_args = list(cli.get('args_prefix') or []) + args
+    env = config.build_env(settings)
+
+    ui.show_status('PairOfCleats: validating index...')
+
+    def on_done(result):
+        if result.error:
+            ui.show_error(result.error)
+            return
+        if result.returncode not in (0, 1):
+            message = result.output.strip() or 'PairOfCleats index validate failed.'
+            ui.show_error(message)
+            return
+        payload = result.payload
+        if not isinstance(payload, dict):
+            ui.show_error('PairOfCleats index validate returned invalid JSON.')
+            return
+        text = _format_validate_report(payload)
+        ui.write_output_panel(window, 'pairofcleats-validate', text)
+        if payload.get('ok'):
+            ui.show_status('PairOfCleats: index validation ok.')
+        else:
+            ui.show_error('PairOfCleats: index validation found issues.')
+
+    runner.run_process(
+        command,
+        full_args,
+        cwd=repo_root,
+        env=env,
+        window=window,
+        title='PairOfCleats index validate',
+        capture_json=True,
+        on_done=on_done,
+        stream_output=False
+    )
+
+
+def _format_validate_report(payload):
+    lines = ['PairOfCleats index validation', '']
+    root = payload.get('root') or ''
+    if root:
+        lines.append('Repo: {0}'.format(root))
+    lines.append('Status: {0}'.format('ok' if payload.get('ok') else 'issues'))
+    lines.append('')
+
+    modes = payload.get('modes') or {}
+    if isinstance(modes, dict):
+        for mode, entry in modes.items():
+            if not isinstance(entry, dict):
+                continue
+            status = 'ok' if entry.get('ok') else 'missing'
+            path = entry.get('path') or ''
+            lines.append('{0}: {1}'.format(mode, status))
+            if path:
+                lines.append('  {0}'.format(path))
+            missing = entry.get('missing')
+            if isinstance(missing, list) and missing:
+                lines.append('  missing: {0}'.format(', '.join(missing)))
+            warnings = entry.get('warnings')
+            if isinstance(warnings, list) and warnings:
+                lines.append('  warnings: {0}'.format(', '.join(warnings)))
+            lines.append('')
+
+    issues = payload.get('issues')
+    if isinstance(issues, list) and issues:
+        lines.append('Issues:')
+        for issue in issues:
+            lines.append('- {0}'.format(issue))
+        lines.append('')
+
+    warnings = payload.get('warnings')
+    if isinstance(warnings, list) and warnings:
+        lines.append('Warnings:')
+        for warning in warnings:
+            lines.append('- {0}'.format(warning))
+        lines.append('')
+
+    hints = payload.get('hints')
+    if isinstance(hints, list) and hints:
+        lines.append('Hints:')
+        for hint in hints:
+            lines.append('- {0}'.format(hint))
+        lines.append('')
+
+    return '\n'.join(lines).rstrip() + '\n'
+
+
+def _run_open_index_dir(window):
+    settings = config.get_settings(window)
+    repo_root, reason = _resolve_repo_root(window)
+    if not repo_root:
+        ui.show_error('PairOfCleats: {0}'.format(reason))
+        return
+    if reason:
+        ui.show_status('PairOfCleats: {0}'.format(reason))
+
+    errors = config.validate_settings(settings, repo_root)
+    if errors:
+        ui.show_error('PairOfCleats settings need attention:\n- {0}'.format('\n- '.join(errors)))
+        return
+
+    args = indexing.build_config_dump_args(repo_root=repo_root, json_output=True)
+    cli = paths.resolve_cli(settings, repo_root)
+    command = cli['command']
+    full_args = list(cli.get('args_prefix') or []) + args
+    env = config.build_env(settings)
+
+    def on_done(result):
+        if result.error:
+            ui.show_error(result.error)
+            return
+        if result.returncode != 0:
+            message = result.output.strip() or 'PairOfCleats config dump failed.'
+            ui.show_error(message)
+            return
+        payload = result.payload
+        if not isinstance(payload, dict):
+            ui.show_error('PairOfCleats config dump returned invalid JSON.')
+            return
+        derived = payload.get('derived') or {}
+        repo_cache_root = derived.get('repoCacheRoot')
+        if not repo_cache_root:
+            ui.show_error('PairOfCleats: repo cache root unavailable.')
+            return
+        window.run_command('open_dir', {'dir': repo_cache_root})
+
+    runner.run_process(
+        command,
+        full_args,
+        cwd=repo_root,
+        env=env,
+        window=window,
+        title='PairOfCleats config dump',
+        capture_json=True,
+        on_done=on_done,
+        stream_output=False
+    )
+
+
+class PairOfCleatsIndexBuildCodeCommand(sublime_plugin.WindowCommand):
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        _run_index_build(self.window, 'code')
+
+
+class PairOfCleatsIndexBuildProseCommand(sublime_plugin.WindowCommand):
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        _run_index_build(self.window, 'prose')
+
+
+class PairOfCleatsIndexBuildAllCommand(sublime_plugin.WindowCommand):
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        _run_index_build(self.window, 'all')
+
+
+class PairOfCleatsIndexWatchStartCommand(sublime_plugin.WindowCommand):
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        _run_index_watch(self.window)
+
+
+class PairOfCleatsIndexWatchStopCommand(sublime_plugin.WindowCommand):
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        _run_index_watch_stop(self.window)
+
+
+class PairOfCleatsIndexValidateCommand(sublime_plugin.WindowCommand):
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        _run_index_validate(self.window)
+
+
+class PairOfCleatsOpenIndexDirectoryCommand(sublime_plugin.WindowCommand):      
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        _run_open_index_dir(self.window)
diff --git a/sublime/PairOfCleats/commands/map.py b/sublime/PairOfCleats/commands/map.py
new file mode 100644
index 000000000..385b075c3
--- /dev/null
+++ b/sublime/PairOfCleats/commands/map.py
@@ -0,0 +1,418 @@
+import json
+import os
+import webbrowser
+from urllib.parse import quote
+
+import sublime
+import sublime_plugin
+
+from ..lib import config
+from ..lib import map as map_lib
+from ..lib import map_state
+from ..lib import paths
+from ..lib import results
+from ..lib import runner
+from ..lib import ui
+
+MAP_TYPE_CHOICES = [
+    ('combined', 'combined (imports + calls + usages + dataflow)'),
+    ('imports', 'imports only'),
+    ('calls', 'calls only'),
+    ('usages', 'usages only'),
+    ('dataflow', 'dataflow only')
+]
+
+MAP_FORMAT_CHOICES = [
+    ('html-iso', 'isometric HTML (three.js)'),
+    ('html', 'graphviz HTML'),
+    ('svg', 'graphviz SVG'),
+    ('dot', 'graphviz DOT'),
+    ('json', 'map model JSON')
+]
+
+
+def _resolve_repo_root(window, path_hint=None):
+    return paths.resolve_repo_root(window, return_reason=True, path_hint=path_hint)
+
+
+def _has_repo_root(window, path_hint=None):
+    return paths.has_repo_root(window, path_hint=path_hint)
+
+
+def _extract_selection(view):
+    if view is None:
+        return ''
+    for region in view.sel():
+        if not region.empty():
+            return view.substr(region)
+    return ''
+
+
+def _extract_symbol(view):
+    if view is None:
+        return ''
+    selection = view.sel()
+    if not selection:
+        return ''
+    region = selection[0]
+    word = view.word(region)
+    return view.substr(word)
+
+
+def _relative_focus(repo_root, path_value):
+    if not path_value:
+        return ''
+    if os.path.isabs(path_value):
+        try:
+            rel = os.path.relpath(path_value, repo_root)
+            return rel.replace('\\', '/')
+        except Exception:
+            return path_value.replace('\\', '/')
+    return path_value.replace('\\', '/')
+
+
+def _open_in_browser(path_value):
+    if not path_value:
+        return
+    try:
+        resolved = os.path.abspath(path_value)
+        url = 'file:///{0}'.format(quote(resolved.replace('\\', '/')))
+    except Exception:
+        url = 'file:///{0}'.format(path_value.replace('\\', '/'))
+    try:
+        webbrowser.open_new_tab(url)
+    except Exception:
+        ui.show_error('PairOfCleats: failed to open browser.')
+
+
+def _render_report(payload):
+    lines = ['PairOfCleats map report', '']
+    if not isinstance(payload, dict):
+        return '\n'.join(lines)
+    summary = payload.get('summary') or {}
+    counts = summary.get('counts') or {}
+    lines.append('files: {0}'.format(counts.get('files') or 0))
+    lines.append('members: {0}'.format(counts.get('members') or 0))
+    lines.append('edges: {0}'.format(counts.get('edges') or 0))
+    warnings = payload.get('warnings') or []
+    if warnings:
+        lines.append('')
+        lines.append('Warnings:')
+        for warning in warnings:
+            lines.append('- {0}'.format(warning))
+    return '\n'.join(lines) + '\n'
+
+
+def _offer_rebuild(window, warnings):
+    if not warnings or window is None:
+        return
+    needs = any(
+        'dataflow metadata missing' in warning or 'controlFlow metadata missing' in warning
+        for warning in warnings
+    )
+    if not needs:
+        return
+
+    def on_select(index):
+        if index == 0:
+            window.run_command('pair_of_cleats_index_build_all')
+
+    window.show_quick_panel(
+        ['Rebuild index with dataflow/control-flow enabled', 'Dismiss'],
+        on_select
+    )
+
+def _prompt_map_type(window, settings, on_done):
+    default_type = map_lib.resolve_map_type(settings)
+    labels = [entry[1] for entry in MAP_TYPE_CHOICES]
+    selected_index = 0
+    for idx, (value, _) in enumerate(MAP_TYPE_CHOICES):
+        if value == default_type:
+            selected_index = idx
+            break
+
+    def on_select(index):
+        if index < 0:
+            return
+        on_done(MAP_TYPE_CHOICES[index][0])
+
+    window.show_quick_panel(labels, on_select, selected_index=selected_index)
+
+
+def _prompt_map_format(window, settings, on_done):
+    default_format = map_lib.resolve_map_format(settings)
+    labels = [entry[1] for entry in MAP_FORMAT_CHOICES]
+    selected_index = 0
+    for idx, (value, _) in enumerate(MAP_FORMAT_CHOICES):
+        if value == default_format:
+            selected_index = idx
+            break
+
+    def on_select(index):
+        if index < 0:
+            return
+        on_done(MAP_FORMAT_CHOICES[index][0])
+
+    window.show_quick_panel(labels, on_select, selected_index=selected_index)
+
+
+def _dispatch_map(window, scope, focus, map_type=None, map_format=None, path_hint=None):
+    settings = config.get_settings(window)
+    repo_root, reason = _resolve_repo_root(window, path_hint=path_hint)
+    if not repo_root:
+        ui.show_error('PairOfCleats: {0}'.format(reason))
+        return
+    if reason:
+        ui.show_status('PairOfCleats: {0}'.format(reason))
+
+    errors = config.validate_settings(settings, repo_root)
+    if errors:
+        message = 'PairOfCleats settings need attention:\n- {0}'.format(
+            '\n- '.join(errors)
+        )
+        ui.show_error(message)
+        return
+
+    map_type = map_type or map_lib.resolve_map_type(settings)
+    map_format = map_format or map_lib.resolve_map_format(settings)
+    output_path, model_path, node_list_path = map_lib.build_output_paths(
+        repo_root, settings, scope, map_type, map_format
+    )
+    args = map_lib.build_map_args(
+        repo_root,
+        settings,
+        scope,
+        focus,
+        map_type,
+        map_format,
+        output_path,
+        model_path,
+        node_list_path
+    )
+
+    cli = paths.resolve_cli(settings, repo_root)
+    command = cli['command']
+    full_args = list(cli.get('args_prefix') or []) + args
+    env = config.build_env(settings)
+
+    ui.show_status('PairOfCleats: generating map...')
+
+    def on_done(result):
+        if result.returncode != 0:
+            message = result.output.strip() or 'PairOfCleats map failed.'
+            ui.show_error(message)
+            return
+        if result.error:
+            ui.show_error(result.error)
+            return
+        payload = result.payload
+        if not isinstance(payload, dict) or not payload.get('ok'):
+            ui.show_error('PairOfCleats map returned invalid JSON.')
+            return
+
+        map_state.record_last_map(window, payload)
+        report_text = _render_report(payload)
+        if settings.get('map_show_report_panel'):
+            ui.write_output_panel(window, 'pairofcleats-map', report_text)
+        _offer_rebuild(window, payload.get('warnings') or [])
+
+        resolved_path = payload.get('outPath') or output_path
+        resolved_format = payload.get('format') or map_format
+
+        if resolved_format in ('html', 'html-iso', 'svg'):
+            _open_in_browser(resolved_path)
+        elif resolved_path:
+            window.open_file(resolved_path)
+
+    runner.run_process(
+        command,
+        full_args,
+        cwd=repo_root,
+        env=env,
+        window=window,
+        title='PairOfCleats map',
+        capture_json=True,
+        on_done=on_done,
+        stream_output=settings.get('map_stream_output') is True,
+        panel_name='pairofcleats-map'
+    )
+
+
+def _run_with_options(window, scope, focus, map_type=None, map_format=None, path_hint=None):
+    settings = config.get_settings(window)
+    if not settings.get('map_prompt_options'):
+        _dispatch_map(window, scope, focus, map_type=map_type, map_format=map_format, path_hint=path_hint)
+        return
+
+    def after_type(selected_type):
+        def after_format(selected_format):
+            _dispatch_map(window, scope, focus, map_type=selected_type, map_format=selected_format, path_hint=path_hint)
+        _prompt_map_format(window, settings, after_format)
+
+    _prompt_map_type(window, settings, after_type)
+
+
+class PairOfCleatsMapRepoCommand(sublime_plugin.WindowCommand):
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        _run_with_options(self.window, 'repo', '', path_hint=None)
+
+
+class PairOfCleatsMapCurrentFolderCommand(sublime_plugin.WindowCommand):        
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        view = self.window.active_view()
+        folder = None
+        if view and view.file_name():
+            folder = os.path.dirname(view.file_name())
+        if not folder and self.window.folders():
+            folder = self.window.folders()[0]
+        repo_root, reason = _resolve_repo_root(self.window, path_hint=folder)
+        if not repo_root:
+            ui.show_error('PairOfCleats: {0}'.format(reason))
+            return
+        focus = _relative_focus(repo_root, folder) if folder else ''
+        _run_with_options(self.window, 'dir', focus, path_hint=folder)
+
+
+class PairOfCleatsMapCurrentFileCommand(sublime_plugin.WindowCommand):
+    def is_enabled(self):
+        view = self.window.active_view()
+        return bool(view and view.file_name())
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        view = self.window.active_view()
+        if not view or not view.file_name():
+            ui.show_status('PairOfCleats: no active file.')
+            return
+        repo_root, reason = _resolve_repo_root(self.window, path_hint=view.file_name())
+        if not repo_root:
+            ui.show_error('PairOfCleats: {0}'.format(reason))
+            return
+        focus = _relative_focus(repo_root, view.file_name())
+        _run_with_options(self.window, 'file', focus, path_hint=view.file_name())
+
+
+class PairOfCleatsMapSymbolUnderCursorCommand(sublime_plugin.TextCommand):
+    def is_enabled(self):
+        return bool(self.view and self.view.file_name())
+
+    def is_visible(self):
+        return True
+
+    def run(self, edit):
+        symbol = _extract_symbol(self.view)
+        if not symbol:
+            ui.show_status('PairOfCleats: no symbol under cursor.')
+            return
+        file_name = self.view.file_name() if self.view else None
+        repo_root, reason = _resolve_repo_root(self.view.window(), path_hint=file_name)
+        if not repo_root:
+            ui.show_error('PairOfCleats: {0}'.format(reason))
+            return
+        focus = '{0}::{1}'.format(_relative_focus(repo_root, file_name), symbol) if file_name else symbol
+        _run_with_options(self.view.window(), 'symbol', focus, path_hint=file_name)
+
+
+class PairOfCleatsMapSelectionCommand(sublime_plugin.TextCommand):
+    def is_enabled(self):
+        return bool(self.view)
+
+    def is_visible(self):
+        return True
+
+    def run(self, edit):
+        selection = _extract_selection(self.view)
+        if not selection:
+            ui.show_status('PairOfCleats: no selection.')
+            return
+        file_name = self.view.file_name() if self.view else None
+        _run_with_options(self.view.window(), 'symbol', selection.strip(), path_hint=file_name)
+
+
+class PairOfCleatsMapJumpToNodeCommand(sublime_plugin.WindowCommand):
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        state = map_state.get_last_map(self.window)
+        if not state:
+            ui.show_status('PairOfCleats: no map history yet.')
+            return
+        node_list_path = state.get('nodeListPath')
+        if not node_list_path or not os.path.exists(node_list_path):
+            ui.show_status('PairOfCleats: node list unavailable.')
+            return
+        try:
+            with open(node_list_path, 'r') as handle:
+                payload = json.load(handle)
+        except Exception:
+            ui.show_error('PairOfCleats: failed to read node list.')
+            return
+        nodes = payload.get('nodes') if isinstance(payload, dict) else None
+        if not isinstance(nodes, list) or not nodes:
+            ui.show_status('PairOfCleats: node list empty.')
+            return
+
+        items = []
+        for node in nodes:
+            label = node.get('label') or node.get('id')
+            detail = node.get('file') or ''
+            items.append([label, detail])
+
+        repo_root, reason = _resolve_repo_root(self.window)
+        if not repo_root:
+            ui.show_error('PairOfCleats: {0}'.format(reason))
+            return
+
+        def on_select(index):
+            if index < 0:
+                return
+            node = nodes[index]
+            hit = {
+                'file': node.get('file'),
+                'startLine': node.get('startLine'),
+                'endLine': node.get('endLine')
+            }
+            results.open_hit(self.window, hit, repo_root=repo_root)
+
+        self.window.show_quick_panel(items, on_select)
+
+
+class PairOfCleatsMapOpenLastViewerCommand(sublime_plugin.WindowCommand):       
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        state = map_state.get_last_map(self.window)
+        if not state:
+            ui.show_status('PairOfCleats: no map history yet.')
+            return
+        path_value = state.get('outPath')
+        if not path_value:
+            ui.show_status('PairOfCleats: no map output yet.')
+            return
+        format_value = state.get('format') or ''
+        if format_value in ('html', 'html-iso', 'svg'):
+            _open_in_browser(path_value)
+        else:
+            self.window.open_file(path_value)
diff --git a/sublime/PairOfCleats/commands/search.py b/sublime/PairOfCleats/commands/search.py
new file mode 100644
index 000000000..6b814fc48
--- /dev/null
+++ b/sublime/PairOfCleats/commands/search.py
@@ -0,0 +1,425 @@
+import sublime
+import sublime_plugin
+
+from ..lib import config
+from ..lib import history
+from ..lib import paths
+from ..lib import results
+from ..lib import runner
+from ..lib import search as search_lib
+from ..lib import ui
+
+LIMIT_CHOICES = [10, 25, 50, 100, 200]
+
+
+def _resolve_repo_root(window):
+    return paths.resolve_repo_root(window, return_reason=True)
+
+
+def _has_repo_root(window):
+    return paths.has_repo_root(window)
+
+
+def _resolve_defaults(settings, overrides=None):
+    overrides = overrides or {}
+    mode = overrides.get('mode') or settings.get('index_mode_default') or 'both'
+    backend = overrides.get('backend') or settings.get('search_backend_default') or ''
+    limit = overrides.get('limit') or settings.get('search_limit') or 25
+    return {
+        'mode': mode,
+        'backend': backend,
+        'limit': limit
+    }
+
+
+def _resolve_results_target(settings, hit_count):
+    target = settings.get('open_results_in') or 'quick_panel'
+    threshold = settings.get('results_buffer_threshold')
+    if target == 'quick_panel' and isinstance(threshold, int) and threshold > 0:
+        if hit_count >= threshold:
+            return 'output_panel'
+    if target in ('quick_panel', 'new_tab', 'output_panel'):
+        return target
+    return 'quick_panel'
+
+
+def _prompt_query(window, initial, on_done):
+    window.show_input_panel(
+        'PairOfCleats search query',
+        initial or '',
+        lambda value: on_done(value.strip()),
+        None,
+        None
+    )
+
+
+def _prompt_options(window, settings, defaults, on_done, force_prompt=False):
+    if not force_prompt and not settings.get('search_prompt_options'):
+        on_done(defaults)
+        return
+
+    options = dict(defaults)
+    mode_choices = ['code', 'prose', 'both']
+    default_mode = options.get('mode')
+    mode_index = mode_choices.index(default_mode) if default_mode in mode_choices else 2
+
+    def on_mode_select(index):
+        if index < 0:
+            on_done(options)
+            return
+        options['mode'] = mode_choices[index]
+        _prompt_backend(window, options, on_done)
+
+    window.show_quick_panel(
+        mode_choices,
+        on_mode_select,
+        selected_index=mode_index
+    )
+
+
+def _prompt_backend(window, options, on_done):
+    backend_choices = [
+        ('', 'auto'),
+        ('memory', 'memory'),
+        ('sqlite', 'sqlite'),
+        ('sqlite-fts', 'sqlite-fts'),
+        ('lmdb', 'lmdb')
+    ]
+    labels = ['backend: {0}'.format(label) for _, label in backend_choices]
+    current = options.get('backend') or ''
+    current_index = 0
+    for idx, (value, _) in enumerate(backend_choices):
+        if value == current:
+            current_index = idx
+            break
+
+    def on_backend_select(index):
+        if index < 0:
+            on_done(options)
+            return
+        options['backend'] = backend_choices[index][0]
+        _prompt_limit(window, options, on_done)
+
+    window.show_quick_panel(labels, on_backend_select, selected_index=current_index)
+
+
+def _prompt_limit(window, options, on_done):
+    limit_default = options.get('limit')
+    limit_values = []
+    if isinstance(limit_default, int) and limit_default > 0:
+        limit_values.append(limit_default)
+    for value in LIMIT_CHOICES:
+        if value not in limit_values:
+            limit_values.append(value)
+
+    choices = ['limit: {0}'.format(value) for value in limit_values]
+    choices.append('limit: custom')
+
+    def on_limit_select(index):
+        if index < 0:
+            on_done(options)
+            return
+        if index < len(limit_values):
+            options['limit'] = limit_values[index]
+            on_done(options)
+            return
+
+        def on_custom_done(value):
+            value = value.strip()
+            if not value:
+                on_done(options)
+                return
+            try:
+                parsed = int(value)
+            except Exception:
+                ui.show_error('Limit must be an integer.')
+                on_done(options)
+                return
+            if parsed < 1:
+                ui.show_error('Limit must be at least 1.')
+                on_done(options)
+                return
+            options['limit'] = parsed
+            on_done(options)
+
+        window.show_input_panel(
+            'PairOfCleats result limit',
+            str(limit_default or ''),
+            on_custom_done,
+            None,
+            None
+        )
+
+    window.show_quick_panel(choices, on_limit_select, selected_index=0)
+
+
+def _execute_search(window, query, overrides=None, explain=False):
+    if not query:
+        return
+
+    settings = config.get_settings(window)
+    repo_root, reason = _resolve_repo_root(window)
+    if not repo_root:
+        ui.show_error('PairOfCleats: {0}'.format(reason))
+        return
+    if reason:
+        ui.show_status('PairOfCleats: {0}'.format(reason))
+
+    errors = config.validate_settings(settings, repo_root)
+    if errors:
+        message = 'PairOfCleats settings need attention:\n- {0}'.format(
+            '\n- '.join(errors)
+        )
+        ui.show_error(message)
+        return
+
+    resolved = _resolve_defaults(settings, overrides)
+    args = search_lib.build_search_args(
+        query,
+        repo_root=repo_root,
+        mode=resolved.get('mode'),
+        backend=resolved.get('backend') or None,
+        limit=resolved.get('limit'),
+        explain=explain
+    )
+
+    cli = paths.resolve_cli(settings, repo_root)
+    command = cli['command']
+    full_args = list(cli.get('args_prefix') or []) + args
+    env = config.build_env(settings)
+
+    ui.show_status('PairOfCleats: searching...')
+
+    def on_done(result):
+        if result.returncode != 0:
+            message = result.output.strip() or 'PairOfCleats search failed.'
+            ui.show_error(message)
+            return
+        if result.error:
+            ui.show_error(result.error)
+            return
+        payload = result.payload
+        if not isinstance(payload, dict):
+            ui.show_error('PairOfCleats search returned invalid JSON.')
+            return
+        if payload.get('ok') is False:
+            ui.show_error(payload.get('message') or 'PairOfCleats search failed.')
+            return
+
+        hits = results.collect_hits(payload)
+        history_limit = settings.get('history_limit')
+        history.record_query(window, query, resolved, history_limit)
+
+        if explain:
+            text = results.format_explain_text(hits)
+            ui.write_output_panel(window, results.RESULTS_PANEL, text)
+            return
+
+        if not hits:
+            ui.show_status('PairOfCleats: no results.')
+            return
+
+        target = _resolve_results_target(settings, len(hits))
+        if target == 'output_panel':
+            text = results.format_results_text(hits)
+            ui.write_output_panel(window, results.RESULTS_PANEL, text)
+            return
+        if target == 'new_tab':
+            text = results.format_results_text(hits)
+            results.open_results_view(window, text)
+            return
+
+        items = [results.format_quick_panel_item(hit) for hit in hits]
+
+        def on_select(index):
+            if index < 0:
+                return
+            results.open_hit(window, hits[index], repo_root)
+
+        window.show_quick_panel(items, on_select)
+
+    runner.run_process(
+        command,
+        full_args,
+        cwd=repo_root,
+        env=env,
+        window=window,
+        title='PairOfCleats search',
+        capture_json=True,
+        on_done=on_done,
+        stream_output=False
+    )
+
+
+def _extract_selection(view):
+    if view is None:
+        return ''
+    for region in view.sel():
+        if not region.empty():
+            return view.substr(region)
+    return ''
+
+
+def _extract_symbol(view):
+    if view is None:
+        return ''
+    selection = view.sel()
+    if not selection:
+        return ''
+    region = selection[0]
+    word = view.word(region)
+    return view.substr(word)
+
+
+def _search_with_query(window, query, overrides=None, force_prompt=False):
+    if not query:
+        ui.show_status('PairOfCleats: empty query.')
+        return
+    settings = config.get_settings(window)
+    defaults = _resolve_defaults(settings, overrides)
+
+    def after_options(options):
+        _execute_search(window, query, options)
+
+    _prompt_options(window, settings, defaults, after_options, force_prompt=force_prompt)
+
+
+def _search_with_prompt(window, overrides=None, force_prompt=False):
+    settings = config.get_settings(window)
+    defaults = _resolve_defaults(settings, overrides)
+    last = history.get_last_query(window)
+    initial = last.get('query') if isinstance(last, dict) else ''
+
+    def on_query(value):
+        if not value:
+            return
+        def after_options(options):
+            _execute_search(window, value, options)
+        _prompt_options(window, settings, defaults, after_options, force_prompt=force_prompt)
+
+    _prompt_query(window, initial, on_query)
+
+
+class PairOfCleatsSearchCommand(sublime_plugin.WindowCommand):
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self, query=None):
+        if query:
+            _search_with_query(self.window, query)
+            return
+        _search_with_prompt(self.window)
+
+
+class PairOfCleatsSearchWithOptionsCommand(sublime_plugin.WindowCommand):       
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self, query=None):
+        if query:
+            _search_with_query(self.window, query, force_prompt=True)
+            return
+        _search_with_prompt(self.window, force_prompt=True)
+
+
+class PairOfCleatsSearchSelectionCommand(sublime_plugin.TextCommand):
+    def is_enabled(self):
+        return bool(self.view)
+
+    def is_visible(self):
+        return True
+
+    def run(self, edit):
+        query = _extract_selection(self.view)
+        if not query:
+            ui.show_status('PairOfCleats: no selection to search.')
+            return
+        _search_with_query(self.view.window(), query)
+
+
+class PairOfCleatsSearchSymbolUnderCursorCommand(sublime_plugin.TextCommand):   
+    def is_enabled(self):
+        return bool(self.view)
+
+    def is_visible(self):
+        return True
+
+    def run(self, edit):
+        query = _extract_symbol(self.view)
+        if not query:
+            ui.show_status('PairOfCleats: no symbol under cursor.')
+            return
+        _search_with_query(self.view.window(), query)
+
+
+class PairOfCleatsSearchHistoryCommand(sublime_plugin.WindowCommand):
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        entries = history.load_history(self.window)
+        if not entries:
+            ui.show_status('PairOfCleats: no history yet.')
+            return
+
+        items = []
+        for entry in entries:
+            query = entry.get('query') or ''
+            mode = entry.get('mode') or 'both'
+            backend = entry.get('backend') or 'auto'
+            limit = entry.get('limit') or ''
+            detail = 'mode {0} | backend {1} | limit {2}'.format(mode, backend, limit)
+            items.append([query, detail])
+
+        def on_select(index):
+            if index < 0:
+                return
+            entry = entries[index]
+            _execute_search(self.window, entry.get('query'), entry)
+
+        self.window.show_quick_panel(items, on_select)
+
+
+class PairOfCleatsRepeatLastSearchCommand(sublime_plugin.WindowCommand):        
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        entry = history.get_last_query(self.window)
+        if not entry:
+            ui.show_status('PairOfCleats: no previous search to repeat.')       
+            return
+        _execute_search(self.window, entry.get('query'), entry)
+
+
+class PairOfCleatsExplainSearchCommand(sublime_plugin.WindowCommand):
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        entry = history.get_last_query(self.window)
+        if entry and entry.get('query'):
+            _execute_search(self.window, entry.get('query'), entry, explain=True)
+            return
+
+        def on_query(value):
+            if not value:
+                return
+            _execute_search(self.window, value, explain=True)
+
+        _prompt_query(self.window, '', on_query)
diff --git a/sublime/PairOfCleats/commands/settings.py b/sublime/PairOfCleats/commands/settings.py
new file mode 100644
index 000000000..60210abee
--- /dev/null
+++ b/sublime/PairOfCleats/commands/settings.py
@@ -0,0 +1,19 @@
+import sublime
+import sublime_plugin
+
+
+class PairOfCleatsOpenSettingsCommand(sublime_plugin.WindowCommand):
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        self.window.run_command(
+            'edit_settings',
+            {
+                'base_file': '${packages}/PairOfCleats/PairOfCleats.sublime-settings',
+                'user_file': '${packages}/User/PairOfCleats.sublime-settings'
+            }
+        )
diff --git a/sublime/PairOfCleats/commands/validate.py b/sublime/PairOfCleats/commands/validate.py
new file mode 100644
index 000000000..e3ae8c0e9
--- /dev/null
+++ b/sublime/PairOfCleats/commands/validate.py
@@ -0,0 +1,30 @@
+import sublime_plugin
+
+from ..lib import config
+from ..lib import paths
+from ..lib import ui
+
+
+class PairOfCleatsValidateSettingsCommand(sublime_plugin.WindowCommand):
+    def is_enabled(self):
+        return True
+
+    def is_visible(self):
+        return True
+
+    def run(self):
+        settings = config.get_settings(self.window)
+        repo_root, reason = paths.resolve_repo_root(self.window, return_reason=True)
+        if not repo_root:
+            ui.show_error('PairOfCleats: {0}'.format(reason))
+            return
+        if reason:
+            ui.show_status('PairOfCleats: {0}'.format(reason))
+        errors = config.validate_settings(settings, repo_root)
+        if errors:
+            message = 'PairOfCleats settings need attention:\n- {0}'.format(
+                '\n- '.join(errors)
+            )
+            ui.show_error(message)
+            return
+        ui.show_status('PairOfCleats settings look good.')
diff --git a/sublime/PairOfCleats/lib/__init__.py b/sublime/PairOfCleats/lib/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/sublime/PairOfCleats/lib/api_client.py b/sublime/PairOfCleats/lib/api_client.py
new file mode 100644
index 000000000..21e569ea0
--- /dev/null
+++ b/sublime/PairOfCleats/lib/api_client.py
@@ -0,0 +1,201 @@
+import json
+import os
+import urllib.parse
+import urllib.request
+import urllib.error
+
+
+def normalize_base_url(value):
+    if not value:
+        return ''
+    value = str(value).strip()
+    if value.endswith('/'):
+        value = value[:-1]
+    return value
+
+
+def build_url(base_url, path, params=None):
+    base_url = normalize_base_url(base_url)
+    if not base_url:
+        return ''
+    params = params or {}
+    filtered = {}
+    for key, value in params.items():
+        if value is None or value == '':
+            continue
+        filtered[str(key)] = str(value)
+    query = urllib.parse.urlencode(filtered, doseq=True)
+    if query:
+        return '{0}{1}?{2}'.format(base_url, path, query)
+    return '{0}{1}'.format(base_url, path)
+
+
+def _open_url(url, timeout_ms=5000):
+    timeout = float(timeout_ms or 5000) / 1000.0
+    if timeout <= 0:
+        timeout = 5.0
+
+    try:
+        resp = urllib.request.urlopen(url, timeout=timeout)
+        try:
+            status = resp.getcode() or 0
+            headers = dict(resp.headers.items())
+            data = resp.read()
+        finally:
+            try:
+                resp.close()
+            except Exception:
+                pass
+        return status, headers, data
+    except urllib.error.HTTPError as err:
+        try:
+            data = err.read()
+        except Exception:
+            data = b''
+        headers = dict(getattr(err, 'headers', {}).items()) if getattr(err, 'headers', None) else {}
+        status = getattr(err, 'code', 0) or 0
+        return status, headers, data
+
+
+def request_json(url, timeout_ms=5000):
+    status, headers, data = _open_url(url, timeout_ms=timeout_ms)
+    text = (data or b'').decode('utf-8', 'replace')
+    if status < 200 or status >= 300:
+        raise RuntimeError('API request failed ({0}): {1}'.format(status, text.strip() or url))
+    try:
+        return json.loads(text or '{}'), headers
+    except Exception as exc:
+        raise RuntimeError('API returned invalid JSON: {0}'.format(exc))
+
+
+def request_text(url, timeout_ms=5000):
+    status, headers, data = _open_url(url, timeout_ms=timeout_ms)
+    text = (data or b'').decode('utf-8', 'replace')
+    if status < 200 or status >= 300:
+        raise RuntimeError('API request failed ({0}): {1}'.format(status, text.strip() or url))
+    return text, headers
+
+
+def _ensure_parent_dir(path_value):
+    if not path_value:
+        return
+    parent = os.path.dirname(path_value)
+    if not parent:
+        return
+    if os.path.isdir(parent):
+        return
+    try:
+        os.makedirs(parent)
+    except Exception:
+        pass
+
+
+def _write_text(path_value, text):
+    _ensure_parent_dir(path_value)
+    with open(path_value, 'w') as handle:
+        handle.write(text or '')
+
+
+def _write_json(path_value, payload):
+    _ensure_parent_dir(path_value)
+    with open(path_value, 'w') as handle:
+        json.dump(payload, handle, indent=2, sort_keys=True)
+
+
+def generate_map_report(
+        base_url,
+        repo_root,
+        settings,
+        scope,
+        focus,
+        include,
+        map_format,
+        output_path,
+        model_path,
+        node_list_path):
+    base_url = normalize_base_url(base_url)
+    if not base_url:
+        raise RuntimeError('api_server_url is not set')
+
+    timeout_ms = settings.get('api_timeout_ms') if isinstance(settings, dict) else None
+    if not isinstance(timeout_ms, int) or timeout_ms <= 0:
+        timeout_ms = 5000
+
+    params = {
+        'repo': repo_root,
+        'mode': settings.get('map_index_mode') or 'code',
+        'scope': scope,
+        'focus': focus,
+        'include': include,
+        'collapse': settings.get('map_collapse_default') or 'none'
+    }
+
+    if settings.get('map_only_exported'):
+        params['onlyExported'] = '1'
+
+    max_files = settings.get('map_max_files')
+    if isinstance(max_files, int) and max_files > 0:
+        params['maxFiles'] = str(max_files)
+
+    max_members = settings.get('map_max_members_per_file')
+    if isinstance(max_members, int) and max_members > 0:
+        params['maxMembersPerFile'] = str(max_members)
+
+    max_edges = settings.get('map_max_edges')
+    if isinstance(max_edges, int) and max_edges > 0:
+        params['maxEdges'] = str(max_edges)
+
+    if settings.get('map_top_k_by_degree') is True:
+        params['topKByDegree'] = '1'
+
+    open_uri = settings.get('map_open_uri_template')
+    if open_uri:
+        params['openUriTemplate'] = open_uri
+
+    three_url = settings.get('map_three_url')
+    if three_url:
+        params['threeUrl'] = three_url
+
+    # Viewer controls (only used by html-iso)
+    for setting_key, param_key in [
+            ('map_wasd_sensitivity', 'wasdSensitivity'),
+            ('map_wasd_acceleration', 'wasdAcceleration'),
+            ('map_wasd_max_speed', 'wasdMaxSpeed'),
+            ('map_wasd_drag', 'wasdDrag'),
+            ('map_zoom_sensitivity', 'zoomSensitivity')]:
+        value = settings.get(setting_key)
+        if isinstance(value, (int, float)):
+            params[param_key] = str(value)
+
+    model_url = build_url(base_url, '/map', dict(params, **{'format': 'json'}))
+    map_model, model_headers = request_json(model_url, timeout_ms=timeout_ms)
+    _write_json(model_path, map_model)
+
+    nodes_url = build_url(base_url, '/map/nodes', params)
+    node_list, _headers = request_json(nodes_url, timeout_ms=timeout_ms)
+    _write_json(node_list_path, node_list)
+
+    out_url = build_url(base_url, '/map', dict(params, **{'format': map_format}))
+    out_path = output_path
+
+    if map_format in ('json', 'dot'):
+        text, _headers = request_text(out_url, timeout_ms=timeout_ms)
+        _write_text(output_path, text)
+        out_path = output_path
+    else:
+        out_path = out_url
+
+    cache_key = model_headers.get('X-PairofCleats-Map-CacheKey') or ''
+
+    return {
+        'ok': True,
+        'source': 'api',
+        'repo': repo_root,
+        'format': map_format,
+        'outPath': out_path,
+        'modelPath': model_path,
+        'nodeListPath': node_list_path,
+        'cacheKey': cache_key,
+        'summary': map_model.get('summary') if isinstance(map_model, dict) else None,
+        'warnings': map_model.get('warnings') if isinstance(map_model, dict) else None
+    }
diff --git a/sublime/PairOfCleats/lib/config.py b/sublime/PairOfCleats/lib/config.py
new file mode 100644
index 000000000..bb46e30ab
--- /dev/null
+++ b/sublime/PairOfCleats/lib/config.py
@@ -0,0 +1,251 @@
+import os
+
+import sublime
+
+SETTINGS_FILE = 'PairOfCleats.sublime-settings'
+
+DEFAULT_SETTINGS = {
+    'pairofcleats_path': '',
+    'node_path': '',
+    'index_mode_default': 'both',
+    'search_backend_default': '',
+    'open_results_in': 'quick_panel',
+    'search_limit': 25,
+    'results_buffer_threshold': 50,
+    'history_limit': 25,
+    'search_prompt_options': False,
+    'index_watch_scope': 'repo',
+    'index_watch_folder': '',
+    'index_watch_mode': 'all',
+    'index_watch_poll_ms': 2000,
+    'index_watch_debounce_ms': 500,
+    'map_type_default': 'combined',
+    'map_format_default': 'html-iso',
+    'map_prompt_options': False,
+    'map_output_dir': '.pairofcleats/maps',
+    'map_only_exported': False,
+    'map_collapse_default': 'none',
+    'map_max_files': 200,
+    'map_max_members_per_file': 60,
+    'map_max_edges': 3000,
+    'map_top_k_by_degree': False,
+    'map_show_report_panel': None,
+    'map_stream_output': False,
+    'map_open_uri_template': 'subl://open?file={file}&line={line}&column={column}',
+    'map_three_url': '',
+    'map_index_mode': 'code',
+    'map_wasd_sensitivity': 16000,
+    'map_wasd_acceleration': 6000,
+    'map_wasd_max_speed': 24000,
+    'map_wasd_drag': 6,
+    'map_zoom_sensitivity': 0.1,
+    'profile': '',
+    'cache_root': '',
+    'embeddings_mode': '',
+    'node_options': '',
+    'env': {}
+}
+
+VALID_INDEX_MODES = {'code', 'prose', 'both'}
+VALID_BACKENDS = {'memory', 'sqlite', 'sqlite-fts', 'lmdb'}
+VALID_OPEN_TARGETS = {'quick_panel', 'new_tab', 'output_panel'}
+VALID_WATCH_SCOPES = {'repo', 'folder'}
+VALID_WATCH_MODES = {'all', 'code', 'prose', 'records', 'extracted-prose'}
+VALID_MAP_TYPES = {'combined', 'imports', 'calls', 'usages', 'dataflow'}
+VALID_MAP_FORMATS = {'json', 'dot', 'svg', 'html', 'html-iso'}
+VALID_MAP_COLLAPSE = {'none', 'file', 'dir'}
+VALID_MAP_MODES = {'code', 'prose'}
+
+
+def prime_settings():
+    try:
+        sublime.load_settings(SETTINGS_FILE)
+    except Exception:
+        pass
+
+
+def get_settings(window=None):
+    base = _load_base_settings()
+    overrides = extract_project_settings(window)
+    return merge_settings(base, overrides)
+
+
+def extract_project_settings(window):
+    if window is None:
+        return {}
+    data = window.project_data() or {}
+    settings = data.get('settings') if isinstance(data, dict) else {}
+    if not isinstance(settings, dict):
+        settings = {}
+
+    override = settings.get('pairofcleats') or settings.get('PairOfCleats')
+    if override is None:
+        override = data.get('pairofcleats') or data.get('PairOfCleats')
+    if isinstance(override, dict):
+        return override
+    return {}
+
+
+def merge_settings(base, overrides):
+    merged = dict(base)
+    for key, value in overrides.items():
+        if key == 'env' and isinstance(value, dict):
+            env = dict(merged.get('env') or {})
+            env.update(value)
+            merged['env'] = env
+        else:
+            merged[key] = value
+    return merged
+
+
+def build_env(settings):
+    env = dict(os.environ)
+    extra = settings.get('env') or {}
+    if isinstance(extra, dict):
+        for key, value in extra.items():
+            if key:
+                env[str(key)] = str(value)
+
+    if settings.get('profile'):
+        env['PAIROFCLEATS_PROFILE'] = str(settings['profile'])
+    if settings.get('cache_root'):
+        env['PAIROFCLEATS_CACHE_ROOT'] = str(settings['cache_root'])
+    if settings.get('embeddings_mode'):
+        env['PAIROFCLEATS_EMBEDDINGS'] = str(settings['embeddings_mode'])
+    if settings.get('node_options'):
+        env['PAIROFCLEATS_NODE_OPTIONS'] = str(settings['node_options'])
+    return env
+
+
+def validate_settings(settings, repo_root=None):
+    errors = []
+
+    mode = settings.get('index_mode_default')
+    if mode and mode not in VALID_INDEX_MODES:
+        errors.append(
+            'index_mode_default must be one of: code, prose, both.'
+        )
+
+    backend = settings.get('search_backend_default')
+    if backend and backend not in VALID_BACKENDS:
+        errors.append(
+            'search_backend_default must be one of: memory, sqlite, sqlite-fts, lmdb.'
+        )
+
+    target = settings.get('open_results_in')
+    if target and target not in VALID_OPEN_TARGETS:
+        errors.append(
+            'open_results_in must be one of: quick_panel, new_tab, output_panel.'
+        )
+
+    env = settings.get('env')
+    if env is not None and not isinstance(env, dict):
+        errors.append('env must be a JSON object (dictionary).')
+
+    cli_path = settings.get('pairofcleats_path')
+    if cli_path and (os.path.isabs(cli_path) or repo_root):
+        resolved = _resolve_path(repo_root, cli_path)
+        if resolved and not os.path.exists(resolved):
+            errors.append(
+                'pairofcleats_path does not exist: {0}'.format(resolved)
+            )
+
+    node_path = settings.get('node_path')
+    if node_path and os.path.isabs(node_path):
+        if not os.path.exists(node_path):
+            errors.append(
+                'node_path does not exist: {0}'.format(node_path)
+            )
+
+    _validate_int_setting(errors, settings, 'search_limit', allow_zero=False)
+    _validate_int_setting(errors, settings, 'results_buffer_threshold', allow_zero=True)
+    _validate_int_setting(errors, settings, 'history_limit', allow_zero=True)
+    _validate_int_setting(errors, settings, 'index_watch_poll_ms', allow_zero=False)
+    _validate_int_setting(errors, settings, 'index_watch_debounce_ms', allow_zero=False)
+
+    watch_scope = settings.get('index_watch_scope')
+    if watch_scope and watch_scope not in VALID_WATCH_SCOPES:
+        errors.append('index_watch_scope must be repo or folder.')
+
+    watch_mode = settings.get('index_watch_mode')
+    if watch_mode and watch_mode not in VALID_WATCH_MODES:
+        errors.append('index_watch_mode must be one of: all, code, prose, records, extracted-prose.')
+
+    watch_folder = settings.get('index_watch_folder')
+    if watch_folder and (os.path.isabs(watch_folder) or repo_root):
+        resolved = _resolve_path(repo_root, watch_folder)
+        if resolved and not os.path.exists(resolved):
+            errors.append('index_watch_folder does not exist: {0}'.format(resolved))
+
+    map_type = settings.get('map_type_default')
+    if map_type and map_type not in VALID_MAP_TYPES:
+        errors.append('map_type_default must be one of: combined, imports, calls, usages, dataflow.')
+
+    map_format = settings.get('map_format_default')
+    if map_format and map_format not in VALID_MAP_FORMATS:
+        errors.append('map_format_default must be one of: json, dot, svg, html, html-iso.')
+
+    map_collapse = settings.get('map_collapse_default')
+    if map_collapse and map_collapse not in VALID_MAP_COLLAPSE:
+        errors.append('map_collapse_default must be one of: none, file, dir.')
+
+    map_mode = settings.get('map_index_mode')
+    if map_mode and map_mode not in VALID_MAP_MODES:
+        errors.append('map_index_mode must be code or prose.')
+
+    _validate_int_setting(errors, settings, 'map_max_files', allow_zero=False)
+    _validate_int_setting(errors, settings, 'map_max_members_per_file', allow_zero=False)
+    _validate_int_setting(errors, settings, 'map_max_edges', allow_zero=False)
+    _validate_number_setting(errors, settings, 'map_wasd_sensitivity', allow_zero=False)
+    _validate_number_setting(errors, settings, 'map_wasd_acceleration', allow_zero=False)
+    _validate_number_setting(errors, settings, 'map_wasd_max_speed', allow_zero=False)
+    _validate_number_setting(errors, settings, 'map_wasd_drag', allow_zero=False)
+    _validate_number_setting(errors, settings, 'map_zoom_sensitivity', allow_zero=False)
+
+    return errors
+
+
+def _load_base_settings():
+    settings = sublime.load_settings(SETTINGS_FILE)
+    values = dict(DEFAULT_SETTINGS)
+    for key in DEFAULT_SETTINGS:
+        values[key] = settings.get(key, DEFAULT_SETTINGS[key])
+    return values
+
+
+def _resolve_path(repo_root, raw_path):
+    if not raw_path:
+        return None
+    if os.path.isabs(raw_path):
+        return raw_path
+    if repo_root:
+        return os.path.join(repo_root, raw_path)
+    return raw_path
+
+
+def _validate_int_setting(errors, settings, key, allow_zero=False):
+    value = settings.get(key)
+    if value is None or value == '':
+        return
+    if isinstance(value, bool) or not isinstance(value, int):
+        errors.append('{0} must be an integer.'.format(key))
+        return
+    if allow_zero:
+        if value < 0:
+            errors.append('{0} must be 0 or higher.'.format(key))
+    elif value < 1:
+        errors.append('{0} must be 1 or higher.'.format(key))
+
+
+def _validate_number_setting(errors, settings, key, allow_zero=False):
+    value = settings.get(key)
+    if value is None or value == '':
+        return
+    if isinstance(value, bool) or not isinstance(value, (int, float)):
+        errors.append('{0} must be a number.'.format(key))
+        return
+    if allow_zero:
+        if value < 0:
+            errors.append('{0} must be 0 or higher.'.format(key))
+    elif value <= 0:
+        errors.append('{0} must be greater than 0.'.format(key))
diff --git a/sublime/PairOfCleats/lib/history.py b/sublime/PairOfCleats/lib/history.py
new file mode 100644
index 000000000..c47c7e8d0
--- /dev/null
+++ b/sublime/PairOfCleats/lib/history.py
@@ -0,0 +1,69 @@
+def load_history(window):
+    if window is None:
+        return []
+    _, state = _load_state(window)
+    history = state.get('history')
+    if isinstance(history, list):
+        return list(history)
+    return []
+
+
+def get_last_query(window):
+    if window is None:
+        return None
+    _, state = _load_state(window)
+    last = state.get('last_search')
+    if isinstance(last, dict) and last.get('query'):
+        return dict(last)
+    history = state.get('history')
+    if isinstance(history, list) and history:
+        entry = history[0]
+        if isinstance(entry, dict) and entry.get('query'):
+            return dict(entry)
+    return None
+
+
+def record_query(window, query, options, limit):
+    if window is None or not query:
+        return
+    data, state = _load_state(window)
+    history = state.get('history')
+    if not isinstance(history, list):
+        history = []
+    entry = _build_entry(query, options)
+    history = [item for item in history if not _matches_entry(item, entry)]
+    history.insert(0, entry)
+    if isinstance(limit, int) and limit > 0:
+        history = history[:limit]
+    state['history'] = history
+    state['last_search'] = entry
+    data['pairofcleats_state'] = state
+    window.set_project_data(data)
+
+
+def _load_state(window):
+    data = window.project_data() or {}
+    state = data.get('pairofcleats_state')
+    if not isinstance(state, dict):
+        state = {}
+    return data, state
+
+
+def _build_entry(query, options):
+    entry = {
+        'query': query
+    }
+    if isinstance(options, dict):
+        for key in ('mode', 'backend', 'limit'):
+            if key in options:
+                entry[key] = options.get(key)
+    return entry
+
+
+def _matches_entry(existing, target):
+    if not isinstance(existing, dict):
+        return False
+    for key in ('query', 'mode', 'backend', 'limit'):
+        if existing.get(key) != target.get(key):
+            return False
+    return True
diff --git a/sublime/PairOfCleats/lib/index_state.py b/sublime/PairOfCleats/lib/index_state.py
new file mode 100644
index 000000000..f04e24dfe
--- /dev/null
+++ b/sublime/PairOfCleats/lib/index_state.py
@@ -0,0 +1,33 @@
+import datetime
+
+
+def record_last_build(window, mode):
+    if window is None:
+        return None
+    timestamp = datetime.datetime.utcnow().replace(microsecond=0).isoformat() + 'Z'
+    data = window.project_data() or {}
+    state = data.get('pairofcleats_state')
+    if not isinstance(state, dict):
+        state = {}
+    index_state = state.get('index')
+    if not isinstance(index_state, dict):
+        index_state = {}
+    index_state['last_mode'] = mode
+    index_state['last_time'] = timestamp
+    state['index'] = index_state
+    data['pairofcleats_state'] = state
+    window.set_project_data(data)
+    return index_state
+
+
+def get_last_build(window):
+    if window is None:
+        return None
+    data = window.project_data() or {}
+    state = data.get('pairofcleats_state')
+    if not isinstance(state, dict):
+        return None
+    index_state = state.get('index')
+    if isinstance(index_state, dict):
+        return dict(index_state)
+    return None
diff --git a/sublime/PairOfCleats/lib/indexing.py b/sublime/PairOfCleats/lib/indexing.py
new file mode 100644
index 000000000..be1eb6328
--- /dev/null
+++ b/sublime/PairOfCleats/lib/indexing.py
@@ -0,0 +1,32 @@
+def build_index_args(mode, repo_root=None, watch=False, watch_poll_ms=None, watch_debounce_ms=None):
+    args = ['index', 'watch' if watch else 'build']
+    if mode:
+        args.extend(['--mode', mode])
+    if watch:
+        if watch_poll_ms is not None:
+            args.extend(['--watch-poll', str(watch_poll_ms)])
+        if watch_debounce_ms is not None:
+            args.extend(['--watch-debounce', str(watch_debounce_ms)])
+    if repo_root:
+        args.extend(['--repo', repo_root])
+    return args
+
+
+def build_validate_args(repo_root=None, modes=None, json_output=True):
+    args = ['index', 'validate']
+    if json_output:
+        args.append('--json')
+    if modes:
+        args.extend(['--mode', modes])
+    if repo_root:
+        args.extend(['--repo', repo_root])
+    return args
+
+
+def build_config_dump_args(repo_root=None, json_output=True):
+    args = ['config', 'dump']
+    if json_output:
+        args.append('--json')
+    if repo_root:
+        args.extend(['--repo', repo_root])
+    return args
diff --git a/sublime/PairOfCleats/lib/map.py b/sublime/PairOfCleats/lib/map.py
new file mode 100644
index 000000000..e876d87e4
--- /dev/null
+++ b/sublime/PairOfCleats/lib/map.py
@@ -0,0 +1,133 @@
+import os
+import time
+
+
+MAP_TYPES = {
+    'imports': 'imports',
+    'calls': 'calls',
+    'usages': 'usages',
+    'dataflow': 'dataflow,aliases',
+    'combined': 'imports,calls,usages,dataflow,exports'
+}
+
+MAP_FORMATS = {
+    'json': '.json',
+    'dot': '.dot',
+    'svg': '.svg',
+    'html': '.html',
+    'html-iso': '.iso.html'
+}
+
+
+def resolve_output_dir(repo_root, settings):
+    output_dir = settings.get('map_output_dir') or '.pairofcleats/maps'
+    if os.path.isabs(output_dir):
+        return output_dir
+    return os.path.normpath(os.path.join(repo_root, output_dir))
+
+
+def build_output_paths(repo_root, settings, scope, map_type, map_format):
+    output_dir = resolve_output_dir(repo_root, settings)
+    timestamp = time.strftime('%Y%m%d-%H%M%S')
+    safe_scope = (scope or 'repo').replace(' ', '_')
+    safe_type = (map_type or 'combined').replace(' ', '_')
+    base = 'map_{0}_{1}_{2}'.format(safe_scope, safe_type, timestamp)
+
+    extension = MAP_FORMATS.get(map_format, '.json')
+    output_path = os.path.join(output_dir, base + extension)
+    model_path = os.path.join(output_dir, base + '.model.json')
+    node_list_path = os.path.join(output_dir, base + '.nodes.json')
+    return output_path, model_path, node_list_path
+
+
+def resolve_map_type(settings, override=None):
+    if override:
+        return override
+    return settings.get('map_type_default') or 'combined'
+
+
+def resolve_map_format(settings, override=None):
+    if override:
+        return override
+    return settings.get('map_format_default') or 'html-iso'
+
+
+def build_map_args(
+        repo_root,
+        settings,
+        scope,
+        focus,
+        map_type,
+        map_format,
+        output_path,
+        model_path,
+        node_list_path):
+    args = ['report', 'map', '--repo', repo_root]
+
+    mode = settings.get('map_index_mode') or 'code'
+    args += ['--mode', mode]
+
+    args += ['--scope', scope]
+    if focus:
+        args += ['--focus', focus]
+
+    include = MAP_TYPES.get(map_type)
+    if include:
+        args += ['--include', include]
+
+    if settings.get('map_only_exported'):
+        args.append('--only-exported')
+
+    collapse = settings.get('map_collapse_default')
+    if collapse:
+        args += ['--collapse', collapse]
+
+    max_files = settings.get('map_max_files')
+    if isinstance(max_files, int) and max_files > 0:
+        args += ['--max-files', str(max_files)]
+
+    max_members = settings.get('map_max_members_per_file')
+    if isinstance(max_members, int) and max_members > 0:
+        args += ['--max-members-per-file', str(max_members)]
+
+    max_edges = settings.get('map_max_edges')
+    if isinstance(max_edges, int) and max_edges > 0:
+        args += ['--max-edges', str(max_edges)]
+
+    if settings.get('map_top_k_by_degree') is True:
+        args.append('--top-k-by-degree')
+
+    if map_format:
+        args += ['--format', map_format]
+
+    if output_path:
+        args += ['--out', output_path]
+
+    if model_path:
+        args += ['--model-out', model_path]
+
+    if node_list_path:
+        args += ['--node-list-out', node_list_path]
+
+    open_uri = settings.get('map_open_uri_template')
+    if open_uri:
+        args += ['--open-uri-template', open_uri]
+
+    three_url = settings.get('map_three_url')
+    if three_url:
+        args += ['--three-url', three_url]
+
+    _append_number(args, settings, 'map_wasd_sensitivity', '--wasd-sensitivity')
+    _append_number(args, settings, 'map_wasd_acceleration', '--wasd-acceleration')
+    _append_number(args, settings, 'map_wasd_max_speed', '--wasd-max-speed')
+    _append_number(args, settings, 'map_wasd_drag', '--wasd-drag')
+    _append_number(args, settings, 'map_zoom_sensitivity', '--zoom-sensitivity')
+
+    args.append('--json')
+    return args
+
+
+def _append_number(args, settings, key, flag):
+    value = settings.get(key)
+    if isinstance(value, (int, float)):
+        args += [flag, str(value)]
diff --git a/sublime/PairOfCleats/lib/map_state.py b/sublime/PairOfCleats/lib/map_state.py
new file mode 100644
index 000000000..501aa349c
--- /dev/null
+++ b/sublime/PairOfCleats/lib/map_state.py
@@ -0,0 +1,25 @@
+def get_last_map(window):
+    if window is None:
+        return None
+    _, state = _load_state(window)
+    entry = state.get('last_map')
+    if isinstance(entry, dict):
+        return dict(entry)
+    return None
+
+
+def record_last_map(window, payload):
+    if window is None or not isinstance(payload, dict):
+        return
+    data, state = _load_state(window)
+    state['last_map'] = dict(payload)
+    data['pairofcleats_state'] = state
+    window.set_project_data(data)
+
+
+def _load_state(window):
+    data = window.project_data() or {}
+    state = data.get('pairofcleats_state')
+    if not isinstance(state, dict):
+        state = {}
+    return data, state
diff --git a/sublime/PairOfCleats/lib/paths.py b/sublime/PairOfCleats/lib/paths.py
new file mode 100644
index 000000000..e250a8c8b
--- /dev/null
+++ b/sublime/PairOfCleats/lib/paths.py
@@ -0,0 +1,159 @@
+import os
+
+
+def find_repo_root(start_path):
+    if not start_path:
+        return None
+
+    path = start_path
+    if os.path.isfile(path):
+        path = os.path.dirname(path)
+    path = os.path.abspath(path)
+
+    while True:
+        if os.path.isfile(os.path.join(path, '.pairofcleats.json')):
+            return path
+        if os.path.isdir(os.path.join(path, '.git')):
+            return path
+
+        parent = os.path.dirname(path)
+        if parent == path:
+            break
+        path = parent
+
+    return None
+
+
+def resolve_repo_root(window, return_reason=False, path_hint=None):
+    root, reason = _resolve_repo_root(window, path_hint=path_hint)
+    if return_reason:
+        return root, reason
+    return root
+
+
+def has_repo_root(window, path_hint=None):
+    root, _ = resolve_repo_root(window, return_reason=True, path_hint=path_hint)
+    return root is not None
+
+
+def _resolve_repo_root(window, path_hint=None):
+    if window is None:
+        return None, 'No active window.'
+
+    hint_root = None
+    if path_hint:
+        hint_path = path_hint
+        if os.path.isfile(hint_path):
+            hint_path = os.path.dirname(hint_path)
+        if hint_path:
+            root = find_repo_root(hint_path)
+            if root:
+                return root, None
+            hint_root = os.path.abspath(hint_path)
+
+    candidates = []
+    active_file = None
+    folders = window.folders() or []
+    folders = sorted([os.path.abspath(folder) for folder in folders if folder])
+    if folders:
+        candidates.extend(folders)
+    else:
+        view = window.active_view()
+        active_file = view.file_name() if view else None
+        if active_file:
+            candidates.append(active_file)
+
+    for candidate in candidates:
+        root = find_repo_root(candidate)
+        if root:
+            return root, None
+
+    if hint_root:
+        return hint_root, 'Repo root not found; using hint path.'
+    if folders:
+        return folders[0], 'Repo root not found; using open folder.'
+    if active_file:
+        return os.path.dirname(active_file), 'Repo root not found; using active file folder.'
+
+    if candidates:
+        return None, 'Repo root not found. Open a folder with .pairofcleats.json or .git.'
+
+    return None, 'No folders are open. Add a folder or project to enable PairOfCleats.'
+
+
+def resolve_watch_root(window, settings):
+    repo_root, _ = resolve_repo_root(window, return_reason=True)
+    scope = (settings.get('index_watch_scope') or 'repo').strip().lower()
+    folder_override = settings.get('index_watch_folder') or ''
+    if scope == 'folder':
+        if folder_override:
+            resolved = resolve_path(repo_root, folder_override)
+            if resolved:
+                return resolved
+        folders = window.folders() if window is not None else []
+        if folders:
+            return folders[0]
+    return repo_root
+
+
+def resolve_cli(settings, repo_root):
+    node_path = settings.get('node_path') or 'node'
+    configured = (settings.get('pairofcleats_path') or '').strip()
+    if configured:
+        resolved = resolve_path(repo_root, configured)
+        return _cli_for_path(resolved, node_path, 'settings')
+
+    local_bin = _find_local_binary(repo_root)
+    if local_bin:
+        return _cli_for_path(local_bin, node_path, 'node_modules')
+
+    if repo_root:
+        local_js = os.path.join(repo_root, 'bin', 'pairofcleats.js')
+        if os.path.exists(local_js):
+            return _cli_for_path(local_js, node_path, 'repo-bin')
+
+    return {
+        'command': 'pairofcleats',
+        'args_prefix': [],
+        'source': 'path'
+    }
+
+
+def resolve_path(repo_root, value):
+    if not value:
+        return None
+    if os.path.isabs(value):
+        return value
+    if repo_root:
+        return os.path.normpath(os.path.join(repo_root, value))
+    return value
+
+
+def _find_local_binary(repo_root):
+    if not repo_root:
+        return None
+    bin_dir = os.path.join(repo_root, 'node_modules', '.bin')
+    candidates = [
+        'pairofcleats',
+        'pairofcleats.cmd',
+        'pairofcleats.ps1'
+    ]
+    for name in candidates:
+        candidate = os.path.join(bin_dir, name)
+        if os.path.exists(candidate):
+            return candidate
+    return None
+
+
+def _cli_for_path(path_value, node_path, source):
+    if path_value and path_value.lower().endswith('.js'):
+        return {
+            'command': node_path or 'node',
+            'args_prefix': [path_value],
+            'source': source
+        }
+    return {
+        'command': path_value,
+        'args_prefix': [],
+        'source': source
+    }
diff --git a/sublime/PairOfCleats/lib/results.py b/sublime/PairOfCleats/lib/results.py
new file mode 100644
index 000000000..7c4174114
--- /dev/null
+++ b/sublime/PairOfCleats/lib/results.py
@@ -0,0 +1,181 @@
+import os
+
+import sublime
+
+HIGHLIGHT_KEY = 'pairofcleats.search.highlight'
+HIGHLIGHT_SCOPE = 'region.yellowish'
+RESULTS_PANEL = 'pairofcleats-results'
+
+
+def collect_hits(payload):
+    hits = []
+    if not isinstance(payload, dict):
+        return hits
+
+    def add(section, items):
+        if not isinstance(items, list):
+            return
+        for hit in items:
+            if not isinstance(hit, dict):
+                continue
+            merged = dict(hit)
+            merged['section'] = section
+            hits.append(merged)
+
+    add('code', payload.get('code'))
+    add('prose', payload.get('prose'))
+    add('extracted-prose', payload.get('extractedProse'))
+    add('records', payload.get('records'))
+    return hits
+
+
+def format_quick_panel_item(hit):
+    file_label = format_file_label(hit)
+    score_label = format_score_label(hit)
+    section = hit.get('section') or ''
+    name = hit.get('name') or hit.get('symbol') or ''
+    headline = hit.get('headline') or hit.get('preview') or ''
+
+    label = name or headline or file_label
+    detail_parts = [file_label]
+    if section:
+        detail_parts.append(section)
+    if score_label:
+        detail_parts.append(score_label)
+    detail = ' | '.join([part for part in detail_parts if part])
+
+    if headline and headline != label:
+        return [label, detail, headline]
+    return [label, detail]
+
+
+def format_results_text(hits):
+    lines = ['PairOfCleats results ({0})'.format(len(hits)), '']
+    for idx, hit in enumerate(hits, start=1):
+        file_label = format_file_label(hit)
+        section = hit.get('section') or ''
+        score_label = format_score_label(hit)
+        name = hit.get('name') or hit.get('symbol') or ''
+        headline = hit.get('headline') or hit.get('preview') or ''
+
+        header_parts = ['{0}.'.format(idx), file_label]
+        if section:
+            header_parts.append('[{0}]'.format(section))
+        if score_label:
+            header_parts.append(score_label)
+        lines.append(' '.join([part for part in header_parts if part]))
+
+        if name:
+            lines.append('  {0}'.format(name))
+        if headline and headline != name:
+            lines.append('  {0}'.format(headline))
+        lines.append('')
+    return '\n'.join(lines).rstrip() + '\n'
+
+
+def format_explain_text(hits):
+    lines = ['PairOfCleats explain ({0})'.format(len(hits)), '']
+    for idx, hit in enumerate(hits, start=1):
+        file_label = format_file_label(hit)
+        section = hit.get('section') or ''
+        score_label = format_score_label(hit)
+        lines.append('{0}. {1}'.format(idx, file_label))
+        if section or score_label:
+            detail = ' '.join([part for part in [section, score_label] if part])
+            if detail:
+                lines.append('  {0}'.format(detail))
+
+        breakdown = hit.get('scoreBreakdown')
+        if isinstance(breakdown, dict) and breakdown:
+            for key in sorted(breakdown.keys()):
+                value = breakdown[key]
+                lines.append('  {0}: {1}'.format(key, value))
+        else:
+            lines.append('  (no score breakdown)')
+        lines.append('')
+    return '\n'.join(lines).rstrip() + '\n'
+
+
+def open_hit(window, hit, repo_root=None):
+    file_path = resolve_hit_path(hit, repo_root)
+    if not file_path:
+        return None
+
+    start_line = hit.get('startLine')
+    encoded_path = file_path
+    if isinstance(start_line, int) and start_line > 0:
+        encoded_path = '{0}:{1}'.format(file_path, start_line)
+
+    view = window.open_file(encoded_path, sublime.ENCODED_POSITION)
+    highlight_hit(view, hit)
+    return view
+
+
+def open_results_view(window, text):
+    if window is None:
+        return None
+    view = window.new_file()
+    view.set_name('PairOfCleats Results')
+    view.set_scratch(True)
+    view.set_read_only(False)
+    view.run_command('append', {'characters': text, 'force': True})
+    view.set_read_only(True)
+    return view
+
+
+def resolve_hit_path(hit, repo_root):
+    if not isinstance(hit, dict):
+        return None
+    file_path = hit.get('file')
+    if not file_path:
+        return None
+    if os.path.isabs(file_path):
+        return file_path
+    if repo_root:
+        return os.path.join(repo_root, file_path)
+    return file_path
+
+
+def highlight_hit(view, hit):
+    if view is None or not isinstance(hit, dict):
+        return
+    start_line = hit.get('startLine')
+    end_line = hit.get('endLine') or start_line
+    if not isinstance(start_line, int) or start_line <= 0:
+        return
+    if not isinstance(end_line, int) or end_line <= 0:
+        end_line = start_line
+
+    def apply():
+        if view.is_loading():
+            sublime.set_timeout(apply, 10)
+            return
+        view.erase_regions(HIGHLIGHT_KEY)
+        start_pt = view.text_point(start_line - 1, 0)
+        end_pt = view.text_point(end_line - 1, 0)
+        region = view.full_line(sublime.Region(start_pt, end_pt))
+        view.add_regions(HIGHLIGHT_KEY, [region], HIGHLIGHT_SCOPE, flags=0)
+
+    sublime.set_timeout(apply, 0)
+
+
+def format_file_label(hit):
+    file_path = hit.get('file') or ''
+    start_line = hit.get('startLine')
+    end_line = hit.get('endLine')
+    if isinstance(start_line, int) and start_line > 0:
+        if isinstance(end_line, int) and end_line > start_line:
+            return '{0}:{1}-{2}'.format(file_path, start_line, end_line)
+        return '{0}:{1}'.format(file_path, start_line)
+    return file_path
+
+
+def format_score_label(hit):
+    score = hit.get('score')
+    score_type = hit.get('scoreType') or ''
+    if isinstance(score, (int, float)):
+        label = '{0:.2f}'.format(score)
+        if score_type:
+            label = '{0} {1}'.format(label, score_type)
+        return 'score {0}'.format(label)
+    return ''
diff --git a/sublime/PairOfCleats/lib/runner.py b/sublime/PairOfCleats/lib/runner.py
new file mode 100644
index 000000000..c45eaecc6
--- /dev/null
+++ b/sublime/PairOfCleats/lib/runner.py
@@ -0,0 +1,121 @@
+import json
+import os
+import subprocess
+import threading
+
+import sublime
+
+
+class ProcessResult(object):
+    def __init__(self, returncode, output, payload=None, error=None):
+        self.returncode = returncode
+        self.output = output
+        self.payload = payload
+        self.error = error
+
+
+class ProcessHandle(object):
+    def __init__(self, process, thread, on_cancel=None):
+        self.process = process
+        self.thread = thread
+        self._on_cancel = on_cancel
+
+    def cancel(self):
+        if self.process.poll() is not None:
+            return
+        try:
+            self.process.terminate()
+        except Exception:
+            pass
+        if self._on_cancel:
+            self._on_cancel()
+        timer = threading.Timer(1.5, self._kill_if_running)
+        timer.daemon = True
+        timer.start()
+
+    def _kill_if_running(self):
+        if self.process.poll() is not None:
+            return
+        try:
+            self.process.kill()
+        except Exception:
+            pass
+
+
+def run_process(command, args, cwd=None, env=None, window=None, title='PairOfCleats',
+                capture_json=False, on_done=None, stream_output=True,
+                panel_name='pairofcleats'):
+    if window is None:
+        window = sublime.active_window()
+    panel = None
+    if stream_output:
+        panel = _ensure_panel(window, panel_name)
+        _show_panel(window, panel_name)
+
+    full_env = dict(os.environ)
+    if env:
+        full_env.update(env)
+
+    proc = subprocess.Popen(
+        [command] + list(args),
+        cwd=cwd or None,
+        env=full_env,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        universal_newlines=True
+    )
+
+    output_lines = []
+
+    def append_line(line):
+        output_lines.append(line)
+        if panel is not None:
+            _append_panel(panel, line)
+
+    def done_callback(result):
+        if on_done:
+            on_done(result)
+
+    def worker():
+        try:
+            for line in proc.stdout:
+                append_line(line)
+        finally:
+            proc.wait()
+
+        output = ''.join(output_lines)
+        payload = None
+        error = None
+        if capture_json:
+            try:
+                payload = json.loads(output or '{}')
+            except Exception as exc:
+                error = 'Failed to parse JSON output: {0}'.format(exc)
+        result = ProcessResult(proc.returncode, output, payload=payload, error=error)
+        sublime.set_timeout(lambda: done_callback(result), 0)
+
+    thread = threading.Thread(target=worker)
+    thread.daemon = True
+    thread.start()
+
+    return ProcessHandle(proc, thread)
+
+
+def _ensure_panel(window, name):
+    panel = window.create_output_panel(name)
+    panel.set_read_only(False)
+    return panel
+
+
+def _show_panel(window, name):
+    window.run_command('show_panel', {'panel': 'output.{0}'.format(name)})
+
+
+def _append_panel(panel, text):
+    def append():
+        panel.run_command('append', {
+            'characters': text,
+            'force': True,
+            'scroll_to_end': True
+        })
+    sublime.set_timeout(append, 0)
diff --git a/sublime/PairOfCleats/lib/search.py b/sublime/PairOfCleats/lib/search.py
new file mode 100644
index 000000000..b4f98f561
--- /dev/null
+++ b/sublime/PairOfCleats/lib/search.py
@@ -0,0 +1,13 @@
+def build_search_args(query, repo_root=None, mode=None, backend=None, limit=None, explain=False):
+    args = ['search', query, '--json']
+    if mode and mode != 'both':
+        args.extend(['--mode', mode])
+    if backend:
+        args.extend(['--backend', backend])
+    if limit:
+        args.extend(['--top', str(limit)])
+    if explain:
+        args.append('--explain')
+    if repo_root:
+        args.extend(['--repo', repo_root])
+    return args
diff --git a/sublime/PairOfCleats/lib/ui.py b/sublime/PairOfCleats/lib/ui.py
new file mode 100644
index 000000000..a2ba08db2
--- /dev/null
+++ b/sublime/PairOfCleats/lib/ui.py
@@ -0,0 +1,35 @@
+import sublime
+
+
+def show_error(message):
+    try:
+        sublime.error_message(message)
+    except Exception:
+        print(message)
+
+
+def show_status(message):
+    try:
+        sublime.status_message(message)
+    except Exception:
+        print(message)
+
+
+def write_output_panel(window, name, text):
+    if window is None:
+        window = sublime.active_window()
+    if window is None:
+        return None
+
+    panel = window.create_output_panel(name)
+    panel.set_read_only(False)
+    panel.run_command('select_all')
+    panel.run_command('right_delete')
+    panel.run_command('append', {
+        'characters': text,
+        'force': True,
+        'scroll_to_end': False
+    })
+    panel.set_read_only(True)
+    window.run_command('show_panel', {'panel': 'output.{0}'.format(name)})
+    return panel
diff --git a/sublime/PairOfCleats/lib/watch.py b/sublime/PairOfCleats/lib/watch.py
new file mode 100644
index 000000000..13c9b1d4c
--- /dev/null
+++ b/sublime/PairOfCleats/lib/watch.py
@@ -0,0 +1,71 @@
+_WATCHERS = {}
+
+
+def _window_key(window):
+    if window is None:
+        return 'global'
+    try:
+        return str(window.id())
+    except Exception:
+        return 'global'
+
+
+def register(window, handle, root):
+    key = _window_key(window)
+    _WATCHERS[key] = {
+        'handle': handle,
+        'root': root
+    }
+
+
+def is_running(window):
+    key = _window_key(window)
+    entry = _WATCHERS.get(key)
+    if not entry:
+        return False
+    handle = entry.get('handle')
+    process = getattr(handle, 'process', None)
+    if process is None:
+        return False
+    return process.poll() is None
+
+
+def stop(window):
+    key = _window_key(window)
+    entry = _WATCHERS.pop(key, None)
+    if not entry:
+        return False
+    handle = entry.get('handle')
+    if handle:
+        handle.cancel()
+    return True
+
+
+def stop_all():
+    keys = list(_WATCHERS.keys())
+    for key in keys:
+        entry = _WATCHERS.pop(key, None)
+        if not entry:
+            continue
+        handle = entry.get('handle')
+        if handle:
+            handle.cancel()
+
+
+def clear_if_done(window):
+    key = _window_key(window)
+    entry = _WATCHERS.get(key)
+    if not entry:
+        return
+    handle = entry.get('handle')
+    process = getattr(handle, 'process', None)
+    if process is None or process.poll() is not None:
+        _WATCHERS.pop(key, None)
+
+
+def current_root(window):
+    key = _window_key(window)
+    entry = _WATCHERS.get(key)
+    if not entry:
+        return None
+    return entry.get('root')
diff --git a/sublime/PairOfCleats/messages/install.txt b/sublime/PairOfCleats/messages/install.txt
new file mode 100644
index 000000000..ae510cdde
--- /dev/null
+++ b/sublime/PairOfCleats/messages/install.txt
@@ -0,0 +1,7 @@
+PairOfCleats Sublime Text
+
+Install requirements:
+- Node.js 18+ available on PATH (or set node_path in settings)
+- PairOfCleats CLI available via npm or a local repo checkout
+
+Open the settings command palette entry: "PairOfCleats: Open Settings" to configure the CLI path and environment overrides.
\ No newline at end of file
diff --git a/sublime/PairOfCleats/plugin.py b/sublime/PairOfCleats/plugin.py
new file mode 100644
index 000000000..389555c97
--- /dev/null
+++ b/sublime/PairOfCleats/plugin.py
@@ -0,0 +1,33 @@
+import sublime
+import sublime_plugin
+
+from .lib import config
+from .lib import watch
+from .commands import index as _index_commands
+from .commands import map as _map_commands
+from .commands import search as _search_commands
+from .commands import settings as _settings_commands
+from .commands import validate as _validate_commands
+
+PLUGIN_NAME = 'PairOfCleats'
+
+
+def plugin_loaded():
+    config.prime_settings()
+
+
+def plugin_unloaded():
+    watch.stop_all()
+
+
+class PairOfCleatsWindowListener(sublime_plugin.EventListener):
+    def on_window_command(self, window, command_name, args):
+        if command_name == 'close_window':
+            watch.stop(window)
+
+    def on_post_window_command(self, window, command_name, args):
+        if command_name == 'close_window':
+            watch.stop(window)
+
+    def on_exit(self):
+        watch.stop_all()
diff --git a/tests/all.js b/tests/all.js
index 3ba3c7085..09f77f4a4 100644
--- a/tests/all.js
+++ b/tests/all.js
@@ -1,12 +1,17 @@
 #!/usr/bin/env node
 import path from 'node:path';
 import { spawnSync } from 'node:child_process';
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['skip-bench', 'skip-script-coverage'],
-  default: { 'skip-bench': false, 'skip-script-coverage': false }
-});
+const argv = createCli({
+  scriptName: 'test-all',
+  options: {
+    'skip-bench': { type: 'boolean', default: false },
+    'skip-script-coverage': { type: 'boolean', default: false },
+    retries: { type: 'number', default: 2 },
+    'log-dir': { type: 'string', default: '' }
+  }
+}).parse();
 
 const envSkipBench = process.env.PAIROFCLEATS_SKIP_BENCH === 'true'
   || process.env.PAIROFCLEATS_SKIP_BENCH === '1'
@@ -29,7 +34,15 @@ const run = (label, args) => {
 };
 
 if (!skipScriptCoverage) {
-  run('script-coverage-test', [path.join(root, 'tests', 'script-coverage.js')]);
+  const args = [path.join(root, 'tests', 'script-coverage.js')];
+  const passRetries = process.argv.some((arg) => arg === '--retries' || arg.startsWith('--retries='));
+  if (passRetries) {
+    args.push('--retries', String(argv.retries));
+  }
+  if (argv['log-dir']) {
+    args.push('--log-dir', argv['log-dir']);
+  }
+  run('script-coverage-test', args);
 }
 
 if (!skipBench) {
diff --git a/tests/api-server-stream.js b/tests/api-server-stream.js
new file mode 100644
index 000000000..43b04e12c
--- /dev/null
+++ b/tests/api-server-stream.js
@@ -0,0 +1,206 @@
+#!/usr/bin/env node
+import http from 'node:http';
+import path from 'node:path';
+import readline from 'node:readline';
+import fsPromises from 'node:fs/promises';
+import { spawn, spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const cacheRoot = path.join(root, 'tests', '.cache', 'api-server-stream');
+const serverPath = path.join(root, 'tools', 'api-server.js');
+
+await fsPromises.rm(cacheRoot, { recursive: true, force: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const build = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', fixtureRoot],
+  { env, stdio: 'inherit' }
+);
+if (build.status !== 0) {
+  console.error('api-server stream test failed: build_index failed');
+  process.exit(build.status ?? 1);
+}
+
+const server = spawn(
+  process.execPath,
+  [serverPath, '--port', '0', '--json', '--quiet', '--repo', fixtureRoot],
+  { env, stdio: ['ignore', 'pipe', 'pipe'] }
+);
+
+let stderr = '';
+server.stderr?.on('data', (chunk) => {
+  stderr += chunk.toString();
+});
+
+const readStartup = async () => {
+  const rl = readline.createInterface({ input: server.stdout });
+  return await new Promise((resolve, reject) => {
+    const timeout = setTimeout(() => {
+      rl.close();
+      reject(new Error('api-server startup timed out'));
+    }, 10000);
+    rl.once('line', (line) => {
+      clearTimeout(timeout);
+      rl.close();
+      resolve(line);
+    });
+  });
+};
+
+const parseSse = (block) => {
+  const lines = block.split(/\r?\n/);
+  let event = 'message';
+  let data = '';
+  for (const line of lines) {
+    if (line.startsWith('event:')) {
+      event = line.replace('event:', '').trim();
+      continue;
+    }
+    if (line.startsWith('data:')) {
+      data += line.replace('data:', '').trim();
+    }
+  }
+  const payload = data ? JSON.parse(data) : null;
+  return { event, data: payload };
+};
+
+const readSse = async (method, requestPath, body) => await new Promise((resolve, reject) => {
+  const payload = body ? JSON.stringify(body) : null;
+  const events = [];
+  let buffer = '';
+  const req = http.request(
+    {
+      host: serverInfo.host,
+      port: serverInfo.port,
+      path: requestPath,
+      method,
+      headers: payload
+        ? {
+            'Content-Type': 'application/json',
+            'Content-Length': Buffer.byteLength(payload)
+          }
+        : {}
+    },
+    (res) => {
+      res.on('data', (chunk) => {
+        buffer += chunk.toString();
+        while (true) {
+          const idx = buffer.indexOf('\n\n');
+          if (idx === -1) break;
+          const block = buffer.slice(0, idx).trim();
+          buffer = buffer.slice(idx + 2);
+          if (!block) continue;
+          const parsed = parseSse(block);
+          events.push(parsed);
+          if (parsed.event === 'done') {
+            resolve(events);
+            req.destroy();
+            break;
+          }
+        }
+      });
+      res.on('end', () => resolve(events));
+    }
+  );
+  req.on('error', reject);
+  if (payload) req.write(payload);
+  req.end();
+});
+
+const abortStream = async (method, requestPath, body) => await new Promise((resolve, reject) => {
+  const payload = body ? JSON.stringify(body) : null;
+  const req = http.request(
+    {
+      host: serverInfo.host,
+      port: serverInfo.port,
+      path: requestPath,
+      method,
+      headers: payload
+        ? {
+            'Content-Type': 'application/json',
+            'Content-Length': Buffer.byteLength(payload)
+          }
+        : {}
+    },
+    (res) => {
+      const timeout = setTimeout(() => {
+        req.destroy();
+        resolve();
+      }, 1000);
+      res.once('data', () => {
+        clearTimeout(timeout);
+        req.destroy();
+        resolve();
+      });
+      res.on('error', (err) => {
+        clearTimeout(timeout);
+        if (err?.code === 'ECONNRESET') return resolve();
+        reject(err);
+      });
+    }
+  );
+  req.on('error', (err) => {
+    if (err?.code === 'ECONNRESET') return resolve();
+    reject(err);
+  });
+  if (payload) req.write(payload);
+  req.end();
+});
+
+let serverInfo = null;
+try {
+  const line = await readStartup();
+  serverInfo = JSON.parse(line || '{}');
+  if (!serverInfo?.port) {
+    throw new Error('api-server did not report a listening port');
+  }
+
+  const statusEvents = await readSse('GET', '/status/stream');
+  const statusResult = statusEvents.find((evt) => evt.event === 'result');
+  if (!statusResult?.data?.status?.repo?.root) {
+    throw new Error('status stream missing repo payload');
+  }
+
+  const searchEvents = await readSse('POST', '/search/stream', { query: 'return', mode: 'code' });
+  const searchResult = searchEvents.find((evt) => evt.event === 'result');
+  const hits = searchResult?.data?.result?.code || [];
+  if (!hits.length) {
+    throw new Error('search stream returned no results');
+  }
+
+  await abortStream('POST', '/search/stream', { query: 'return', mode: 'code' });
+  const followUp = await readSse('GET', '/status/stream');
+  const followResult = followUp.find((evt) => evt.event === 'result');
+  if (!followResult?.data?.status?.repo?.root) {
+    throw new Error('stream abort should not break subsequent requests');
+  }
+} catch (err) {
+  console.error(err?.message || err);
+  if (stderr.trim()) {
+    console.error(stderr.trim());
+  }
+  server.kill('SIGKILL');
+  process.exit(1);
+}
+
+await new Promise((resolve) => {
+  const timeout = setTimeout(() => {
+    server.kill('SIGKILL');
+    resolve();
+  }, 5000);
+  server.once('exit', () => {
+    clearTimeout(timeout);
+    resolve();
+  });
+  server.kill('SIGTERM');
+});
+
+console.log('api-server stream tests passed');
diff --git a/tests/api-server.js b/tests/api-server.js
new file mode 100644
index 000000000..0f7b038c8
--- /dev/null
+++ b/tests/api-server.js
@@ -0,0 +1,164 @@
+#!/usr/bin/env node
+import http from 'node:http';
+import path from 'node:path';
+import readline from 'node:readline';
+import fsPromises from 'node:fs/promises';
+import { spawn, spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const cacheRoot = path.join(root, 'tests', '.cache', 'api-server');
+const emptyRepo = path.join(cacheRoot, 'empty');
+const serverPath = path.join(root, 'tools', 'api-server.js');
+
+await fsPromises.rm(cacheRoot, { recursive: true, force: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+await fsPromises.mkdir(emptyRepo, { recursive: true });
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const build = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', fixtureRoot],
+  { env, stdio: 'inherit' }
+);
+if (build.status !== 0) {
+  console.error('api-server test failed: build_index failed');
+  process.exit(build.status ?? 1);
+}
+
+const server = spawn(
+  process.execPath,
+  [serverPath, '--port', '0', '--json', '--quiet', '--repo', fixtureRoot],
+  { env, stdio: ['ignore', 'pipe', 'pipe'] }
+);
+
+let stderr = '';
+server.stderr?.on('data', (chunk) => {
+  stderr += chunk.toString();
+});
+
+const readStartup = async () => {
+  const rl = readline.createInterface({ input: server.stdout });
+  return await new Promise((resolve, reject) => {
+    const timeout = setTimeout(() => {
+      rl.close();
+      reject(new Error('api-server startup timed out'));
+    }, 10000);
+    rl.once('line', (line) => {
+      clearTimeout(timeout);
+      rl.close();
+      resolve(line);
+    });
+  });
+};
+
+const requestJson = async (method, requestPath, body) => await new Promise((resolve, reject) => {
+  const host = serverInfo?.host || '127.0.0.1';
+  const port = serverInfo?.port || 0;
+  const payload = body ? JSON.stringify(body) : null;
+  const req = http.request(
+    {
+      host,
+      port,
+      path: requestPath,
+      method,
+      headers: payload
+        ? {
+            'Content-Type': 'application/json',
+            'Content-Length': Buffer.byteLength(payload)
+          }
+        : {}
+    },
+    (res) => {
+      let data = '';
+      res.on('data', (chunk) => {
+        data += chunk.toString();
+      });
+      res.on('end', () => {
+        try {
+          resolve({ status: res.statusCode || 0, body: JSON.parse(data || '{}') });
+        } catch (err) {
+          reject(err);
+        }
+      });
+    }
+  );
+  req.on('error', reject);
+  if (payload) req.write(payload);
+  req.end();
+});
+
+let serverInfo = null;
+try {
+  const line = await readStartup();
+  serverInfo = JSON.parse(line || '{}');
+  if (!serverInfo?.port) {
+    throw new Error('api-server did not report a listening port');
+  }
+
+  const health = await requestJson('GET', '/health');
+  if (!health.body?.ok || typeof health.body.uptimeMs !== 'number') {
+    throw new Error('api-server /health response invalid');
+  }
+
+  const status = await requestJson('GET', '/status');
+  if (!status.body?.ok || !status.body.status?.repo?.root) {
+    throw new Error('api-server /status response missing repo info');
+  }
+
+  const search = await requestJson('POST', '/search', { query: 'return', mode: 'code', top: 3 });
+  const hits = search.body?.result?.code || [];
+  if (!search.body?.ok || !hits.length) {
+    throw new Error('api-server /search returned no results');
+  }
+  if (hits[0]?.tokens !== undefined) {
+    throw new Error('api-server /search should default to compact JSON output');
+  }
+
+  const invalid = await requestJson('POST', '/search', {});
+  if (invalid.status !== 400 || invalid.body?.ok !== false || invalid.body?.code !== 'INVALID_REQUEST') {
+    throw new Error('api-server should reject missing query');
+  }
+
+  const unknownField = await requestJson('POST', '/search', {
+    query: 'return',
+    extraField: true
+  });
+  if (unknownField.status !== 400 || unknownField.body?.code !== 'INVALID_REQUEST') {
+    throw new Error('api-server should reject unknown fields');
+  }
+
+  const noIndex = await requestJson('POST', '/search', {
+    repoPath: emptyRepo,
+    query: 'return'
+  });
+  if (noIndex.status !== 409 || noIndex.body?.code !== 'NO_INDEX') {
+    throw new Error('api-server should return NO_INDEX when indexes are missing');
+  }
+} catch (err) {
+  console.error(err?.message || err);
+  if (stderr.trim()) {
+    console.error(stderr.trim());
+  }
+  server.kill('SIGKILL');
+  process.exit(1);
+}
+
+await new Promise((resolve) => {
+  const timeout = setTimeout(() => {
+    server.kill('SIGKILL');
+    resolve();
+  }, 5000);
+  server.once('exit', () => {
+    clearTimeout(timeout);
+    resolve();
+  });
+  server.kill('SIGTERM');
+});
+
+console.log('api-server tests passed');
diff --git a/tests/artifact-bak-recovery.js b/tests/artifact-bak-recovery.js
new file mode 100644
index 000000000..c2cdb26d4
--- /dev/null
+++ b/tests/artifact-bak-recovery.js
@@ -0,0 +1,85 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { readJsonFile, readJsonLinesArraySync } from '../src/shared/artifact-io.js';
+import { writeJsonLinesFile } from '../src/shared/json-stream.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'artifact-bak-recovery');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+
+const primaryPath = path.join(tempRoot, 'primary.json');
+const primaryBak = `${primaryPath}.bak`;
+await fsPromises.writeFile(primaryPath, JSON.stringify({ ok: true }));
+await fsPromises.writeFile(primaryBak, JSON.stringify({ ok: false }));
+
+const primary = readJsonFile(primaryPath);
+if (!primary?.ok) {
+  console.error('artifact bak test failed: primary read did not return expected payload.');
+  process.exit(1);
+}
+if (fs.existsSync(primaryBak)) {
+  console.error('artifact bak test failed: backup was not cleaned up after primary read.');
+  process.exit(1);
+}
+
+const corruptPath = path.join(tempRoot, 'corrupt.json');
+const corruptBak = `${corruptPath}.bak`;
+await fsPromises.writeFile(corruptPath, '{bad json');
+await fsPromises.writeFile(corruptBak, JSON.stringify({ ok: 'backup' }));
+
+const fallback = readJsonFile(corruptPath);
+if (fallback?.ok !== 'backup') {
+  console.error('artifact bak test failed: fallback did not return backup payload.');
+  process.exit(1);
+}
+if (!fs.existsSync(corruptBak)) {
+  console.error('artifact bak test failed: backup should remain after fallback read.');
+  process.exit(1);
+}
+
+const missingPath = path.join(tempRoot, 'missing.json');
+const missingBak = `${missingPath}.bak`;
+await fsPromises.writeFile(missingBak, JSON.stringify({ ok: 'onlybak' }));
+const missing = readJsonFile(missingPath);
+if (missing?.ok !== 'onlybak') {
+  console.error('artifact bak test failed: missing primary did not fall back to backup.');
+  process.exit(1);
+}
+if (!fs.existsSync(missingBak)) {
+  console.error('artifact bak test failed: backup should remain when primary is missing.');
+  process.exit(1);
+}
+
+const jsonlPath = path.join(tempRoot, 'lines.jsonl');
+const jsonlBak = `${jsonlPath}.bak`;
+await writeJsonLinesFile(jsonlPath, [{ id: 1 }, { id: 2 }], { atomic: false });
+await fsPromises.writeFile(jsonlBak, '{"id":3}\n');
+const jsonl = readJsonLinesArraySync(jsonlPath);
+if (jsonl.length !== 2) {
+  console.error('artifact bak test failed: jsonl primary read did not return expected rows.');
+  process.exit(1);
+}
+if (fs.existsSync(jsonlBak)) {
+  console.error('artifact bak test failed: jsonl backup was not cleaned up after primary read.');
+  process.exit(1);
+}
+
+const jsonlCorruptPath = path.join(tempRoot, 'lines-corrupt.jsonl');
+const jsonlCorruptBak = `${jsonlCorruptPath}.bak`;
+await fsPromises.writeFile(jsonlCorruptPath, '{bad\n');
+await fsPromises.writeFile(jsonlCorruptBak, '{"id":4}\n{"id":5}\n');
+const jsonlFallback = readJsonLinesArraySync(jsonlCorruptPath);
+if (jsonlFallback.length !== 2) {
+  console.error('artifact bak test failed: jsonl backup fallback did not return expected rows.');
+  process.exit(1);
+}
+if (!fs.existsSync(jsonlCorruptBak)) {
+  console.error('artifact bak test failed: jsonl backup should remain after fallback read.');
+  process.exit(1);
+}
+
+console.log('artifact bak recovery tests passed');
diff --git a/tests/artifact-formats.js b/tests/artifact-formats.js
new file mode 100644
index 000000000..b9bf9a416
--- /dev/null
+++ b/tests/artifact-formats.js
@@ -0,0 +1,78 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { loadIndex } from '../src/retrieval/cli-index.js';
+
+const root = process.cwd();
+const cacheRoot = path.join(root, 'tests', '.cache', 'artifact-formats');
+
+await fs.rm(cacheRoot, { recursive: true, force: true });
+await fs.mkdir(cacheRoot, { recursive: true });
+
+const chunkMetaLines = [
+  { id: 0, file: 'src/a.js', start: 0, end: 10, ext: '.js', kind: 'Function', name: 'alpha' },
+  { id: 1, file: 'src/b.js', start: 0, end: 20, ext: '.js', kind: 'Function', name: 'beta' }
+];
+await fs.writeFile(
+  path.join(cacheRoot, 'chunk_meta.jsonl'),
+  `${chunkMetaLines.map((row) => JSON.stringify(row)).join('\n')}\n`
+);
+await fs.writeFile(
+  path.join(cacheRoot, 'chunk_meta.json'),
+  JSON.stringify([{ id: 99, file: 'src/legacy.js', start: 0, end: 1, ext: '.js' }], null, 2)
+);
+
+const shardsDir = path.join(cacheRoot, 'token_postings.shards');
+await fs.mkdir(shardsDir, { recursive: true });
+
+const partA = {
+  vocab: ['alpha'],
+  postings: [[[0, 1]]]
+};
+const partB = {
+  vocab: ['beta'],
+  postings: [[[1, 2]]]
+};
+
+const partAName = 'token_postings.part-00000.json';
+const partBName = 'token_postings.part-00001.json';
+await fs.writeFile(path.join(shardsDir, partAName), JSON.stringify(partA, null, 2));
+await fs.writeFile(path.join(shardsDir, partBName), JSON.stringify(partB, null, 2));
+
+const meta = {
+  avgDocLen: 1.5,
+  totalDocs: 2,
+  format: 'sharded',
+  shardSize: 1,
+  vocabCount: 2,
+  parts: [
+    path.join('token_postings.shards', partAName),
+    path.join('token_postings.shards', partBName)
+  ],
+  docLengths: [1, 2]
+};
+await fs.writeFile(
+  path.join(cacheRoot, 'token_postings.meta.json'),
+  JSON.stringify(meta, null, 2)
+);
+await fs.writeFile(
+  path.join(cacheRoot, 'token_postings.json'),
+  JSON.stringify({ vocab: ['legacy'], postings: [[[0, 1]]], docLengths: [1], avgDocLen: 1, totalDocs: 1 }, null, 2)
+);
+
+const idx = loadIndex(cacheRoot, { modelIdDefault: null, fileChargramN: 3 });
+
+if (!idx || !Array.isArray(idx.chunkMeta) || idx.chunkMeta.length !== 2) {
+  console.error('Expected chunk_meta to load from JSONL.');
+  process.exit(1);
+}
+if (!idx.tokenIndex || idx.tokenIndex.vocab?.length !== 2) {
+  console.error('Expected token_postings shards to load into tokenIndex.');
+  process.exit(1);
+}
+if (!Array.isArray(idx.tokenIndex.docLengths) || idx.tokenIndex.docLengths.length !== 2) {
+  console.error('Expected docLengths to load from token_postings meta.');
+  process.exit(1);
+}
+
+console.log('artifact formats test passed');
diff --git a/tests/artifact-size-guardrails.js b/tests/artifact-size-guardrails.js
new file mode 100644
index 000000000..179db2e73
--- /dev/null
+++ b/tests/artifact-size-guardrails.js
@@ -0,0 +1,118 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const cacheRoot = path.join(root, 'tests', '.cache', 'artifact-size-guardrails');
+const repoRoot = path.join(cacheRoot, 'repo');
+
+await fsPromises.rm(cacheRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+
+const tokens = [];
+for (let i = 0; i < 200; i += 1) {
+  tokens.push(`token_${i}_${'x'.repeat(24)}`);
+}
+const lines = [];
+for (let i = 0; i < tokens.length; i += 20) {
+  lines.push(tokens.slice(i, i + 20).join(' '));
+}
+const content = `${lines.join('\n')}\n`;
+for (let i = 0; i < 3; i += 1) {
+  await fsPromises.writeFile(path.join(repoRoot, `big-${i}.js`), content);
+}
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({
+    sqlite: {
+      use: false
+    },
+    indexing: {
+      fileScan: {
+        minified: { sampleMinBytes: 20000 }
+      },
+      chunkTokenMode: 'full',
+      artifacts: {
+        chunkMetaFormat: 'json',
+        chunkMetaShardSize: 0,
+        tokenPostingsFormat: 'json'
+      }
+    }
+  }, null, 2)
+);
+
+const baseEnv = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const runBuild = (label, envOverrides) => {
+  const result = spawnSync(
+    process.execPath,
+    [path.join(root, 'build_index.js'), '--stub-embeddings', '--mode', 'code', '--repo', repoRoot],
+    { cwd: repoRoot, env: { ...baseEnv, ...envOverrides }, stdio: 'inherit' }
+  );
+  if (result.status !== 0) {
+    console.error(`Failed: build_index (${label})`);
+    process.exit(result.status ?? 1);
+  }
+};
+
+runBuild('artifact guardrails (small max)', { PAIROFCLEATS_MAX_JSON_BYTES: '2048' });
+
+const userConfig = loadUserConfig(repoRoot);
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+const indexDir = getIndexDir(repoRoot, 'code', userConfig);
+
+const chunkMetaMetaPath = path.join(indexDir, 'chunk_meta.meta.json');
+const chunkMetaPartsDir = path.join(indexDir, 'chunk_meta.parts');
+if (!fs.existsSync(chunkMetaMetaPath) || !fs.existsSync(chunkMetaPartsDir)) {
+  console.error('Expected chunk_meta sharding when max JSON bytes is small.');
+  process.exit(1);
+}
+if (fs.existsSync(path.join(indexDir, 'chunk_meta.json'))) {
+  console.error('Expected chunk_meta.json to be suppressed when sharding.');
+  process.exit(1);
+}
+
+const tokenMetaPath = path.join(indexDir, 'token_postings.meta.json');
+const tokenShardsDir = path.join(indexDir, 'token_postings.shards');
+if (!fs.existsSync(tokenMetaPath) || !fs.existsSync(tokenShardsDir)) {
+  console.error('Expected token_postings shards when max JSON bytes is small.');
+  process.exit(1);
+}
+if (fs.existsSync(path.join(indexDir, 'token_postings.json'))) {
+  console.error('Expected token_postings.json to be suppressed when sharding.');
+  process.exit(1);
+}
+
+runBuild('artifact guardrails (large max)', { PAIROFCLEATS_MAX_JSON_BYTES: '52428800' });
+
+const nextIndexDir = getIndexDir(repoRoot, 'code', userConfig);
+const nextChunkMetaMeta = path.join(nextIndexDir, 'chunk_meta.meta.json');
+const nextChunkMetaParts = path.join(nextIndexDir, 'chunk_meta.parts');
+if (fs.existsSync(nextChunkMetaMeta) || fs.existsSync(nextChunkMetaParts)) {
+  console.error('Expected chunk_meta to remain unsharded when max JSON bytes is large.');
+  process.exit(1);
+}
+if (!fs.existsSync(path.join(nextIndexDir, 'chunk_meta.json'))) {
+  console.error('Expected chunk_meta.json when max JSON bytes is large.');
+  process.exit(1);
+}
+
+const nextTokenMetaPath = path.join(nextIndexDir, 'token_postings.meta.json');
+const nextTokenShardsDir = path.join(nextIndexDir, 'token_postings.shards');
+if (fs.existsSync(nextTokenMetaPath) || fs.existsSync(nextTokenShardsDir)) {
+  console.error('Expected token_postings to remain unsharded when max JSON bytes is large.');
+  process.exit(1);
+}
+if (!fs.existsSync(path.join(nextIndexDir, 'token_postings.json'))) {
+  console.error('Expected token_postings.json when max JSON bytes is large.');
+  process.exit(1);
+}
+
+console.log('artifact size guardrails test passed');
diff --git a/tests/artifacts/file-meta.test.js b/tests/artifacts/file-meta.test.js
new file mode 100644
index 000000000..e23bad955
--- /dev/null
+++ b/tests/artifacts/file-meta.test.js
@@ -0,0 +1,31 @@
+#!/usr/bin/env node
+import { buildFileMeta } from '../../src/index/build/artifacts/file-meta.js';
+
+const fail = (message) => {
+  console.error(message);
+  process.exit(1);
+};
+
+const state = {
+  chunks: [
+    { file: 'a.js', ext: '.js' },
+    { file: 'a.js', ext: '.js' },
+    { file: 'b.js', ext: '.js' }
+  ]
+};
+
+const { fileMeta, fileIdByPath } = buildFileMeta(state);
+if (fileMeta.length !== 2) {
+  fail('Expected fileMeta to contain one entry per file.');
+}
+if (fileMeta[0].file !== 'a.js' || fileMeta[0].id !== 0) {
+  fail('Expected a.js to be assigned id 0.');
+}
+if (fileMeta[1].file !== 'b.js' || fileMeta[1].id !== 1) {
+  fail('Expected b.js to be assigned id 1.');
+}
+if (fileIdByPath.get('a.js') !== 0 || fileIdByPath.get('b.js') !== 1) {
+  fail('Expected fileIdByPath to map files to stable ids.');
+}
+
+console.log('artifact file meta tests passed');
diff --git a/tests/artifacts/token-mode.test.js b/tests/artifacts/token-mode.test.js
new file mode 100644
index 000000000..b242a5f34
--- /dev/null
+++ b/tests/artifacts/token-mode.test.js
@@ -0,0 +1,40 @@
+#!/usr/bin/env node
+import { resolveTokenMode } from '../../src/index/build/artifacts/token-mode.js';
+
+const fail = (message) => {
+  console.error(message);
+  process.exit(1);
+};
+
+const baseState = {
+  chunks: [{ tokens: ['a', 'b', 'c'] }]
+};
+
+const autoSample = resolveTokenMode({
+  indexingConfig: {},
+  state: baseState,
+  fileCounts: { candidates: 6000 }
+});
+if (autoSample.resolvedTokenMode !== 'sample') {
+  fail('Expected auto mode to resolve to sample when file count exceeds max.');
+}
+
+const tokenBudgetSample = resolveTokenMode({
+  indexingConfig: { chunkTokenMaxTokens: 1 },
+  state: { chunks: [{ tokens: ['a', 'b'] }] },
+  fileCounts: { candidates: 1 }
+});
+if (tokenBudgetSample.resolvedTokenMode !== 'sample') {
+  fail('Expected auto mode to resolve to sample when token budget exceeded.');
+}
+
+const noneMode = resolveTokenMode({
+  indexingConfig: { chunkTokenMode: 'none' },
+  state: baseState,
+  fileCounts: { candidates: 0 }
+});
+if (noneMode.resolvedTokenMode !== 'none') {
+  fail('Expected explicit chunkTokenMode=none to be respected.');
+}
+
+console.log('artifact token mode tests passed');
diff --git a/tests/backend-policy.js b/tests/backend-policy.js
new file mode 100644
index 000000000..cb817b6a9
--- /dev/null
+++ b/tests/backend-policy.js
@@ -0,0 +1,83 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { resolveBackendPolicy } from '../src/storage/backend-policy.js';
+
+const autoDefault = resolveBackendPolicy({
+  backendArg: 'auto',
+  sqliteScoreModeConfig: false,
+  sqliteConfigured: true,
+  sqliteAvailable: true,
+  lmdbAvailable: true,
+  needsSqlite: true
+});
+assert.equal(autoDefault.useSqlite, true);
+assert.equal(autoDefault.useLmdb, false);
+assert.equal(autoDefault.backendLabel, 'sqlite');
+
+const autoChunkThreshold = resolveBackendPolicy({
+  backendArg: 'auto',
+  sqliteConfigured: true,
+  sqliteAvailable: true,
+  sqliteAutoChunkThreshold: 10,
+  needsSqlite: true,
+  chunkCounts: [5]
+});
+assert.equal(autoChunkThreshold.useSqlite, false);
+assert.equal(autoChunkThreshold.useLmdb, false);
+
+const autoArtifactThreshold = resolveBackendPolicy({
+  backendArg: 'auto',
+  sqliteConfigured: true,
+  sqliteAvailable: true,
+  sqliteAutoArtifactBytes: 100,
+  needsSqlite: true,
+  artifactBytes: [200]
+});
+assert.equal(autoArtifactThreshold.useSqlite, true);
+
+const forcedMemory = resolveBackendPolicy({
+  backendArg: 'memory',
+  sqliteConfigured: true,
+  sqliteAvailable: true,
+  lmdbAvailable: true,
+  needsSqlite: true
+});
+assert.equal(forcedMemory.useSqlite, false);
+assert.equal(forcedMemory.useLmdb, false);
+assert.equal(forcedMemory.backendLabel, 'memory');
+
+const forcedSqliteMissing = resolveBackendPolicy({
+  backendArg: 'sqlite',
+  sqliteConfigured: true,
+  sqliteAvailable: false,
+  lmdbAvailable: true,
+  needsSqlite: true
+});
+assert.ok(forcedSqliteMissing.error);
+
+const forcedLmdb = resolveBackendPolicy({
+  backendArg: 'lmdb',
+  lmdbAvailable: true,
+  needsSqlite: true
+});
+assert.equal(forcedLmdb.useLmdb, true);
+assert.equal(forcedLmdb.backendLabel, 'lmdb');
+
+const forcedLmdbMissing = resolveBackendPolicy({
+  backendArg: 'lmdb',
+  lmdbAvailable: false,
+  needsSqlite: true
+});
+assert.ok(forcedLmdbMissing.error);
+
+const autoFallbackLmdb = resolveBackendPolicy({
+  backendArg: 'auto',
+  sqliteConfigured: true,
+  sqliteAvailable: false,
+  lmdbAvailable: true,
+  needsSqlite: true
+});
+assert.equal(autoFallbackLmdb.useLmdb, true);
+assert.equal(autoFallbackLmdb.backendLabel, 'lmdb');
+
+console.log('backend-policy test passed');
diff --git a/tests/bench-language-lock-semantics.js b/tests/bench-language-lock-semantics.js
new file mode 100644
index 000000000..4abc50b5d
--- /dev/null
+++ b/tests/bench-language-lock-semantics.js
@@ -0,0 +1,45 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { checkIndexLock } from '../tools/bench/language/locks.js';
+
+const root = path.join(process.cwd(), 'tests', '.cache', 'bench-language-lock-semantics');
+const locksDir = path.join(root, 'locks');
+const lockPath = path.join(locksDir, 'index.lock');
+
+await fsPromises.rm(root, { recursive: true, force: true });
+await fsPromises.mkdir(locksDir, { recursive: true });
+
+const staleStartedAt = new Date(Date.now() - 5000).toISOString();
+await fsPromises.writeFile(lockPath, JSON.stringify({ pid: process.pid, startedAt: staleStartedAt }));
+
+const staleResult = await checkIndexLock({
+  repoCacheRoot: root,
+  repoLabel: 'repo',
+  lockMode: 'fail-fast',
+  lockWaitMs: 0,
+  lockStaleMs: 1000,
+  onLog: () => {}
+});
+assert.equal(staleResult.ok, true, 'expected stale lock to be cleared');
+assert.equal(staleResult.cleared, true, 'expected stale lock to report cleared');
+const staleExists = await fsPromises.stat(lockPath).then(() => true).catch(() => false);
+assert.equal(staleExists, false, 'expected stale lock file to be removed');
+
+await fsPromises.mkdir(locksDir, { recursive: true });
+await fsPromises.writeFile(lockPath, JSON.stringify({ pid: process.pid, startedAt: new Date().toISOString() }));
+
+const activeResult = await checkIndexLock({
+  repoCacheRoot: root,
+  repoLabel: 'repo',
+  lockMode: 'fail-fast',
+  lockWaitMs: 0,
+  lockStaleMs: 60 * 60 * 1000,
+  onLog: () => {}
+});
+assert.equal(activeResult.ok, false, 'expected active lock to block');
+assert.equal(activeResult.detail.pid, process.pid, 'expected active lock pid to be reported');
+assert.equal(activeResult.detail.alive, true, 'expected active lock pid to be alive');
+
+console.log('bench-language lock semantics test passed');
diff --git a/tests/bench-language-lock.js b/tests/bench-language-lock.js
new file mode 100644
index 000000000..b811d639f
--- /dev/null
+++ b/tests/bench-language-lock.js
@@ -0,0 +1,82 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getRepoCacheRoot } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'bench-language-lock');
+const reposRoot = path.join(tempRoot, 'repos');
+const cacheRoot = path.join(tempRoot, 'cache');
+const resultsRoot = path.join(tempRoot, 'results');
+const configPath = path.join(tempRoot, 'repos.json');
+const queriesPath = path.join(root, 'tests', 'fixtures', 'sample', 'queries.txt');
+const repoId = 'test/lock-repo';
+const repoPath = path.join(reposRoot, 'javascript', repoId.replace('/', '__'));
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoPath, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+await fsPromises.mkdir(resultsRoot, { recursive: true });
+
+await fsPromises.writeFile(path.join(repoPath, 'README.md'), 'bench lock test');
+
+const config = {
+  javascript: {
+    label: 'JavaScript',
+    queries: queriesPath,
+    repos: {
+      typical: [repoId]
+    }
+  }
+};
+await fsPromises.writeFile(configPath, JSON.stringify(config, null, 2));
+
+const repoCacheRoot = getRepoCacheRoot(repoPath, { cache: { root: cacheRoot } });
+const lockDir = path.join(repoCacheRoot, 'locks');
+await fsPromises.mkdir(lockDir, { recursive: true });
+await fsPromises.writeFile(
+  path.join(lockDir, 'index.lock'),
+  JSON.stringify({ pid: process.pid, startedAt: new Date().toISOString() })
+);
+
+const scriptPath = path.join(root, 'tools', 'bench-language-repos.js');
+const result = spawnSync(
+  process.execPath,
+  [
+    scriptPath,
+    '--config',
+    configPath,
+    '--root',
+    reposRoot,
+    '--cache-root',
+    cacheRoot,
+    '--results',
+    resultsRoot,
+    '--no-clone',
+    '--dry-run',
+    '--lock-mode',
+    'fail-fast',
+    '--json'
+  ],
+  { encoding: 'utf8' }
+);
+
+if (result.status !== 0) {
+  console.error(result.stderr || 'bench-language-lock test failed');
+  process.exit(result.status ?? 1);
+}
+
+const payload = JSON.parse(result.stdout || '{}');
+const task = Array.isArray(payload.tasks) ? payload.tasks[0] : null;
+if (!task || !task.skipped) {
+  console.error('Expected bench task to be skipped due to lock.');
+  process.exit(1);
+}
+if (task.skipReason !== 'lock') {
+  console.error(`Expected skipReason=lock, got ${task.skipReason}`);
+  process.exit(1);
+}
+
+console.log('bench-language lock test passed');
diff --git a/tests/bench-language-progress-parse.js b/tests/bench-language-progress-parse.js
new file mode 100644
index 000000000..4139ecca4
--- /dev/null
+++ b/tests/bench-language-progress-parse.js
@@ -0,0 +1,50 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import {
+  parseFileProgressLine,
+  parseImportStatsLine,
+  parseLineProgress,
+  parseProgressLine,
+  parseScanMode,
+  parseShardLine
+} from '../tools/bench/language/progress/parse.js';
+
+const shard = parseShardLine('-> Shard 2/5: python (42 files)');
+assert.deepEqual(shard, {
+  index: 2,
+  total: 5,
+  shardLabel: 'python',
+  fileCount: 42
+});
+
+const importStats = parseImportStatsLine('\u2192 Imports: modules=12, edges=34, files=56');
+assert.deepEqual(importStats, { modules: 12, edges: 34, files: 56 });
+
+const combined = parseFileProgressLine('Files 10/100 (10.0%) [shard 2/5] File 3/10 lines 1,234 src/index.js');
+assert.equal(combined.count, 10);
+assert.equal(combined.total, 100);
+assert.equal(combined.pct, 10);
+assert.equal(combined.shardLabel, '2/5');
+assert.equal(combined.fileIndex, 3);
+assert.equal(combined.fileTotal, 10);
+assert.equal(combined.file, 'src/index.js');
+
+const fileOnly = parseFileProgressLine('File 7/12 src/lib.rs');
+assert.equal(fileOnly.count, null);
+assert.equal(fileOnly.total, null);
+assert.equal(fileOnly.pct, null);
+assert.equal(fileOnly.shardLabel, '');
+assert.equal(fileOnly.fileIndex, 7);
+assert.equal(fileOnly.fileTotal, 12);
+assert.equal(fileOnly.file, 'src/lib.rs');
+
+const progress = parseProgressLine('Files 90/200 (45.0%)');
+assert.deepEqual(progress, { step: 'Files', count: 90, total: 200, pct: 45 });
+
+const lineProgress = parseLineProgress('Line 5 / 20');
+assert.deepEqual(lineProgress, { current: 5, total: 20 });
+
+assert.equal(parseScanMode('Scanning code'), 'code');
+assert.equal(parseScanMode('Scanning prose'), 'prose');
+
+console.log('bench-language progress parse test passed');
diff --git a/tests/bench-language-repos.js b/tests/bench-language-repos.js
new file mode 100644
index 000000000..7f1733450
--- /dev/null
+++ b/tests/bench-language-repos.js
@@ -0,0 +1,38 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { formatShardFileProgress } from '../src/shared/bench-progress.js';
+
+const root = process.cwd();
+const scriptPath = path.join(root, 'tools', 'bench-language-repos.js');
+const result = spawnSync(process.execPath, [scriptPath, '--list', '--json'], { encoding: 'utf8' });
+if (result.status !== 0) {
+  console.error(result.stderr || 'bench-language-repos failed');
+  process.exit(result.status ?? 1);
+}
+
+const payload = JSON.parse(result.stdout || '{}');
+assert.ok(Array.isArray(payload.languages), 'languages array missing');
+assert.ok(payload.languages.includes('javascript'), 'javascript language missing');
+assert.ok(payload.languages.includes('shell'), 'shell language missing');
+assert.ok(Array.isArray(payload.tasks), 'tasks array missing');
+assert.ok(payload.tasks.length > 0, 'no benchmark tasks listed');
+
+const shardByLabel = new Map([['src', { index: 2, total: 4 }]]);
+const progressLine = formatShardFileProgress(
+  {
+    fileIndex: 175,
+    fileTotal: 176,
+    pct: 99.4,
+    shardLabel: 'src',
+    file: 'src/app.js'
+  },
+  { shardByLabel, lineTotal: 123 }
+);
+assert.ok(progressLine.startsWith('[shard 2/4] 175/176 (99.4%)'), 'shard prefix missing');
+assert.ok(progressLine.includes('lines 123'), 'line count missing');
+assert.ok(progressLine.includes('src/app.js'), 'file path missing');
+assert.ok(!progressLine.includes('Files '), 'legacy Files label should be removed');
+
+console.log('bench-language-repos test passed.');
diff --git a/tests/bench-progress-format.js b/tests/bench-progress-format.js
new file mode 100644
index 000000000..9894ea7ef
--- /dev/null
+++ b/tests/bench-progress-format.js
@@ -0,0 +1,30 @@
+#!/usr/bin/env node
+import { formatShardFileProgress } from '../src/shared/bench-progress.js';
+
+const shardByLabel = new Map([['alpha', { index: 2, total: 10 }]]);
+const output = formatShardFileProgress({
+  shardLabel: 'alpha',
+  fileIndex: 5,
+  fileTotal: 20,
+  pct: 25.0,
+  file: 'src/app.js'
+}, { shardByLabel, lineTotal: 100 });
+
+if (!output.includes('[shard 2/10]')) {
+  console.error('bench progress format test failed: missing shard index');
+  process.exit(1);
+}
+if (!output.includes('5/20')) {
+  console.error('bench progress format test failed: missing file counts');
+  process.exit(1);
+}
+if (!output.includes('lines 100')) {
+  console.error('bench progress format test failed: missing line count');
+  process.exit(1);
+}
+if (!output.includes('src/app.js')) {
+  console.error('bench progress format test failed: missing file path');
+  process.exit(1);
+}
+
+console.log('bench progress format test passed');
diff --git a/tests/bench.js b/tests/bench.js
index 11d500320..f9fc55836 100644
--- a/tests/bench.js
+++ b/tests/bench.js
@@ -1,21 +1,53 @@
 #!/usr/bin/env node
+import fsSync from 'node:fs';
 import fs from 'node:fs/promises';
 import path from 'node:path';
-import { spawnSync } from 'node:child_process';
-import minimist from 'minimist';
-
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['ann', 'no-ann', 'json', 'write-report', 'build', 'build-index', 'build-sqlite', 'incremental', 'stub-embeddings'],
-  string: ['queries', 'backend', 'out', 'bm25-k1', 'bm25-b', 'fts-profile', 'fts-weights'],
-  alias: { n: 'top', q: 'queries' },
-  default: { top: 5, limit: 0, json: false, 'write-report': false }
+import { spawn, spawnSync } from 'node:child_process';
+import { createCli } from '../src/shared/cli.js';
+import { BENCH_OPTIONS, validateBenchArgs } from '../src/shared/cli-options.js';
+import { getIndexDir, getRuntimeConfig, loadUserConfig, resolveRuntimeEnv, resolveSqlitePaths } from '../tools/dict-utils.js';
+import { getEnvConfig } from '../src/shared/env.js';
+import { runWithConcurrency } from '../src/shared/concurrency.js';
+import os from 'node:os';
+import { createSafeRegex, normalizeSafeRegexConfig } from '../src/shared/safe-regex.js';
+import { build as buildHistogram } from 'hdr-histogram-js';
+
+const rawArgs = process.argv.slice(2);
+const argv = createCli({
+  scriptName: 'bench',
+  options: BENCH_OPTIONS,
+  aliases: { n: 'top', q: 'queries' }
+}).parse();
+validateBenchArgs(argv);
+
+const safeRegexConfig = normalizeSafeRegexConfig({
+  maxPatternLength: 64,
+  maxInputLength: 64,
+  timeoutMs: 10,
+  flags: 'i'
 });
+const safeRegex = createSafeRegex('a+b', '', safeRegexConfig);
+if (!safeRegex || !safeRegex.test('Aaab')) {
+  console.error('Safe regex self-check failed.');
+  process.exit(1);
+}
+const rejected = createSafeRegex('a'.repeat(128), '', safeRegexConfig);
+if (rejected) {
+  console.error('Safe regex maxPatternLength guard failed.');
+  process.exit(1);
+}
+if (safeRegex.test('a'.repeat(100))) {
+  console.error('Safe regex maxInputLength guard failed.');
+  process.exit(1);
+}
 
 const root = process.cwd();
+const repoArg = argv.repo ? path.resolve(argv.repo) : null;
 const searchPath = path.join(root, 'search.js');
 const reportPath = path.join(root, 'tools', 'report-artifacts.js');
 const buildIndexPath = path.join(root, 'build_index.js');
 const buildSqlitePath = path.join(root, 'tools', 'build-sqlite-index.js');
+const indexerServicePath = path.join(root, 'tools', 'indexer-service.js');
 
 const defaultQueriesPath = path.join(root, 'tests', 'parity-queries.txt');
 const queriesPath = argv.queries ? path.resolve(argv.queries) : defaultQueriesPath;
@@ -41,8 +73,10 @@ if (!queries.length) {
 const topN = Math.max(1, parseInt(argv.top, 10) || 5);
 const limit = Math.max(0, parseInt(argv.limit, 10) || 0);
 const selectedQueries = limit > 0 ? queries.slice(0, limit) : queries;
-const annEnabled = argv.ann !== false;
+const annFlagPresent = rawArgs.includes('--ann') || rawArgs.includes('--no-ann');
+const annEnabled = annFlagPresent ? argv.ann === true : true;
 const annArg = annEnabled ? '--ann' : '--no-ann';
+const jsonOutput = argv.json === true;
 const bm25K1Arg = argv['bm25-k1'];
 const bm25BArg = argv['bm25-b'];
 const ftsProfileArg = argv['fts-profile'];
@@ -56,16 +90,128 @@ function resolveBackends(value) {
   return Array.from(new Set(list.map((entry) => entry.trim()).filter(Boolean)));
 }
 const backends = resolveBackends(argv.backend);
-const buildIndex = argv['build-index'] || argv.build;
-const buildSqlite = argv['build-sqlite'] || argv.build;
-const buildIncremental = argv.incremental === true;
-const stubEmbeddings = argv['stub-embeddings'] === true;
+let buildIndex = argv['build-index'] || argv.build;
+let buildSqlite = argv['build-sqlite'] || argv.build;
+const buildIncremental = argv.incremental === true || buildSqlite;
+const envConfig = getEnvConfig();
+const indexProfileArg = typeof argv['index-profile'] === 'string'
+  ? argv['index-profile'].trim()
+  : '';
+const noIndexProfile = rawArgs.includes('--no-index-profile');
+const originalEnvProfile = process.env.PAIROFCLEATS_PROFILE;
+const indexProfileRaw = indexProfileArg;
+const suppressEnvProfile = noIndexProfile && !indexProfileRaw;
+if (suppressEnvProfile) {
+  delete process.env.PAIROFCLEATS_PROFILE;
+}
+const runtimeRoot = repoArg || root;
+const userConfig = loadUserConfig(
+  runtimeRoot,
+  indexProfileRaw ? { profile: indexProfileRaw } : {}
+);
+if (suppressEnvProfile) {
+  if (originalEnvProfile === undefined) {
+    delete process.env.PAIROFCLEATS_PROFILE;
+  } else {
+    process.env.PAIROFCLEATS_PROFILE = originalEnvProfile;
+  }
+}
+const runtimeConfig = getRuntimeConfig(runtimeRoot, userConfig);
+const embeddingProvider = userConfig.indexing?.embeddings?.provider || 'xenova';
+const needsMemory = backends.includes('memory');
+const needsSqlite = backends.some((entry) => entry.startsWith('sqlite'));
+const hasIndex = (mode) => {
+  const dir = getIndexDir(runtimeRoot, mode, userConfig);
+  const metaPaths = [
+    'chunk_meta.json',
+    'chunk_meta.jsonl',
+    'chunk_meta.meta.json',
+    'chunk_meta.parts'
+  ];
+  return metaPaths.some((entry) => fsSync.existsSync(path.join(dir, entry)));
+};
+const hasSqliteIndex = (mode) => {
+  const paths = resolveSqlitePaths(runtimeRoot, userConfig);
+  const target = mode === 'prose' ? paths.prosePath : paths.codePath;
+  return fsSync.existsSync(target);
+};
+if (needsMemory && !buildIndex && (!hasIndex('code') || !hasIndex('prose'))) {
+  buildIndex = true;
+  if (!jsonOutput) {
+    console.log('[bench] Missing file-backed index; enabling build-index.');
+  }
+}
+if (needsSqlite && !buildSqlite && (!hasSqliteIndex('code') || !hasSqliteIndex('prose'))) {
+  buildSqlite = true;
+  if (!jsonOutput) {
+    console.log('[bench] Missing sqlite index; enabling build-sqlite.');
+  }
+}
+if (buildSqlite && !buildIndex) buildIndex = true;
+const heapArgRaw = argv['heap-mb'];
+const heapArg = Number.isFinite(Number(heapArgRaw)) ? Math.floor(Number(heapArgRaw)) : null;
+const heapRecommendation = getRecommendedHeapMb();
+const baseNodeOptions = stripMaxOldSpaceFlag(process.env.NODE_OPTIONS || '');
+const hasHeapFlag = baseNodeOptions.includes('--max-old-space-size');
+let heapOverride = null;
+if (Number.isFinite(heapArg) && heapArg > 0) {
+  heapOverride = heapArg;
+} else if (
+  !Number.isFinite(runtimeConfig.maxOldSpaceMb)
+  && !envConfig.maxOldSpaceMb
+  && !hasHeapFlag
+) {
+  heapOverride = heapRecommendation.recommendedMb;
+}
+const runtimeConfigForRun = heapOverride
+  ? { ...runtimeConfig, maxOldSpaceMb: heapOverride }
+  : runtimeConfig;
+const envStubEmbeddings = envConfig.embeddings === 'stub';
+const realEmbeddings = argv['real-embeddings'] === true;
+const stubEmbeddings = argv['stub-embeddings'] === true
+  || (!realEmbeddings && envStubEmbeddings);
+const baseEnvInput = { ...process.env };
+if (baseNodeOptions) {
+  baseEnvInput.NODE_OPTIONS = baseNodeOptions;
+} else {
+  delete baseEnvInput.NODE_OPTIONS;
+}
+const baseEnv = resolveRuntimeEnv(runtimeConfigForRun, baseEnvInput);
+const profileArgPresent = rawArgs.includes('--profile') || rawArgs.includes('--index-profile');
+if (noIndexProfile && !profileArgPresent && baseEnv.PAIROFCLEATS_PROFILE) {
+  delete baseEnv.PAIROFCLEATS_PROFILE;
+}
+if (realEmbeddings && baseEnv.PAIROFCLEATS_EMBEDDINGS) {
+  delete baseEnv.PAIROFCLEATS_EMBEDDINGS;
+}
+if (heapOverride) {
+  baseEnv.PAIROFCLEATS_MAX_OLD_SPACE_MB = String(heapOverride);
+  if (!jsonOutput) {
+    console.log(
+      `[bench] heap ${formatGb(heapOverride)} (${heapOverride} MB) ` +
+        `(override with --heap-mb or PAIROFCLEATS_MAX_OLD_SPACE_MB)`
+    );
+  }
+}
+const benchEnvWithProfile = indexProfileRaw
+  ? { ...baseEnv, PAIROFCLEATS_PROFILE: indexProfileRaw }
+  : baseEnv;
+
+function logBench(message) {
+  if (!message) return;
+  if (jsonOutput) {
+    process.stderr.write(`${message}\n`);
+  } else {
+    console.log(message);
+  }
+}
 
 function runSearch(query, backend) {
   const args = [
     searchPath,
     query,
     '--json',
+    '--json-compact',
     '--stats',
     '--backend',
     backend,
@@ -73,17 +219,47 @@ function runSearch(query, backend) {
     String(topN),
     annArg
   ];
+  if (repoArg) args.push('--repo', repoArg);
   if (bm25K1Arg) args.push('--bm25-k1', String(bm25K1Arg));
   if (bm25BArg) args.push('--bm25-b', String(bm25BArg));
   if (ftsProfileArg) args.push('--fts-profile', String(ftsProfileArg));
   if (ftsWeightsArg) args.push('--fts-weights', String(ftsWeightsArg));
-  const result = spawnSync(process.execPath, args, { encoding: 'utf8' });
-  if (result.status !== 0) {
-    console.error(`Search failed for backend=${backend} query="${query}"`);
-    if (result.stderr) console.error(result.stderr.trim());
-    process.exit(result.status ?? 1);
+  const env = { ...benchEnvWithProfile };
+  if (stubEmbeddings) {
+    env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+  } else {
+    delete env.PAIROFCLEATS_EMBEDDINGS;
   }
-  return JSON.parse(result.stdout || '{}');
+  return new Promise((resolve) => {
+    const child = spawn(process.execPath, args, { env, stdio: ['ignore', 'pipe', 'pipe'] });
+    let stdout = '';
+    let stderr = '';
+    child.stdout.on('data', (chunk) => {
+      stdout += chunk;
+    });
+    child.stderr.on('data', (chunk) => {
+      stderr += chunk;
+    });
+    child.on('error', (err) => {
+      console.error(`Search failed to start for backend=${backend} query="${query}"`);
+      if (err?.message) console.error(err.message);
+      process.exit(1);
+    });
+    child.on('close', (code) => {
+      if (code !== 0) {
+        console.error(`Search failed for backend=${backend} query="${query}"`);
+        if (stderr) console.error(stderr.trim());
+        process.exit(code ?? 1);
+      }
+      try {
+        resolve(JSON.parse(stdout || '{}'));
+      } catch (err) {
+        console.error(`Search response parse failed for backend=${backend} query="${query}"`);
+        if (stderr) console.error(stderr.trim());
+        process.exit(1);
+      }
+    });
+  });
 }
 
 function mean(values) {
@@ -91,27 +267,88 @@ function mean(values) {
   return values.reduce((a, b) => a + b, 0) / values.length;
 }
 
-function percentile(sortedValues, pct) {
-  if (!sortedValues.length) return 0;
-  const idx = Math.min(sortedValues.length - 1, Math.max(0, Math.floor((pct / 100) * (sortedValues.length - 1))));
-  return sortedValues[idx];
+function buildPercentileHistogram(values, scale) {
+  if (!values.length) return null;
+  const scaled = values.map((value) => Math.max(1, Math.round(value * scale)));
+  const maxValue = Math.max(...scaled, 1);
+  const histogram = buildHistogram({
+    lowestDiscernibleValue: 1,
+    highestTrackableValue: maxValue,
+    numberOfSignificantValueDigits: 3
+  });
+  scaled.forEach((value) => histogram.recordValue(value));
+  return histogram;
 }
 
-function buildStats(values) {
-  if (!values.length) return { mean: 0, p50: 0, p95: 0, min: 0, max: 0 };
-  const sorted = [...values].sort((a, b) => a - b);
+function buildStats(values, { scale = 1 } = {}) {
+  if (!values.length) return { mean: 0, p50: 0, p95: 0, p99: 0, min: 0, max: 0 };
+  const histogram = buildPercentileHistogram(values, scale);
+  const pct = (value) => (histogram ? histogram.getValueAtPercentile(value) / scale : 0);
   return {
     mean: mean(values),
-    p50: percentile(sorted, 50),
-    p95: percentile(sorted, 95),
-    min: sorted[0],
-    max: sorted[sorted.length - 1]
+    p50: pct(50),
+    p95: pct(95),
+    p99: pct(99),
+    min: Math.min(...values),
+    max: Math.max(...values)
+  };
+}
+
+function stripMaxOldSpaceFlag(options) {
+  if (!options) return '';
+  return options
+    .replace(/--max-old-space-size=\d+/g, '')
+    .replace(/\s+/g, ' ')
+    .trim();
+}
+
+function formatGb(mb) {
+  return `${(mb / 1024).toFixed(1)} GB`;
+}
+
+function formatDuration(ms) {
+  const total = Math.max(0, Math.floor(ms / 1000));
+  const hours = Math.floor(total / 3600);
+  const minutes = Math.floor((total % 3600) / 60);
+  const seconds = total % 60;
+  if (hours > 0) return `${hours}h ${minutes}m ${seconds}s`;
+  if (minutes > 0) return `${minutes}m ${seconds}s`;
+  return `${seconds}s`;
+}
+
+function formatDurationMs(ms) {
+  if (!Number.isFinite(ms)) return 'n/a';
+  if (ms < 1000) return `${Math.max(0, Math.round(ms))}ms`;
+  return formatDuration(ms);
+}
+
+function formatRate(value, unit) {
+  if (!Number.isFinite(value)) return 'n/a';
+  const rounded = value >= 100 ? value.toFixed(0) : value >= 10 ? value.toFixed(1) : value.toFixed(2);
+  return `${rounded} ${unit}/s`;
+}
+
+function getRecommendedHeapMb() {
+  const totalMb = Math.floor(os.totalmem() / (1024 * 1024));
+  const recommended = Math.max(4096, Math.floor(totalMb * 0.75));
+  const rounded = Math.floor(recommended / 256) * 256;
+  return {
+    totalMb,
+    recommendedMb: Math.max(4096, rounded)
   };
 }
 
 function runBuild(args, label, env) {
   const start = Date.now();
-  const result = spawnSync(process.execPath, args, { env, stdio: 'inherit' });
+  const result = spawnSync(process.execPath, args, {
+    env,
+    encoding: 'utf8',
+    stdio: jsonOutput ? ['ignore', 'pipe', 'pipe'] : 'inherit'
+  });
+  if (jsonOutput) {
+    if (result.stdout) process.stderr.write(result.stdout);
+    if (result.stderr) process.stderr.write(result.stderr);
+  }
   if (result.status !== 0) {
     console.error(`Build failed: ${label}`);
     process.exit(result.status ?? 1);
@@ -119,98 +356,322 @@ function runBuild(args, label, env) {
   return Date.now() - start;
 }
 
+function runServiceQueue(queueName, env) {
+  const args = [indexerServicePath, 'work', '--queue', queueName, '--concurrency', '1'];
+  const result = spawnSync(process.execPath, args, {
+    env,
+    encoding: 'utf8',
+    stdio: jsonOutput ? ['ignore', 'pipe', 'pipe'] : 'inherit'
+  });
+  if (jsonOutput) {
+    if (result.stdout) process.stderr.write(result.stdout);
+    if (result.stderr) process.stderr.write(result.stderr);
+  }
+  if (result.status !== 0) {
+    console.error(`Service queue failed: ${queueName}`);
+    process.exit(result.status ?? 1);
+  }
+}
+
 const buildMs = {};
 if (buildIndex || buildSqlite) {
-  const buildEnv = { ...process.env };
-  if (stubEmbeddings) buildEnv.PAIROFCLEATS_EMBEDDINGS = 'stub';
+  const buildEnv = { ...benchEnvWithProfile };
+  if (Number.isFinite(Number(argv.threads)) && Number(argv.threads) > 0) {
+    buildEnv.PAIROFCLEATS_THREADS = String(argv.threads);
+  }
+  if (stubEmbeddings) {
+    buildEnv.PAIROFCLEATS_EMBEDDINGS = 'stub';
+  } else {
+    delete buildEnv.PAIROFCLEATS_EMBEDDINGS;
+  }
+  const twoStageConfig = userConfig.indexing?.twoStage || {};
+  const useStageQueue = twoStageConfig.enabled === true
+    && twoStageConfig.background === true
+    && twoStageConfig.queue !== false;
+  const embeddingMode = typeof userConfig.indexing?.embeddings?.mode === 'string'
+    ? userConfig.indexing.embeddings.mode.trim().toLowerCase()
+    : '';
+  const embeddingsEnabled = userConfig.indexing?.embeddings?.enabled !== false;
+  const useEmbeddingService = embeddingsEnabled && embeddingMode === 'service';
   if (buildIndex) {
     const args = [buildIndexPath];
+    if (repoArg) args.push('--repo', repoArg);
     if (stubEmbeddings) args.push('--stub-embeddings');
     if (buildIncremental) args.push('--incremental');
+    if (argv.threads) args.push('--threads', String(argv.threads));
     buildMs.index = runBuild(args, 'build index', buildEnv);
+    if (useStageQueue) {
+      runServiceQueue('index', buildEnv);
+      logBench('[bench] Stage2 enrichment complete; continuing with benchmark queries.');
+    }
   }
   if (buildSqlite) {
     const args = [buildSqlitePath];
+    if (repoArg) args.push('--repo', repoArg);
     if (buildIncremental) args.push('--incremental');
     buildMs.sqlite = runBuild(args, 'build sqlite', buildEnv);
   }
+  if (buildIndex && useEmbeddingService) {
+    runServiceQueue('embeddings', buildEnv);
+  }
 }
 
-const latency = {};
-const memoryRss = {};
-const hitCounts = {};
-const resultCounts = {};
-for (const backend of backends) {
-  latency[backend] = [];
-  memoryRss[backend] = [];
-  hitCounts[backend] = 0;
-  resultCounts[backend] = [];
+const queryTasks = [];
+let queryIndex = 0;
+for (const query of selectedQueries) {
+  queryIndex += 1;
+  for (const backend of backends) {
+    queryTasks.push({ query, backend, queryIndex });
+  }
 }
 
-for (const query of selectedQueries) {
+const queryConcurrencyRaw = Number(argv['query-concurrency']);
+const queryConcurrencyList = Number.isFinite(queryConcurrencyRaw) && queryConcurrencyRaw > 0
+  ? [Math.floor(queryConcurrencyRaw)]
+  : [4];
+
+const runQueries = async (requestedConcurrency) => {
+  const latency = {};
+  const memoryRss = {};
+  const hitCounts = {};
+  const resultCounts = {};
   for (const backend of backends) {
-    const payload = runSearch(query, backend);
-    latency[backend].push(payload.stats?.elapsedMs || 0);
+    latency[backend] = [];
+    memoryRss[backend] = [];
+    hitCounts[backend] = 0;
+    resultCounts[backend] = [];
+  }
+
+  const totalSearches = selectedQueries.length * backends.length;
+  const queryProgress = {
+    count: 0,
+    startMs: Date.now(),
+    lastLogMs: 0,
+    lastPct: 0
+  };
+  const logQueryProgress = (force = false) => {
+    if (!totalSearches) return;
+    const now = Date.now();
+    const pct = (queryProgress.count / totalSearches) * 100;
+    const elapsedMs = now - queryProgress.startMs;
+    const rate = elapsedMs > 0 ? queryProgress.count / (elapsedMs / 1000) : 0;
+    const remaining = totalSearches - queryProgress.count;
+    const etaMs = rate > 0 && remaining > 0 ? (remaining / rate) * 1000 : 0;
+    const shouldLog = force
+      || queryProgress.count === totalSearches
+      || now - queryProgress.lastLogMs >= 10000
+      || pct - queryProgress.lastPct >= 5;
+    if (!shouldLog) return;
+    const elapsedText = formatDuration(elapsedMs);
+    const avgSearchText = queryProgress.count
+      ? formatDurationMs(elapsedMs / queryProgress.count)
+      : 'n/a';
+    const avgQueryText = selectedQueries.length
+      ? formatDurationMs(elapsedMs / selectedQueries.length)
+      : 'n/a';
+    const etaText = etaMs > 0 ? formatDuration(etaMs) : 'n/a';
+    logBench(
+      `[bench] Queries ${queryProgress.count}/${totalSearches} (${pct.toFixed(1)}%) | ` +
+      `concurrency ${requestedConcurrency} | elapsed ${elapsedText} | ` +
+      `avg/search ${avgSearchText} | avg/query ${avgQueryText} | eta ${etaText}`
+    );
+    queryProgress.lastLogMs = now;
+    queryProgress.lastPct = pct;
+  };
+
+  logBench(
+    `[bench] Running ${selectedQueries.length} queries across ${backends.length} backends ` +
+    `(${totalSearches} searches) with concurrency ${requestedConcurrency}.`
+  );
+  logQueryProgress(true);
+
+  const loggedQueries = new Set();
+  const runQueryTask = async (task) => {
+    if (!loggedQueries.has(task.queryIndex)) {
+      loggedQueries.add(task.queryIndex);
+      logBench(
+        `[bench] (concurrency ${requestedConcurrency}) Query ` +
+        `${task.queryIndex}/${selectedQueries.length}: ${task.query}`
+      );
+    }
+    const payload = await runSearch(task.query, task.backend);
+    queryProgress.count += 1;
+    logQueryProgress();
+    const elapsedMs = Number(payload.stats?.elapsedMs);
+    if (!Number.isFinite(elapsedMs)) {
+      console.error(`[bench] Missing timing stats for backend=${task.backend} query="${task.query}"`);
+      process.exit(1);
+    }
+    latency[task.backend].push(elapsedMs);
     const codeHits = Array.isArray(payload.code) ? payload.code.length : 0;
     const proseHits = Array.isArray(payload.prose) ? payload.prose.length : 0;
     const totalHits = codeHits + proseHits;
-    resultCounts[backend].push(totalHits);
-    if (totalHits > 0) hitCounts[backend] += 1;
+    resultCounts[task.backend].push(totalHits);
+    if (totalHits > 0) hitCounts[task.backend] += 1;
     const rss = payload.stats?.memory?.rss;
-    if (Number.isFinite(rss)) memoryRss[backend].push(rss);
+    if (Number.isFinite(rss)) memoryRss[task.backend].push(rss);
+  };
+  if (queryTasks.length) {
+    await runWithConcurrency(
+      queryTasks,
+      Math.max(1, Math.min(requestedConcurrency, queryTasks.length)),
+      runQueryTask
+    );
   }
+  logQueryProgress(true);
+  const queryWallMs = Date.now() - queryProgress.startMs;
+  const queryWallMsPerSearch = totalSearches ? queryWallMs / totalSearches : 0;
+  const queryWallMsPerQuery = selectedQueries.length ? queryWallMs / selectedQueries.length : 0;
+
+  const latencyStats = Object.fromEntries(backends.map((b) => [b, buildStats(latency[b], { scale: 1000 })]));
+  const memoryStats = Object.fromEntries(backends.map((b) => [b, buildStats(memoryRss[b], { scale: 1 })]));
+  const hitRate = Object.fromEntries(backends.map((b) => [
+    b,
+    selectedQueries.length ? hitCounts[b] / selectedQueries.length : 0
+  ]));
+  const resultCountAvg = Object.fromEntries(backends.map((b) => [b, mean(resultCounts[b])]));
+
+  const summary = {
+    queries: selectedQueries.length,
+    topN,
+    annEnabled,
+    embeddingProvider,
+    backends,
+    queryConcurrency: requestedConcurrency,
+    queryWallMs,
+    queryWallMsPerSearch,
+    queryWallMsPerQuery,
+    latencyMsAvg: Object.fromEntries(backends.map((b) => [b, latencyStats[b].mean])),
+    latencyMs: latencyStats,
+    hitRate,
+    resultCountAvg,
+    memoryRss: memoryStats,
+    buildMs: Object.keys(buildMs).length ? buildMs : null
+  };
+
+  return { summary };
+};
+
+const runs = [];
+for (const concurrency of queryConcurrencyList) {
+  runs.push(await runQueries(concurrency));
 }
 
-const reportResult = spawnSync(process.execPath, [reportPath, '--json'], { encoding: 'utf8' });
+const reportArgs = [reportPath, '--json'];
+if (repoArg) reportArgs.push('--repo', repoArg);
+const reportResult = spawnSync(process.execPath, reportArgs, { encoding: 'utf8' });
 const artifactReport = reportResult.status === 0 ? JSON.parse(reportResult.stdout || '{}') : {};
+const corruption = artifactReport?.corruption || null;
+if (corruption && corruption.ok === false) {
+  const issues = Array.isArray(corruption.issues) && corruption.issues.length
+    ? corruption.issues.join('; ')
+    : 'unknown issues';
+  console.error(`[bench] Artifact corruption check failed: ${issues}`);
+  process.exit(1);
+}
 
-const latencyStats = Object.fromEntries(backends.map((b) => [b, buildStats(latency[b])]));
-const memoryStats = Object.fromEntries(backends.map((b) => [b, buildStats(memoryRss[b])]));
-const hitRate = Object.fromEntries(backends.map((b) => [
-  b,
-  selectedQueries.length ? hitCounts[b] / selectedQueries.length : 0
-]));
-const resultCountAvg = Object.fromEntries(backends.map((b) => [b, mean(resultCounts[b])]));
-const summary = {
-  queries: selectedQueries.length,
-  topN,
-  annEnabled,
-  backends,
-  latencyMsAvg: Object.fromEntries(backends.map((b) => [b, latencyStats[b].mean])),
-  latencyMs: latencyStats,
-  hitRate,
-  resultCountAvg,
-  memoryRss: memoryStats,
-  buildMs: Object.keys(buildMs).length ? buildMs : null
-};
+const summaries = runs.map((run) => run.summary).filter(Boolean);
+const concurrencyStats = {};
+for (const runSummary of summaries) {
+  const concurrency = runSummary?.queryConcurrency;
+  if (concurrency === 4) {
+    concurrencyStats[String(concurrency)] = {
+      latencyMsAvg: runSummary.latencyMsAvg,
+      latencyMs: runSummary.latencyMs,
+      hitRate: runSummary.hitRate,
+      resultCountAvg: runSummary.resultCountAvg,
+      memoryRss: runSummary.memoryRss
+    };
+  }
+}
+const summary = summaries[0]
+  ? {
+    ...summaries[0],
+    ...(Object.keys(concurrencyStats).length ? { concurrencyStats } : {})
+  }
+  : null;
 
 const output = {
   generatedAt: new Date().toISOString(),
+  repo: { root: repoArg || root },
   summary,
+  runs: summaries,
   artifacts: artifactReport
 };
 
 if (argv.json) {
   console.log(JSON.stringify(output, null, 2));
 } else {
-  console.log('Benchmark summary');
-  console.log(`- Queries: ${summary.queries}`);
-  console.log(`- TopN: ${summary.topN}`);
-  console.log(`- Ann: ${summary.annEnabled}`);
-  for (const backend of backends) {
-    const stats = latencyStats[backend];
-    console.log(`- ${backend} avg ms: ${stats.mean.toFixed(1)} (p95 ${stats.p95.toFixed(1)})`);
-    console.log(`- ${backend} hit rate: ${(hitRate[backend] * 100).toFixed(1)}% (avg hits ${resultCountAvg[backend].toFixed(1)})`);
-    const mem = memoryStats[backend];
-    if (mem && mem.mean) {
-      console.log(`- ${backend} rss avg mb: ${(mem.mean / (1024 * 1024)).toFixed(1)} (p95 ${(mem.p95 / (1024 * 1024)).toFixed(1)})`);
+  for (const runSummary of summaries) {
+    if (!runSummary) continue;
+    const concurrencyLabel = Number.isFinite(runSummary.queryConcurrency)
+      ? ` (concurrency ${runSummary.queryConcurrency})`
+      : '';
+    console.log(`Benchmark summary${concurrencyLabel}`);
+    console.log(`- Queries: ${runSummary.queries}`);
+    console.log(`- TopN: ${runSummary.topN}`);
+    console.log(`- Ann: ${runSummary.annEnabled}`);
+    if (Number.isFinite(runSummary.queryWallMs)) {
+      console.log(
+        `- Query wall time: ${formatDuration(runSummary.queryWallMs)} ` +
+        `(avg/search ${formatDurationMs(runSummary.queryWallMsPerSearch)}, ` +
+        `avg/query ${formatDurationMs(runSummary.queryWallMsPerQuery)})`
+      );
+    }
+    for (const backend of runSummary.backends || backends) {
+      const stats = runSummary.latencyMs?.[backend];
+      if (stats) {
+        console.log(`- ${backend} avg ms: ${stats.mean.toFixed(1)} (p95 ${stats.p95.toFixed(1)}, p99 ${stats.p99.toFixed(1)})`);
+      }
+      const hitRate = runSummary.hitRate?.[backend];
+      const resultCount = runSummary.resultCountAvg?.[backend];
+      if (Number.isFinite(hitRate) && Number.isFinite(resultCount)) {
+        console.log(`- ${backend} hit rate: ${(hitRate * 100).toFixed(1)}% (avg hits ${resultCount.toFixed(1)})`);
+      }
+      const mem = runSummary.memoryRss?.[backend];
+      if (mem && mem.mean) {
+        console.log(`- ${backend} rss avg mb: ${(mem.mean / (1024 * 1024)).toFixed(1)} (p95 ${(mem.p95 / (1024 * 1024)).toFixed(1)}, p99 ${(mem.p99 / (1024 * 1024)).toFixed(1)})`);
+      }
+    }
+    if (runSummary.buildMs?.index) {
+      console.log(`- build index ms: ${runSummary.buildMs.index.toFixed(0)}`);
+    }
+    if (runSummary.buildMs?.sqlite) {
+      console.log(`- build sqlite ms: ${runSummary.buildMs.sqlite.toFixed(0)}`);
+    }
+    const throughput = artifactReport?.throughput || null;
+    if (throughput?.code) {
+      const codeThroughput = throughput.code;
+      console.log(
+        `- throughput code: ${formatRate(codeThroughput.chunksPerSec, 'chunks')}, ` +
+        `${formatRate(codeThroughput.tokensPerSec, 'tokens')}, ` +
+        `${formatRate(codeThroughput.bytesPerSec, 'bytes')}`
+      );
+    }
+    if (throughput?.prose) {
+      const proseThroughput = throughput.prose;
+      console.log(
+        `- throughput prose: ${formatRate(proseThroughput.chunksPerSec, 'chunks')}, ` +
+        `${formatRate(proseThroughput.tokensPerSec, 'tokens')}, ` +
+        `${formatRate(proseThroughput.bytesPerSec, 'bytes')}`
+      );
+    }
+    if (throughput?.lmdb?.code) {
+      const lmdbCode = throughput.lmdb.code;
+      console.log(
+        `- throughput lmdb code: ${formatRate(lmdbCode.chunksPerSec, 'chunks')}, ` +
+        `${formatRate(lmdbCode.tokensPerSec, 'tokens')}, ` +
+        `${formatRate(lmdbCode.bytesPerSec, 'bytes')}`
+      );
+    }
+    if (throughput?.lmdb?.prose) {
+      const lmdbProse = throughput.lmdb.prose;
+      console.log(
+        `- throughput lmdb prose: ${formatRate(lmdbProse.chunksPerSec, 'chunks')}, ` +
+        `${formatRate(lmdbProse.tokensPerSec, 'tokens')}, ` +
+        `${formatRate(lmdbProse.bytesPerSec, 'bytes')}`
+      );
     }
-  }
-  if (buildMs.index) {
-    console.log(`- build index ms: ${buildMs.index.toFixed(0)}`);
-  }
-  if (buildMs.sqlite) {
-    console.log(`- build sqlite ms: ${buildMs.sqlite.toFixed(0)}`);
   }
 }
 
diff --git a/tests/build-embeddings-cache.js b/tests/build-embeddings-cache.js
new file mode 100644
index 000000000..a689445ec
--- /dev/null
+++ b/tests/build-embeddings-cache.js
@@ -0,0 +1,63 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getRepoCacheRoot } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'build-embeddings-cache');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(path.join(repoRoot, 'src'), { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(
+  path.join(repoRoot, 'src', 'alpha.js'),
+  'export const alpha = () => 1;\n'
+);
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({ indexing: { treeSitter: { enabled: false } } }, null, 2)
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const runNode = (label, args) => {
+  const result = spawnSync(process.execPath, args, { cwd: repoRoot, env, stdio: 'inherit' });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    process.exit(result.status ?? 1);
+  }
+};
+
+runNode('build_index', [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot]);
+runNode('build_embeddings', [path.join(root, 'tools', 'build-embeddings.js'), '--stub-embeddings', '--mode', 'code', '--repo', repoRoot]);
+
+const repoCacheRoot = getRepoCacheRoot(repoRoot, { cache: { root: cacheRoot } });
+const cacheDir = path.join(repoCacheRoot, 'embeddings', 'code', 'files');
+const cacheFiles = fs.existsSync(cacheDir)
+  ? fs.readdirSync(cacheDir).filter((name) => name.endsWith('.json'))
+  : [];
+if (!cacheFiles.length) {
+  console.error('Expected embedding cache files to be created');
+  process.exit(1);
+}
+const cachePath = path.join(cacheDir, cacheFiles[0]);
+const before = await fsPromises.stat(cachePath);
+
+runNode('build_embeddings cached', [path.join(root, 'tools', 'build-embeddings.js'), '--stub-embeddings', '--mode', 'code', '--repo', repoRoot]);
+
+const after = await fsPromises.stat(cachePath);
+if (after.mtimeMs !== before.mtimeMs) {
+  console.error('Expected embedding cache file to be reused without rewrite');
+  process.exit(1);
+}
+
+console.log('embedding cache reuse test passed');
diff --git a/tests/build-index-all.js b/tests/build-index-all.js
new file mode 100644
index 000000000..4b49d1fb8
--- /dev/null
+++ b/tests/build-index-all.js
@@ -0,0 +1,57 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'build-index-all');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(path.join(repoRoot, 'alpha.js'), 'const alpha = 1;\\n');
+await fsPromises.writeFile(path.join(repoRoot, 'beta.md'), '# Beta\\n');
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({ indexing: { treeSitter: { enabled: false } } }, null, 2)
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const result = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--mode', 'all', '--stub-embeddings', '--repo', repoRoot],
+  { env, stdio: 'inherit' }
+);
+if (result.status !== 0) {
+  console.error('Failed: build_index --mode all');
+  process.exit(result.status ?? 1);
+}
+
+const userConfig = loadUserConfig(repoRoot);
+const modes = ['code', 'prose', 'extracted-prose'];
+const hasChunkMeta = (dir) => (
+  fs.existsSync(path.join(dir, 'chunk_meta.json'))
+  || fs.existsSync(path.join(dir, 'chunk_meta.jsonl'))
+  || fs.existsSync(path.join(dir, 'chunk_meta.meta.json'))
+  || fs.existsSync(path.join(dir, 'chunk_meta.parts'))
+);
+
+for (const mode of modes) {
+  const dir = getIndexDir(repoRoot, mode, userConfig);
+  if (!hasChunkMeta(dir)) {
+    console.error(`Expected chunk metadata for ${mode} in ${dir}`);
+    process.exit(1);
+  }
+}
+
+console.log('build-index --mode all test passed');
diff --git a/tests/build-runtime/content-hash.test.js b/tests/build-runtime/content-hash.test.js
new file mode 100644
index 000000000..db96a3ae4
--- /dev/null
+++ b/tests/build-runtime/content-hash.test.js
@@ -0,0 +1,67 @@
+#!/usr/bin/env node
+import { buildContentConfigHash, normalizeContentConfig } from '../../src/index/build/runtime/hash.js';
+
+const fail = (message) => {
+  console.error(message);
+  process.exit(1);
+};
+
+const config = {
+  indexing: {
+    concurrency: 12,
+    importConcurrency: 4,
+    workerPool: { enabled: true },
+    debugCrash: true,
+    shards: { enabled: true },
+    fileListSampleSize: 123,
+    maxFileBytes: 2048
+  }
+};
+
+const normalized = normalizeContentConfig(config);
+if (!normalized.indexing || normalized.indexing.maxFileBytes !== 2048) {
+  fail('normalizeContentConfig should preserve relevant indexing fields.');
+}
+for (const key of ['concurrency', 'importConcurrency', 'workerPool', 'debugCrash', 'shards', 'fileListSampleSize']) {
+  if (normalized.indexing[key] !== undefined) {
+    fail(`normalizeContentConfig should remove indexing.${key}.`);
+  }
+}
+
+const envA = { cacheRoot: '/tmp/a', stage: 'stage1' };
+const envB = { cacheRoot: '/tmp/b', stage: 'stage1' };
+const hashA = buildContentConfigHash(config, envA);
+const hashB = buildContentConfigHash(config, envB);
+if (hashA !== hashB) {
+  fail('buildContentConfigHash should ignore cacheRoot differences.');
+}
+
+const configVariant = {
+  indexing: {
+    concurrency: 1,
+    importConcurrency: 2,
+    maxFileBytes: 2048
+  }
+};
+const hashC = buildContentConfigHash(configVariant, envA);
+if (hashA !== hashC) {
+  fail('buildContentConfigHash should ignore concurrency-only changes.');
+}
+
+const envC = { cacheRoot: '/tmp/a', stage: 'stage2' };
+const hashD = buildContentConfigHash(config, envC);
+if (hashA === hashD) {
+  fail('buildContentConfigHash should change when env fields change.');
+}
+
+const configDiff = {
+  indexing: {
+    maxFileBytes: 4096
+  }
+};
+const hashE = buildContentConfigHash(configDiff, envA);
+if (hashA === hashE) {
+  fail('buildContentConfigHash should change when config fields change.');
+}
+
+console.log('build runtime content hash tests passed');
diff --git a/tests/build-runtime/stage-overrides.test.js b/tests/build-runtime/stage-overrides.test.js
new file mode 100644
index 000000000..088a85868
--- /dev/null
+++ b/tests/build-runtime/stage-overrides.test.js
@@ -0,0 +1,53 @@
+#!/usr/bin/env node
+import { buildStageOverrides, normalizeStage } from '../../src/index/build/runtime/stage.js';
+
+const fail = (message) => {
+  console.error(message);
+  process.exit(1);
+};
+
+if (normalizeStage('stage1') !== 'stage1') {
+  fail('normalizeStage should match stage1.');
+}
+if (normalizeStage('embed') !== 'stage3') {
+  fail('normalizeStage should map embed to stage3.');
+}
+if (normalizeStage('ann') !== 'stage4') {
+  fail('normalizeStage should map ann to stage4.');
+}
+if (normalizeStage('') !== null) {
+  fail('normalizeStage should return null for empty input.');
+}
+
+const stage1Overrides = buildStageOverrides({ stage1: { lint: true } }, 'stage1');
+if (!stage1Overrides || stage1Overrides.lint !== true) {
+  fail('stage1 overrides should merge explicit values.');
+}
+if (stage1Overrides.embeddings?.enabled !== false) {
+  fail('stage1 overrides should disable embeddings.');
+}
+if (stage1Overrides.treeSitter?.enabled !== false) {
+  fail('stage1 overrides should disable tree-sitter.');
+}
+if (stage1Overrides.typeInference !== false) {
+  fail('stage1 overrides should disable type inference.');
+}
+
+const stage2Overrides = buildStageOverrides({ stage2: { lint: false, embeddings: { enabled: true } } }, 'stage2');
+if (!stage2Overrides || stage2Overrides.embeddings?.enabled !== true) {
+  fail('stage2 overrides should preserve explicit embeddings config.');
+}
+
+const stage3Overrides = buildStageOverrides({ stage3: { lint: true } }, 'stage3');
+if (!stage3Overrides || stage3Overrides.lint !== true) {
+  fail('stage3 overrides should merge explicit values.');
+}
+if (stage3Overrides.treeSitter?.enabled !== false) {
+  fail('stage3 overrides should disable tree-sitter.');
+}
+
+if (buildStageOverrides({}, 'unknown') !== null) {
+  fail('buildStageOverrides should return null for unknown stages.');
+}
+
+console.log('build runtime stage overrides tests passed');
diff --git a/tests/cache-lru.js b/tests/cache-lru.js
new file mode 100644
index 000000000..be8ba225f
--- /dev/null
+++ b/tests/cache-lru.js
@@ -0,0 +1,31 @@
+import assert from 'node:assert/strict';
+import { createLruCache, estimateStringBytes } from '../src/shared/cache.js';
+
+const sizeCache = createLruCache({
+  name: 'size-test',
+  maxMb: 0.0001,
+  ttlMs: 0,
+  sizeCalculation: estimateStringBytes
+});
+
+sizeCache.set('a', 'a'.repeat(80));
+sizeCache.set('b', 'b'.repeat(80));
+
+const hasA = sizeCache.get('a') !== null;
+const hasB = sizeCache.get('b') !== null;
+assert.ok(!(hasA && hasB), 'expected size-based eviction');
+assert.ok(sizeCache.stats.evictions >= 1, 'expected at least one eviction');
+
+const ttlCache = createLruCache({
+  name: 'ttl-test',
+  maxMb: 1,
+  ttlMs: 10,
+  sizeCalculation: estimateStringBytes
+});
+
+ttlCache.set('x', 'value');
+await new Promise((resolve) => setTimeout(resolve, 25));
+const expired = ttlCache.get('x');
+assert.equal(expired, null, 'expected ttl-based expiration');
+
+console.log('cache lru test passed');
diff --git a/tests/capabilities-report.js b/tests/capabilities-report.js
new file mode 100644
index 000000000..9feccbc6f
--- /dev/null
+++ b/tests/capabilities-report.js
@@ -0,0 +1,23 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { getCapabilities } from '../src/shared/capabilities.js';
+
+const caps = getCapabilities({ refresh: true });
+
+assert.ok(caps && typeof caps === 'object', 'capabilities should be an object');
+assert.equal(typeof caps.watcher?.chokidar, 'boolean', 'watcher.chokidar should be boolean');
+assert.equal(typeof caps.watcher?.parcel, 'boolean', 'watcher.parcel should be boolean');
+assert.equal(typeof caps.regex?.re2, 'boolean', 'regex.re2 should be boolean');
+assert.equal(typeof caps.regex?.re2js, 'boolean', 'regex.re2js should be boolean');
+assert.equal(typeof caps.hash?.nodeRsXxhash, 'boolean', 'hash.nodeRsXxhash should be boolean');
+assert.equal(typeof caps.hash?.wasmXxhash, 'boolean', 'hash.wasmXxhash should be boolean');
+assert.equal(typeof caps.compression?.gzip, 'boolean', 'compression.gzip should be boolean');
+assert.equal(typeof caps.compression?.zstd, 'boolean', 'compression.zstd should be boolean');
+assert.equal(typeof caps.extractors?.pdf, 'boolean', 'extractors.pdf should be boolean');
+assert.equal(typeof caps.extractors?.docx, 'boolean', 'extractors.docx should be boolean');
+assert.equal(typeof caps.mcp?.legacy, 'boolean', 'mcp.legacy should be boolean');
+assert.equal(typeof caps.mcp?.sdk, 'boolean', 'mcp.sdk should be boolean');
+assert.equal(typeof caps.externalBackends?.tantivy, 'boolean', 'externalBackends.tantivy should be boolean');
+assert.equal(typeof caps.externalBackends?.lancedb, 'boolean', 'externalBackends.lancedb should be boolean');
+
+console.log('capabilities report tests passed');
diff --git a/tests/chargram-guardrails.js b/tests/chargram-guardrails.js
new file mode 100644
index 000000000..83aaaaa85
--- /dev/null
+++ b/tests/chargram-guardrails.js
@@ -0,0 +1,47 @@
+#!/usr/bin/env node
+import { createTokenizationContext, tokenizeChunkText } from '../src/index/build/tokenization.js';
+import { tri } from '../src/shared/tokenize.js';
+
+const context = createTokenizationContext({
+  dictWords: new Set(),
+  dictConfig: { segmentation: 'greedy' },
+  postingsConfig: {
+    enableChargrams: true,
+    chargramMinN: 3,
+    chargramMaxN: 3,
+    chargramMaxTokenLength: 5,
+    chargramSource: 'full'
+  }
+});
+
+const payload = tokenizeChunkText({
+  text: 'short veryverylongtoken',
+  mode: 'code',
+  ext: '.js',
+  context
+});
+
+const longGram = tri('veryverylongtoken', 3)[0];
+if (payload.chargrams.includes(longGram)) {
+  console.error('chargram guardrail test failed: long token chargrams should be skipped.');
+  process.exit(1);
+}
+
+const fieldPayload = tokenizeChunkText({
+  text: 'short',
+  mode: 'code',
+  ext: '.js',
+  context,
+  chargramTokens: ['field']
+});
+const fieldGram = tri('field', 3)[0];
+if (!fieldPayload.chargrams.includes(fieldGram)) {
+  console.error('chargram guardrail test failed: field chargrams missing.');
+  process.exit(1);
+}
+if (fieldPayload.chargrams.includes(tri('short', 3)[0])) {
+  console.error('chargram guardrail test failed: expected chargrams to use field tokens only.');
+  process.exit(1);
+}
+
+console.log('chargram guardrail test passed');
diff --git a/tests/chunk-meta-jsonl-cleanup.js b/tests/chunk-meta-jsonl-cleanup.js
new file mode 100644
index 000000000..594440853
--- /dev/null
+++ b/tests/chunk-meta-jsonl-cleanup.js
@@ -0,0 +1,101 @@
+#!/usr/bin/env node
+
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+
+import {
+  createChunkMetaIterator,
+  enqueueChunkMetaArtifacts
+} from '../src/index/build/artifacts/writers/chunk-meta.js';
+
+const root = process.cwd();
+const cacheRoot = path.join(root, 'tests', '.cache', 'chunk-meta-jsonl-cleanup');
+const outDir = path.join(cacheRoot, 'index');
+
+await fsPromises.rm(cacheRoot, { recursive: true, force: true });
+await fsPromises.mkdir(outDir, { recursive: true });
+
+const chunks = [
+  { id: 0, file: 'alpha.js', start: 0, end: 10, startLine: 1, endLine: 1, kind: 'code' },
+  { id: 1, file: 'beta.js', start: 0, end: 12, startLine: 1, endLine: 1, kind: 'code' },
+  { id: 2, file: 'gamma.js', start: 0, end: 14, startLine: 1, endLine: 1, kind: 'code' }
+];
+
+const chunkMetaIterator = createChunkMetaIterator({
+  chunks,
+  fileIdByPath: new Map(),
+  resolvedTokenMode: 'none',
+  tokenSampleSize: 0
+});
+
+const runWriter = async (chunkMetaPlan) => {
+  const writes = [];
+  const enqueueWrite = (label, job) => {
+    writes.push({ label, job });
+  };
+  const enqueueJsonArray = (label, _payload, _options) => {
+    throw new Error(`Unexpected enqueueJsonArray for chunk meta (${label})`);
+  };
+  const addPieceFile = () => {};
+  const formatArtifactLabel = (value) => value;
+
+  const state = { chunks };
+  await enqueueChunkMetaArtifacts({
+    state,
+    outDir,
+    chunkMetaIterator,
+    chunkMetaPlan,
+    enqueueJsonArray,
+    enqueueWrite,
+    addPieceFile,
+    formatArtifactLabel
+  });
+
+  for (const { label, job } of writes) {
+    try {
+      // eslint-disable-next-line no-await-in-loop
+      await job();
+    } catch (err) {
+      throw new Error(`Failed write job (${label}): ${err?.message || err}`);
+    }
+  }
+};
+
+const metaPath = path.join(outDir, 'chunk_meta.meta.json');
+const partsDir = path.join(outDir, 'chunk_meta.parts');
+const jsonlPath = path.join(outDir, 'chunk_meta.jsonl');
+
+await runWriter({
+  chunkMetaUseJsonl: true,
+  chunkMetaUseShards: true,
+  chunkMetaShardSize: 1,
+  chunkMetaCount: chunks.length
+});
+
+if (!fs.existsSync(metaPath) || !fs.existsSync(partsDir)) {
+  console.error('Expected sharded chunk_meta artifacts (meta + parts).');
+  process.exit(1);
+}
+if (fs.existsSync(jsonlPath)) {
+  console.error('Did not expect chunk_meta.jsonl when writing sharded chunk_meta.');
+  process.exit(1);
+}
+
+await runWriter({
+  chunkMetaUseJsonl: true,
+  chunkMetaUseShards: false,
+  chunkMetaShardSize: 0,
+  chunkMetaCount: chunks.length
+});
+
+if (!fs.existsSync(jsonlPath)) {
+  console.error('Expected chunk_meta.jsonl when writing unsharded JSONL chunk_meta.');
+  process.exit(1);
+}
+if (fs.existsSync(metaPath) || fs.existsSync(partsDir)) {
+  console.error('Expected stale sharded chunk_meta artifacts to be removed when writing unsharded JSONL.');
+  process.exit(1);
+}
+
+console.log('chunk_meta JSONL cleanup test passed');
diff --git a/tests/chunking-limits.js b/tests/chunking-limits.js
new file mode 100644
index 000000000..ebbb79ef2
--- /dev/null
+++ b/tests/chunking-limits.js
@@ -0,0 +1,80 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { smartChunk } from '../src/index/chunking.js';
+import { buildMetaV2 } from '../src/index/metadata-v2.js';
+
+const lineText = [
+  'alpha',
+  'bravo',
+  'charlie',
+  'delta',
+  'echo',
+  'foxtrot',
+  'golf'
+].join('\n') + '\n';
+const lineContext = { chunking: { maxLines: 3 } };
+
+const first = smartChunk({
+  text: lineText,
+  ext: '.js',
+  relPath: 'src/sample.js',
+  mode: 'code',
+  context: lineContext
+});
+const second = smartChunk({
+  text: lineText,
+  ext: '.js',
+  relPath: 'src/sample.js',
+  mode: 'code',
+  context: lineContext
+});
+
+assert.ok(first.length > 1, 'expected line splitting');
+
+const countLines = (value) => {
+  if (!value) return 0;
+  const trimmed = value.endsWith('\n') ? value.slice(0, -1) : value;
+  return trimmed ? trimmed.split('\n').length : 0;
+};
+
+for (const chunk of first) {
+  const slice = lineText.slice(chunk.start, chunk.end);
+  const lineCount = countLines(slice);
+  assert.ok(lineCount <= 3, `chunk line count ${lineCount} exceeds maxLines`);
+}
+
+const segment = {
+  segmentId: 'seg-1',
+  type: 'code',
+  languageId: 'javascript',
+  parentSegmentId: null
+};
+const toChunkId = (chunk) => buildMetaV2({
+  chunk: {
+    ...chunk,
+    file: 'src/sample.js',
+    ext: '.js',
+    segment
+  },
+  docmeta: {}
+}).chunkId;
+
+assert.deepEqual(first.map(toChunkId), second.map(toChunkId), 'expected stable chunk IDs');
+
+const byteText = 'abcdefghijABCDEFGHIJ';
+const byteChunks = smartChunk({
+  text: byteText,
+  ext: '.txt',
+  relPath: 'notes.txt',
+  mode: 'code',
+  context: { chunking: { maxBytes: 7 } }
+});
+
+assert.ok(byteChunks.length > 1, 'expected byte splitting');
+for (const chunk of byteChunks) {
+  const slice = byteText.slice(chunk.start, chunk.end);
+  const bytes = Buffer.byteLength(slice, 'utf8');
+  assert.ok(bytes <= 7, `chunk byte count ${bytes} exceeds maxBytes`);
+}
+
+console.log('chunking limits test passed');
diff --git a/tests/chunking-sql-lua.js b/tests/chunking-sql-lua.js
new file mode 100644
index 000000000..63d164acd
--- /dev/null
+++ b/tests/chunking-sql-lua.js
@@ -0,0 +1,30 @@
+#!/usr/bin/env node
+import { buildLuaChunks } from '../src/lang/lua.js';
+import { buildSqlChunks } from '../src/lang/sql.js';
+
+const luaText = "local function foo(a)\n  return a\nend -- done\n";
+const luaChunks = buildLuaChunks(luaText) || [];
+if (!luaChunks.some((chunk) => chunk.name === 'foo')) {
+  console.error('Expected Lua chunk for foo when end has a trailing comment.');
+  process.exit(1);
+}
+
+const mysqlSql = "DELIMITER $$\nCREATE FUNCTION add_one(x INT)\nRETURNS INT\nBEGIN\nSELECT x + 1;\nEND $$\nDELIMITER ;\nSELECT 1;";
+const mysqlChunks = buildSqlChunks(mysqlSql, { dialect: 'mysql' }) || [];
+if (mysqlChunks.length !== 2) {
+  console.error(`Expected 2 MySQL statements, got ${mysqlChunks.length}.`);
+  process.exit(1);
+}
+if (mysqlChunks[0].kind !== 'FunctionDeclaration') {
+  console.error('Expected first MySQL chunk to be a FunctionDeclaration.');
+  process.exit(1);
+}
+
+const pgSql = "CREATE FUNCTION test_fn() RETURNS text AS $$\nSELECT ';';\n$$ LANGUAGE sql;\nSELECT 2;";
+const pgChunks = buildSqlChunks(pgSql, { dialect: 'postgres' }) || [];
+if (pgChunks.length !== 2) {
+  console.error(`Expected 2 Postgres statements, got ${pgChunks.length}.`);
+  process.exit(1);
+}
+
+console.log('sql/lua chunking test passed');
diff --git a/tests/chunking-yaml.js b/tests/chunking-yaml.js
new file mode 100644
index 000000000..068ac7ce9
--- /dev/null
+++ b/tests/chunking-yaml.js
@@ -0,0 +1,53 @@
+#!/usr/bin/env node
+import { smartChunk } from '../src/index/chunking.js';
+
+const text = "alpha: 1\nbeta: 2\n";
+const defaultChunks = smartChunk({
+  text,
+  ext: '.yaml',
+  relPath: 'config.yaml',
+  mode: 'code'
+});
+if (defaultChunks.length !== 1 || defaultChunks[0].name !== 'root') {
+  console.error('Expected default YAML chunking to return a root chunk.');
+  process.exit(1);
+}
+
+const top = smartChunk({
+  text,
+  ext: '.yaml',
+  relPath: 'config.yaml',
+  mode: 'code',
+  context: { yamlChunking: { mode: 'top-level' } }
+});
+const topNames = top.map((chunk) => chunk.name);
+if (top.length !== 2 || !topNames.includes('alpha') || !topNames.includes('beta')) {
+  console.error(`Unexpected top-level YAML chunks: ${topNames.join(',')}`);
+  process.exit(1);
+}
+
+const rootOnly = smartChunk({
+  text,
+  ext: '.yaml',
+  relPath: 'config.yaml',
+  mode: 'code',
+  context: { yamlChunking: { mode: 'root' } }
+});
+if (rootOnly.length !== 1 || rootOnly[0].name !== 'root') {
+  console.error('Expected root-only YAML chunking.');
+  process.exit(1);
+}
+
+const autoLarge = smartChunk({
+  text,
+  ext: '.yaml',
+  relPath: 'config.yaml',
+  mode: 'code',
+  context: { yamlChunking: { mode: 'auto', maxBytes: 4 } }
+});
+if (autoLarge.length !== 1 || autoLarge[0].name !== 'root') {
+  console.error('Expected auto YAML chunking to fall back to root.');
+  process.exit(1);
+}
+
+console.log('yaml chunking test passed');
diff --git a/tests/chunking/json.test.js b/tests/chunking/json.test.js
new file mode 100644
index 000000000..6ad5c5da8
--- /dev/null
+++ b/tests/chunking/json.test.js
@@ -0,0 +1,30 @@
+#!/usr/bin/env node
+import { chunkJson } from '../../src/index/chunking.js';
+
+const expect = (condition, message) => {
+  if (!condition) {
+    console.error(message);
+    process.exit(1);
+  }
+};
+
+const jsonText = JSON.stringify({
+  name: 'alpha',
+  config: { enabled: true },
+  text: 'escaped \"quote\"'
+});
+
+const chunks = chunkJson(jsonText, {}) || [];
+const names = new Set(chunks.map((chunk) => chunk.name));
+expect(names.has('name'), 'Missing chunk for name key.');
+expect(names.has('config'), 'Missing chunk for config key.');
+expect(names.has('text'), 'Missing chunk for text key.');
+
+const arrayChunk = chunkJson('["a","b"]', {}) || [];
+expect(arrayChunk.length === 1, 'Expected array JSON to return a single chunk.');
+expect(arrayChunk[0].name === 'root', 'Expected root chunk for array JSON.');
+
+const invalid = chunkJson('{', {});
+expect(invalid === null, 'Expected invalid JSON to return null.');
+
+console.log('Chunking JSON test passed.');
diff --git a/tests/chunking/limits.test.js b/tests/chunking/limits.test.js
new file mode 100644
index 000000000..cac52fe95
--- /dev/null
+++ b/tests/chunking/limits.test.js
@@ -0,0 +1,40 @@
+#!/usr/bin/env node
+import { applyChunkingLimits } from '../../src/index/chunking/limits.js';
+
+const expect = (condition, message) => {
+  if (!condition) {
+    console.error(message);
+    process.exit(1);
+  }
+};
+
+const lineText = [
+  'alpha',
+  'bravo',
+  'charlie',
+  'delta'
+].join('\n');
+
+const baseChunk = { start: 0, end: lineText.length, name: 'root', kind: 'Section', meta: {} };
+const lineChunks = applyChunkingLimits([baseChunk], lineText, { chunking: { maxLines: 2 } });
+
+expect(lineChunks.length === 2, `Expected 2 chunks for maxLines, got ${lineChunks.length}`);
+lineChunks.forEach((chunk) => {
+  expect(chunk.meta?.startLine >= 1, 'Expected startLine in chunk meta.');
+  expect(chunk.meta?.endLine >= chunk.meta?.startLine, 'Expected endLine >= startLine.');
+});
+
+const byteText = 'abcdefghij';
+const byteChunks = applyChunkingLimits(
+  [{ start: 0, end: byteText.length, name: 'root', kind: 'Section', meta: {} }],
+  byteText,
+  { chunking: { maxBytes: 4 } }
+);
+
+expect(byteChunks.length >= 3, `Expected multiple chunks for maxBytes, got ${byteChunks.length}`);
+byteChunks.forEach((chunk) => {
+  const slice = byteText.slice(chunk.start, chunk.end);
+  expect(Buffer.byteLength(slice, 'utf8') <= 4, 'Chunk exceeded maxBytes.');
+});
+
+console.log('Chunking limits test passed.');
diff --git a/tests/chunking/yaml.test.js b/tests/chunking/yaml.test.js
new file mode 100644
index 000000000..2ef1a5349
--- /dev/null
+++ b/tests/chunking/yaml.test.js
@@ -0,0 +1,42 @@
+#!/usr/bin/env node
+import { chunkYaml } from '../../src/index/chunking.js';
+
+const expect = (condition, message) => {
+  if (!condition) {
+    console.error(message);
+    process.exit(1);
+  }
+};
+
+const yamlText = [
+  'defaults: &defaults',
+  '  name: base',
+  'service:',
+  '  <<: *defaults',
+  '  port: 80'
+].join('\n');
+
+const topLevel = chunkYaml(yamlText, 'config.yml', {
+  yamlChunking: { mode: 'top-level', maxBytes: 1024 }
+}) || [];
+
+const names = new Set(topLevel.map((chunk) => chunk.name));
+expect(names.has('defaults'), 'Missing top-level chunk for defaults.');
+expect(names.has('service'), 'Missing top-level chunk for service.');
+
+const rootOnly = chunkYaml(yamlText, 'config.yml', { yamlChunking: { mode: 'root' } }) || [];
+expect(rootOnly.length === 1, `Expected root mode to return 1 chunk, got ${rootOnly.length}`);
+expect(rootOnly[0].name === 'root', 'Expected root chunk name.');
+
+const multiDoc = [
+  '---',
+  'first: 1',
+  '---',
+  'second: 2'
+].join('\n');
+const multiChunks = chunkYaml(multiDoc, 'config.yml', { yamlChunking: { mode: 'top-level' } }) || [];
+const multiNames = new Set(multiChunks.map((chunk) => chunk.name));
+expect(multiNames.has('first'), 'Missing first doc chunk.');
+expect(multiNames.has('second'), 'Missing second doc chunk.');
+
+console.log('Chunking YAML test passed.');
diff --git a/tests/churn-filter.js b/tests/churn-filter.js
index 81d1e3312..878ea27f2 100644
--- a/tests/churn-filter.js
+++ b/tests/churn-filter.js
@@ -3,7 +3,7 @@ import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
 import { spawnSync } from 'node:child_process';
-import { getGitMeta } from '../src/indexer/git.js';
+import { getGitMeta } from '../src/index/git.js';
 
 const root = process.cwd();
 const tempRoot = path.join(root, 'tests', '.cache', 'churn-filter');
@@ -64,10 +64,7 @@ const env = {
 };
 const repoArgs = ['--repo', repoRoot];
 
-const originalCwd = process.cwd();
-process.chdir(repoRoot);
-const gitMeta = await getGitMeta('notes.md', 0, 1, { blame: false });
-process.chdir(originalCwd);
+const gitMeta = await getGitMeta('notes.md', 1, 2, { blame: false, baseDir: repoRoot });
 const expectedChurn = 5;
 if (gitMeta.churn !== expectedChurn) {
   console.error(`Expected churn ${expectedChurn}, got ${gitMeta.churn}`);
diff --git a/tests/clean-artifacts.js b/tests/clean-artifacts.js
index 7e73e56f7..0d240cc59 100644
--- a/tests/clean-artifacts.js
+++ b/tests/clean-artifacts.js
@@ -53,7 +53,7 @@ await fsPromises.writeFile(path.join(extensionsDir, 'ext.bin'), 'ext');
 
 const result = spawnSync(
   process.execPath,
-  [path.join(root, 'tools', 'clean-artifacts.js')],
+  [path.join(root, 'tools', 'clean-artifacts.js'), '--repo', repoRoot],
   { cwd: repoRoot, env, stdio: 'inherit' }
 );
 
@@ -74,7 +74,7 @@ await fsPromises.writeFile(path.join(repoCacheRoot, 'marker.txt'), 'marker');
 
 const resultAll = spawnSync(
   process.execPath,
-  [path.join(root, 'tools', 'clean-artifacts.js'), '--all'],
+  [path.join(root, 'tools', 'clean-artifacts.js'), '--repo', repoRoot, '--all'],
   { cwd: repoRoot, env, stdio: 'inherit' }
 );
 
diff --git a/tests/clike-doc-comments.js b/tests/clike-doc-comments.js
new file mode 100644
index 000000000..0e07ab7de
--- /dev/null
+++ b/tests/clike-doc-comments.js
@@ -0,0 +1,43 @@
+#!/usr/bin/env node
+import { buildCLikeChunks } from '../src/lang/clike.js';
+
+const expect = (condition, message) => {
+  if (!condition) {
+    console.error(message);
+    process.exit(1);
+  }
+};
+
+const cText = [
+  '/**',
+  ' * Greets the user.',
+  ' */',
+  'int greet(int x) {',
+  '  return x;',
+  '}'
+].join('\n');
+
+const cChunks = buildCLikeChunks(cText, '.c', { treeSitter: { enabled: false }, log: () => {} }) || [];
+const greetChunk = cChunks.find((chunk) => chunk.kind === 'FunctionDeclaration' && chunk.name === 'greet');
+expect(!!greetChunk, 'Expected to find a C-like function chunk for greet.');
+expect(
+  String(greetChunk.meta?.docstring || '').includes('Greets the user'),
+  `Expected greet docstring to include "Greets the user", got: ${JSON.stringify(greetChunk.meta?.docstring || '')}`
+);
+
+const objcText = [
+  '@interface Widget : NSObject',
+  '/// Greets from ObjC.',
+  '- (void)greet;',
+  '@end'
+].join('\n');
+
+const objcChunks = buildCLikeChunks(objcText, '.m', { treeSitter: { enabled: false }, log: () => {} }) || [];
+const objcGreet = objcChunks.find((chunk) => chunk.kind === 'MethodDeclaration' && String(chunk.name || '').includes('greet'));
+expect(!!objcGreet, 'Expected to find an ObjC method chunk for greet.');
+expect(
+  String(objcGreet.meta?.docstring || '').includes('Greets from ObjC'),
+  `Expected ObjC greet docstring to include "Greets from ObjC", got: ${JSON.stringify(objcGreet.meta?.docstring || '')}`
+);
+
+console.log('C-like doc comment extraction test passed.');
diff --git a/tests/code-map-basic.js b/tests/code-map-basic.js
new file mode 100644
index 000000000..f099d74b5
--- /dev/null
+++ b/tests/code-map-basic.js
@@ -0,0 +1,112 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'code-map-basic');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const config = {
+  indexing: {
+    astDataflow: true,
+    controlFlow: true,
+    typeInference: true,
+    typeInferenceCrossFile: true
+  }
+};
+
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify(config, null, 2)
+);
+
+await fsPromises.mkdir(path.join(repoRoot, 'src'), { recursive: true });
+await fsPromises.writeFile(
+  path.join(repoRoot, 'src', 'util.js'),
+  'export function add(a, b) { return a + b; }\n' +
+    'export function mutate(obj) { obj.count = obj.count + 1; return obj; }\n'
+);
+await fsPromises.writeFile(
+  path.join(repoRoot, 'src', 'main.js'),
+  'import { add, mutate } from "./util.js";\n' +
+    'function run(x) {\n' +
+    '  if (x > 0) { return add(x, 1); }\n' +
+    '  return add(x, 2);\n' +
+    '}\n' +
+    'async function go(items) {\n' +
+    '  for (const item of items) {\n' +
+    '    await Promise.resolve(item);\n' +
+    '    mutate(item);\n' +
+    '  }\n' +
+    '}\n' +
+    'export default function main(items) { return go(items); }\n'
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+
+if (buildResult.status !== 0) {
+  console.error('Failed: build index for code map basic test');
+  process.exit(buildResult.status ?? 1);
+}
+
+const mapResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'tools', 'report-code-map.js'), '--format', 'json', '--repo', repoRoot],
+  { cwd: repoRoot, env, encoding: 'utf8' }
+);
+
+if (mapResult.status !== 0) {
+  console.error('Failed: map generator');
+  if (mapResult.stderr) console.error(mapResult.stderr.trim());
+  process.exit(mapResult.status ?? 1);
+}
+
+let payload = null;
+try {
+  payload = JSON.parse(mapResult.stdout || '{}');
+} catch {
+  console.error('Failed: map output invalid JSON');
+  process.exit(1);
+}
+
+if (!Array.isArray(payload.nodes) || payload.nodes.length === 0) {
+  console.error('Failed: map nodes missing');
+  process.exit(1);
+}
+
+const members = payload.nodes.flatMap((node) => node.members || []);
+if (!members.length) {
+  console.error('Failed: map members missing');
+  process.exit(1);
+}
+
+const hasControlFlow = members.some((member) => member.controlFlow);
+const hasDataflow = members.some((member) => member.dataflow);
+if (!hasControlFlow || !hasDataflow) {
+  console.error('Failed: expected dataflow/controlFlow metadata');
+  process.exit(1);
+}
+
+const edgeTypes = new Set(payload.edges.map((edge) => edge.type));
+if (!edgeTypes.has('import') || !edgeTypes.has('call')) {
+  console.error('Failed: expected import + call edges');
+  process.exit(1);
+}
+
+console.log('code map basic tests passed');
diff --git a/tests/code-map-default-guardrails.js b/tests/code-map-default-guardrails.js
new file mode 100644
index 000000000..859a54d83
--- /dev/null
+++ b/tests/code-map-default-guardrails.js
@@ -0,0 +1,84 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { DEFAULT_LIMITS } from '../src/map/constants.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'code-map-default-guardrails');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.mkdir(path.join(repoRoot, 'src'), { recursive: true });
+
+const functionCount = DEFAULT_LIMITS.maxMembersPerFile + 15;
+let source = '';
+for (let i = 0; i < functionCount; i += 1) {
+  source += `export function fn${i}(value) { return value + ${i}; }\n`;
+}
+
+await fsPromises.writeFile(path.join(repoRoot, 'src', 'many.js'), source);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+
+if (buildResult.status !== 0) {
+  console.error('Failed: build index for code map default guardrails test');
+  process.exit(buildResult.status ?? 1);
+}
+
+const mapResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'tools', 'report-code-map.js'), '--format', 'json', '--repo', repoRoot],
+  { cwd: repoRoot, env, encoding: 'utf8' }
+);
+
+if (mapResult.status !== 0) {
+  console.error('Failed: report-code-map for default guardrails test');
+  if (mapResult.stderr) console.error(mapResult.stderr.trim());
+  process.exit(mapResult.status ?? 1);
+}
+
+let model = null;
+try {
+  model = JSON.parse(mapResult.stdout || '{}');
+} catch {
+  console.error('Failed: map output invalid JSON (default guardrails test)');
+  process.exit(1);
+}
+
+const fileNode = (model.nodes || []).find((node) => node?.path === 'src/many.js');
+if (!fileNode) {
+  console.error('Failed: map missing src/many.js node (default guardrails test)');
+  process.exit(1);
+}
+
+const members = Array.isArray(fileNode.members) ? fileNode.members : [];
+if (members.length > DEFAULT_LIMITS.maxMembersPerFile) {
+  console.error(
+    `Failed: expected members <= ${DEFAULT_LIMITS.maxMembersPerFile} but saw ${members.length}`
+  );
+  process.exit(1);
+}
+
+const droppedMembers = model?.summary?.dropped?.members ?? 0;
+const truncated = model?.summary?.truncated === true;
+if (!truncated || droppedMembers <= 0) {
+  console.error('Failed: expected map summary to indicate truncation (default guardrails test)');
+  process.exit(1);
+}
+
+console.log('code map default guardrails tests passed');
diff --git a/tests/code-map-determinism.js b/tests/code-map-determinism.js
new file mode 100644
index 000000000..e05c42983
--- /dev/null
+++ b/tests/code-map-determinism.js
@@ -0,0 +1,76 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'code-map-determinism');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({ indexing: { astDataflow: true, controlFlow: true } }, null, 2)
+);
+
+await fsPromises.mkdir(path.join(repoRoot, 'src'), { recursive: true });
+await fsPromises.writeFile(
+  path.join(repoRoot, 'src', 'one.js'),
+  'export function alpha() { return 1; }\n'
+);
+await fsPromises.writeFile(
+  path.join(repoRoot, 'src', 'two.js'),
+  'import { alpha } from "./one.js";\nexport function beta() { return alpha(); }\n'
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+
+if (buildResult.status !== 0) {
+  console.error('Failed: build index for determinism test');
+  process.exit(buildResult.status ?? 1);
+}
+
+const runMap = () => spawnSync(
+  process.execPath,
+  [path.join(root, 'tools', 'report-code-map.js'), '--format', 'json', '--repo', repoRoot],
+  { cwd: repoRoot, env, encoding: 'utf8' }
+);
+
+const first = runMap();
+const second = runMap();
+
+if (first.status !== 0 || second.status !== 0) {
+  console.error('Failed: map generator runs');
+  process.exit(1);
+}
+
+const strip = (payload) => {
+  const clone = JSON.parse(JSON.stringify(payload));
+  clone.generatedAt = null;
+  if (clone.summary) clone.summary.generatedAt = null;
+  return clone;
+};
+
+const firstPayload = strip(JSON.parse(first.stdout || '{}'));
+const secondPayload = strip(JSON.parse(second.stdout || '{}'));
+
+if (JSON.stringify(firstPayload) !== JSON.stringify(secondPayload)) {
+  console.error('Failed: map output not deterministic');
+  process.exit(1);
+}
+
+console.log('code map determinism tests passed');
diff --git a/tests/code-map-dot.js b/tests/code-map-dot.js
new file mode 100644
index 000000000..8d3b5d63a
--- /dev/null
+++ b/tests/code-map-dot.js
@@ -0,0 +1,73 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'code-map-dot');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({ indexing: { astDataflow: true, controlFlow: true } }, null, 2)
+);
+
+await fsPromises.mkdir(path.join(repoRoot, 'src'), { recursive: true });
+await fsPromises.writeFile(
+  path.join(repoRoot, 'src', 'a.js'),
+  'import { add } from "./b.js";\n' +
+    'export function run(x) { return add(x, 1); }\n'
+);
+await fsPromises.writeFile(
+  path.join(repoRoot, 'src', 'b.js'),
+  'export function add(a, b) { return a + b; }\n'
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+
+if (buildResult.status !== 0) {
+  console.error('Failed: build index for code map dot test');
+  process.exit(buildResult.status ?? 1);
+}
+
+const mapResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'tools', 'report-code-map.js'), '--format', 'dot', '--repo', repoRoot],
+  { cwd: repoRoot, env, encoding: 'utf8' }
+);
+
+if (mapResult.status !== 0) {
+  console.error('Failed: map dot output');
+  process.exit(mapResult.status ?? 1);
+}
+
+const output = mapResult.stdout || '';
+if (!output.includes('PORT=')) {
+  console.error('Failed: dot output missing ports');
+  process.exit(1);
+}
+if (!output.includes('->')) {
+  console.error('Failed: dot output missing edges');
+  process.exit(1);
+}
+if (!output.includes('style="dashed"')) {
+  console.error('Failed: dot output missing import style');
+  process.exit(1);
+}
+
+console.log('code map dot tests passed');
diff --git a/tests/code-map-graphviz-available.js b/tests/code-map-graphviz-available.js
new file mode 100644
index 000000000..b327d8fda
--- /dev/null
+++ b/tests/code-map-graphviz-available.js
@@ -0,0 +1,120 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+
+const dotCheck = spawnSync('dot', ['-V'], { encoding: 'utf8' });
+if (dotCheck.status !== 0) {
+  console.log('code map graphviz available test skipped (dot not found)');
+  process.exit(0);
+}
+
+const tempRoot = path.join(root, 'tests', '.cache', 'code-map-graphviz-available');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({ indexing: { astDataflow: true, controlFlow: true } }, null, 2)
+);
+
+await fsPromises.mkdir(path.join(repoRoot, 'src'), { recursive: true });
+await fsPromises.writeFile(
+  path.join(repoRoot, 'src', 'a.js'),
+  'import { add } from "./b.js";\n' +
+    'export function run(x) { return add(x, 1); }\n'
+);
+await fsPromises.writeFile(
+  path.join(repoRoot, 'src', 'b.js'),
+  'export function add(a, b) { return a + b; }\n'
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+
+if (buildResult.status !== 0) {
+  console.error('Failed: build index for code map graphviz available test');
+  process.exit(buildResult.status ?? 1);
+}
+
+// Verify stdout rendering.
+const mapStdoutResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'tools', 'report-code-map.js'), '--format', 'svg', '--repo', repoRoot],
+  { cwd: repoRoot, env, encoding: 'utf8' }
+);
+
+if (mapStdoutResult.status !== 0) {
+  console.error('Failed: map svg output (stdout)');
+  if (mapStdoutResult.stderr) console.error(mapStdoutResult.stderr.trim());
+  process.exit(mapStdoutResult.status ?? 1);
+}
+
+const stdoutSvg = (mapStdoutResult.stdout || '').trim();
+if (!stdoutSvg.includes('<svg')) {
+  console.error('Failed: svg output missing <svg>');
+  process.exit(1);
+}
+
+// Verify file output through --out + --json.
+const outPath = path.join(tempRoot, 'map.svg');
+const mapFileResult = spawnSync(
+  process.execPath,
+  [
+    path.join(root, 'tools', 'report-code-map.js'),
+    '--format',
+    'svg',
+    '--out',
+    outPath,
+    '--json',
+    '--repo',
+    repoRoot
+  ],
+  { cwd: repoRoot, env, encoding: 'utf8' }
+);
+
+if (mapFileResult.status !== 0) {
+  console.error('Failed: map svg output (file)');
+  if (mapFileResult.stderr) console.error(mapFileResult.stderr.trim());
+  process.exit(mapFileResult.status ?? 1);
+}
+
+let report = null;
+try {
+  report = JSON.parse(mapFileResult.stdout || '{}');
+} catch {
+  console.error('Failed: svg --json output invalid JSON');
+  process.exit(1);
+}
+
+if (report.format !== 'svg') {
+  console.error(`Failed: expected format svg but saw ${report.format}`);
+  process.exit(1);
+}
+if (!report.outPath) {
+  console.error('Failed: svg report missing outPath');
+  process.exit(1);
+}
+
+const fileSvg = (await fsPromises.readFile(report.outPath, 'utf8')).trim();
+if (!fileSvg.includes('<svg')) {
+  console.error('Failed: svg file missing <svg>');
+  process.exit(1);
+}
+
+console.log('code map graphviz available tests passed');
diff --git a/tests/code-map-graphviz-fallback.js b/tests/code-map-graphviz-fallback.js
new file mode 100644
index 000000000..d288c225b
--- /dev/null
+++ b/tests/code-map-graphviz-fallback.js
@@ -0,0 +1,74 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'code-map-graphviz');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+await fsPromises.mkdir(path.join(repoRoot, 'src'), { recursive: true });
+
+await fsPromises.writeFile(
+  path.join(repoRoot, 'src', 'a.js'),
+  'export function alpha() { return 1; }\n'
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+
+if (buildResult.status !== 0) {
+  console.error('Failed: build index for graphviz fallback test');
+  process.exit(buildResult.status ?? 1);
+}
+
+const outPath = path.join(tempRoot, 'map.svg');
+const mapResult = spawnSync(
+  process.execPath,
+  [
+    path.join(root, 'tools', 'report-code-map.js'),
+    '--format', 'svg',
+    '--repo', repoRoot,
+    '--out', outPath,
+    '--json'
+  ],
+  {
+    cwd: repoRoot,
+    env: {
+      ...env,
+      PATH: '',
+      Path: ''
+    },
+    encoding: 'utf8'
+  }
+);
+
+if (mapResult.status !== 0) {
+  console.error('Failed: graphviz fallback map output');
+  process.exit(mapResult.status ?? 1);
+}
+
+const payload = JSON.parse(mapResult.stdout || '{}');
+if (payload.format !== 'dot') {
+  console.error('Failed: expected dot fallback');
+  process.exit(1);
+}
+if (!payload.outPath || !payload.outPath.endsWith('.dot')) {
+  console.error('Failed: expected .dot output path');
+  process.exit(1);
+}
+
+console.log('code map graphviz fallback tests passed');
diff --git a/tests/code-map-guardrails.js b/tests/code-map-guardrails.js
new file mode 100644
index 000000000..de758ffc0
--- /dev/null
+++ b/tests/code-map-guardrails.js
@@ -0,0 +1,69 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'code-map-guardrails');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+await fsPromises.mkdir(path.join(repoRoot, 'src'), { recursive: true });
+
+const funcs = [];
+for (let i = 0; i < 120; i += 1) {
+  funcs.push(`export function fn${i}() { return ${i}; }`);
+}
+await fsPromises.writeFile(path.join(repoRoot, 'src', 'many.js'), funcs.join('\n'));
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+
+if (buildResult.status !== 0) {
+  console.error('Failed: build index for guardrails test');
+  process.exit(buildResult.status ?? 1);
+}
+
+const mapResult = spawnSync(
+  process.execPath,
+  [
+    path.join(root, 'tools', 'report-code-map.js'),
+    '--format', 'json',
+    '--repo', repoRoot,
+    '--max-members-per-file', '5',
+    '--max-files', '1',
+    '--max-edges', '2'
+  ],
+  { cwd: repoRoot, env, encoding: 'utf8' }
+);
+
+if (mapResult.status !== 0) {
+  console.error('Failed: guardrails map output');
+  process.exit(mapResult.status ?? 1);
+}
+
+const payload = JSON.parse(mapResult.stdout || '{}');
+const summary = payload.summary || {};
+const dropped = summary.dropped || {};
+if (!summary.truncated) {
+  console.error('Failed: guardrails did not truncate');
+  process.exit(1);
+}
+if (!dropped.members || dropped.members < 1) {
+  console.error('Failed: guardrails did not drop members');
+  process.exit(1);
+}
+
+console.log('code map guardrails tests passed');
diff --git a/tests/compact-pieces.js b/tests/compact-pieces.js
new file mode 100644
index 000000000..927a63593
--- /dev/null
+++ b/tests/compact-pieces.js
@@ -0,0 +1,99 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'compact-pieces');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+const buildIndexPath = path.join(root, 'build_index.js');
+const compactPiecesPath = path.join(root, 'tools', 'compact-pieces.js');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(path.join(repoRoot, 'alpha.js'), 'const alpha = 1;\n');
+await fsPromises.writeFile(path.join(repoRoot, 'beta.js'), 'const beta = 2;\n');
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({
+    indexing: {
+      treeSitter: { enabled: false },
+      artifacts: {
+        chunkMetaFormat: 'jsonl',
+        chunkMetaShardSize: 1,
+        tokenPostingsFormat: 'sharded',
+        tokenPostingsShardSize: 1
+      }
+    }
+  }, null, 2)
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const runNode = (label, args, cwd = repoRoot) => {
+  const result = spawnSync(process.execPath, args, { cwd, env, stdio: 'inherit' });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    process.exit(result.status ?? 1);
+  }
+};
+
+runNode('build_index', [buildIndexPath, '--stub-embeddings', '--mode', 'code', '--repo', repoRoot]);
+
+const userConfig = loadUserConfig(repoRoot);
+const previousCacheRoot = process.env.PAIROFCLEATS_CACHE_ROOT;
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+const indexDir = getIndexDir(repoRoot, 'code', userConfig);
+if (previousCacheRoot === undefined) {
+  delete process.env.PAIROFCLEATS_CACHE_ROOT;
+} else {
+  process.env.PAIROFCLEATS_CACHE_ROOT = previousCacheRoot;
+}
+const chunkPartsDir = path.join(indexDir, 'chunk_meta.parts');
+const tokenPartsDir = path.join(indexDir, 'token_postings.shards');
+const beforeChunkParts = fs.existsSync(chunkPartsDir) ? fs.readdirSync(chunkPartsDir).length : 0;
+const beforeTokenParts = fs.existsSync(tokenPartsDir) ? fs.readdirSync(tokenPartsDir).length : 0;
+if (beforeChunkParts < 2) {
+  console.error('Expected multiple chunk_meta parts before compaction.');
+  process.exit(1);
+}
+
+runNode('compact-pieces', [
+  compactPiecesPath,
+  '--repo',
+  repoRoot,
+  '--mode',
+  'code',
+  '--chunk-meta-size',
+  '10',
+  '--token-postings-size',
+  '10'
+]);
+
+const afterChunkParts = fs.existsSync(chunkPartsDir) ? fs.readdirSync(chunkPartsDir).length : 0;
+const afterTokenParts = fs.existsSync(tokenPartsDir) ? fs.readdirSync(tokenPartsDir).length : 0;
+if (afterChunkParts >= beforeChunkParts) {
+  console.error('Expected chunk_meta parts to shrink after compaction.');
+  process.exit(1);
+}
+if (beforeTokenParts >= 2 && afterTokenParts >= beforeTokenParts) {
+  console.error('Expected token_postings shards to shrink after compaction.');
+  process.exit(1);
+}
+
+const logPath = path.join(indexDir, 'pieces', 'compaction.log');
+if (!fs.existsSync(logPath)) {
+  console.error(`Expected compaction log at ${logPath}`);
+  process.exit(1);
+}
+
+console.log('compact pieces test passed');
diff --git a/tests/compare-models.js b/tests/compare-models.js
deleted file mode 100644
index 26e1681e2..000000000
--- a/tests/compare-models.js
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/usr/bin/env node
-import fs from 'node:fs';
-import fsPromises from 'node:fs/promises';
-import path from 'node:path';
-import { spawnSync } from 'node:child_process';
-
-const root = process.cwd();
-const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
-const tempRoot = path.join(root, 'tests', '.cache', 'compare-models');
-const cacheRoot = path.join(tempRoot, 'cache');
-
-await fsPromises.rm(tempRoot, { recursive: true, force: true });
-await fsPromises.mkdir(cacheRoot, { recursive: true });
-
-const env = {
-  ...process.env,
-  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
-  PAIROFCLEATS_EMBEDDINGS: 'stub'
-};
-
-const models = [
-  'Xenova/all-MiniLM-L12-v2',
-  'Xenova/all-MiniLM-L6-v2'
-];
-
-const result = spawnSync(
-  process.execPath,
-  [
-    path.join(root, 'tools', 'compare-models.js'),
-    '--models',
-    models.join(','),
-    '--build',
-    '--stub-embeddings',
-    '--no-ann',
-    '--limit',
-    '2',
-    '--json'
-  ],
-  { cwd: fixtureRoot, env, encoding: 'utf8' }
-);
-
-if (result.status !== 0) {
-  console.error('compare models test failed: script error.');
-  if (result.stderr) console.error(result.stderr.trim());
-  process.exit(result.status ?? 1);
-}
-
-const payload = JSON.parse(result.stdout || '{}');
-if (!payload.summary || !payload.settings || !payload.results) {
-  console.error('compare models test failed: missing fields.');
-  process.exit(1);
-}
-if (!Array.isArray(payload.settings.models) || payload.settings.models.length < 2) {
-  console.error('compare models test failed: models missing.');
-  process.exit(1);
-}
-if (!payload.summary.models || !payload.summary.comparisons) {
-  console.error('compare models test failed: summary missing.');
-  process.exit(1);
-}
-
-console.log('compare models test passed');
diff --git a/tests/config-dump.js b/tests/config-dump.js
new file mode 100644
index 000000000..309ef6977
--- /dev/null
+++ b/tests/config-dump.js
@@ -0,0 +1,19 @@
+#!/usr/bin/env node
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { spawnSync } from 'node:child_process';
+
+const root = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
+const scriptPath = path.join(root, 'tools', 'config-dump.js');
+const result = spawnSync(process.execPath, [scriptPath, '--json'], { encoding: 'utf8', cwd: root });
+if (result.status !== 0) {
+  throw new Error(`config-dump failed: ${result.stderr || result.stdout}`);
+}
+const payload = JSON.parse(result.stdout || '{}');
+if (!payload.repoRoot) {
+  throw new Error('config-dump did not report repoRoot.');
+}
+if (!payload.derived || !payload.derived.cacheRoot) {
+  throw new Error('config-dump did not include derived cacheRoot.');
+}
+console.log('Config dump test passed');
diff --git a/tests/config-validate.js b/tests/config-validate.js
index 349208dc8..dca333af9 100644
--- a/tests/config-validate.js
+++ b/tests/config-validate.js
@@ -14,7 +14,7 @@ const invalidPath = path.join(cacheRoot, 'invalid.json');
 
 await fsPromises.writeFile(
   validPath,
-  JSON.stringify({ search: { annDefault: true }, sqlite: { use: true } }, null, 2)
+  JSON.stringify({ search: { annDefault: true }, sqlite: { use: true }, runtime: { uvThreadpoolSize: 8 } }, null, 2)
 );
 await fsPromises.writeFile(
   invalidPath,
diff --git a/tests/context-expansion.js b/tests/context-expansion.js
new file mode 100644
index 000000000..78733b07e
--- /dev/null
+++ b/tests/context-expansion.js
@@ -0,0 +1,35 @@
+#!/usr/bin/env node
+import { expandContext } from '../src/retrieval/context-expansion.js';
+
+const chunkMeta = [
+  { id: 0, file: 'src/a.js', name: 'alpha', codeRelations: { calls: [['alpha', 'beta']] } },
+  { id: 1, file: 'src/b.js', name: 'beta' },
+  { id: 2, file: 'src/c.js', name: 'gamma' }
+];
+
+const fileRelations = new Map([
+  ['src/a.js', { importLinks: ['src/c.js'], usages: ['beta'], exports: [] }]
+]);
+
+const hits = [{ id: 0, file: 'src/a.js' }];
+const contextHits = expandContext({
+  hits,
+  chunkMeta,
+  fileRelations,
+  repoMap: null,
+  options: {
+    maxPerHit: 5,
+    maxTotal: 10,
+    includeCalls: true,
+    includeImports: true,
+    includeUsages: true
+  }
+});
+
+const ids = new Set(contextHits.map((hit) => hit.id));
+if (!ids.has(1) || !ids.has(2)) {
+  console.error('Expected context expansion to include call and import targets.');
+  process.exit(1);
+}
+
+console.log('context expansion test passed');
diff --git a/tests/core-api.js b/tests/core-api.js
new file mode 100644
index 000000000..c709737d1
--- /dev/null
+++ b/tests/core-api.js
@@ -0,0 +1,50 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { buildIndex, search, status } from '../src/integrations/core/index.js';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const cacheRoot = path.join(root, 'tests', '.cache', 'core-api');
+
+if (!fs.existsSync(fixtureRoot)) {
+  console.error(`Fixture not found: ${fixtureRoot}`);
+  process.exit(1);
+}
+
+await fsPromises.rm(cacheRoot, { recursive: true, force: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+
+await buildIndex(fixtureRoot, {
+  mode: 'code',
+  sqlite: false,
+  stubEmbeddings: true,
+  log: () => {}
+});
+
+const userConfig = loadUserConfig(fixtureRoot);
+const indexDir = getIndexDir(fixtureRoot, 'code', userConfig);
+const chunkPath = path.join(indexDir, 'chunk_meta.json');
+if (!fs.existsSync(chunkPath)) {
+  console.error(`Core API test failed: missing ${chunkPath}`);
+  process.exit(1);
+}
+
+const searchPayload = await search(fixtureRoot, { query: 'index', mode: 'code', json: true });
+if (!searchPayload || !Array.isArray(searchPayload.code)) {
+  console.error('Core API test failed: search payload missing code results.');
+  process.exit(1);
+}
+
+const statusPayload = await status(fixtureRoot);
+if (!statusPayload?.repo?.root) {
+  console.error('Core API test failed: status payload missing repo root.');
+  process.exit(1);
+}
+
+console.log('core api test passed');
diff --git a/tests/ctags-ingest.js b/tests/ctags-ingest.js
new file mode 100644
index 000000000..262a721cb
--- /dev/null
+++ b/tests/ctags-ingest.js
@@ -0,0 +1,46 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'ctags-ingest');
+const repoRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const inputPath = path.join(root, 'tests', 'fixtures', 'ctags', 'tags.jsonl');
+const outPath = path.join(tempRoot, 'ctags.jsonl');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+
+const result = spawnSync(
+  process.execPath,
+  [path.join(root, 'tools', 'ctags-ingest.js'), '--repo', repoRoot, '--input', inputPath, '--out', outPath, '--json'],
+  { encoding: 'utf8' }
+);
+if (result.status !== 0) {
+  console.error(result.stderr || result.stdout || 'ctags-ingest failed');
+  process.exit(result.status ?? 1);
+}
+
+if (!fs.existsSync(outPath)) {
+  console.error('ctags output not found');
+  process.exit(1);
+}
+
+const lines = fs.readFileSync(outPath, 'utf8').trim().split(/\r?\n/).filter(Boolean);
+assert.ok(lines.length >= 2, 'expected ctags output lines');
+
+const first = JSON.parse(lines[0]);
+assert.equal(first.file, 'src/widget.js');
+assert.equal(first.name, 'Widget');
+assert.equal(first.kind, 'class');
+assert.equal(first.language, 'JavaScript');
+assert.equal(first.startLine, 3);
+
+const metaPath = `${outPath}.meta.json`;
+const meta = JSON.parse(fs.readFileSync(metaPath, 'utf8'));
+assert.equal(meta.stats.entries, lines.length);
+
+console.log('ctags ingest test passed');
diff --git a/tests/dict-adaptive.js b/tests/dict-adaptive.js
new file mode 100644
index 000000000..d29171654
--- /dev/null
+++ b/tests/dict-adaptive.js
@@ -0,0 +1,26 @@
+#!/usr/bin/env node
+import { applyAdaptiveDictConfig } from '../tools/dict-utils.js';
+
+const base = {
+  segmentation: 'auto',
+  dpMaxTokenLength: 32,
+  dpMaxTokenLengthByFileCount: [
+    { maxFiles: 5000, dpMaxTokenLength: 32 },
+    { maxFiles: 20000, dpMaxTokenLength: 24 },
+    { maxFiles: 999999, dpMaxTokenLength: 16 }
+  ]
+};
+
+const expect = (actual, expected, label) => {
+  if (actual !== expected) {
+    console.error(`dict adaptive test failed (${label}): expected ${expected}, got ${actual}`);
+    process.exit(1);
+  }
+};
+
+expect(applyAdaptiveDictConfig(base, 100).dpMaxTokenLength, 32, 'small repo');
+expect(applyAdaptiveDictConfig(base, 12000).dpMaxTokenLength, 24, 'mid repo');
+expect(applyAdaptiveDictConfig(base, 80000).dpMaxTokenLength, 16, 'large repo');
+expect(applyAdaptiveDictConfig({ segmentation: 'greedy', dpMaxTokenLength: 12 }, 50000).dpMaxTokenLength, 12, 'greedy override');
+
+console.log('dictionary adaptive config test passed');
diff --git a/tests/discover.js b/tests/discover.js
new file mode 100644
index 000000000..e8101331e
--- /dev/null
+++ b/tests/discover.js
@@ -0,0 +1,98 @@
+import assert from 'node:assert/strict';
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { discoverFiles, discoverFilesForModes } from '../src/index/build/discover.js';
+import { buildIgnoreMatcher } from '../src/index/build/ignore.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'discover');
+
+await fs.rm(tempRoot, { recursive: true, force: true });
+await fs.mkdir(path.join(tempRoot, 'src'), { recursive: true });
+await fs.mkdir(path.join(tempRoot, 'docs'), { recursive: true });
+await fs.mkdir(path.join(tempRoot, 'src', 'deep', 'nested'), { recursive: true });
+
+const gitCheck = spawnSync('git', ['--version'], { encoding: 'utf8' });
+if (gitCheck.status !== 0) {
+  console.log('skip: git not available');
+  process.exit(0);
+}
+
+const runGit = (args) => {
+  const result = spawnSync('git', args, { cwd: tempRoot, encoding: 'utf8' });
+  if (result.status !== 0) {
+    throw new Error(`git ${args.join(' ')} failed: ${result.stderr || result.stdout}`);
+  }
+};
+
+runGit(['init']);
+runGit(['config', 'user.email', 'tests@example.com']);
+runGit(['config', 'user.name', 'Tests']);
+
+await fs.writeFile(path.join(tempRoot, 'src', 'app.js'), 'console.log("hi")\n');
+await fs.writeFile(path.join(tempRoot, 'src', 'deep', 'nested', 'too-deep.js'), 'console.log("deep")\n');
+await fs.writeFile(path.join(tempRoot, 'docs', 'readme.md'), '# Hello\n');
+await fs.writeFile(path.join(tempRoot, 'Dockerfile.dev'), 'FROM node:20\n');
+await fs.writeFile(path.join(tempRoot, 'Makefile.in'), 'build:\n\t@echo ok\n');
+runGit(['add', '.']);
+runGit(['commit', '-m', 'init']);
+
+await fs.writeFile(path.join(tempRoot, 'src', 'untracked.js'), 'console.log("no")\n');
+
+const { ignoreMatcher } = await buildIgnoreMatcher({ root: tempRoot, userConfig: {} });
+
+const skipped = [];
+const codeEntries = await discoverFiles({
+  root: tempRoot,
+  mode: 'code',
+  ignoreMatcher,
+  skippedFiles: skipped,
+  maxFileBytes: null
+});
+const codeRel = codeEntries.map((entry) => entry.rel);
+assert.ok(codeRel.includes('src/app.js'), 'tracked code file missing');
+assert.ok(codeRel.includes('Dockerfile.dev'), 'Dockerfile variant missing');
+assert.ok(codeRel.includes('Makefile.in'), 'Makefile variant missing');
+assert.ok(!codeRel.includes('src/untracked.js'), 'untracked file should not be discovered');
+assert.ok(codeEntries[0].stat && typeof codeEntries[0].stat.size === 'number', 'stat missing');
+
+const depthSkipped = [];
+const depthLimited = await discoverFiles({
+  root: tempRoot,
+  mode: 'code',
+  ignoreMatcher,
+  skippedFiles: depthSkipped,
+  maxFileBytes: null,
+  maxDepth: 1
+});
+assert.ok(!depthLimited.some((entry) => entry.rel.includes('deep/nested')), 'maxDepth should skip deep files');
+assert.ok(depthSkipped.some((entry) => entry.reason === 'max-depth'), 'maxDepth skip reason missing');
+
+const countSkipped = [];
+const countLimited = await discoverFiles({
+  root: tempRoot,
+  mode: 'code',
+  ignoreMatcher,
+  skippedFiles: countSkipped,
+  maxFileBytes: null,
+  maxFiles: 1
+});
+assert.ok(countLimited.length <= 1, 'maxFiles should cap entries');
+assert.ok(countSkipped.some((entry) => entry.reason === 'max-files'), 'maxFiles skip reason missing');
+
+const skippedByMode = { code: [], prose: [] };
+const byMode = await discoverFilesForModes({
+  root: tempRoot,
+  modes: ['code', 'prose'],
+  ignoreMatcher,
+  skippedByMode,
+  maxFileBytes: null
+});
+assert.ok(byMode.code.some((entry) => entry.rel === 'src/app.js'), 'code mode missing app.js');
+assert.ok(byMode.prose.some((entry) => entry.rel === 'docs/readme.md'), 'prose mode missing readme');
+assert.ok(!byMode.code.some((entry) => entry.rel === 'src/untracked.js'), 'untracked file should not appear');
+assert.ok(byMode.code.every((entry) => entry.stat), 'code entries missing stat');
+assert.ok(byMode.prose.every((entry) => entry.stat), 'prose entries missing stat');
+
+console.log('discover test passed');
diff --git a/tests/docs-consistency.js b/tests/docs-consistency.js
deleted file mode 100644
index 918aeada7..000000000
--- a/tests/docs-consistency.js
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env node
-import fs from 'node:fs';
-import path from 'node:path';
-
-const root = process.cwd();
-const roadmapPath = path.join(root, 'ROADMAP.md');
-const planPath = path.join(root, 'COMPLETE_PLAN.md');
-const readmePath = path.join(root, 'README.md');
-
-const failures = [];
-const roadmap = fs.existsSync(roadmapPath) ? fs.readFileSync(roadmapPath, 'utf8') : '';
-const plan = fs.existsSync(planPath) ? fs.readFileSync(planPath, 'utf8') : '';
-const readme = fs.existsSync(readmePath) ? fs.readFileSync(readmePath, 'utf8') : '';
-
-if (!plan) failures.push('COMPLETE_PLAN.md missing or empty.');
-if (!roadmap) failures.push('ROADMAP.md missing or empty.');
-if (roadmap && !roadmap.toLowerCase().includes('historical')) {
-  failures.push('ROADMAP.md should be marked as historical.');
-}
-if (roadmap && !roadmap.includes('COMPLETE_PLAN.md')) {
-  failures.push('ROADMAP.md should reference COMPLETE_PLAN.md as the source of truth.');
-}
-if (readme && !readme.includes('COMPLETE_PLAN.md')) {
-  failures.push('README.md should reference COMPLETE_PLAN.md.');
-}
-
-if (failures.length) {
-  failures.forEach((msg) => console.error(msg));
-  process.exit(1);
-}
-
-console.log('Docs consistency test passed');
diff --git a/tests/download-dicts.js b/tests/download-dicts.js
index c3e99f595..84a50b035 100644
--- a/tests/download-dicts.js
+++ b/tests/download-dicts.js
@@ -1,6 +1,7 @@
 #!/usr/bin/env node
 import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
+import crypto from 'node:crypto';
 import http from 'node:http';
 import path from 'node:path';
 import { spawn } from 'node:child_process';
@@ -17,6 +18,9 @@ if (!fs.existsSync(sourceFile)) {
   console.error(`Missing fixture: ${sourceFile}`);
   process.exit(1);
 }
+const sourceHash = crypto.createHash('sha256')
+  .update(await fsPromises.readFile(sourceFile))
+  .digest('hex');
 
 const server = http.createServer((req, res) => {
   const filePath = sourceFile;
@@ -60,6 +64,8 @@ const result = await run(
     path.join(root, 'tools', 'download-dicts.js'),
     '--url',
     `test=${url}`,
+    '--sha256',
+    `test=${sourceHash}`,
     '--lang',
     'test',
     '--dir',
@@ -98,5 +104,9 @@ if (!manifest.test || manifest.test.url !== url || manifest.test.file !== 'test.
   console.error('download-dicts test failed: manifest entry mismatch.');
   process.exit(1);
 }
+if (manifest.test.sha256 !== sourceHash || manifest.test.verified !== true) {
+  console.error('download-dicts test failed: hash verification missing.');
+  process.exit(1);
+}
 
 console.log('download-dicts test passed');
diff --git a/tests/download-extensions.js b/tests/download-extensions.js
index 722801f52..a397f2cba 100644
--- a/tests/download-extensions.js
+++ b/tests/download-extensions.js
@@ -1,6 +1,7 @@
 #!/usr/bin/env node
 import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
+import crypto from 'node:crypto';
 import http from 'node:http';
 import path from 'node:path';
 import { spawn, spawnSync } from 'node:child_process';
@@ -37,6 +38,11 @@ function runDownload(args) {
   });
 }
 
+async function hashFile(filePath) {
+  const buffer = await fsPromises.readFile(filePath);
+  return crypto.createHash('sha256').update(buffer).digest('hex');
+}
+
 await new Promise((resolve) => server.listen(0, '127.0.0.1', resolve));
 const address = server.address();
 const port = typeof address === 'object' && address ? address.port : 0;
@@ -45,15 +51,22 @@ const cases = [
   { label: 'zip', archive: 'vec0.zip', expectedArchive: 'zip' },
   { label: 'tar', archive: 'vec0.tar', expectedArchive: 'tar' }
 ];
+const maliciousCases = [
+  { label: 'zip-slip', archive: 'vec0-slip.zip', escapeName: 'pwned-zip.txt' },
+  { label: 'tar-slip', archive: 'vec0-slip.tar', escapeName: 'pwned-tar.txt' }
+];
 
 const failures = [];
 for (const entry of cases) {
   const extensionDir = path.join(tempRoot, entry.label);
   const url = `http://127.0.0.1:${port}/${entry.archive}`;
+  const archiveHash = await hashFile(path.join(fixturesRoot, entry.archive));
   const status = await runDownload([
     path.join(root, 'tools', 'download-extensions.js'),
     '--url',
     `vec0=${url}`,
+    '--sha256',
+    `vec0=${archiveHash}`,
     '--dir',
     extensionDir,
     '--provider',
@@ -98,6 +111,9 @@ for (const entry of cases) {
   if (!record.extractedFrom) {
     failures.push(`${entry.label} manifest extractedFrom missing`);
   }
+  if (record.sha256 !== archiveHash || record.verified !== true) {
+    failures.push(`${entry.label} manifest hash verification missing`);
+  }
 
   const verify = spawnSync(
     process.execPath,
@@ -126,6 +142,38 @@ for (const entry of cases) {
   }
 }
 
+for (const entry of maliciousCases) {
+  const extensionDir = path.join(tempRoot, entry.label);
+  const url = `http://127.0.0.1:${port}/${entry.archive}`;
+  const archiveHash = await hashFile(path.join(fixturesRoot, entry.archive));
+  const escapePath = path.join(tempRoot, entry.escapeName);
+  await fsPromises.rm(escapePath, { force: true });
+  await runDownload([
+    path.join(root, 'tools', 'download-extensions.js'),
+    '--url',
+    `vec0=${url}`,
+    '--sha256',
+    `vec0=${archiveHash}`,
+    '--dir',
+    extensionDir,
+    '--provider',
+    'sqlite-vec',
+    '--platform',
+    'win32',
+    '--arch',
+    'x64',
+    '--force'
+  ]);
+  const expectedPath = path.join(extensionDir, 'sqlite-vec', 'win32-x64', 'vec0.dll');
+  if (fs.existsSync(escapePath)) {
+    failures.push(`${entry.label} wrote outside extraction root`);
+    continue;
+  }
+  if (fs.existsSync(expectedPath)) {
+    failures.push(`${entry.label} unexpectedly extracted binary`);
+  }
+}
+
 server.close();
 
 if (failures.length) {
diff --git a/tests/editor-parity.js b/tests/editor-parity.js
new file mode 100644
index 000000000..329a5ec24
--- /dev/null
+++ b/tests/editor-parity.js
@@ -0,0 +1,102 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import path from 'node:path';
+import { DEFAULT_LIMITS } from '../src/map/constants.js';
+
+const root = process.cwd();
+
+const readJson = (filePath, label) => {
+  try {
+    return JSON.parse(fs.readFileSync(filePath, 'utf8'));
+  } catch (err) {
+    console.error(`Failed: ${label} invalid JSON (${filePath})`);
+    console.error(String(err?.message || err));
+    process.exit(1);
+  }
+};
+
+const vscodePackagePath = path.join(root, 'extensions', 'vscode', 'package.json');
+const vscodePackage = readJson(vscodePackagePath, 'vscode extension manifest');
+const vscodeConfig = vscodePackage?.contributes?.configuration?.properties || {};
+
+const getVsCodeDefault = (key) => vscodeConfig?.[key]?.default;
+
+const sublimeSettingsPath = path.join(
+  root,
+  'sublime',
+  'PairOfCleats',
+  'PairOfCleats.sublime-settings'
+);
+const sublimeSettings = readJson(sublimeSettingsPath, 'sublime settings');
+
+const sublimeCommandsPath = path.join(
+  root,
+  'sublime',
+  'PairOfCleats',
+  'Default.sublime-commands'
+);
+const sublimeCommands = readJson(sublimeCommandsPath, 'sublime command palette');
+
+const requiredVsCodeKeys = [
+  'pairofcleats.cliPath',
+  'pairofcleats.searchMode',
+  'pairofcleats.searchBackend',
+  'pairofcleats.searchAnn',
+  'pairofcleats.maxResults'
+];
+
+for (const key of requiredVsCodeKeys) {
+  if (!(key in vscodeConfig)) {
+    console.error(`Failed: VSCode extension missing configuration property: ${key}`);
+    process.exit(1);
+  }
+}
+
+const requiredSublimeCommands = [
+  'pair_of_cleats_search',
+  'pair_of_cleats_search_selection',
+  'pair_of_cleats_index_build_all',
+  'pair_of_cleats_map_repo',
+  'pair_of_cleats_map_current_file',
+  'pair_of_cleats_map_jump_to_node'
+];
+
+if (!Array.isArray(sublimeCommands)) {
+  console.error('Failed: Sublime Default.sublime-commands is not a JSON array');
+  process.exit(1);
+}
+
+const sublimeCommandSet = new Set(sublimeCommands.map((entry) => entry?.command).filter(Boolean));
+for (const command of requiredSublimeCommands) {
+  if (!sublimeCommandSet.has(command)) {
+    console.error(`Failed: Sublime command palette missing command: ${command}`);
+    process.exit(1);
+  }
+}
+
+const ensureEqual = (label, actual, expected) => {
+  if (actual !== expected) {
+    console.error(`Failed: ${label} expected ${JSON.stringify(expected)} but saw ${JSON.stringify(actual)}`);
+    process.exit(1);
+  }
+};
+
+// Search defaults parity (Sublime ↔ VSCode).
+ensureEqual('search limit parity', sublimeSettings.search_limit, getVsCodeDefault('pairofcleats.maxResults'));
+ensureEqual('search mode parity', sublimeSettings.index_mode_default, getVsCodeDefault('pairofcleats.searchMode'));
+ensureEqual(
+  'search backend parity',
+  sublimeSettings.search_backend_default,
+  getVsCodeDefault('pairofcleats.searchBackend')
+);
+
+// Guardrail parity (Sublime ↔ CLI defaults).
+ensureEqual('map max files parity', sublimeSettings.map_max_files, DEFAULT_LIMITS.maxFiles);
+ensureEqual(
+  'map max members per file parity',
+  sublimeSettings.map_max_members_per_file,
+  DEFAULT_LIMITS.maxMembersPerFile
+);
+ensureEqual('map max edges parity', sublimeSettings.map_max_edges, DEFAULT_LIMITS.maxEdges);
+
+console.log('editor parity checklist tests passed');
diff --git a/tests/embedding-batch-autotune.js b/tests/embedding-batch-autotune.js
new file mode 100644
index 000000000..7587e78c1
--- /dev/null
+++ b/tests/embedding-batch-autotune.js
@@ -0,0 +1,38 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { parseBuildArgs } from '../src/index/build/args.js';
+import { createBuildRuntime } from '../src/index/build/runtime.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'embedding-batch-autotune');
+const repoRoot = path.join(tempRoot, 'repo');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+process.env.PAIROFCLEATS_CACHE_ROOT = tempRoot;
+
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({
+    indexing: {
+      embeddings: { enabled: true, mode: 'stub' },
+      treeSitter: { enabled: false }
+    }
+  }, null, 2)
+);
+
+const defaults = parseBuildArgs([]).argv;
+const argv = { ...defaults, 'stub-embeddings': true };
+const runtime = await createBuildRuntime({ root: repoRoot, argv, rawArgv: [] });
+
+if (runtime.embeddingBatchSize < 32 || runtime.embeddingBatchSize > 128) {
+  console.error(`Unexpected embedding batch size: ${runtime.embeddingBatchSize}`);
+  process.exit(1);
+}
+if (runtime.embeddingConcurrency < 1) {
+  console.error(`Unexpected embedding concurrency: ${runtime.embeddingConcurrency}`);
+  process.exit(1);
+}
+
+console.log('embedding auto-tune test passed');
diff --git a/tests/embedding-batch-multipliers.js b/tests/embedding-batch-multipliers.js
new file mode 100644
index 000000000..a6cb4896b
--- /dev/null
+++ b/tests/embedding-batch-multipliers.js
@@ -0,0 +1,18 @@
+#!/usr/bin/env node
+import { normalizeEmbeddingBatchMultipliers, resolveEmbeddingBatchSize } from '../src/index/build/embedding-batch.js';
+
+const multipliers = normalizeEmbeddingBatchMultipliers({ typescript: 4, python: 2 }, { typescript: 3, rust: 1.5 });
+
+const expect = (label, actual, expected) => {
+  if (actual !== expected) {
+    console.error(`embedding batch multiplier failed (${label}): ${actual} !== ${expected}`);
+    process.exit(1);
+  }
+};
+
+expect('typescript', resolveEmbeddingBatchSize(10, 'typescript', multipliers), 40);
+expect('python', resolveEmbeddingBatchSize(10, 'python', multipliers), 20);
+expect('rust fallback', resolveEmbeddingBatchSize(10, 'rust', multipliers), 15);
+expect('unknown', resolveEmbeddingBatchSize(10, 'go', multipliers), 10);
+
+console.log('embedding batch multiplier test passed');
diff --git a/tests/embedding-provider-strict.js b/tests/embedding-provider-strict.js
new file mode 100644
index 000000000..5fc9f99d1
--- /dev/null
+++ b/tests/embedding-provider-strict.js
@@ -0,0 +1,18 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+
+import { normalizeEmbeddingProvider } from '../src/shared/onnx-embeddings.js';
+
+assert.equal(normalizeEmbeddingProvider(undefined), 'xenova');
+assert.equal(normalizeEmbeddingProvider('  '), 'xenova');
+assert.equal(normalizeEmbeddingProvider('TRANSFORMERS'), 'xenova');
+assert.equal(normalizeEmbeddingProvider('onnxruntime-node'), 'onnx');
+assert.equal(normalizeEmbeddingProvider('xenova'), 'xenova');
+
+assert.throws(
+  () => normalizeEmbeddingProvider('provider-a'),
+  /Unknown embedding provider/i,
+  'expected unknown provider to throw rather than silently falling back'
+);
+
+console.log('embedding provider strict validation test passed');
diff --git a/tests/embeddings-cache-identity.js b/tests/embeddings-cache-identity.js
new file mode 100644
index 000000000..d0a0ed6be
--- /dev/null
+++ b/tests/embeddings-cache-identity.js
@@ -0,0 +1,99 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getRepoCacheRoot, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const tempRoot = path.join(root, 'tests', '.cache', 'embeddings-cache-identity');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+await fsPromises.cp(fixtureRoot, repoRoot, { recursive: true });
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+
+const buildIndex = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+if (buildIndex.status !== 0) {
+  console.error('embeddings cache identity test failed: build_index failed');
+  process.exit(buildIndex.status ?? 1);
+}
+
+const runEmbeddings = (dims) => {
+  const result = spawnSync(
+    process.execPath,
+    [
+      path.join(root, 'tools', 'build-embeddings.js'),
+      '--stub-embeddings',
+      '--mode',
+      'code',
+      '--dims',
+      String(dims),
+      '--repo',
+      repoRoot
+    ],
+    { cwd: repoRoot, env, stdio: 'inherit' }
+  );
+  if (result.status !== 0) {
+    console.error(`embeddings cache identity test failed: build-embeddings dims=${dims} failed`);
+    process.exit(result.status ?? 1);
+  }
+};
+
+runEmbeddings(8);
+
+const userConfig = loadUserConfig(repoRoot);
+const repoCacheRoot = getRepoCacheRoot(repoRoot, userConfig);
+const cacheDir = path.join(repoCacheRoot, 'embeddings', 'code', 'files');
+const firstFiles = (await fsPromises.readdir(cacheDir))
+  .filter((name) => name.endsWith('.json'));
+if (!firstFiles.length) {
+  console.error('embeddings cache identity test failed: missing cache files');
+  process.exit(1);
+}
+
+const firstCache = JSON.parse(
+  await fsPromises.readFile(path.join(cacheDir, firstFiles[0]), 'utf8')
+);
+const meta = firstCache?.cacheMeta?.identity;
+if (!meta) {
+  console.error('embeddings cache identity test failed: missing cache metadata');
+  process.exit(1);
+}
+if (meta.dims !== 8 || meta.scale !== 2 / 255 || meta.stub !== true) {
+  console.error('embeddings cache identity test failed: cache identity did not include expected dims/scale/stub');
+  process.exit(1);
+}
+if (!meta.modelId || typeof meta.modelId !== 'string') {
+  console.error('embeddings cache identity test failed: cache identity missing modelId');
+  process.exit(1);
+}
+if (!meta.provider || typeof meta.provider !== 'string') {
+  console.error('embeddings cache identity test failed: cache identity missing provider');
+  process.exit(1);
+}
+
+runEmbeddings(12);
+const secondFiles = (await fsPromises.readdir(cacheDir))
+  .filter((name) => name.endsWith('.json'));
+const firstSet = new Set(firstFiles);
+const hasNew = secondFiles.some((name) => !firstSet.has(name));
+if (!hasNew) {
+  console.error('embeddings cache identity test failed: expected new cache entries after dims change');
+  process.exit(1);
+}
+
+console.log('embeddings cache identity tests passed');
diff --git a/tests/embeddings-cache-invalidation.js b/tests/embeddings-cache-invalidation.js
new file mode 100644
index 000000000..81aea820b
--- /dev/null
+++ b/tests/embeddings-cache-invalidation.js
@@ -0,0 +1,100 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { buildCacheIdentity, buildCacheKey, isCacheValid } from '../tools/build-embeddings/cache.js';
+
+const base = buildCacheIdentity({
+  modelId: 'model-a',
+  provider: 'provider-a',
+  mode: 'inline',
+  stub: false,
+  dims: 384,
+  scale: 0.5
+});
+const dimsChanged = buildCacheIdentity({
+  modelId: 'model-a',
+  provider: 'provider-a',
+  mode: 'inline',
+  stub: false,
+  dims: 768,
+  scale: 0.5
+});
+const modelChanged = buildCacheIdentity({
+  modelId: 'model-b',
+  provider: 'provider-a',
+  mode: 'inline',
+  stub: false,
+  dims: 384,
+  scale: 0.5
+});
+const providerChanged = buildCacheIdentity({
+  modelId: 'model-a',
+  provider: 'provider-b',
+  mode: 'inline',
+  stub: false,
+  dims: 384,
+  scale: 0.5
+});
+
+assert.notEqual(base.key, dimsChanged.key, 'expected cache identity to change with dims');
+assert.notEqual(base.key, modelChanged.key, 'expected cache identity to change with model');
+assert.notEqual(base.key, providerChanged.key, 'expected cache identity to change with provider');
+
+// Provider-specific knobs should participate in cache invalidation.
+const onnxBase = buildCacheIdentity({
+  modelId: 'model-a',
+  provider: 'onnx',
+  mode: 'inline',
+  stub: false,
+  dims: 384,
+  scale: 0.5,
+  onnx: {
+    modelPath: '/models/model-a.onnx',
+    tokenizerId: 'model-a',
+    executionProviders: ['cpu'],
+    intraOpNumThreads: 1,
+    interOpNumThreads: 1,
+    graphOptimizationLevel: 'all'
+  }
+});
+const onnxModelPathChanged = buildCacheIdentity({
+  modelId: 'model-a',
+  provider: 'onnx',
+  mode: 'inline',
+  stub: false,
+  dims: 384,
+  scale: 0.5,
+  onnx: {
+    modelPath: '/models/other.onnx',
+    tokenizerId: 'model-a',
+    executionProviders: ['cpu'],
+    intraOpNumThreads: 1,
+    interOpNumThreads: 1,
+    graphOptimizationLevel: 'all'
+  }
+});
+assert.notEqual(onnxBase.key, onnxModelPathChanged.key, 'expected cache identity to change with onnx modelPath');
+
+const signature = 'sig-1';
+const cached = {
+  chunkSignature: signature,
+  cacheMeta: { identityKey: base.key }
+};
+assert.equal(isCacheValid({ cached, signature, identityKey: base.key }), true, 'expected cache to be valid for matching identity');
+assert.equal(isCacheValid({ cached, signature, identityKey: dimsChanged.key }), false, 'expected cache to be invalid for mismatched identity');
+
+const cacheKey = buildCacheKey({
+  file: 'src/index.js',
+  hash: 'hash-1',
+  signature,
+  identityKey: base.key
+});
+assert.ok(cacheKey, 'expected cache key for hashed file');
+const cacheKeyMismatch = buildCacheKey({
+  file: 'src/index.js',
+  hash: 'hash-1',
+  signature,
+  identityKey: dimsChanged.key
+});
+assert.notEqual(cacheKey, cacheKeyMismatch, 'expected cache key to change with identity');
+
+console.log('embeddings cache invalidation test passed');
diff --git a/tests/embeddings-dims-mismatch.js b/tests/embeddings-dims-mismatch.js
new file mode 100644
index 000000000..dae690f7a
--- /dev/null
+++ b/tests/embeddings-dims-mismatch.js
@@ -0,0 +1,86 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getRepoCacheRoot, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const tempRoot = path.join(root, 'tests', '.cache', 'embeddings-dims-mismatch');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+await fsPromises.cp(fixtureRoot, repoRoot, { recursive: true });
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+
+const buildIndex = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+if (buildIndex.status !== 0) {
+  console.error('embeddings dims mismatch test failed: build_index failed');
+  process.exit(buildIndex.status ?? 1);
+}
+
+const runEmbeddings = () => spawnSync(
+  process.execPath,
+  [
+    path.join(root, 'tools', 'build-embeddings.js'),
+    '--stub-embeddings',
+    '--mode',
+    'code',
+    '--dims',
+    '8',
+    '--repo',
+    repoRoot
+  ],
+  { cwd: repoRoot, env, encoding: 'utf8' }
+);
+
+const firstRun = runEmbeddings();
+if (firstRun.status !== 0) {
+  console.error('embeddings dims mismatch test failed: initial build-embeddings failed');
+  process.exit(firstRun.status ?? 1);
+}
+
+const userConfig = loadUserConfig(repoRoot);
+const repoCacheRoot = getRepoCacheRoot(repoRoot, userConfig);
+const cacheDir = path.join(repoCacheRoot, 'embeddings', 'code', 'files');
+const cacheFiles = (await fsPromises.readdir(cacheDir)).filter((name) => name.endsWith('.json'));
+if (!cacheFiles.length) {
+  console.error('embeddings dims mismatch test failed: no cache files found');
+  process.exit(1);
+}
+
+const targetPath = path.join(cacheDir, cacheFiles[0]);
+const cached = JSON.parse(await fsPromises.readFile(targetPath, 'utf8'));
+const bumpVector = (vec) => {
+  if (Array.isArray(vec)) vec.push(0);
+};
+bumpVector(cached?.mergedVectors?.[0]);
+bumpVector(cached?.codeVectors?.[0]);
+bumpVector(cached?.docVectors?.[0]);
+await fsPromises.writeFile(targetPath, JSON.stringify(cached));
+
+const secondRun = runEmbeddings();
+if (secondRun.status === 0) {
+  console.error('embeddings dims mismatch test failed: expected dims mismatch error');
+  process.exit(1);
+}
+const output = `${secondRun.stdout || ''}${secondRun.stderr || ''}`;
+if (!output.includes('embedding dims mismatch')) {
+  console.error('embeddings dims mismatch test failed: missing mismatch error message');
+  process.exit(1);
+}
+
+console.log('embeddings dims mismatch tests passed');
diff --git a/tests/embeddings-dims-validation.js b/tests/embeddings-dims-validation.js
new file mode 100644
index 000000000..5cc270e4f
--- /dev/null
+++ b/tests/embeddings-dims-validation.js
@@ -0,0 +1,21 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { createDimsValidator, isDimsMismatch, validateCachedDims } from '../tools/build-embeddings/embed.js';
+
+const validator = createDimsValidator({ mode: 'code', configuredDims: 4 });
+validator.assertDims(4);
+assert.throws(() => validator.assertDims(5), /embedding dims mismatch/, 'expected configured dims mismatch to throw');
+
+const cachedOk = [[0, 1, 2, 3], [4, 5, 6, 7]];
+validateCachedDims({ vectors: cachedOk, expectedDims: 4, mode: 'code' });
+
+assert.throws(
+  () => validateCachedDims({ vectors: [[0, 1, 2]], expectedDims: 4, mode: 'code' }),
+  /embedding dims mismatch/,
+  'expected cached dims mismatch to throw'
+);
+
+const mismatchError = new Error('[embeddings] code embedding dims mismatch (configured=4, observed=5).');
+assert.equal(isDimsMismatch(mismatchError), true, 'expected dims mismatch error to be detected');
+
+console.log('embeddings dims validation test passed');
diff --git a/tests/embeddings-sqlite-dense.js b/tests/embeddings-sqlite-dense.js
new file mode 100644
index 000000000..fa55be38b
--- /dev/null
+++ b/tests/embeddings-sqlite-dense.js
@@ -0,0 +1,87 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fsPromises from 'node:fs/promises';
+import os from 'node:os';
+import path from 'node:path';
+import { updateSqliteDense } from '../tools/build-embeddings/sqlite-dense.js';
+
+let Database;
+try {
+  ({ default: Database } = await import('better-sqlite3'));
+} catch (err) {
+  console.error('better-sqlite3 is required for embeddings sqlite dense test.');
+  process.exit(1);
+}
+
+const tempRoot = await fsPromises.mkdtemp(path.join(os.tmpdir(), 'pairofcleats-embeddings-sqlite-'));
+const dbPath = path.join(tempRoot, 'index-code.db');
+const dbMissingPath = path.join(tempRoot, 'index-missing.db');
+
+const vectors = [
+  [1, 2, 3],
+  [4, 5, 6]
+];
+
+const createDbWithTables = (target) => {
+  const db = new Database(target);
+  db.exec('CREATE TABLE dense_vectors (mode TEXT, doc_id INTEGER, vector BLOB)');
+  db.exec('CREATE TABLE dense_meta (mode TEXT, dims INTEGER, scale REAL, model TEXT)');
+  db.close();
+};
+
+createDbWithTables(dbPath);
+new Database(dbMissingPath).close();
+
+const disabledResult = updateSqliteDense({
+  Database,
+  root: tempRoot,
+  userConfig: { sqlite: { use: false } },
+  mode: 'code',
+  vectors,
+  dims: 3,
+  scale: 1,
+  modelId: 'model-a',
+  dbPath,
+  emitOutput: false
+});
+assert.equal(disabledResult.skipped, true, 'expected sqlite update to skip when disabled');
+
+const missingResult = updateSqliteDense({
+  Database,
+  root: tempRoot,
+  userConfig: { sqlite: { use: true } },
+  mode: 'code',
+  vectors,
+  dims: 3,
+  scale: 1,
+  modelId: 'model-a',
+  dbPath: dbMissingPath,
+  emitOutput: false
+});
+assert.equal(missingResult.skipped, true, 'expected sqlite update to skip when tables missing');
+assert.equal(missingResult.reason, 'missing dense tables', 'expected missing dense tables reason');
+
+const enabledResult = updateSqliteDense({
+  Database,
+  root: tempRoot,
+  userConfig: { sqlite: { use: true } },
+  mode: 'code',
+  vectors,
+  dims: 3,
+  scale: 1,
+  modelId: 'model-a',
+  dbPath,
+  emitOutput: false
+});
+assert.equal(enabledResult.skipped, false, 'expected sqlite update to run when enabled');
+
+const db = new Database(dbPath, { readonly: true });
+const denseCount = db.prepare('SELECT COUNT(*) AS total FROM dense_vectors').get().total;
+const metaCount = db.prepare('SELECT COUNT(*) AS total FROM dense_meta').get().total;
+const modeCount = db.prepare('SELECT COUNT(*) AS total FROM dense_vectors WHERE mode = ?').get('code').total;
+db.close();
+assert.equal(denseCount, vectors.length, 'expected dense vectors to be written');
+assert.equal(metaCount, 1, 'expected dense metadata to be written');
+assert.equal(modeCount, vectors.length, 'expected mode-specific dense vectors');
+
+console.log('embeddings sqlite dense test passed');
diff --git a/tests/embeddings-validate.js b/tests/embeddings-validate.js
new file mode 100644
index 000000000..2262e3462
--- /dev/null
+++ b/tests/embeddings-validate.js
@@ -0,0 +1,81 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const cacheRoot = path.join(root, 'tests', '.cache', 'embeddings-validate');
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+await fsPromises.rm(cacheRoot, { recursive: true, force: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const buildPath = path.join(root, 'build_index.js');
+const embeddingsPath = path.join(root, 'tools', 'build-embeddings.js');
+const validatePath = path.join(root, 'tools', 'index-validate.js');
+
+const run = (args, label) => {
+  const result = spawnSync(process.execPath, args, { env, encoding: 'utf8' });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    if (result.stderr) console.error(result.stderr.trim());
+    process.exit(result.status ?? 1);
+  }
+  return result.stdout || '';
+};
+
+run([buildPath, '--stub-embeddings', '--repo', fixtureRoot], 'build index');
+run([embeddingsPath, '--stub-embeddings', '--repo', fixtureRoot], 'build embeddings');
+
+const validateResult = spawnSync(
+  process.execPath,
+  [validatePath, '--repo', fixtureRoot, '--json'],
+  { env, encoding: 'utf8' }
+);
+if (validateResult.status !== 0) {
+  console.error('Expected index-validate to pass after build-embeddings.');
+  if (validateResult.stderr) console.error(validateResult.stderr.trim());
+  process.exit(validateResult.status ?? 1);
+}
+let payload;
+try {
+  payload = JSON.parse(validateResult.stdout);
+} catch {
+  console.error('index-validate did not return valid JSON.');
+  process.exit(1);
+}
+if (!payload || payload.ok !== true) {
+  console.error('index-validate JSON payload missing ok=true.');
+  process.exit(1);
+}
+
+const previousCacheRoot = process.env.PAIROFCLEATS_CACHE_ROOT;
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+const userConfig = loadUserConfig(fixtureRoot);
+const codeDir = getIndexDir(fixtureRoot, 'code', userConfig);
+if (previousCacheRoot === undefined) {
+  delete process.env.PAIROFCLEATS_CACHE_ROOT;
+} else {
+  process.env.PAIROFCLEATS_CACHE_ROOT = previousCacheRoot;
+}
+const statePath = path.join(codeDir, 'index_state.json');
+let state;
+try {
+  state = JSON.parse(await fsPromises.readFile(statePath, 'utf8'));
+} catch {
+  console.error('Failed to read index_state.json after build-embeddings.');
+  process.exit(1);
+}
+const embeddings = state?.embeddings || {};
+if (embeddings.enabled !== true || embeddings.ready !== true || embeddings.pending === true) {
+  console.error('index_state embeddings flags not marked ready after build-embeddings.');
+  process.exit(1);
+}
+
+console.log('Stage3 embeddings validation test passed');
diff --git a/tests/encoding-fallback.js b/tests/encoding-fallback.js
new file mode 100644
index 000000000..d2b1280a3
--- /dev/null
+++ b/tests/encoding-fallback.js
@@ -0,0 +1,65 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { readTextFile } from '../src/shared/encoding.js';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'encoding');
+const cacheRoot = path.join(root, 'tests', '.cache', 'encoding-fallback');
+const sourcePath = path.join(fixtureRoot, 'latin1.js');
+
+await fsPromises.rm(cacheRoot, { recursive: true, force: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const { text, usedFallback } = await readTextFile(sourcePath);
+if (!text.includes('café')) {
+  console.error('Encoding fallback did not decode latin1.js correctly.');
+  process.exit(1);
+}
+if (!usedFallback) {
+  console.error('Expected encoding fallback to be used for latin1.js.');
+  process.exit(1);
+}
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub',
+  PAIROFCLEATS_WORKER_POOL: 'off'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', fixtureRoot],
+  { cwd: fixtureRoot, env, stdio: 'inherit' }
+);
+if (buildResult.status !== 0) {
+  console.error('Failed: build_index');
+  process.exit(buildResult.status ?? 1);
+}
+
+const searchResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'search.js'), '--json', '--repo', fixtureRoot, 'café'],
+  { cwd: fixtureRoot, env, encoding: 'utf8' }
+);
+if (searchResult.status !== 0) {
+  console.error('Failed: search');
+  process.exit(searchResult.status ?? 1);
+}
+let payload = null;
+try {
+  payload = JSON.parse(searchResult.stdout || '{}');
+} catch {
+  console.error('Search output is not valid JSON.');
+  process.exit(1);
+}
+const hits = Array.isArray(payload?.code) ? payload.code : [];
+const hit = hits.find((entry) => typeof entry?.file === 'string' && entry.file.endsWith('latin1.js'));
+if (!hit) {
+  console.error('Expected search hit for latin1.js in encoding fixture.');
+  process.exit(1);
+}
+
+console.log('encoding fallback test passed');
diff --git a/tests/encoding-hash.js b/tests/encoding-hash.js
new file mode 100644
index 000000000..b04a2e80b
--- /dev/null
+++ b/tests/encoding-hash.js
@@ -0,0 +1,29 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { readTextFileWithHash } from '../src/shared/encoding.js';
+import { sha1 } from '../src/shared/hash.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'encoding-hash');
+const filePath = path.join(tempRoot, 'latin1.txt');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+
+const buffer = Buffer.from([0xff, 0xfe, 0xfd, 0x41]);
+await fsPromises.writeFile(filePath, buffer);
+
+const info = await readTextFileWithHash(filePath);
+const expectedHash = sha1(buffer);
+
+if (info.hash !== expectedHash) {
+  console.error('encoding hash test failed: hash did not match raw bytes.');
+  process.exit(1);
+}
+if (!info.usedFallback) {
+  console.error('encoding hash test failed: expected fallback decoding for invalid UTF-8.');
+  process.exit(1);
+}
+
+console.log('encoding hash tests passed');
diff --git a/tests/eval-quality.js b/tests/eval-quality.js
new file mode 100644
index 000000000..1a51197d4
--- /dev/null
+++ b/tests/eval-quality.js
@@ -0,0 +1,80 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'eval-quality');
+const cacheRoot = path.join(tempRoot, 'cache');
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const datasetPath = path.join(fixtureRoot, 'eval.json');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', fixtureRoot],
+  { env, stdio: 'inherit' }
+);
+if (buildResult.status !== 0) {
+  console.error('eval quality test failed: build_index failed');
+  process.exit(buildResult.status ?? 1);
+}
+
+const evalResult = spawnSync(
+  process.execPath,
+  [
+    path.join(root, 'tools', 'eval', 'run.js'),
+    '--repo',
+    fixtureRoot,
+    '--dataset',
+    datasetPath,
+    '--backend',
+    'memory',
+    '--no-ann',
+    '--top',
+    '5'
+  ],
+  { env, encoding: 'utf8' }
+);
+
+if (evalResult.status !== 0) {
+  console.error('eval quality test failed: eval run returned error');
+  if (evalResult.stderr) console.error(evalResult.stderr.trim());
+  process.exit(evalResult.status ?? 1);
+}
+
+let payload = null;
+try {
+  payload = JSON.parse(evalResult.stdout || '{}');
+} catch (err) {
+  console.error('eval quality test failed: invalid JSON output');
+  process.exit(1);
+}
+
+const summary = payload?.summary || {};
+const recallAt5 = summary?.recallAtK?.['5'] ?? 0;
+const ndcgAt5 = summary?.ndcgAtK?.['5'] ?? 0;
+const mrr = summary?.mrr ?? 0;
+
+if (recallAt5 < 0.6) {
+  console.error(`eval quality test failed: recall@5 too low (${recallAt5.toFixed(3)})`);
+  process.exit(1);
+}
+if (ndcgAt5 < 0.6) {
+  console.error(`eval quality test failed: ndcg@5 too low (${ndcgAt5.toFixed(3)})`);
+  process.exit(1);
+}
+if (mrr < 0.5) {
+  console.error(`eval quality test failed: mrr too low (${mrr.toFixed(3)})`);
+  process.exit(1);
+}
+
+console.log('eval quality tests passed');
diff --git a/tests/ext-filter.js b/tests/ext-filter.js
new file mode 100644
index 000000000..7eb0eccd4
--- /dev/null
+++ b/tests/ext-filter.js
@@ -0,0 +1,17 @@
+#!/usr/bin/env node
+import { normalizeExtFilter } from '../src/retrieval/filters.js';
+
+const result = normalizeExtFilter(['*.js', 'JS', '.Md']);
+const expected = ['.js', '.md'];
+
+const sorted = (result || []).slice().sort();
+const expectedSorted = expected.slice().sort();
+
+const sameLength = sorted.length === expectedSorted.length;
+const sameValues = sorted.every((value, idx) => value === expectedSorted[idx]);
+if (!sameLength || !sameValues) {
+  console.error(`normalizeExtFilter failed: expected ${expectedSorted.join(', ')}, got ${sorted.join(', ')}`);
+  process.exit(1);
+}
+
+console.log('ext filter test passed');
diff --git a/tests/external-docs.js b/tests/external-docs.js
new file mode 100644
index 000000000..a913d30f3
--- /dev/null
+++ b/tests/external-docs.js
@@ -0,0 +1,72 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'external-docs');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(path.join(repoRoot, 'src'), { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(
+  path.join(repoRoot, 'src', 'index.js'),
+  [
+    "import foo from '@scope/pkg';",
+    "import bar from 'left-pad';",
+    "console.log(foo, bar);"
+  ].join('\n') + '\n'
+);
+
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+if (buildResult.status !== 0) {
+  console.error('external docs test failed: build_index failed');
+  process.exit(buildResult.status ?? 1);
+}
+
+const userConfig = loadUserConfig(repoRoot);
+const codeDir = getIndexDir(repoRoot, 'code', userConfig);
+const fileMetaPath = path.join(codeDir, 'file_meta.json');
+if (!fs.existsSync(fileMetaPath)) {
+  console.error(`Missing file metadata: ${fileMetaPath}`);
+  process.exit(1);
+}
+
+const files = JSON.parse(fs.readFileSync(fileMetaPath, 'utf8'));
+const expectedScoped = 'https://www.npmjs.com/package/@scope/pkg';
+const expectedUnscoped = 'https://www.npmjs.com/package/left-pad';
+const encodedScoped = 'https://www.npmjs.com/package/%40scope/pkg';
+
+const allDocs = files.flatMap((file) => file.externalDocs || []);
+if (!allDocs.includes(expectedScoped)) {
+  console.error(`Missing scoped npm doc link: ${expectedScoped}`);
+  process.exit(1);
+}
+if (allDocs.includes(encodedScoped)) {
+  console.error(`Scoped npm doc link should preserve @: ${encodedScoped}`);
+  process.exit(1);
+}
+if (!allDocs.includes(expectedUnscoped)) {
+  console.error(`Missing npm doc link: ${expectedUnscoped}`);
+  process.exit(1);
+}
+
+console.log('External docs test passed');
diff --git a/tests/extracted-prose.js b/tests/extracted-prose.js
new file mode 100644
index 000000000..b29f5b2f1
--- /dev/null
+++ b/tests/extracted-prose.js
@@ -0,0 +1,76 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'extracted-prose');
+const repoRoot = path.join(tempRoot, 'repo');
+const srcDir = path.join(repoRoot, 'src');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(srcDir, { recursive: true });
+
+const commentText = 'extracted-prose sentinel phrase';
+const source = [
+  '/**',
+  ` * ${commentText}`,
+  ' */',
+  'export function sample() { return 1; }',
+  ''
+].join('\n');
+await fsPromises.writeFile(path.join(srcDir, 'sample.js'), source);
+
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({ sqlite: { use: false } }, null, 2)
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: path.join(tempRoot, 'cache'),
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+process.env.PAIROFCLEATS_CACHE_ROOT = env.PAIROFCLEATS_CACHE_ROOT;
+process.env.PAIROFCLEATS_EMBEDDINGS = env.PAIROFCLEATS_EMBEDDINGS;
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--repo', repoRoot, '--mode', 'extracted-prose', '--stub-embeddings'],
+  { env, encoding: 'utf8' }
+);
+if (buildResult.status !== 0) {
+  console.error('Extracted-prose test failed: build_index error.');
+  if (buildResult.stderr) console.error(buildResult.stderr.trim());
+  process.exit(buildResult.status ?? 1);
+}
+
+const searchResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'search.js'), '--repo', repoRoot, '--mode', 'extracted-prose', '--json', commentText],
+  { env, encoding: 'utf8' }
+);
+if (searchResult.status !== 0) {
+  console.error('Extracted-prose test failed: search error.');
+  if (searchResult.stderr) console.error(searchResult.stderr.trim());
+  process.exit(searchResult.status ?? 1);
+}
+
+let payload;
+try {
+  payload = JSON.parse(searchResult.stdout || '{}');
+} catch (err) {
+  console.error('Extracted-prose test failed: invalid JSON output.');
+  if (searchResult.stdout) console.error(searchResult.stdout.trim());
+  process.exit(1);
+}
+
+const hits = Array.isArray(payload.extractedProse) ? payload.extractedProse : [];
+const matched = hits.some((hit) => hit?.file === 'src/sample.js');
+if (!matched) {
+  console.error('Extracted-prose test failed: expected hit missing.');
+  process.exit(1);
+}
+
+console.log('Extracted-prose test passed.');
diff --git a/tests/fielded-bm25.js b/tests/fielded-bm25.js
new file mode 100644
index 000000000..ca0d5a9b0
--- /dev/null
+++ b/tests/fielded-bm25.js
@@ -0,0 +1,87 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'fielded-bm25');
+const cacheRoot = path.join(tempRoot, 'cache');
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', fixtureRoot],
+  { env, stdio: 'inherit' }
+);
+if (buildResult.status !== 0) {
+  console.error('fielded bm25 test failed: build_index failed');
+  process.exit(buildResult.status ?? 1);
+}
+
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+const userConfig = loadUserConfig(fixtureRoot);
+const fieldPostings = path.join(
+  getIndexDir(fixtureRoot, 'code', userConfig),
+  'field_postings.json'
+);
+
+if (!fs.existsSync(fieldPostings)) {
+  console.error('fielded bm25 test failed: field_postings.json missing');
+  process.exit(1);
+}
+
+const result = spawnSync(
+  process.execPath,
+  [
+    path.join(root, 'search.js'),
+    'greet',
+    '--mode',
+    'code',
+    '--no-ann',
+    '--json',
+    '--repo',
+    fixtureRoot
+  ],
+  { env, encoding: 'utf8' }
+);
+
+if (result.status !== 0) {
+  console.error('fielded bm25 test failed: search returned error');
+  if (result.stderr) console.error(result.stderr.trim());
+  process.exit(result.status ?? 1);
+}
+
+let payload = null;
+try {
+  payload = JSON.parse(result.stdout || '{}');
+} catch (err) {
+  console.error('fielded bm25 test failed: invalid JSON output');
+  process.exit(1);
+}
+
+const hit = payload?.code?.[0];
+if (!hit) {
+  console.error('fielded bm25 test failed: no hits');
+  process.exit(1);
+}
+if (hit.scoreType !== 'bm25-fielded') {
+  console.error(`fielded bm25 test failed: expected bm25-fielded, got ${hit.scoreType}`);
+  process.exit(1);
+}
+if (hit.scoreBreakdown?.sparse?.fielded !== true) {
+  console.error('fielded bm25 test failed: sparse.fielded not true');
+  process.exit(1);
+}
+
+console.log('fielded bm25 tests passed');
diff --git a/tests/file-line-guard.js b/tests/file-line-guard.js
new file mode 100644
index 000000000..6dfdc7693
--- /dev/null
+++ b/tests/file-line-guard.js
@@ -0,0 +1,83 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, getMetricsDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'file-line-guard');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const configPath = path.join(repoRoot, '.pairofcleats.json');
+await fsPromises.writeFile(
+  configPath,
+  JSON.stringify({
+    indexing: {
+      fileCaps: { default: { maxLines: 2 } },
+      fileListSampleSize: 20,
+      treeSitter: { enabled: false }
+    }
+  }, null, 2)
+);
+
+const largePath = path.join(repoRoot, 'too_many_lines.js');
+const smallPath = path.join(repoRoot, 'ok.js');
+await fsPromises.writeFile(largePath, 'line1\nline2\nline3\nline4\n');
+await fsPromises.writeFile(smallPath, 'function ok() { return 1; }\n');
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+if (buildResult.status !== 0) {
+  console.error('Failed: build_index');
+  process.exit(buildResult.status ?? 1);
+}
+
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+const userConfig = loadUserConfig(repoRoot);
+const codeDir = getIndexDir(repoRoot, 'code', userConfig);
+const fileListsPath = path.join(codeDir, '.filelists.json');
+if (!fs.existsSync(fileListsPath)) {
+  console.error('Missing .filelists.json');
+  process.exit(1);
+}
+const fileLists = JSON.parse(await fsPromises.readFile(fileListsPath, 'utf8'));
+const skippedSample = fileLists?.skipped?.sample;
+if (!Array.isArray(skippedSample)) {
+  console.error('Skipped sample payload is not an array');
+  process.exit(1);
+}
+const oversize = skippedSample.find((entry) => entry?.file && entry.file.endsWith('too_many_lines.js'));
+if (!oversize || oversize.reason !== 'oversize') {
+  console.error('Expected oversize skip entry for too_many_lines.js');
+  process.exit(1);
+}
+
+const metricsDir = getMetricsDir(repoRoot, userConfig);
+const metricsPath = path.join(metricsDir, 'index-code.json');
+if (!fs.existsSync(metricsPath)) {
+  console.error('Missing index-code metrics');
+  process.exit(1);
+}
+const metrics = JSON.parse(await fsPromises.readFile(metricsPath, 'utf8'));
+const oversizeCount = metrics?.files?.skippedByReason?.oversize || 0;
+if (oversizeCount < 1) {
+  console.error('Expected skippedByReason.oversize to be >= 1');
+  process.exit(1);
+}
+
+console.log('File line guard test passed');
diff --git a/tests/file-processor/cached-bundle.test.js b/tests/file-processor/cached-bundle.test.js
new file mode 100644
index 000000000..1b65f35f3
--- /dev/null
+++ b/tests/file-processor/cached-bundle.test.js
@@ -0,0 +1,94 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { reuseCachedBundle } from '../../src/index/build/file-processor/cached-bundle.js';
+
+const fail = (message) => {
+  console.error(message);
+  process.exit(1);
+};
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'file-processor-cached');
+const repoRoot = path.join(tempRoot, 'repo');
+await fs.rm(tempRoot, { recursive: true, force: true });
+await fs.mkdir(repoRoot, { recursive: true });
+
+const targetPath = path.join(repoRoot, 'cached.js');
+await fs.writeFile(targetPath, 'export const demo = 1;\n');
+const stat = await fs.stat(targetPath);
+
+const cachedBundle = {
+  chunks: [
+    {
+      file: 'cached.js',
+      ext: '.js',
+      start: 0,
+      end: 10,
+      startLine: 1,
+      endLine: 1,
+      kind: 'code',
+      name: 'demo',
+      lang: 'javascript',
+      codeRelations: {
+        imports: ['dep'],
+        exports: ['demo'],
+        calls: ['demo']
+      },
+      docmeta: { signature: 'demo()' },
+      tokens: ['demo'],
+      seq: ['demo'],
+      ngrams: [],
+      chargrams: []
+    }
+  ],
+  fileRelations: null
+};
+
+const { result, skip } = reuseCachedBundle({
+  abs: targetPath,
+  relKey: 'cached.js',
+  fileIndex: 0,
+  fileStat: stat,
+  fileHash: 'hash',
+  ext: '.js',
+  fileCaps: {},
+  cachedBundle,
+  incrementalState: {
+    manifest: {
+      files: {
+        'cached.js': { bundle: 'cached.json', hash: 'hash' }
+      }
+    }
+  },
+  allImports: {
+    dep: [{ source: 'cached.js', target: 'dep.js' }]
+  },
+  fileStructural: null,
+  toolInfo: null,
+  fileStart: Date.now(),
+  knownLines: 1,
+  fileLanguageId: null
+});
+
+if (skip) {
+  fail('Expected cached bundle to be reused without skip.');
+}
+if (!result) {
+  fail('Expected cached bundle reuse result.');
+}
+if (!result.fileRelations?.importLinks?.length) {
+  fail('Expected importLinks to be rehydrated from allImports.');
+}
+const chunk = result.chunks[0];
+if (!chunk?.metaV2?.chunkId) {
+  fail('Expected cached chunk to have metaV2 chunkId.');
+}
+if (!Array.isArray(chunk?.codeRelations?.calls)) {
+  fail('Expected cached chunk to preserve non-file relation fields.');
+}
+if (!result.fileMetrics?.cached) {
+  fail('Expected cached file metrics to set cached=true.');
+}
+
+console.log('file processor cached bundle tests passed');
diff --git a/tests/file-processor/skip.test.js b/tests/file-processor/skip.test.js
new file mode 100644
index 000000000..024ca75fa
--- /dev/null
+++ b/tests/file-processor/skip.test.js
@@ -0,0 +1,61 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { createFileScanner } from '../../src/index/build/file-scan.js';
+import { resolveBinarySkip, resolvePreReadSkip } from '../../src/index/build/file-processor/skip.js';
+
+const fail = (message) => {
+  console.error(message);
+  process.exit(1);
+};
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'file-processor-skip');
+await fs.rm(tempRoot, { recursive: true, force: true });
+await fs.mkdir(tempRoot, { recursive: true });
+
+const fileScanner = createFileScanner();
+const runIo = (fn) => fn();
+
+const minifiedPath = path.join(tempRoot, 'app.min.js');
+await fs.writeFile(minifiedPath, 'const x=1;');
+const minifiedStat = await fs.stat(minifiedPath);
+const minifiedSkip = await resolvePreReadSkip({
+  abs: minifiedPath,
+  fileEntry: { lines: 1, scan: { checkedBinary: true, checkedMinified: true } },
+  fileStat: minifiedStat,
+  ext: '.js',
+  fileCaps: {},
+  fileScanner,
+  runIo
+});
+if (!minifiedSkip || minifiedSkip.reason !== 'minified') {
+  fail('Expected minified filename to skip with reason=minified.');
+}
+
+const cappedPath = path.join(tempRoot, 'big.txt');
+await fs.writeFile(cappedPath, 'abcdef');
+const cappedStat = await fs.stat(cappedPath);
+const cappedSkip = await resolvePreReadSkip({
+  abs: cappedPath,
+  fileEntry: { lines: 1, scan: { checkedBinary: true, checkedMinified: true } },
+  fileStat: cappedStat,
+  ext: '.txt',
+  fileCaps: { default: { maxBytes: 1 } },
+  fileScanner,
+  runIo
+});
+if (!cappedSkip || cappedSkip.reason !== 'oversize' || cappedSkip.maxBytes !== 1) {
+  fail('Expected maxBytes to skip with reason=oversize and maxBytes.');
+}
+
+const binarySkip = await resolveBinarySkip({
+  abs: minifiedPath,
+  fileBuffer: Buffer.from([0, 0, 0, 0, 0]),
+  fileScanner
+});
+if (!binarySkip || binarySkip.reason !== 'binary') {
+  fail('Expected binary buffer to skip with reason=binary.');
+}
+
+console.log('file processor skip tests passed');
diff --git a/tests/file-size-guard.js b/tests/file-size-guard.js
index 38c52d507..2ffde4722 100644
--- a/tests/file-size-guard.js
+++ b/tests/file-size-guard.js
@@ -17,7 +17,7 @@ await fsPromises.mkdir(cacheRoot, { recursive: true });
 const configPath = path.join(repoRoot, '.pairofcleats.json');
 await fsPromises.writeFile(
   configPath,
-  JSON.stringify({ indexing: { maxFileBytes: 120 } }, null, 2)
+  JSON.stringify({ indexing: { maxFileBytes: 120, fileListSampleSize: 10 } }, null, 2)
 );
 
 const largePath = path.join(repoRoot, 'big.js');
@@ -44,17 +44,18 @@ if (buildResult.status !== 0) {
 process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
 const userConfig = loadUserConfig(repoRoot);
 const codeDir = getIndexDir(repoRoot, 'code', userConfig);
-const skippedPath = path.join(codeDir, '.skippedfiles.json');
-if (!fs.existsSync(skippedPath)) {
-  console.error('Missing .skippedfiles.json');
+const fileListsPath = path.join(codeDir, '.filelists.json');
+if (!fs.existsSync(fileListsPath)) {
+  console.error('Missing .filelists.json');
   process.exit(1);
 }
-const skipped = JSON.parse(await fsPromises.readFile(skippedPath, 'utf8'));
-if (!Array.isArray(skipped)) {
-  console.error('Skipped files payload is not an array');
+const fileLists = JSON.parse(await fsPromises.readFile(fileListsPath, 'utf8'));
+const skippedSample = fileLists?.skipped?.sample;
+if (!Array.isArray(skippedSample)) {
+  console.error('Skipped sample payload is not an array');
   process.exit(1);
 }
-const oversize = skipped.find((entry) => entry?.file && entry.file.endsWith('big.js'));
+const oversize = skippedSample.find((entry) => entry?.file && entry.file.endsWith('big.js'));
 if (!oversize || oversize.reason !== 'oversize') {
   console.error('Expected oversize skip entry for big.js');
   process.exit(1);
diff --git a/tests/filter-index-artifact.js b/tests/filter-index-artifact.js
new file mode 100644
index 000000000..18beae329
--- /dev/null
+++ b/tests/filter-index-artifact.js
@@ -0,0 +1,44 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+import { readJsonFile } from '../src/shared/artifact-io.js';
+import { loadIndex } from '../src/retrieval/cli-index.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'filter-index-artifact');
+const repoRoot = path.join(tempRoot, 'repo');
+const srcDir = path.join(repoRoot, 'src');
+const configPath = path.join(repoRoot, '.pairofcleats.json');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(srcDir, { recursive: true });
+await fsPromises.writeFile(path.join(srcDir, 'example.js'), 'const a = 1;\n', 'utf8');
+await fsPromises.writeFile(
+  configPath,
+  JSON.stringify({ search: { filePrefilter: { chargramN: 4 } } }, null, 2)
+);
+
+const buildResult = spawnSync(process.execPath, [
+  path.join(root, 'build_index.js'),
+  '--stub-embeddings',
+  '--repo',
+  repoRoot
+], { encoding: 'utf8' });
+if (buildResult.status !== 0) {
+  console.error(buildResult.stderr || buildResult.stdout || 'build_index failed');
+  process.exit(buildResult.status ?? 1);
+}
+
+const userConfig = loadUserConfig(repoRoot);
+const indexDir = getIndexDir(repoRoot, 'code', userConfig);
+const filterIndexPath = path.join(indexDir, 'filter_index.json');
+const raw = readJsonFile(filterIndexPath);
+assert.equal(raw.fileChargramN, 4, 'expected filter_index.json fileChargramN to match config');
+
+const idx = loadIndex(indexDir, { modelIdDefault: 'test', fileChargramN: 2 });
+assert.equal(idx.filterIndex?.fileChargramN, 4, 'expected hydrated filter index to use persisted fileChargramN');
+
+console.log('filter index artifact test passed');
diff --git a/tests/filter-index.js b/tests/filter-index.js
new file mode 100644
index 000000000..fbfd74b53
--- /dev/null
+++ b/tests/filter-index.js
@@ -0,0 +1,49 @@
+#!/usr/bin/env node
+import { buildFilterIndex } from '../src/retrieval/filter-index.js';
+import { filterChunks } from '../src/retrieval/output.js';
+
+const meta = [
+  {
+    id: 0,
+    ext: '.js',
+    kind: 'FunctionDeclaration',
+    last_author: 'Alice',
+    chunk_authors: ['Alice'],
+    docmeta: { visibility: 'public' }
+  },
+  {
+    id: 1,
+    ext: '.py',
+    kind: 'ClassDeclaration',
+    last_author: 'Bob',
+    chunk_authors: ['Bob', 'Alice'],
+    docmeta: { visibility: 'private' }
+  },
+  {
+    id: 2,
+    ext: '.py',
+    kind: 'FunctionDeclaration',
+    last_author: 'Carol',
+    chunk_authors: ['Carol'],
+    docmeta: { visibility: 'public' }
+  }
+];
+
+const index = buildFilterIndex(meta);
+
+const expectIds = (filters, expected, label) => {
+  const results = filterChunks(meta, filters, index).map((entry) => entry.id).sort();
+  const expectedSorted = expected.slice().sort();
+  const same = results.length === expectedSorted.length
+    && results.every((id, i) => id === expectedSorted[i]);
+  if (!same) {
+    console.error(`${label} failed: expected ${expectedSorted.join(', ')} got ${results.join(', ')}`);
+    process.exit(1);
+  }
+};
+
+expectIds({ ext: '.py', author: 'bob' }, [1], 'author+ext');
+expectIds({ chunkAuthor: 'alice' }, [0, 1], 'chunkAuthor');
+expectIds({ visibility: 'public', type: 'FunctionDeclaration' }, [0, 2], 'visibility+type');
+
+console.log('Filter index test passed');
diff --git a/tests/filter-strictness.js b/tests/filter-strictness.js
new file mode 100644
index 000000000..6605c51db
--- /dev/null
+++ b/tests/filter-strictness.js
@@ -0,0 +1,68 @@
+#!/usr/bin/env node
+import { filterChunks } from '../src/retrieval/output.js';
+import { buildFilterIndex } from '../src/retrieval/filter-index.js';
+
+const meta = [
+  {
+    id: 0,
+    kind: 'FunctionDeclaration',
+    last_author: 'Alice',
+    docmeta: { signature: 'foo(bar)', params: ['bar'] },
+    codeRelations: { calls: [['foo', 'fetch']], usages: ['config'] },
+    file: 'src/a.js'
+  },
+  {
+    id: 1,
+    kind: 'FunctionDeclaration',
+    docmeta: {},
+    codeRelations: {},
+    file: 'src/b.js'
+  },
+  {
+    id: 2,
+    kind: 'ClassDeclaration',
+    last_author: 'Bob',
+    docmeta: { signature: 'baz()', params: ['baz'] },
+    codeRelations: { calls: [['baz', 'other']], usages: ['other'] },
+    file: 'src/c.js'
+  },
+  {
+    id: 3,
+    docmeta: {},
+    codeRelations: {},
+    file: 'docs/readme.md'
+  },
+  {
+    id: 4,
+    kind: ['FunctionDeclaration', 'MethodDefinition'],
+    last_author: ['Carol', 'Dana'],
+    docmeta: { signature: 'qux()', params: ['qux'] },
+    codeRelations: {},
+    file: 'src/nested/util.ts'
+  }
+];
+const filterIndex = buildFilterIndex(meta, { fileChargramN: 3 });
+
+const expectIds = (filters, expected, label) => {
+  const result = filterChunks(meta, filters, filterIndex).map((entry) => entry.id).sort();
+  const expectedSorted = expected.slice().sort();
+  const ok = result.length === expectedSorted.length
+    && result.every((value, idx) => value === expectedSorted[idx]);
+  if (!ok) {
+    console.error(`${label} failed: expected [${expectedSorted.join(', ')}], got [${result.join(', ')}]`);
+    process.exit(1);
+  }
+};
+
+expectIds({ signature: 'foo' }, [0], 'signature filter');
+expectIds({ param: 'bar' }, [0], 'param filter');
+expectIds({ calls: 'fetch' }, [0], 'calls filter');
+expectIds({ uses: 'config' }, [0], 'uses filter');
+expectIds({ type: 'FunctionDeclaration' }, [0, 1, 4], 'type filter strict');
+expectIds({ type: 'FunctionDeclaration ClassDeclaration' }, [0, 1, 2, 4], 'type multi filter');
+expectIds({ author: 'Alice' }, [0], 'author filter strict');
+expectIds({ author: 'car' }, [4], 'author filter substring');
+expectIds({ file: 'src/b.js', filePrefilter: { enabled: true, chargramN: 3 } }, [1], 'file filter substring');
+expectIds({ file: '/util\\.ts$/i', filePrefilter: { enabled: true, chargramN: 3 } }, [4], 'file filter regex');
+
+console.log('filter strictness test passed');
diff --git a/tests/fixture-eval.js b/tests/fixture-eval.js
index d5069143f..e2010f6b2 100644
--- a/tests/fixture-eval.js
+++ b/tests/fixture-eval.js
@@ -3,14 +3,19 @@ import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
 import { spawnSync } from 'node:child_process';
-import minimist from 'minimist';
-
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['json', 'write-report'],
-  string: ['backend', 'out'],
-  alias: { n: 'top' },
-  default: { top: 5, backend: 'memory', json: false, 'write-report': false }
-});
+import { createCli } from '../src/shared/cli.js';
+
+const argv = createCli({
+  scriptName: 'fixture-eval',
+  options: {
+    json: { type: 'boolean', default: false },
+    'write-report': { type: 'boolean', default: false },
+    backend: { type: 'string', default: 'memory' },
+    out: { type: 'string' },
+    top: { type: 'number', default: 5 }
+  },
+  aliases: { n: 'top' }
+}).parse();
 
 const root = process.cwd();
 const fixturesRoot = path.join(root, 'tests', 'fixtures');
diff --git a/tests/fixture-parity.js b/tests/fixture-parity.js
index 72576d09a..9b759b840 100644
--- a/tests/fixture-parity.js
+++ b/tests/fixture-parity.js
@@ -3,23 +3,57 @@ import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
 import { spawnSync } from 'node:child_process';
+import { createCli } from '../src/shared/cli.js';
 
 const root = process.cwd();
 const fixturesRoot = path.join(root, 'tests', 'fixtures');
+const argv = createCli({
+  scriptName: 'fixture-parity',
+  options: {
+    all: { type: 'boolean', default: false },
+    fixture: { type: 'string', default: 'sample' },
+    fixtures: { type: 'string', default: '' },
+    'timeout-ms': { type: 'number', default: 300000 }
+  }
+}).parse();
+const parsedTimeout = Number.isFinite(argv['timeout-ms']) ? argv['timeout-ms'] : 300000;
+const timeoutMs = Math.max(1000, Math.floor(parsedTimeout));
+const defaultProfile = process.platform === 'win32' ? 'ci-parity' : '';
+const resolvedProfile = process.env.PAIROFCLEATS_PROFILE || defaultProfile;
 
 function resolveFixtures() {
+  if (argv.fixtures) {
+    const list = argv.fixtures
+      .split(/[,\s]+/)
+      .map((entry) => entry.trim())
+      .filter(Boolean);
+    if (list.length) return list;
+  }
   const entries = fs.readdirSync(fixturesRoot, { withFileTypes: true });
-  return entries.filter((entry) => entry.isDirectory()).map((entry) => entry.name).sort();
+  const allFixtures = entries.filter((entry) => entry.isDirectory()).map((entry) => entry.name).sort();
+  if (argv.all) return allFixtures;
+  return [argv.fixture];
 }
 
 function run(args, label, cwd, env) {
   const result = spawnSync(process.execPath, args, {
     cwd,
     env,
+    timeout: timeoutMs,
+    killSignal: 'SIGTERM',
     stdio: 'inherit'
   });
   if (result.status !== 0) {
-    console.error(`Failed: ${label}`);
+    const details = [];
+    if (result.error?.code === 'ETIMEDOUT') {
+      details.push(`timeout after ${timeoutMs}ms`);
+    }
+    if (result.signal) details.push(`signal ${result.signal}`);
+    if (result.error && result.error.code !== 'ETIMEDOUT') {
+      details.push(result.error.message || String(result.error));
+    }
+    const suffix = details.length ? ` (${details.join(', ')})` : '';
+    console.error(`Failed: ${label}${suffix}`);
     process.exit(result.status ?? 1);
   }
 }
@@ -32,6 +66,10 @@ if (!fixtures.length) {
 
 for (const fixtureName of fixtures) {
   const fixtureRoot = path.join(fixturesRoot, fixtureName);
+  if (!fs.existsSync(fixtureRoot)) {
+    console.error(`Fixture not found: ${fixtureRoot}`);
+    process.exit(1);
+  }
   const cacheRoot = path.join(root, 'tests', '.cache', `parity-${fixtureName}`);
   console.log(`\nFixture parity: ${fixtureName}`);
   await fsPromises.rm(cacheRoot, { recursive: true, force: true });
@@ -40,8 +78,12 @@ for (const fixtureName of fixtures) {
   const env = {
     ...process.env,
     PAIROFCLEATS_CACHE_ROOT: cacheRoot,
-    PAIROFCLEATS_EMBEDDINGS: 'stub'
+    PAIROFCLEATS_EMBEDDINGS: 'stub',
+    ...(resolvedProfile ? { PAIROFCLEATS_PROFILE: resolvedProfile } : {})
   };
+  if (resolvedProfile) {
+    console.log(`[fixture-parity] profile=${resolvedProfile}`);
+  }
 
   run([path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', fixtureRoot], `build index (${fixtureName})`, fixtureRoot, env);
   run([path.join(root, 'tools', 'build-sqlite-index.js'), '--repo', fixtureRoot], `build sqlite index (${fixtureName})`, fixtureRoot, env);
diff --git a/tests/fixture-smoke.js b/tests/fixture-smoke.js
index 3649e9dd1..c8712ca32 100644
--- a/tests/fixture-smoke.js
+++ b/tests/fixture-smoke.js
@@ -3,17 +3,19 @@ import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
 import { spawnSync } from 'node:child_process';
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
 import { getIndexDir, getMetricsDir, loadUserConfig, resolveSqlitePaths } from '../tools/dict-utils.js';
-import { rankMinhash } from '../src/search/rankers.js';
+import { rankMinhash } from '../src/retrieval/rankers.js';
 
 const root = process.cwd();
 const fixturesRoot = path.join(root, 'tests', 'fixtures');
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['all'],
-  string: ['fixture'],
-  default: { fixture: 'sample', all: false }
-});
+const argv = createCli({
+  scriptName: 'fixture-smoke',
+  options: {
+    all: { type: 'boolean', default: false },
+    fixture: { type: 'string', default: 'sample' }
+  }
+}).parse();
 
 function resolveFixtures() {
   if (!argv.all) return [argv.fixture];
@@ -101,9 +103,9 @@ if (!fixtures.length) {
 }
 
 for (const fixtureName of fixtures) {
-  currentFixtureRoot = path.join(fixturesRoot, fixtureName);
-  if (!fs.existsSync(currentFixtureRoot)) {
-    console.error(`Fixture not found: ${currentFixtureRoot}`);
+  const fixtureSourceRoot = path.join(fixturesRoot, fixtureName);
+  if (!fs.existsSync(fixtureSourceRoot)) {
+    console.error(`Fixture not found: ${fixtureSourceRoot}`);
     process.exit(1);
   }
   console.log(`\nFixture smoke: ${fixtureName}`);
@@ -118,6 +120,21 @@ for (const fixtureName of fixtures) {
     PAIROFCLEATS_EMBEDDINGS: 'stub'
   };
   process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+  currentFixtureRoot = fixtureSourceRoot;
+  const generatorPath = path.join(fixtureSourceRoot, 'generate.js');
+  if (fs.existsSync(generatorPath)) {
+    const generatedRoot = path.join(root, 'tests', '.cache', 'fixtures', fixtureName);
+    const result = spawnSync(
+      process.execPath,
+      [generatorPath, '--out', generatedRoot],
+      { cwd: fixtureSourceRoot, env: currentEnv, stdio: 'inherit' }
+    );
+    if (result.status !== 0) {
+      console.error(`Fixture generator failed: ${fixtureName}`);
+      process.exit(result.status ?? 1);
+    }
+    currentFixtureRoot = generatedRoot;
+  }
   const repoArgs = ['--repo', currentFixtureRoot];
 
   run([path.join(root, 'build_index.js'), '--stub-embeddings', ...repoArgs], `build index (${fixtureName})`);
@@ -132,8 +149,16 @@ for (const fixtureName of fixtures) {
   const requiredFiles = [
     path.join(codeDir, 'chunk_meta.json'),
     path.join(codeDir, 'token_postings.json'),
+    path.join(codeDir, 'dense_vectors_uint8.json'),
+    path.join(codeDir, 'dense_vectors_doc_uint8.json'),
+    path.join(codeDir, 'dense_vectors_code_uint8.json'),
+    path.join(codeDir, 'repo_map.json'),
     path.join(proseDir, 'chunk_meta.json'),
     path.join(proseDir, 'token_postings.json'),
+    path.join(proseDir, 'dense_vectors_uint8.json'),
+    path.join(proseDir, 'dense_vectors_doc_uint8.json'),
+    path.join(proseDir, 'dense_vectors_code_uint8.json'),
+    path.join(proseDir, 'repo_map.json'),
     path.join(metricsDir, 'index-code.json'),
     path.join(metricsDir, 'index-prose.json'),
     sqlitePaths.codePath,
@@ -147,12 +172,28 @@ for (const fixtureName of fixtures) {
     }
   }
 
+  const repoMapPath = path.join(codeDir, 'repo_map.json');
+  const repoMapRaw = fs.readFileSync(repoMapPath, 'utf8');
+  const repoMap = JSON.parse(repoMapRaw);
+  if (!Array.isArray(repoMap) || !repoMap.length) {
+    console.error('Fixture repo map missing or empty.');
+    process.exit(1);
+  }
+  const sampleEntry = repoMap.find((entry) => entry && entry.file && entry.name);
+  if (!sampleEntry) {
+    console.error('Fixture repo map missing expected fields.');
+    process.exit(1);
+  }
+
   assertChunkWeights('code', path.join(codeDir, 'chunk_meta.json'));
   assertChunkWeights('prose', path.join(proseDir, 'chunk_meta.json'));
   assertMinhashConsistency('code', path.join(codeDir, 'chunk_meta.json'), path.join(codeDir, 'minhash_signatures.json'));
   assertMinhashConsistency('prose', path.join(proseDir, 'chunk_meta.json'), path.join(proseDir, 'minhash_signatures.json'));
 
-  const queries = loadQueries(currentFixtureRoot);
+  const queriesRoot = fs.existsSync(path.join(currentFixtureRoot, 'queries.txt'))
+    ? currentFixtureRoot
+    : fixtureSourceRoot;
+  const queries = loadQueries(queriesRoot);
   const backends = ['memory', 'sqlite-fts'];
   for (const query of queries) {
     for (const backend of backends) {
@@ -344,6 +385,56 @@ for (const fixtureName of fixtures) {
     }
   }
 
+  if (fixtureName === 'sample') {
+    const typeScoped = spawnSync(
+      process.execPath,
+      [path.join(root, 'search.js'), 'sayHello', '--mode', 'code', '--json', '--backend', 'memory', '--no-ann', '--type', 'MethodDeclaration', ...repoArgs],
+      { cwd: currentFixtureRoot, env: currentEnv, encoding: 'utf8' }
+    );
+    if (typeScoped.status !== 0) {
+      console.error('Fixture type filter failed: search error.');
+      process.exit(typeScoped.status ?? 1);
+    }
+    const typePayload = JSON.parse(typeScoped.stdout || '{}');
+    const typeHits = typePayload.code || [];
+    if (!typeHits.some((hit) => hit.file === 'src/sample.swift' && String(hit.name || '').includes('sayHello'))) {
+      console.error('Fixture type filter returned no sayHello() hits.');
+      process.exit(1);
+    }
+
+    const signatureScoped = spawnSync(
+      process.execPath,
+      [path.join(root, 'search.js'), 'sayHello', '--mode', 'code', '--json', '--backend', 'memory', '--no-ann', '--signature', 'func sayHello', ...repoArgs],
+      { cwd: currentFixtureRoot, env: currentEnv, encoding: 'utf8' }
+    );
+    if (signatureScoped.status !== 0) {
+      console.error('Fixture signature filter failed: search error.');
+      process.exit(signatureScoped.status ?? 1);
+    }
+    const signaturePayload = JSON.parse(signatureScoped.stdout || '{}');
+    const signatureHits = signaturePayload.code || [];
+    if (!signatureHits.some((hit) => hit.file === 'src/sample.swift' && String(hit.name || '').includes('sayHello'))) {
+      console.error('Fixture signature filter returned no sayHello() hits.');
+      process.exit(1);
+    }
+
+    const decoratorScoped = spawnSync(
+      process.execPath,
+      [path.join(root, 'search.js'), 'sayHello', '--mode', 'code', '--json', '--backend', 'memory', '--no-ann', '--decorator', 'available', ...repoArgs],
+      { cwd: currentFixtureRoot, env: currentEnv, encoding: 'utf8' }
+    );
+    if (decoratorScoped.status !== 0) {
+      console.error('Fixture decorator filter failed: search error.');
+      process.exit(decoratorScoped.status ?? 1);
+    }
+    const decoratorPayload = JSON.parse(decoratorScoped.stdout || '{}');
+    const decoratorHits = decoratorPayload.code || [];
+    if (!decoratorHits.some((hit) => hit.file === 'src/sample.swift' && String(hit.name || '').includes('sayHello'))) {
+      console.error('Fixture decorator filter returned no sayHello() hits.');
+      process.exit(1);
+    }
+  }
+
   if (fixtureName === 'sample') {
     const rustCheck = spawnSync(
       process.execPath,
diff --git a/tests/fixtures/binary/sample.png b/tests/fixtures/binary/sample.png
new file mode 100644
index 000000000..c1f499e3b
Binary files /dev/null and b/tests/fixtures/binary/sample.png differ
diff --git a/tests/fixtures/ctags/tags.jsonl b/tests/fixtures/ctags/tags.jsonl
new file mode 100644
index 000000000..5d1d31900
--- /dev/null
+++ b/tests/fixtures/ctags/tags.jsonl
@@ -0,0 +1,3 @@
+{"_type":"tag","name":"Widget","path":"src/widget.js","kind":"class","line":3,"language":"JavaScript"}
+{"_type":"tag","name":"Widget.render","path":"src/widget.js","kind":"method","line":12,"language":"JavaScript","signature":"render()"}
+{"_type":"tag","name":"util","path":"src/util.js","kind":"function","line":1,"language":"JavaScript"}
diff --git a/tests/fixtures/dict-scan/tokens.txt b/tests/fixtures/dict-scan/tokens.txt
new file mode 100644
index 000000000..3db628457
--- /dev/null
+++ b/tests/fixtures/dict-scan/tokens.txt
@@ -0,0 +1,6 @@
+alphabeta
+userprofile
+httprequesthandler
+tokenScan
+gammaDelta
+alphazzzbeta
diff --git a/tests/fixtures/dict-scan/words.txt b/tests/fixtures/dict-scan/words.txt
new file mode 100644
index 000000000..a0d35f127
--- /dev/null
+++ b/tests/fixtures/dict-scan/words.txt
@@ -0,0 +1,11 @@
+alpha
+beta
+gamma
+delta
+user
+profile
+http
+request
+handler
+token
+scan
diff --git a/tests/fixtures/encoding/latin1.js b/tests/fixtures/encoding/latin1.js
new file mode 100644
index 000000000..e9daecb5c
--- /dev/null
+++ b/tests/fixtures/encoding/latin1.js
@@ -0,0 +1 @@
+const cafe = "caf�";\n
\ No newline at end of file
diff --git a/tests/fixtures/extensions/vec0-slip.tar b/tests/fixtures/extensions/vec0-slip.tar
new file mode 100644
index 000000000..f4cdf95a0
Binary files /dev/null and b/tests/fixtures/extensions/vec0-slip.tar differ
diff --git a/tests/fixtures/extensions/vec0-slip.zip b/tests/fixtures/extensions/vec0-slip.zip
new file mode 100644
index 000000000..3cbec5a34
Binary files /dev/null and b/tests/fixtures/extensions/vec0-slip.zip differ
diff --git a/tests/fixtures/formats/.pairofcleats.json b/tests/fixtures/formats/.pairofcleats.json
new file mode 100644
index 000000000..3775e9907
--- /dev/null
+++ b/tests/fixtures/formats/.pairofcleats.json
@@ -0,0 +1,5 @@
+{
+  "indexing": {
+    "yamlChunking": "top-level"
+  }
+}
diff --git a/tests/fixtures/formats/src/styles.css b/tests/fixtures/formats/src/styles.css
new file mode 100644
index 000000000..ed354b5ac
--- /dev/null
+++ b/tests/fixtures/formats/src/styles.css
@@ -0,0 +1,11 @@
+/* Header styles */
+.page-header {
+  display: flex;
+  align-items: center;
+}
+
+@media screen and (max-width: 900px) {
+  .page-header {
+    flex-direction: column;
+  }
+}
diff --git a/tests/fixtures/formats/src/unknown.html b/tests/fixtures/formats/src/unknown.html
index b9564a9b0..0a25660b8 100644
--- a/tests/fixtures/formats/src/unknown.html
+++ b/tests/fixtures/formats/src/unknown.html
@@ -1,5 +1,10 @@
 <html>
   <body>
     <h1>Fallback Chunk</h1>
+    <script type="application/json">{"settings":{"theme":"dark"}}</script>
+    <pre class="language-yaml">name: demo</pre>
+    <pre class="language-toml">[build]</pre>
+    <pre class="language-ini">[server]</pre>
+    <pre class="language-markdown"># Doc Block</pre>
   </body>
 </html>
diff --git a/tests/fixtures/graphs/simple/consumer.js b/tests/fixtures/graphs/simple/consumer.js
new file mode 100644
index 000000000..9726d931b
--- /dev/null
+++ b/tests/fixtures/graphs/simple/consumer.js
@@ -0,0 +1,6 @@
+import { createGraphWidget, GraphWidget } from './producer.js';
+
+export function buildGraphWidget() {
+  const widget = new GraphWidget();
+  return createGraphWidget();
+}
diff --git a/tests/fixtures/graphs/simple/producer.js b/tests/fixtures/graphs/simple/producer.js
new file mode 100644
index 000000000..4f178e53d
--- /dev/null
+++ b/tests/fixtures/graphs/simple/producer.js
@@ -0,0 +1,9 @@
+export function createGraphWidget() {
+  return new GraphWidget();
+}
+
+export class GraphWidget {
+  constructor() {
+    this.id = 1;
+  }
+}
diff --git a/tests/fixtures/gtags/gtags.txt b/tests/fixtures/gtags/gtags.txt
new file mode 100644
index 000000000..11c2b1f87
--- /dev/null
+++ b/tests/fixtures/gtags/gtags.txt
@@ -0,0 +1,2 @@
+Widget 3 src/widget.js
+render 12 src/widget.js
diff --git a/tests/fixtures/languages/src/BUILD b/tests/fixtures/languages/src/BUILD
new file mode 100644
index 000000000..195586115
--- /dev/null
+++ b/tests/fixtures/languages/src/BUILD
@@ -0,0 +1,6 @@
+load("//tools:defs.bzl", "widget_lib")
+
+widget_lib(
+  name = "widget",
+  srcs = ["widget.cc"],
+)
diff --git a/tests/fixtures/languages/src/CMakeLists.txt b/tests/fixtures/languages/src/CMakeLists.txt
new file mode 100644
index 000000000..f0c7c7df3
--- /dev/null
+++ b/tests/fixtures/languages/src/CMakeLists.txt
@@ -0,0 +1,5 @@
+cmake_minimum_required(VERSION 3.20)
+project(Widget)
+add_library(widget_lib src/widget.cpp)
+add_executable(widget_cli src/main.cpp)
+include(cmake/Extras.cmake)
diff --git a/tests/fixtures/languages/src/Dockerfile b/tests/fixtures/languages/src/Dockerfile
new file mode 100644
index 000000000..2e11f4b2b
--- /dev/null
+++ b/tests/fixtures/languages/src/Dockerfile
@@ -0,0 +1,6 @@
+FROM node:18 AS base
+WORKDIR /app
+COPY package.json package-lock.json ./
+RUN npm ci
+COPY . .
+CMD ["node", "server.js"]
diff --git a/tests/fixtures/languages/src/Gemfile b/tests/fixtures/languages/src/Gemfile
new file mode 100644
index 000000000..f701cd13b
--- /dev/null
+++ b/tests/fixtures/languages/src/Gemfile
@@ -0,0 +1,5 @@
+source 'https://rubygems.org'
+
+def build_widget
+  puts 'gemfile widget'
+end
diff --git a/tests/fixtures/languages/src/Makefile b/tests/fixtures/languages/src/Makefile
new file mode 100644
index 000000000..acb15d039
--- /dev/null
+++ b/tests/fixtures/languages/src/Makefile
@@ -0,0 +1,7 @@
+build:
+	@echo "Building"
+
+test:
+	@echo "Testing"
+
+include tools.mk
diff --git a/tests/fixtures/languages/src/WORKSPACE b/tests/fixtures/languages/src/WORKSPACE
new file mode 100644
index 000000000..d10eb11a5
--- /dev/null
+++ b/tests/fixtures/languages/src/WORKSPACE
@@ -0,0 +1,3 @@
+workspace(name = "widgets")
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
diff --git a/tests/fixtures/languages/src/Widget.groovy b/tests/fixtures/languages/src/Widget.groovy
new file mode 100644
index 000000000..d5538d5a1
--- /dev/null
+++ b/tests/fixtures/languages/src/Widget.groovy
@@ -0,0 +1,7 @@
+class Widget {
+  String name
+}
+
+def buildWidget() {
+  return new Widget(name: 'ok')
+}
diff --git a/tests/fixtures/languages/src/Widget.scala b/tests/fixtures/languages/src/Widget.scala
new file mode 100644
index 000000000..1cd35d230
--- /dev/null
+++ b/tests/fixtures/languages/src/Widget.scala
@@ -0,0 +1,5 @@
+case class Widget(name: String)
+
+object WidgetFactory {
+  def buildWidget(): Widget = Widget("ok")
+}
diff --git a/tests/fixtures/languages/src/default.nix b/tests/fixtures/languages/src/default.nix
new file mode 100644
index 000000000..1f5e0473d
--- /dev/null
+++ b/tests/fixtures/languages/src/default.nix
@@ -0,0 +1,5 @@
+{ pkgs ? import <nixpkgs> {} }:
+let
+  widget = pkgs.hello;
+in
+  widget
diff --git a/tests/fixtures/languages/src/defs.bzl b/tests/fixtures/languages/src/defs.bzl
new file mode 100644
index 000000000..62d037e3f
--- /dev/null
+++ b/tests/fixtures/languages/src/defs.bzl
@@ -0,0 +1,6 @@
+def widget_rule(name):
+  native.genrule(
+    name = name,
+    outs = ["widget.txt"],
+    cmd = "echo hello > $@",
+  )
diff --git a/tests/fixtures/languages/src/javascript_flow.js b/tests/fixtures/languages/src/javascript_flow.js
new file mode 100644
index 000000000..a4974f780
--- /dev/null
+++ b/tests/fixtures/languages/src/javascript_flow.js
@@ -0,0 +1,15 @@
+/* @flow */
+import type { User } from './types';
+import { parse } from 'flow-parser';
+
+export type Id = string;
+
+export function greet(user: User, id: Id): string {
+  return `${user.name}-${id}`;
+}
+
+const handler = (name: string): void => {
+  parse(name);
+};
+
+export const api = { handler };
diff --git a/tests/fixtures/languages/src/schema.graphql b/tests/fixtures/languages/src/schema.graphql
new file mode 100644
index 000000000..6c2f193ec
--- /dev/null
+++ b/tests/fixtures/languages/src/schema.graphql
@@ -0,0 +1,17 @@
+schema {
+  query: Query
+}
+
+type Query {
+  widget(id: ID!): Widget
+}
+
+type Widget {
+  id: ID!
+  name: String!
+}
+
+enum WidgetState {
+  ACTIVE
+  INACTIVE
+}
diff --git a/tests/fixtures/languages/src/schema.proto b/tests/fixtures/languages/src/schema.proto
new file mode 100644
index 000000000..92b3561da
--- /dev/null
+++ b/tests/fixtures/languages/src/schema.proto
@@ -0,0 +1,24 @@
+syntax = "proto3";
+
+package widgets;
+
+message Widget {
+  string name = 1;
+}
+
+enum State {
+  STATE_UNKNOWN = 0;
+  STATE_ACTIVE = 1;
+}
+
+service WidgetService {
+  rpc GetWidget (WidgetRequest) returns (WidgetResponse);
+}
+
+message WidgetRequest {
+  string id = 1;
+}
+
+message WidgetResponse {
+  Widget widget = 1;
+}
diff --git a/tests/fixtures/languages/src/types.js b/tests/fixtures/languages/src/types.js
new file mode 100644
index 000000000..809f05666
--- /dev/null
+++ b/tests/fixtures/languages/src/types.js
@@ -0,0 +1,5 @@
+/* @flow */
+
+export type User = {
+  name: string
+};
diff --git a/tests/fixtures/languages/src/widget.dart b/tests/fixtures/languages/src/widget.dart
new file mode 100644
index 000000000..f77fb9731
--- /dev/null
+++ b/tests/fixtures/languages/src/widget.dart
@@ -0,0 +1,9 @@
+class Widget {
+  int size() {
+    return 42;
+  }
+}
+
+void buildWidget() {
+  print('ok');
+}
diff --git a/tests/fixtures/languages/src/widget.djhtml b/tests/fixtures/languages/src/widget.djhtml
new file mode 100644
index 000000000..4f7e3f3c1
--- /dev/null
+++ b/tests/fixtures/languages/src/widget.djhtml
@@ -0,0 +1,4 @@
+{% load static %}
+{% block body %}
+  <img src="{% static 'img.png' %}">
+{% endblock %}
diff --git a/tests/fixtures/languages/src/widget.hbs b/tests/fixtures/languages/src/widget.hbs
new file mode 100644
index 000000000..77665d115
--- /dev/null
+++ b/tests/fixtures/languages/src/widget.hbs
@@ -0,0 +1,4 @@
+{{! Widget template }}
+{{#widgets}}
+  <div>{{name}}</div>
+{{/widgets}}
diff --git a/tests/fixtures/languages/src/widget.jinja2 b/tests/fixtures/languages/src/widget.jinja2
new file mode 100644
index 000000000..71baf8435
--- /dev/null
+++ b/tests/fixtures/languages/src/widget.jinja2
@@ -0,0 +1,4 @@
+{% extends "base.html" %}
+{% block content %}
+  {{ widget.name }}
+{% endblock %}
diff --git a/tests/fixtures/languages/src/widget.jl b/tests/fixtures/languages/src/widget.jl
new file mode 100644
index 000000000..993b578e2
--- /dev/null
+++ b/tests/fixtures/languages/src/widget.jl
@@ -0,0 +1,7 @@
+module Widget
+export build_widget
+
+function build_widget(x)
+  x + 1
+end
+end
diff --git a/tests/fixtures/languages/src/widget.mustache b/tests/fixtures/languages/src/widget.mustache
new file mode 100644
index 000000000..0b16cf341
--- /dev/null
+++ b/tests/fixtures/languages/src/widget.mustache
@@ -0,0 +1,4 @@
+{{! Widget template }}
+{{#widget}}
+  {{name}}
+{{/widget}}
diff --git a/tests/fixtures/languages/src/widget.r b/tests/fixtures/languages/src/widget.r
new file mode 100644
index 000000000..7256bea32
--- /dev/null
+++ b/tests/fixtures/languages/src/widget.r
@@ -0,0 +1,3 @@
+build_widget <- function(x) {
+  x + 1
+}
diff --git a/tests/fixtures/languages/src/widget.razor b/tests/fixtures/languages/src/widget.razor
new file mode 100644
index 000000000..f634dad99
--- /dev/null
+++ b/tests/fixtures/languages/src/widget.razor
@@ -0,0 +1,5 @@
+@page "/widget"
+@code {
+  void BuildWidget() {
+  }
+}
diff --git a/tests/fixtures/lsif/dump.lsif b/tests/fixtures/lsif/dump.lsif
new file mode 100644
index 000000000..a2bcf74db
--- /dev/null
+++ b/tests/fixtures/lsif/dump.lsif
@@ -0,0 +1,7 @@
+{"id":1,"type":"vertex","label":"document","uri":"file:///repo/src/sample.ts","languageId":"typescript"}
+{"id":2,"type":"vertex","label":"range","start":{"line":1,"character":0},"end":{"line":1,"character":5},"tag":"foo"}
+{"id":3,"type":"vertex","label":"definitionResult"}
+{"id":4,"type":"vertex","label":"referenceResult"}
+{"id":5,"type":"edge","label":"contains","outV":1,"inVs":[2]}
+{"id":6,"type":"edge","label":"item","outV":2,"inVs":[3]}
+{"id":7,"type":"edge","label":"item","outV":2,"inVs":[4]}
diff --git a/tests/fixtures/lsp/bin/clangd b/tests/fixtures/lsp/bin/clangd
new file mode 100644
index 000000000..2e6327493
--- /dev/null
+++ b/tests/fixtures/lsp/bin/clangd
@@ -0,0 +1,16 @@
+#!/usr/bin/env node
+import { spawn } from 'node:child_process';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const args = process.argv.slice(2);
+if (args.includes('--version') || args.includes('--help')) {
+  process.stdout.write('clangd stub\n');
+  process.exit(0);
+}
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+const script = path.join(__dirname, '..', 'stub-lsp-server.js');
+const child = spawn(process.execPath, [script, '--mode', 'clangd'], { stdio: 'inherit' });
+child.on('exit', (code) => process.exit(code ?? 0));
diff --git a/tests/fixtures/lsp/bin/clangd.cmd b/tests/fixtures/lsp/bin/clangd.cmd
new file mode 100644
index 000000000..cacebed48
--- /dev/null
+++ b/tests/fixtures/lsp/bin/clangd.cmd
@@ -0,0 +1,5 @@
+@echo off
+setlocal
+if "%1"=="--version" exit /b 0
+if "%1"=="--help" exit /b 0
+node "%~dp0\..\stub-lsp-server.js" --mode clangd
diff --git a/tests/fixtures/lsp/bin/pyright-langserver b/tests/fixtures/lsp/bin/pyright-langserver
new file mode 100644
index 000000000..e4d0b4828
--- /dev/null
+++ b/tests/fixtures/lsp/bin/pyright-langserver
@@ -0,0 +1,16 @@
+#!/usr/bin/env node
+import { spawn } from 'node:child_process';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const args = process.argv.slice(2);
+if (args.includes('--version') || args.includes('--help')) {
+  process.stdout.write('pyright-langserver stub\n');
+  process.exit(0);
+}
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+const script = path.join(__dirname, '..', 'stub-lsp-server.js');
+const child = spawn(process.execPath, [script, '--mode', 'pyright'], { stdio: 'inherit' });
+child.on('exit', (code) => process.exit(code ?? 0));
diff --git a/tests/fixtures/lsp/bin/pyright-langserver.cmd b/tests/fixtures/lsp/bin/pyright-langserver.cmd
new file mode 100644
index 000000000..4ec397478
--- /dev/null
+++ b/tests/fixtures/lsp/bin/pyright-langserver.cmd
@@ -0,0 +1,5 @@
+@echo off
+setlocal
+if "%1"=="--version" exit /b 0
+if "%1"=="--help" exit /b 0
+node "%~dp0\..\stub-lsp-server.js" --mode pyright
diff --git a/tests/fixtures/lsp/bin/sourcekit-lsp b/tests/fixtures/lsp/bin/sourcekit-lsp
new file mode 100644
index 000000000..e22ff0303
--- /dev/null
+++ b/tests/fixtures/lsp/bin/sourcekit-lsp
@@ -0,0 +1,16 @@
+#!/usr/bin/env node
+import { spawn } from 'node:child_process';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const args = process.argv.slice(2);
+if (args.includes('--version') || args.includes('--help')) {
+  process.stdout.write('sourcekit-lsp stub\n');
+  process.exit(0);
+}
+
+const __filename = fileURLToPath(import.meta.url);
+const __dirname = path.dirname(__filename);
+const script = path.join(__dirname, '..', 'stub-lsp-server.js');
+const child = spawn(process.execPath, [script, '--mode', 'sourcekit'], { stdio: 'inherit' });
+child.on('exit', (code) => process.exit(code ?? 0));
diff --git a/tests/fixtures/lsp/bin/sourcekit-lsp.cmd b/tests/fixtures/lsp/bin/sourcekit-lsp.cmd
new file mode 100644
index 000000000..ed465d565
--- /dev/null
+++ b/tests/fixtures/lsp/bin/sourcekit-lsp.cmd
@@ -0,0 +1,5 @@
+@echo off
+setlocal
+if "%1"=="--version" exit /b 0
+if "%1"=="--help" exit /b 0
+node "%~dp0\..\stub-lsp-server.js" --mode sourcekit
diff --git a/tests/fixtures/lsp/stub-lsp-server.js b/tests/fixtures/lsp/stub-lsp-server.js
new file mode 100644
index 000000000..16e810da8
--- /dev/null
+++ b/tests/fixtures/lsp/stub-lsp-server.js
@@ -0,0 +1,161 @@
+#!/usr/bin/env node
+import { createFramedJsonRpcParser, writeFramedJsonRpc } from '../../../src/shared/jsonrpc.js';
+
+const args = process.argv.slice(2);
+const modeIdx = args.indexOf('--mode');
+const mode = modeIdx !== -1 && args[modeIdx + 1] ? args[modeIdx + 1] : 'clangd';
+const exitOnShutdown = args.includes('--exit-on-shutdown');
+
+const symbolsByMode = {
+  clangd: {
+    name: 'add',
+    detail: 'int add(int a, int b)',
+    kind: 12
+  },
+  sourcekit: {
+    name: 'greet',
+    detail: 'func greet(name: String, count: Int) -> String',
+    kind: 12
+  },
+  pyright: {
+    name: 'greet',
+    detail: 'def greet(name: str) -> str',
+    kind: 12
+  }
+};
+
+const config = symbolsByMode[mode] || symbolsByMode.clangd;
+const pyrightDiagnostic = {
+  message: 'Stub pyright diagnostic',
+  severity: 2,
+  code: 'PYRIGHT_STUB',
+  source: 'pyright',
+  range: {
+    start: { line: 0, character: 0 },
+    end: { line: 0, character: 1 }
+  }
+};
+const documents = new Map();
+
+const send = (payload) => {
+  const pending = writeFramedJsonRpc(process.stdout, payload);
+  if (pending && typeof pending.catch === 'function') {
+    pending.catch(() => {});
+  }
+};
+
+const lineColForIndex = (text, index) => {
+  const before = text.slice(0, Math.max(0, index));
+  const lines = before.split(/\r?\n/);
+  const line = Math.max(0, lines.length - 1);
+  const character = lines.length ? lines[lines.length - 1].length : 0;
+  return { line, character };
+};
+
+const buildSymbol = (text) => {
+  if (mode === 'pyright') {
+    const match = text.match(/^\s*(?:async\s+)?def\s+([A-Za-z_][\w]*)\s*\(([^)]*)\)\s*(?:->\s*([^:]+))?\s*:/m);
+    if (match) {
+      const name = match[1];
+      const params = match[2] || '';
+      const returnType = match[3] ? ` -> ${match[3].trim()}` : '';
+      const detail = `def ${name}(${params})${returnType}`.trim();
+      const idx = text.indexOf(name);
+      const start = lineColForIndex(text || '', idx >= 0 ? idx : 0);
+      const end = lineColForIndex(text || '', idx >= 0 ? idx + name.length : 1);
+      return {
+        name,
+        kind: config.kind,
+        detail,
+        range: { start, end },
+        selectionRange: { start, end }
+      };
+    }
+  }
+  const name = config.name;
+  const detail = config.detail;
+  const idx = text ? text.indexOf(name) : -1;
+  const start = lineColForIndex(text || '', idx >= 0 ? idx : 0);
+  const end = lineColForIndex(text || '', idx >= 0 ? idx + name.length : 1);
+  return {
+    name,
+    kind: config.kind,
+    detail,
+    range: { start, end },
+    selectionRange: { start, end }
+  };
+};
+
+const respond = (id, result) => send({ jsonrpc: '2.0', id, result });
+const respondError = (id, message) => send({ jsonrpc: '2.0', id, error: { code: -32601, message } });
+
+const handleRequest = (message) => {
+  const { id, method, params } = message;
+  if (method === 'initialize') {
+    respond(id, {
+      capabilities: {
+        documentSymbolProvider: true,
+        hoverProvider: true
+      }
+    });
+    return;
+  }
+  if (method === 'shutdown') {
+    respond(id, null);
+    if (exitOnShutdown) {
+      setTimeout(() => process.exit(0), 0);
+    }
+    return;
+  }
+  if (method === 'textDocument/documentSymbol') {
+    const uri = params?.textDocument?.uri;
+    const text = documents.get(uri) || '';
+    const symbol = buildSymbol(text);
+    respond(id, symbol ? [symbol] : []);
+    return;
+  }
+  if (method === 'textDocument/hover') {
+    respond(id, {
+      contents: { kind: 'plaintext', value: config.detail }
+    });
+    return;
+  }
+  respondError(id, `Method not supported: ${method}`);
+};
+
+const handleNotification = (message) => {
+  if (!message?.method) return;
+  if (message.method === 'textDocument/didOpen') {
+    const uri = message.params?.textDocument?.uri;
+    const text = message.params?.textDocument?.text || '';
+    if (uri) documents.set(uri, text);
+    if (uri && mode === 'pyright') {
+      send({
+        jsonrpc: '2.0',
+        method: 'textDocument/publishDiagnostics',
+        params: { uri, diagnostics: [pyrightDiagnostic] }
+      });
+    }
+  } else if (message.method === 'textDocument/didClose') {
+    const uri = message.params?.textDocument?.uri;
+    if (uri) documents.delete(uri);
+  } else if (message.method === 'exit') {
+    process.exit(0);
+  }
+};
+
+const parser = createFramedJsonRpcParser({
+  onMessage: (message) => {
+    if (!message || typeof message !== 'object') return;
+    if (Object.prototype.hasOwnProperty.call(message, 'id') && message.method) {
+      handleRequest(message);
+      return;
+    }
+    handleNotification(message);
+  },
+  onError: (err) => {
+    process.stderr.write(`stub lsp parse error: ${err.message}\n`);
+  }
+});
+
+process.stdin.on('data', (chunk) => parser.push(chunk));
diff --git a/tests/fixtures/mcp/schema-snapshot.json b/tests/fixtures/mcp/schema-snapshot.json
new file mode 100644
index 000000000..bff704c77
--- /dev/null
+++ b/tests/fixtures/mcp/schema-snapshot.json
@@ -0,0 +1,474 @@
+{
+  "tools": [
+    {
+      "name": "index_status",
+      "required": [],
+      "properties": [
+        "repoPath"
+      ]
+    },
+    {
+      "name": "config_status",
+      "required": [],
+      "properties": [
+        "repoPath"
+      ]
+    },
+    {
+      "name": "build_index",
+      "required": [],
+      "properties": [
+        "artifactsDir",
+        "incremental",
+        "mode",
+        "repoPath",
+        "sqlite",
+        "stubEmbeddings",
+        "useArtifacts"
+      ]
+    },
+    {
+      "name": "search",
+      "required": [
+        "query"
+      ],
+      "properties": [
+        "alias",
+        "ann",
+        "async",
+        "author",
+        "awaits",
+        "backend",
+        "branch",
+        "branchesMin",
+        "breaksMin",
+        "calls",
+        "case",
+        "caseFile",
+        "caseTokens",
+        "chunkAuthor",
+        "churnMin",
+        "context",
+        "continuesMin",
+        "decorator",
+        "ext",
+        "extends",
+        "file",
+        "generator",
+        "import",
+        "inferredType",
+        "lang",
+        "lint",
+        "loopsMin",
+        "meta",
+        "metaJson",
+        "mode",
+        "modifiedAfter",
+        "modifiedSince",
+        "mutates",
+        "output",
+        "param",
+        "path",
+        "query",
+        "reads",
+        "repoPath",
+        "returnType",
+        "returns",
+        "risk",
+        "riskCategory",
+        "riskFlow",
+        "riskSink",
+        "riskSource",
+        "riskTag",
+        "signature",
+        "throws",
+        "top",
+        "type",
+        "uses",
+        "visibility",
+        "writes"
+      ]
+    },
+    {
+      "name": "triage_ingest",
+      "required": [
+        "inputPath",
+        "source"
+      ],
+      "properties": [
+        "buildIndex",
+        "incremental",
+        "inputPath",
+        "meta",
+        "repoPath",
+        "source",
+        "stubEmbeddings"
+      ]
+    },
+    {
+      "name": "triage_decision",
+      "required": [
+        "finding",
+        "status"
+      ],
+      "properties": [
+        "codes",
+        "evidence",
+        "expires",
+        "finding",
+        "justification",
+        "meta",
+        "repoPath",
+        "reviewer",
+        "status"
+      ]
+    },
+    {
+      "name": "triage_context_pack",
+      "required": [
+        "recordId"
+      ],
+      "properties": [
+        "ann",
+        "outPath",
+        "recordId",
+        "repoPath",
+        "stubEmbeddings"
+      ]
+    },
+    {
+      "name": "download_models",
+      "required": [],
+      "properties": [
+        "cacheDir",
+        "model",
+        "repoPath"
+      ]
+    },
+    {
+      "name": "download_dictionaries",
+      "required": [],
+      "properties": [
+        "dir",
+        "force",
+        "lang",
+        "repoPath",
+        "update",
+        "url"
+      ]
+    },
+    {
+      "name": "download_extensions",
+      "required": [],
+      "properties": [
+        "arch",
+        "dir",
+        "force",
+        "out",
+        "platform",
+        "provider",
+        "repoPath",
+        "update",
+        "url"
+      ]
+    },
+    {
+      "name": "verify_extensions",
+      "required": [],
+      "properties": [
+        "annMode",
+        "arch",
+        "column",
+        "dir",
+        "encoding",
+        "load",
+        "module",
+        "options",
+        "path",
+        "platform",
+        "provider",
+        "repoPath",
+        "table"
+      ]
+    },
+    {
+      "name": "build_sqlite_index",
+      "required": [],
+      "properties": [
+        "codeDir",
+        "compact",
+        "incremental",
+        "mode",
+        "out",
+        "proseDir",
+        "repoPath"
+      ]
+    },
+    {
+      "name": "compact_sqlite_index",
+      "required": [],
+      "properties": [
+        "dryRun",
+        "keepBackup",
+        "mode",
+        "repoPath"
+      ]
+    },
+    {
+      "name": "cache_gc",
+      "required": [],
+      "properties": [
+        "dryRun",
+        "maxAgeDays",
+        "maxBytes",
+        "maxGb",
+        "repoPath"
+      ]
+    },
+    {
+      "name": "clean_artifacts",
+      "required": [],
+      "properties": [
+        "all",
+        "dryRun",
+        "repoPath"
+      ]
+    },
+    {
+      "name": "bootstrap",
+      "required": [],
+      "properties": [
+        "incremental",
+        "repoPath",
+        "skipArtifacts",
+        "skipDicts",
+        "skipIndex",
+        "skipInstall",
+        "skipTooling",
+        "withSqlite"
+      ]
+    },
+    {
+      "name": "report_artifacts",
+      "required": [],
+      "properties": [
+        "repoPath"
+      ]
+    }
+  ],
+  "responses": {
+    "index_status": {
+      "cacheRoot": "<string>",
+      "dictionaries": {
+        "dir": "<string>",
+        "enabled": "<boolean>",
+        "files": [],
+        "includeSlang": "<boolean>"
+      },
+      "git": {
+        "isRepo": "<boolean>",
+        "warning": "<string>"
+      },
+      "incremental": {
+        "dir": "<string>",
+        "exists": "<boolean>"
+      },
+      "index": {
+        "code": {
+          "chunkMeta": {
+            "bytes": "<number>",
+            "exists": "<boolean>",
+            "mtime": "<null>"
+          },
+          "dir": "<string>",
+          "tokenPostings": {
+            "bytes": "<number>",
+            "exists": "<boolean>",
+            "mtime": "<null>"
+          }
+        },
+        "prose": {
+          "chunkMeta": {
+            "bytes": "<number>",
+            "exists": "<boolean>",
+            "mtime": "<null>"
+          },
+          "dir": "<string>",
+          "tokenPostings": {
+            "bytes": "<number>",
+            "exists": "<boolean>",
+            "mtime": "<null>"
+          }
+        },
+        "records": {
+          "chunkMeta": {
+            "bytes": "<number>",
+            "exists": "<boolean>",
+            "mtime": "<null>"
+          },
+          "dir": "<string>",
+          "tokenPostings": {
+            "bytes": "<number>",
+            "exists": "<boolean>",
+            "mtime": "<null>"
+          }
+        }
+      },
+      "metrics": {
+        "dir": "<string>",
+        "indexCode": {
+          "bytes": "<number>",
+          "exists": "<boolean>",
+          "mtime": "<null>"
+        },
+        "indexProse": {
+          "bytes": "<number>",
+          "exists": "<boolean>",
+          "mtime": "<null>"
+        },
+        "indexRecords": {
+          "bytes": "<number>",
+          "exists": "<boolean>",
+          "mtime": "<null>"
+        },
+        "queryCache": {
+          "bytes": "<number>",
+          "exists": "<boolean>",
+          "mtime": "<null>"
+        }
+      },
+      "models": {
+        "available": "<boolean>",
+        "dir": "<string>",
+        "hint": "<string>",
+        "model": "<string>"
+      },
+      "repoCacheRoot": "<string>",
+      "repoId": "<string>",
+      "repoPath": "<string>",
+      "sqlite": {
+        "code": {
+          "bytes": "<number>",
+          "exists": "<boolean>",
+          "mtime": "<null>",
+          "path": "<string>"
+        },
+        "legacy": "<null>",
+        "prose": {
+          "bytes": "<number>",
+          "exists": "<boolean>",
+          "mtime": "<null>",
+          "path": "<string>"
+        }
+      }
+    },
+    "config_status": {
+      "cache": {
+        "cacheRootExists": "<boolean>",
+        "dictionaries": [],
+        "modelAvailable": "<boolean>",
+        "repoCacheExists": "<boolean>",
+        "sqlite": {
+          "codeExists": "<boolean>",
+          "proseExists": "<boolean>"
+        },
+        "vectorExtension": {
+          "available": "<boolean>",
+          "enabled": "<boolean>",
+          "path": "<string>"
+        }
+      },
+      "config": {
+        "cacheRoot": "<string>",
+        "dictionary": {
+          "dir": "<string>",
+          "dpMaxTokenLength": "<number>",
+          "dpMaxTokenLengthByFileCount": [
+            {
+              "dpMaxTokenLength": "<number>",
+              "maxFiles": "<number>"
+            },
+            {
+              "dpMaxTokenLength": "<number>",
+              "maxFiles": "<number>"
+            },
+            {
+              "dpMaxTokenLength": "<number>",
+              "maxFiles": "<null>"
+            }
+          ],
+          "enableRepoDictionary": "<boolean>",
+          "files": [],
+          "includeSlang": "<boolean>",
+          "languages": [
+            "<string>"
+          ],
+          "segmentation": "<string>",
+          "slangDirs": [],
+          "slangFiles": []
+        },
+        "indexing": {
+          "astDataflow": "<boolean>",
+          "complexity": "<boolean>",
+          "controlFlow": "<boolean>",
+          "gitBlame": "<boolean>",
+          "importScan": "<string>",
+          "lint": "<boolean>",
+          "postings": {
+            "chargramMaxN": "<number>",
+            "chargramMaxTokenLength": "<number>",
+            "chargramMinN": "<number>",
+            "chargramSource": "<string>",
+            "enableChargrams": "<boolean>",
+            "enablePhraseNgrams": "<boolean>",
+            "fielded": "<boolean>",
+            "phraseMaxN": "<number>",
+            "phraseMinN": "<number>"
+          },
+          "pythonAst": {
+            "enabled": "<boolean>"
+          },
+          "riskAnalysis": "<boolean>",
+          "riskAnalysisCrossFile": "<boolean>",
+          "treeSitter": {
+            "enabled": "<boolean>"
+          },
+          "typeInference": "<boolean>",
+          "typeInferenceCrossFile": "<boolean>"
+        },
+        "models": {
+          "dir": "<string>",
+          "id": "<string>"
+        },
+        "repoCacheRoot": "<string>",
+        "search": {
+          "annDefault": "<boolean>",
+          "denseVectorMode": "<string>"
+        },
+        "sqlite": {
+          "annMode": "<string>",
+          "codeDbPath": "<string>",
+          "proseDbPath": "<string>",
+          "use": "<boolean>"
+        },
+        "tooling": {}
+      },
+      "repoId": "<string>",
+      "repoPath": "<string>",
+      "warnings": [
+        {
+          "code": "<string>",
+          "message": "<string>"
+        },
+        {
+          "code": "<string>",
+          "message": "<string>"
+        },
+        {
+          "code": "<string>",
+          "message": "<string>"
+        }
+      ]
+    }
+  }
+}
diff --git a/tests/fixtures/medium/README.md b/tests/fixtures/medium/README.md
new file mode 100644
index 000000000..fd4c8b472
--- /dev/null
+++ b/tests/fixtures/medium/README.md
@@ -0,0 +1,15 @@
+# Medium fixture
+
+This fixture is generated on demand to avoid committing thousands of files.
+
+- Generator: `tests/fixtures/medium/generate.js`
+- Default output: `tests/.cache/fixtures/medium`
+- Default size: 5,000 files (adjust with `--count`)
+
+Example:
+
+```bash
+node tests/fixtures/medium/generate.js --out tests/.cache/fixtures/medium --count 8000
+```
+
+`tests/fixture-smoke.js` will auto-generate this fixture when it detects `generate.js`.
diff --git a/tests/fixtures/medium/generate.js b/tests/fixtures/medium/generate.js
new file mode 100644
index 000000000..79962a420
--- /dev/null
+++ b/tests/fixtures/medium/generate.js
@@ -0,0 +1,173 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import crypto from 'node:crypto';
+import { fileURLToPath } from 'node:url';
+import { createCli } from '../../../src/shared/cli.js';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const repoRoot = path.resolve(__dirname, '..', '..', '..');
+const argv = createCli({
+  scriptName: 'fixture-medium-generate',
+  options: {
+    out: { type: 'string' },
+    count: { type: 'number', default: 5000 },
+    seed: { type: 'string', default: 'medium-fixture' },
+    clean: { type: 'boolean', default: false }
+  }
+}).parse();
+
+const outRoot = argv.out
+  ? path.resolve(argv.out)
+  : path.join(repoRoot, 'tests', '.cache', 'fixtures', 'medium');
+const fileCount = Number.isFinite(argv.count)
+  ? Math.max(1, Math.floor(argv.count))
+  : 5000;
+const seed = String(argv.seed || 'medium-fixture');
+
+const hashSeed = (value) => {
+  let h = 2166136261;
+  for (let i = 0; i < value.length; i += 1) {
+    h ^= value.charCodeAt(i);
+    h = Math.imul(h, 16777619);
+  }
+  return h >>> 0;
+};
+
+const createRng = (value) => {
+  let state = hashSeed(value);
+  return () => {
+    state = (Math.imul(state, 1664525) + 1013904223) >>> 0;
+    return state / 0x100000000;
+  };
+};
+
+const rng = createRng(seed);
+const words = ['alpha', 'bravo', 'charlie', 'delta', 'echo', 'foxtrot', 'golf', 'hotel'];
+
+const templates = [
+  {
+    dir: path.join('src', 'js'),
+    ext: 'js',
+    render: (i) => {
+      const word = words[i % words.length];
+      const n = Math.floor(rng() * 1000);
+      return [
+        `export function fn_${i}(input) {`,
+        `  const tag = '${word}-${i}';`,
+        `  return input + ${n};`,
+        '}',
+        ''
+      ].join('\n');
+    }
+  },
+  {
+    dir: path.join('src', 'ts'),
+    ext: 'ts',
+    render: (i) => {
+      const n = Math.floor(rng() * 1000);
+      return [
+        `export interface Widget${i} {`,
+        '  id: number;',
+        '  name: string;',
+        '}',
+        `export const widget${i}: Widget${i} = { id: ${n}, name: 'widget-${i}' };`,
+        ''
+      ].join('\n');
+    }
+  },
+  {
+    dir: path.join('src', 'py'),
+    ext: 'py',
+    render: (i) => {
+      const n = Math.floor(rng() * 1000);
+      return [
+        `def handler_${i}(value: int) -> int:`,
+        `    return value + ${n}`,
+        ''
+      ].join('\n');
+    }
+  },
+  {
+    dir: path.join('docs'),
+    ext: 'md',
+    render: (i) => {
+      const word = words[(i + 3) % words.length];
+      return [
+        `# Note ${i}`,
+        '',
+        `This is the ${word} fixture entry for file ${i}.`,
+        '',
+        '```js',
+        `export const sample = ${i};`,
+        '```',
+        ''
+      ].join('\n');
+    }
+  },
+  {
+    dir: path.join('web'),
+    ext: 'html',
+    render: (i) => {
+      const n = Math.floor(rng() * 1000);
+      return [
+        '<!doctype html>',
+        '<html>',
+        '  <head>',
+        '    <style>.card{padding:8px;border:1px solid #ccc;}</style>',
+        '  </head>',
+        '  <body>',
+        `    <div class="card">Item ${i}</div>`,
+        '    <script>',
+        `      const value = ${n};`,
+        '      console.log(value);',
+        '    </script>',
+        '  </body>',
+        '</html>',
+        ''
+      ].join('\n');
+    }
+  }
+];
+
+if (argv.clean) {
+  await fsPromises.rm(outRoot, { recursive: true, force: true });
+}
+
+await fsPromises.mkdir(outRoot, { recursive: true });
+
+const manifest = {
+  seed,
+  fileCount,
+  generatedAt: new Date().toISOString(),
+  filesByExt: {},
+  totalBytes: 0,
+  contentHash: ''
+};
+const hash = crypto.createHash('sha1');
+
+for (let i = 0; i < fileCount; i += 1) {
+  const template = templates[i % templates.length];
+  const group = Math.floor(i / 500);
+  const dir = path.join(outRoot, template.dir, `group-${group}`);
+  await fsPromises.mkdir(dir, { recursive: true });
+  const fileName = `file-${i}.${template.ext}`;
+  const content = template.render(i);
+  const relPath = path.join(template.dir, `group-${group}`, fileName);
+  await fsPromises.writeFile(path.join(outRoot, relPath), content, 'utf8');
+  manifest.filesByExt[template.ext] = (manifest.filesByExt[template.ext] || 0) + 1;
+  manifest.totalBytes += Buffer.byteLength(content, 'utf8');
+  hash.update(relPath.replace(/\\/g, '/'));
+  hash.update('\n');
+  hash.update(content);
+  hash.update('\n');
+}
+
+manifest.contentHash = `sha1:${hash.digest('hex')}`;
+await fsPromises.writeFile(
+  path.join(outRoot, 'manifest.json'),
+  `${JSON.stringify(manifest, null, 2)}\n`,
+  'utf8'
+);
+
+console.log(`Generated medium fixture at ${outRoot} (${fileCount} files).`);
diff --git a/tests/fixtures/scip/index.json b/tests/fixtures/scip/index.json
new file mode 100644
index 000000000..c640a1f8f
--- /dev/null
+++ b/tests/fixtures/scip/index.json
@@ -0,0 +1,28 @@
+{
+  "documents": [
+    {
+      "relativePath": "src/example.js",
+      "language": "JavaScript",
+      "symbols": [
+        {
+          "symbol": "local 1",
+          "kind": "Function",
+          "displayName": "doThing",
+          "signature": "doThing()"
+        }
+      ],
+      "occurrences": [
+        {
+          "range": [1, 0, 1, 7],
+          "symbol": "local 1",
+          "symbolRoles": 1
+        },
+        {
+          "range": [3, 2, 3, 9],
+          "symbol": "local 1",
+          "symbolRoles": 2
+        }
+      ]
+    }
+  ]
+}
diff --git a/tests/fixtures/segments/README.md b/tests/fixtures/segments/README.md
new file mode 100644
index 000000000..86ab279d0
--- /dev/null
+++ b/tests/fixtures/segments/README.md
@@ -0,0 +1,3 @@
+# Segments fixture
+
+Fixtures for segmented document + comment extraction coverage.
diff --git a/tests/fixtures/segments/docs/guide.md b/tests/fixtures/segments/docs/guide.md
new file mode 100644
index 000000000..cd40c5dbc
--- /dev/null
+++ b/tests/fixtures/segments/docs/guide.md
@@ -0,0 +1,21 @@
+---
+title: Segment Guide
+tags:
+  - docs
+---
+
+# Segment Guide
+
+This guide has `short` and `inline_code_span` plus `ok_span_long`.
+
+```js
+const answer = 42;
+```
+
+```json
+{
+  "name": "widget"
+}
+```
+
+More prose here.
diff --git a/tests/fixtures/segments/src/comments.js b/tests/fixtures/segments/src/comments.js
new file mode 100644
index 000000000..97d5726a4
--- /dev/null
+++ b/tests/fixtures/segments/src/comments.js
@@ -0,0 +1,20 @@
+/**
+ * Widget config.
+ *
+ * ```json
+ * {
+ *   "name": "widget",
+ *   "enabled": true
+ * }
+ * ```
+ */
+export function buildWidget() {
+  // short
+  // Longer inline comment that should be indexed by prose tokenization.
+  /* Block comment that should be indexed as well. */
+  // generated by lint
+  console.log('ok');
+  return true;
+}
+
+/* Copyright 2025 Example */
diff --git a/tests/fixtures/segments/src/component.vue b/tests/fixtures/segments/src/component.vue
new file mode 100644
index 000000000..ece3e08e0
--- /dev/null
+++ b/tests/fixtures/segments/src/component.vue
@@ -0,0 +1,11 @@
+<template>
+  <div class="widget">{{ label }}</div>
+</template>
+
+<script setup>
+const label = 'Widget';
+</script>
+
+<style scoped>
+.widget { color: #333; }
+</style>
diff --git a/tests/fixtures/segments/src/page.astro b/tests/fixtures/segments/src/page.astro
new file mode 100644
index 000000000..20bf98c25
--- /dev/null
+++ b/tests/fixtures/segments/src/page.astro
@@ -0,0 +1,12 @@
+---
+const label = 'Astro';
+---
+<html>
+  <head>
+    <style>.hero { color: #111; }</style>
+  </head>
+  <body>
+    <script>console.log(label);</script>
+    <div class="hero">{label}</div>
+  </body>
+</html>
diff --git a/tests/fixtures/segments/src/widget.svelte b/tests/fixtures/segments/src/widget.svelte
new file mode 100644
index 000000000..24cb1538c
--- /dev/null
+++ b/tests/fixtures/segments/src/widget.svelte
@@ -0,0 +1,9 @@
+<script>
+  const label = 'Widget';
+</script>
+
+<style>
+  .card { padding: 8px; }
+</style>
+
+<div class="card">{label}</div>
diff --git a/tests/fixtures/structural/bin/comby b/tests/fixtures/structural/bin/comby
new file mode 100644
index 000000000..ff0ee986f
--- /dev/null
+++ b/tests/fixtures/structural/bin/comby
@@ -0,0 +1,19 @@
+#!/usr/bin/env node
+const args = process.argv.slice(2);
+if (args.includes('--version') || args.includes('--help')) {
+  process.stdout.write('comby stub\n');
+  process.exit(0);
+}
+const payload = {
+  uri: 'docs/notes.md',
+  matches: [
+    {
+      matched: 'TODO: update',
+      range: {
+        start: { line: 1, col: 1 },
+        end: { line: 1, col: 12 }
+      }
+    }
+  ]
+};
+process.stdout.write(`${JSON.stringify(payload)}\n`);
diff --git a/tests/fixtures/structural/bin/semgrep b/tests/fixtures/structural/bin/semgrep
new file mode 100644
index 000000000..4859e69df
--- /dev/null
+++ b/tests/fixtures/structural/bin/semgrep
@@ -0,0 +1,23 @@
+#!/usr/bin/env node
+const args = process.argv.slice(2);
+if (args.includes('--version') || args.includes('--help')) {
+  process.stdout.write('semgrep stub\n');
+  process.exit(0);
+}
+const payload = {
+  results: [
+    {
+      check_id: 'semgrep.stub',
+      path: 'src/example.js',
+      start: { line: 2, col: 3 },
+      end: { line: 2, col: 10 },
+      extra: {
+        message: 'stub semgrep match',
+        severity: 'WARNING',
+        lines: 'eval("x")',
+        metadata: { tags: ['security'] }
+      }
+    }
+  ]
+};
+process.stdout.write(`${JSON.stringify(payload)}\n`);
diff --git a/tests/fixtures/structural/bin/sg b/tests/fixtures/structural/bin/sg
new file mode 100644
index 000000000..e59a86f09
--- /dev/null
+++ b/tests/fixtures/structural/bin/sg
@@ -0,0 +1,21 @@
+#!/usr/bin/env node
+const args = process.argv.slice(2);
+if (args.includes('--version') || args.includes('--help')) {
+  process.stdout.write('ast-grep stub\n');
+  process.exit(0);
+}
+const payload = {
+  ruleId: 'astgrep.stub',
+  file: 'src/example.ts',
+  matches: [
+    {
+      message: 'stub ast-grep match',
+      range: {
+        start: { line: 4, column: 1 },
+        end: { line: 4, column: 8 }
+      },
+      text: 'eval(x)'
+    }
+  ]
+};
+process.stdout.write(`${JSON.stringify(payload)}\n`);
diff --git a/tests/fixtures/tree-sitter/clike.c b/tests/fixtures/tree-sitter/clike.c
new file mode 100644
index 000000000..f06c63c96
--- /dev/null
+++ b/tests/fixtures/tree-sitter/clike.c
@@ -0,0 +1,5 @@
+struct Widget { int id; };
+
+int greet(int name) {
+  return name;
+}
diff --git a/tests/fixtures/tree-sitter/cpp.cpp b/tests/fixtures/tree-sitter/cpp.cpp
new file mode 100644
index 000000000..fcbddb061
--- /dev/null
+++ b/tests/fixtures/tree-sitter/cpp.cpp
@@ -0,0 +1,4 @@
+class Widget {
+public:
+  int greet(int name) { return name; }
+};
diff --git a/tests/fixtures/tree-sitter/csharp.cs b/tests/fixtures/tree-sitter/csharp.cs
new file mode 100644
index 000000000..71e8d26fc
--- /dev/null
+++ b/tests/fixtures/tree-sitter/csharp.cs
@@ -0,0 +1,7 @@
+namespace Demo {
+  class Widget {
+    string Greet(string name) {
+      return name;
+    }
+  }
+}
diff --git a/tests/fixtures/tree-sitter/go.go b/tests/fixtures/tree-sitter/go.go
new file mode 100644
index 000000000..1bf52bd1b
--- /dev/null
+++ b/tests/fixtures/tree-sitter/go.go
@@ -0,0 +1,5 @@
+type Widget struct {}
+
+func (w Widget) Greet(name string) string {
+  return name
+}
diff --git a/tests/fixtures/tree-sitter/java.java b/tests/fixtures/tree-sitter/java.java
new file mode 100644
index 000000000..a45ac794c
--- /dev/null
+++ b/tests/fixtures/tree-sitter/java.java
@@ -0,0 +1,5 @@
+class Widget {
+  String greet(String name) {
+    return name;
+  }
+}
diff --git a/tests/fixtures/tree-sitter/kotlin.kt b/tests/fixtures/tree-sitter/kotlin.kt
new file mode 100644
index 000000000..843ec5710
--- /dev/null
+++ b/tests/fixtures/tree-sitter/kotlin.kt
@@ -0,0 +1,5 @@
+class Widget {
+  fun greet(name: String): String {
+    return name
+  }
+}
diff --git a/tests/fixtures/tree-sitter/objc.m b/tests/fixtures/tree-sitter/objc.m
new file mode 100644
index 000000000..3125bca79
--- /dev/null
+++ b/tests/fixtures/tree-sitter/objc.m
@@ -0,0 +1,8 @@
+@interface Widget : NSObject
+- (void)greet:(NSString *)name;
+@end
+
+@implementation Widget
+- (void)greet:(NSString *)name {
+}
+@end
diff --git a/tests/fixtures/tree-sitter/rust.rs b/tests/fixtures/tree-sitter/rust.rs
new file mode 100644
index 000000000..2395833bb
--- /dev/null
+++ b/tests/fixtures/tree-sitter/rust.rs
@@ -0,0 +1,7 @@
+struct Widget {}
+
+impl Widget {
+  fn greet(&self, name: &str) -> &str {
+    name
+  }
+}
diff --git a/tests/fixtures/tree-sitter/swift.swift b/tests/fixtures/tree-sitter/swift.swift
new file mode 100644
index 000000000..3c37acd09
--- /dev/null
+++ b/tests/fixtures/tree-sitter/swift.swift
@@ -0,0 +1,7 @@
+import Foundation
+
+class Widget {
+  func greet(name: String) -> String {
+    return name
+  }
+}
diff --git a/tests/format-fidelity.js b/tests/format-fidelity.js
index 9918536e8..d0f6d054b 100644
--- a/tests/format-fidelity.js
+++ b/tests/format-fidelity.js
@@ -35,11 +35,22 @@ const codeDir = getIndexDir(fixtureRoot, 'code', userConfig);
 const proseDir = getIndexDir(fixtureRoot, 'prose', userConfig);
 const codeMeta = JSON.parse(fs.readFileSync(path.join(codeDir, 'chunk_meta.json'), 'utf8'));
 const proseMeta = JSON.parse(fs.readFileSync(path.join(proseDir, 'chunk_meta.json'), 'utf8'));
+const loadFileMap = (dir) => {
+  const metaPath = path.join(dir, 'file_meta.json');
+  if (!fs.existsSync(metaPath)) return new Map();
+  const entries = JSON.parse(fs.readFileSync(metaPath, 'utf8'));
+  return new Map(
+    (Array.isArray(entries) ? entries : []).map((entry) => [entry.id, entry.file])
+  );
+};
+const codeFileById = loadFileMap(codeDir);
+const proseFileById = loadFileMap(proseDir);
 
-function findChunk(meta, match) {
+function findChunk(meta, match, fileById) {
   return meta.find((chunk) => {
-    if (!chunk || !chunk.file) return false;
-    if (match.file && chunk.file !== match.file) return false;
+    const file = chunk?.file || fileById.get(chunk?.fileId) || null;
+    if (!chunk || !file) return false;
+    if (match.file && file !== match.file) return false;
     if (match.kind && chunk.kind !== match.kind) return false;
     if (match.nameIncludes && !String(chunk.name || '').includes(match.nameIncludes)) return false;
     return true;
@@ -48,38 +59,53 @@ function findChunk(meta, match) {
 
 const failures = [];
 
-if (!findChunk(codeMeta, { file: 'src/config.json', nameIncludes: 'database' })) {
+if (!findChunk(codeMeta, { file: 'src/config.json', nameIncludes: 'database' }, codeFileById)) {
   failures.push('Missing JSON chunk for database.');
 }
-if (!findChunk(codeMeta, { file: 'src/config.toml', nameIncludes: 'database' })) {
+if (!findChunk(codeMeta, { file: 'src/config.toml', nameIncludes: 'database' }, codeFileById)) {
   failures.push('Missing TOML chunk for database.');
 }
-if (!findChunk(codeMeta, { file: 'src/config.ini', nameIncludes: 'server' })) {
+if (!findChunk(codeMeta, { file: 'src/config.ini', nameIncludes: 'server' }, codeFileById)) {
   failures.push('Missing INI chunk for server.');
 }
-if (!findChunk(codeMeta, { file: 'src/config.xml', nameIncludes: 'database' })) {
+if (!findChunk(codeMeta, { file: 'src/config.xml', nameIncludes: 'database' }, codeFileById)) {
   failures.push('Missing XML chunk for database.');
 }
-if (!findChunk(codeMeta, { file: 'src/Dockerfile', nameIncludes: 'FROM' })) {
+if (!findChunk(codeMeta, { file: 'src/Dockerfile', nameIncludes: 'FROM' }, codeFileById)) {
   failures.push('Missing Dockerfile chunk for FROM.');
 }
-if (!findChunk(codeMeta, { file: 'src/Makefile', nameIncludes: 'build' })) {
+if (!findChunk(codeMeta, { file: 'src/Makefile', nameIncludes: 'build' }, codeFileById)) {
   failures.push('Missing Makefile chunk for build target.');
 }
-if (!findChunk(codeMeta, { file: 'src/config.yaml', nameIncludes: 'database' })) {
+if (!findChunk(codeMeta, { file: 'src/config.yaml', nameIncludes: 'database' }, codeFileById)) {
   failures.push('Missing YAML chunk for database.');
 }
-if (!findChunk(codeMeta, { file: '.github/workflows/ci.yml', nameIncludes: 'build' })) {
+if (!findChunk(codeMeta, { file: '.github/workflows/ci.yml', nameIncludes: 'build' }, codeFileById)) {
   failures.push('Missing GitHub Actions chunk for build job.');
 }
-if (!findChunk(codeMeta, { file: 'src/unknown.html', kind: 'Blob' })) {
-  failures.push('Missing fallback blob chunk for unknown.html.');
+if (!findChunk(codeMeta, { file: 'src/unknown.html', kind: 'ElementDeclaration', nameIncludes: 'html' }, codeFileById)) {
+  failures.push('Missing HTML element chunk for unknown.html.');
+}
+if (!findChunk(codeMeta, { file: 'src/unknown.html', kind: 'ConfigSection', nameIncludes: 'settings' }, codeFileById)) {
+  failures.push('Missing embedded JSON chunk for unknown.html.');
+}
+if (!findChunk(codeMeta, { file: 'src/unknown.html', kind: 'ConfigSection', nameIncludes: 'build' }, codeFileById)) {
+  failures.push('Missing embedded TOML chunk for unknown.html.');
+}
+if (!findChunk(codeMeta, { file: 'src/unknown.html', kind: 'ConfigSection', nameIncludes: 'server' }, codeFileById)) {
+  failures.push('Missing embedded INI chunk for unknown.html.');
+}
+if (!findChunk(codeMeta, { file: 'src/unknown.html', kind: 'Section', nameIncludes: 'Doc Block' }, codeFileById)) {
+  failures.push('Missing embedded Markdown chunk for unknown.html.');
+}
+if (!findChunk(codeMeta, { file: 'src/styles.css', kind: 'StyleRule', nameIncludes: '.page-header' }, codeFileById)) {
+  failures.push('Missing CSS chunk for styles.css.');
 }
 
-if (!findChunk(proseMeta, { file: 'docs/guide.rst', nameIncludes: 'Guide' })) {
+if (!findChunk(proseMeta, { file: 'docs/guide.rst', nameIncludes: 'Guide' }, proseFileById)) {
   failures.push('Missing RST chunk for Guide.');
 }
-if (!findChunk(proseMeta, { file: 'docs/manual.adoc', nameIncludes: 'Manual' })) {
+if (!findChunk(proseMeta, { file: 'docs/manual.adoc', nameIncludes: 'Manual' }, proseFileById)) {
   failures.push('Missing AsciiDoc chunk for Manual.');
 }
 
diff --git a/tests/git-blame-range.js b/tests/git-blame-range.js
new file mode 100644
index 000000000..b7bedcd70
--- /dev/null
+++ b/tests/git-blame-range.js
@@ -0,0 +1,109 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'git-blame-range');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+const gitCheck = spawnSync('git', ['--version'], { encoding: 'utf8' });
+if (gitCheck.status !== 0) {
+  console.log('[skip] git not available');
+  process.exit(0);
+}
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const runGit = (args, label) => {
+  const result = spawnSync('git', args, { cwd: repoRoot, encoding: 'utf8' });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    if (result.stderr) console.error(result.stderr.trim());
+    process.exit(result.status ?? 1);
+  }
+};
+
+runGit(['init'], 'git init');
+runGit(['config', 'user.email', 'alpha@example.com'], 'git config email alpha');
+runGit(['config', 'user.name', 'Alpha Author'], 'git config name alpha');
+
+const sourcePath = path.join(repoRoot, 'sample.js');
+await fsPromises.writeFile(
+  sourcePath,
+  ['function alpha() {', '  return 1;', '}'].join('\n') + '\n'
+);
+runGit(['add', '.'], 'git add alpha');
+runGit(['commit', '-m', 'alpha'], 'git commit alpha');
+
+runGit(['config', 'user.email', 'beta@example.com'], 'git config email beta');
+runGit(['config', 'user.name', 'Beta Author'], 'git config name beta');
+await fsPromises.appendFile(
+  sourcePath,
+  ['','function beta() {', '  return 2;', '}'].join('\n') + '\n'
+);
+runGit(['add', '.'], 'git add beta');
+runGit(['commit', '-m', 'beta'], 'git commit beta');
+
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+if (buildResult.status !== 0) {
+  console.error('git blame range test failed: build_index failed');
+  process.exit(buildResult.status ?? 1);
+}
+
+const userConfig = loadUserConfig(repoRoot);
+const codeDir = getIndexDir(repoRoot, 'code', userConfig);
+const meta = JSON.parse(fs.readFileSync(path.join(codeDir, 'chunk_meta.json'), 'utf8'));
+
+const findChunk = (name) => meta.find((chunk) => chunk.name === name || String(chunk.name || '').includes(name));
+const alphaChunk = findChunk('alpha');
+const betaChunk = findChunk('beta');
+if (!alphaChunk || !betaChunk) {
+  console.error('Expected alpha and beta chunks in chunk_meta.json');
+  process.exit(1);
+}
+const alphaAuthors = new Set(alphaChunk.chunk_authors || []);
+const betaAuthors = new Set(betaChunk.chunk_authors || []);
+if (alphaChunk.startLine !== 1 || alphaChunk.endLine !== 3) {
+  console.error(`Expected alpha chunk line range 1-3, got ${alphaChunk.startLine}-${alphaChunk.endLine}`);
+  process.exit(1);
+}
+if (!Number.isFinite(betaChunk.startLine) || betaChunk.startLine < 4) {
+  console.error(`Expected beta chunk start line >= 4, got ${betaChunk.startLine}`);
+  process.exit(1);
+}
+if (!alphaAuthors.has('Alpha Author')) {
+  console.error(`Expected Alpha Author in alpha chunk authors, got ${Array.from(alphaAuthors).join(', ')}`);
+  process.exit(1);
+}
+if (!betaAuthors.has('Beta Author')) {
+  console.error(`Expected Beta Author in beta chunk authors, got ${Array.from(betaAuthors).join(', ')}`);
+  process.exit(1);
+}
+if (alphaAuthors.has('Beta Author')) {
+  console.error('Unexpected Beta Author in alpha chunk authors (range likely wrong).');
+  process.exit(1);
+}
+if (betaAuthors.has('Alpha Author')) {
+  console.error('Unexpected Alpha Author in beta chunk authors (range likely wrong).');
+  process.exit(1);
+}
+
+console.log('Git blame range test passed');
diff --git a/tests/git-meta.js b/tests/git-meta.js
index ae34f1d7b..5c914b57b 100644
--- a/tests/git-meta.js
+++ b/tests/git-meta.js
@@ -1,7 +1,7 @@
 #!/usr/bin/env node
 import fs from 'node:fs';
 import path from 'node:path';
-import { getGitMeta } from '../src/indexer/git.js';
+import { getGitMeta } from '../src/index/git.js';
 
 const root = process.cwd();
 const target = path.join(root, 'README.md');
@@ -11,8 +11,8 @@ if (!fs.existsSync(target)) {
   process.exit(1);
 }
 
-const blameEnabled = await getGitMeta(target, 0, 0, { blame: true });
-const blameDisabled = await getGitMeta(target, 0, 0, { blame: false });
+const blameEnabled = await getGitMeta(target, 1, 1, { blame: true, baseDir: root });
+const blameDisabled = await getGitMeta(target, 1, 1, { blame: false, baseDir: root });
 
 if (blameDisabled.chunk_authors !== undefined) {
   console.error('Expected git blame metadata to be disabled, but chunk_authors is present.');
diff --git a/tests/graph-chunk-id.js b/tests/graph-chunk-id.js
new file mode 100644
index 000000000..243005322
--- /dev/null
+++ b/tests/graph-chunk-id.js
@@ -0,0 +1,23 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { buildRelationGraphs } from '../src/index/build/graphs.js';
+
+const stableChunkId = 'chunk_graph_1';
+const chunks = [
+  {
+    file: 'src/graph.js',
+    name: 'buildWidget',
+    kind: 'Function',
+    metaV2: { chunkId: stableChunkId },
+    codeRelations: {
+      callLinks: [{ file: 'src/other.js', target: 'helper', kind: 'Function' }]
+    }
+  }
+];
+
+const graphs = buildRelationGraphs({ chunks, fileRelations: new Map() });
+const node = graphs.callGraph.nodes.find((entry) => entry.id === 'src/graph.js::buildWidget');
+assert.ok(node, 'expected call graph node');
+assert.equal(node.chunkId, stableChunkId, 'expected stable chunkId in graph output');
+
+console.log('graph chunk id test passed');
diff --git a/tests/gtags-ingest.js b/tests/gtags-ingest.js
new file mode 100644
index 000000000..5c0cfd329
--- /dev/null
+++ b/tests/gtags-ingest.js
@@ -0,0 +1,45 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'gtags-ingest');
+const repoRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const inputPath = path.join(root, 'tests', 'fixtures', 'gtags', 'gtags.txt');
+const outPath = path.join(tempRoot, 'gtags.jsonl');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+
+const result = spawnSync(
+  process.execPath,
+  [path.join(root, 'tools', 'gtags-ingest.js'), '--repo', repoRoot, '--input', inputPath, '--out', outPath, '--json'],
+  { encoding: 'utf8' }
+);
+if (result.status !== 0) {
+  console.error(result.stderr || result.stdout || 'gtags-ingest failed');
+  process.exit(result.status ?? 1);
+}
+
+if (!fs.existsSync(outPath)) {
+  console.error('gtags output not found');
+  process.exit(1);
+}
+
+const lines = fs.readFileSync(outPath, 'utf8').trim().split(/\r?\n/).filter(Boolean);
+assert.ok(lines.length >= 2, 'expected gtags output lines');
+
+const first = JSON.parse(lines[0]);
+assert.equal(first.file, 'src/widget.js');
+assert.equal(first.name, 'Widget');
+assert.equal(first.startLine, 3);
+assert.equal(first.source, 'gtags');
+
+const metaPath = `${outPath}.meta.json`;
+const meta = JSON.parse(fs.readFileSync(metaPath, 'utf8'));
+assert.equal(meta.stats.entries, lines.length);
+
+console.log('gtags ingest test passed');
diff --git a/tests/hnsw-ann.js b/tests/hnsw-ann.js
new file mode 100644
index 000000000..6e7d89ce5
--- /dev/null
+++ b/tests/hnsw-ann.js
@@ -0,0 +1,95 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const tempRoot = path.join(root, 'tests', '.cache', 'hnsw-ann');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+await fsPromises.cp(fixtureRoot, repoRoot, { recursive: true });
+
+const config = {
+  cache: { root: cacheRoot },
+  indexing: {
+    embeddings: {
+      hnsw: {
+        enabled: true
+      }
+    }
+  }
+};
+
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify(config, null, 2) + '\n'
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+function run(args, label) {
+  const result = spawnSync(process.execPath, args, {
+    cwd: repoRoot,
+    env,
+    stdio: 'inherit'
+  });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    process.exit(result.status ?? 1);
+  }
+}
+
+run([path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot], 'build index');
+run([path.join(root, 'tools', 'build-embeddings.js'), '--stub-embeddings', '--mode', 'code', '--repo', repoRoot], 'build embeddings (code)');
+run([path.join(root, 'tools', 'build-embeddings.js'), '--stub-embeddings', '--mode', 'prose', '--repo', repoRoot], 'build embeddings (prose)');
+
+const userConfig = loadUserConfig(repoRoot);
+const codeDir = getIndexDir(repoRoot, 'code', userConfig);
+const proseDir = getIndexDir(repoRoot, 'prose', userConfig);
+const codeIndex = path.join(codeDir, 'dense_vectors_hnsw.bin');
+const codeMeta = path.join(codeDir, 'dense_vectors_hnsw.meta.json');
+const proseIndex = path.join(proseDir, 'dense_vectors_hnsw.bin');
+const proseMeta = path.join(proseDir, 'dense_vectors_hnsw.meta.json');
+
+if (!fs.existsSync(codeIndex) || !fs.existsSync(codeMeta)) {
+  console.error('HNSW index missing for code mode.');
+  process.exit(1);
+}
+if (!fs.existsSync(proseIndex) || !fs.existsSync(proseMeta)) {
+  console.error('HNSW index missing for prose mode.');
+  process.exit(1);
+}
+
+const searchResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'search.js'), 'index', '--backend', 'memory', '--json', '--ann', '--repo', repoRoot],
+  { cwd: repoRoot, env, encoding: 'utf8' }
+);
+if (searchResult.status !== 0) {
+  console.error('search.js failed for HNSW ANN test.');
+  if (searchResult.stderr) console.error(searchResult.stderr.trim());
+  process.exit(searchResult.status ?? 1);
+}
+
+const payload = JSON.parse(searchResult.stdout || '{}');
+const stats = payload.stats || {};
+if (stats.annBackend !== 'hnsw') {
+  console.error(`Expected annBackend=hnsw, got ${stats.annBackend}`);
+  process.exit(1);
+}
+if (!stats.annHnsw?.available?.code || !stats.annHnsw?.available?.prose) {
+  console.error('Expected HNSW availability for code and prose.');
+  process.exit(1);
+}
+
+console.log('HNSW ANN test passed');
diff --git a/tests/hnsw-atomic.js b/tests/hnsw-atomic.js
new file mode 100644
index 000000000..a5251f881
--- /dev/null
+++ b/tests/hnsw-atomic.js
@@ -0,0 +1,75 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+import { resolveHnswPaths } from '../src/shared/hnsw.js';
+import { loadChunkMeta, readJsonFile } from '../src/shared/artifact-io.js';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const tempRoot = path.join(root, 'tests', '.cache', 'hnsw-atomic');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+await fsPromises.cp(fixtureRoot, repoRoot, { recursive: true });
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+
+const buildIndex = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+if (buildIndex.status !== 0) {
+  console.error('hnsw atomic test failed: build_index failed');
+  process.exit(buildIndex.status ?? 1);
+}
+
+const userConfig = loadUserConfig(repoRoot);
+const codeIndexDir = getIndexDir(repoRoot, 'code', userConfig);
+const { indexPath: hnswIndexPath, metaPath: hnswMetaPath } = resolveHnswPaths(codeIndexDir);
+
+await fsPromises.writeFile(hnswIndexPath, 'stub-index');
+await fsPromises.writeFile(hnswMetaPath, JSON.stringify({ version: 1, dims: 1, count: 0 }));
+
+const buildEmbeddings = spawnSync(
+  process.execPath,
+  [path.join(root, 'tools', 'build-embeddings.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+if (buildEmbeddings.status !== 0) {
+  console.error('hnsw atomic test failed: build-embeddings failed');
+  process.exit(buildEmbeddings.status ?? 1);
+}
+
+if (!fs.existsSync(`${hnswIndexPath}.bak`)) {
+  console.error('hnsw atomic test failed: expected .bak for HNSW index after replace');
+  process.exit(1);
+}
+
+const chunkMeta = loadChunkMeta(codeIndexDir);
+const meta = readJsonFile(hnswMetaPath);
+if (!Number.isFinite(meta?.count) || !Number.isFinite(meta?.expectedCount)) {
+  console.error('hnsw atomic test failed: missing count fields in HNSW meta');
+  process.exit(1);
+}
+if (meta.count !== meta.expectedCount) {
+  console.error(`hnsw atomic test failed: count mismatch (${meta.count} vs ${meta.expectedCount})`);
+  process.exit(1);
+}
+if (meta.count !== chunkMeta.length) {
+  console.error(`hnsw atomic test failed: expected ${chunkMeta.length} vectors, got ${meta.count}`);
+  process.exit(1);
+}
+
+console.log('hnsw atomic tests passed');
diff --git a/tests/hnsw-fallback-and-candidates.js b/tests/hnsw-fallback-and-candidates.js
new file mode 100644
index 000000000..f0ea5b7a5
--- /dev/null
+++ b/tests/hnsw-fallback-and-candidates.js
@@ -0,0 +1,71 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import os from 'node:os';
+import path from 'node:path';
+
+import { loadHnswIndex, rankHnswIndex } from '../src/shared/hnsw.js';
+
+{
+  // loadHnswIndex should fall back to .bak when the primary exists but is unreadable.
+  const tmp = await fsPromises.mkdtemp(path.join(os.tmpdir(), 'poc-hnsw-fallback-'));
+  const primary = path.join(tmp, 'dense_vectors_hnsw.bin');
+  const bak = `${primary}.bak`;
+  await fsPromises.writeFile(primary, 'corrupt');
+  await fsPromises.writeFile(bak, 'ok');
+
+  const readAttempts = [];
+  class FakeHNSW {
+    constructor(space, dims) {
+      this.space = space;
+      this.dims = dims;
+      this.ef = 0;
+    }
+    readIndexSync(p) {
+      readAttempts.push(p);
+      if (p === primary) {
+        throw new Error('corrupt index');
+      }
+      return true;
+    }
+    setEf(ef) {
+      this.ef = ef;
+    }
+  }
+
+  const index = loadHnswIndex({
+    indexPath: primary,
+    dims: 2,
+    config: { enabled: true, efSearch: 17, space: 'cosine' },
+    lib: { HierarchicalNSW: FakeHNSW }
+  });
+
+  assert.ok(index, 'expected fallback index to load');
+  assert.deepEqual(readAttempts, [primary, bak], 'expected to try primary then .bak');
+  assert.equal(index.ef, 17, 'expected efSearch to be applied on loaded index');
+  assert.equal(fs.existsSync(bak), true, 'expected .bak to be preserved when used as fallback');
+}
+
+{
+  // rankHnswIndex should treat an empty candidateSet as "no candidates".
+  const calls = [];
+  const fakeIndex = {
+    searchKnn: (vec, limit, filter) => {
+      calls.push({ vec, limit, filter });
+      return { neighbors: [1], distances: [0.25] };
+    }
+  };
+
+  const empty = rankHnswIndex({ index: fakeIndex, space: 'cosine' }, new Float32Array([1, 0]), 5, new Set());
+  assert.deepEqual(empty, [], 'expected empty candidate set to yield no results');
+  assert.equal(calls.length, 0, 'expected searchKnn to be skipped for empty candidate set');
+
+  const nonEmpty = rankHnswIndex({ index: fakeIndex, space: 'cosine' }, new Float32Array([1, 0]), 5, new Set([1]));
+  assert.equal(calls.length, 1, 'expected searchKnn to be invoked');
+  assert.equal(Array.isArray(calls[0].vec), true, 'expected query embedding to be coerced to an Array');
+  assert.equal(typeof calls[0].filter, 'function', 'expected candidate filter to be passed to searchKnn');
+  assert.equal(nonEmpty.length, 1, 'expected a single neighbor');
+}
+
+console.log('hnsw fallback + candidate-set semantics test passed');
diff --git a/tests/ignore-overrides.js b/tests/ignore-overrides.js
new file mode 100644
index 000000000..a7cc54da6
--- /dev/null
+++ b/tests/ignore-overrides.js
@@ -0,0 +1,40 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { buildIgnoreMatcher } from '../src/index/build/ignore.js';
+import { discoverFiles } from '../src/index/build/discover.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'ignore-overrides');
+
+await fs.rm(tempRoot, { recursive: true, force: true });
+await fs.mkdir(path.join(tempRoot, 'dist'), { recursive: true });
+await fs.writeFile(path.join(tempRoot, 'dist', 'allow.js'), 'console.log("ok")');
+await fs.writeFile(path.join(tempRoot, 'dist', 'deny.js'), 'console.log("no")');
+
+const { ignoreMatcher } = await buildIgnoreMatcher({
+  root: tempRoot,
+  userConfig: {
+    extraIgnore: ['!dist/allow.js']
+  }
+});
+
+const entries = await discoverFiles({
+  root: tempRoot,
+  mode: 'code',
+  ignoreMatcher,
+  skippedFiles: [],
+  maxFileBytes: null
+});
+
+const rels = entries.map((entry) => entry.rel).sort();
+if (!rels.includes('dist/allow.js')) {
+  console.error('ignore override test failed: allow.js not discovered');
+  process.exit(1);
+}
+if (rels.includes('dist/deny.js')) {
+  console.error('ignore override test failed: deny.js should be ignored');
+  process.exit(1);
+}
+
+console.log('ignore override test passed');
diff --git a/tests/import-links.js b/tests/import-links.js
new file mode 100644
index 000000000..8f01277c6
--- /dev/null
+++ b/tests/import-links.js
@@ -0,0 +1,74 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'import-links');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(path.join(repoRoot, 'src'), { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(path.join(repoRoot, 'src', 'a.js'), "import x from 'lib';\n");
+await fsPromises.writeFile(path.join(repoRoot, 'src', 'b.js'), "const x = require('lib');\n");
+await fsPromises.writeFile(path.join(repoRoot, 'src', 'c.js'), "import y from 'other';\n");
+
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+if (buildResult.status !== 0) {
+  console.error('import-links test failed: build_index failed');
+  process.exit(buildResult.status ?? 1);
+}
+
+const userConfig = loadUserConfig(repoRoot);
+const codeDir = getIndexDir(repoRoot, 'code', userConfig);
+const relationsPath = path.join(codeDir, 'file_relations.json');
+if (!fs.existsSync(relationsPath)) {
+  console.error('import-links test failed: file_relations.json missing');
+  process.exit(1);
+}
+
+const raw = JSON.parse(fs.readFileSync(relationsPath, 'utf8'));
+const map = new Map(raw.map((entry) => [entry.file, entry.relations]));
+const relA = map.get('src/a.js');
+const relB = map.get('src/b.js');
+
+if (!relA || !Array.isArray(relA.importLinks)) {
+  console.error('import-links test failed: missing importLinks for a.js');
+  process.exit(1);
+}
+if (!relB || !Array.isArray(relB.importLinks)) {
+  console.error('import-links test failed: missing importLinks for b.js');
+  process.exit(1);
+}
+
+const expected = new Set(['src/a.js', 'src/b.js']);
+for (const file of expected) {
+  if (!relA.importLinks.includes(file)) {
+    console.error(`import-links test failed: a.js missing link to ${file}`);
+    process.exit(1);
+  }
+}
+if (relA.importLinks.includes('src/c.js')) {
+  console.error('import-links test failed: a.js should not link to c.js');
+  process.exit(1);
+}
+
+console.log('Import links test passed');
diff --git a/tests/import-priority.js b/tests/import-priority.js
new file mode 100644
index 000000000..3b2c1a25d
--- /dev/null
+++ b/tests/import-priority.js
@@ -0,0 +1,25 @@
+#!/usr/bin/env node
+import { sortImportScanItems } from '../src/index/build/imports.js';
+
+const items = [
+  { relKey: 'a', stat: { size: 100 }, index: 0 },
+  { relKey: 'b', stat: { size: 1000 }, index: 1 },
+  { relKey: 'c', stat: { size: 2000 }, index: 2 },
+  { relKey: 'd', stat: { size: 150 }, index: 3 }
+];
+
+const counts = new Map([
+  ['a', 10],
+  ['b', 5],
+  ['d', 10]
+]);
+
+sortImportScanItems(items, counts);
+const order = items.map((item) => item.relKey).join(',');
+
+if (order !== 'd,a,b,c') {
+  console.error(`import priority test failed: got ${order}`);
+  process.exit(1);
+}
+
+console.log('import priority test passed');
diff --git a/tests/incremental-cache-signature.js b/tests/incremental-cache-signature.js
new file mode 100644
index 000000000..b4b1c3674
--- /dev/null
+++ b/tests/incremental-cache-signature.js
@@ -0,0 +1,83 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'incremental-cache-signature');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const configPath = path.join(repoRoot, '.pairofcleats.json');
+const writeConfig = async (controlFlow) => {
+  await fsPromises.writeFile(
+    configPath,
+    JSON.stringify({
+      indexing: {
+        controlFlow,
+        fileListSampleSize: 10,
+        treeSitter: { enabled: false }
+      }
+    }, null, 2)
+  );
+};
+
+await writeConfig(false);
+const filePath = path.join(repoRoot, 'src.js');
+await fsPromises.writeFile(filePath, 'function alpha() { return 1; }\n');
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const runBuild = (label) => {
+  const result = spawnSync(
+    process.execPath,
+    [path.join(root, 'build_index.js'), '--stub-embeddings', '--incremental', '--repo', repoRoot],
+    { cwd: repoRoot, env, stdio: 'inherit' }
+  );
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    process.exit(result.status ?? 1);
+  }
+};
+
+runBuild('initial build');
+runBuild('cache build');
+
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+const userConfig = loadUserConfig(repoRoot);
+const codeDir = getIndexDir(repoRoot, 'code', userConfig);
+const fileListsPath = path.join(codeDir, '.filelists.json');
+if (!fs.existsSync(fileListsPath)) {
+  console.error('Missing .filelists.json');
+  process.exit(1);
+}
+const fileLists = JSON.parse(await fsPromises.readFile(fileListsPath, 'utf8'));
+const cachedEntry = fileLists?.scanned?.sample?.find((entry) => entry?.file?.endsWith('src.js'));
+if (!cachedEntry || cachedEntry.cached !== true) {
+  console.error('Expected cached entry after incremental rebuild');
+  process.exit(1);
+}
+
+await writeConfig(true);
+runBuild('config signature rebuild');
+
+const userConfigAfter = loadUserConfig(repoRoot);
+const codeDirAfter = getIndexDir(repoRoot, 'code', userConfigAfter);
+const fileListsAfter = JSON.parse(await fsPromises.readFile(path.join(codeDirAfter, '.filelists.json'), 'utf8'));
+const rebuildEntry = fileListsAfter?.scanned?.sample?.find((entry) => entry?.file?.endsWith('src.js'));
+if (!rebuildEntry || rebuildEntry.cached === true) {
+  console.error('Expected cache invalidation after config signature change');
+  process.exit(1);
+}
+
+console.log('incremental cache signature test passed');
diff --git a/tests/incremental-manifest.js b/tests/incremental-manifest.js
new file mode 100644
index 000000000..c1cb49ef3
--- /dev/null
+++ b/tests/incremental-manifest.js
@@ -0,0 +1,74 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getRepoCacheRoot, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const repoRoot = path.join(root, 'tests', '.cache', 'incremental-manifest');
+const cacheRoot = path.join(repoRoot, '.cache');
+const buildIndexPath = path.join(root, 'build_index.js');
+
+await fsPromises.rm(repoRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+
+const filePath = path.join(repoRoot, 'sample.js');
+await fsPromises.writeFile(filePath, 'export function hello() { return 1; }\n');
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({ sqlite: { use: false } }, null, 2)
+);
+
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const run = (args, label) => {
+  const result = spawnSync(process.execPath, args, { cwd: repoRoot, env, encoding: 'utf8' });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    if (result.stderr) console.error(result.stderr.trim());
+    process.exit(result.status ?? 1);
+  }
+};
+
+run([buildIndexPath, '--incremental', '--stub-embeddings', '--mode', 'code', '--repo', repoRoot], 'initial build');
+
+const userConfig = loadUserConfig(repoRoot);
+const repoCacheRoot = getRepoCacheRoot(repoRoot, userConfig);
+const manifestPath = path.join(repoCacheRoot, 'incremental', 'code', 'manifest.json');
+if (!fs.existsSync(manifestPath)) {
+  console.error('Missing incremental manifest after initial build.');
+  process.exit(1);
+}
+
+const manifestBefore = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
+const entryBefore = manifestBefore.files?.['sample.js'];
+if (!entryBefore) {
+  console.error('Missing manifest entry for sample.js.');
+  process.exit(1);
+}
+
+const newTime = new Date(Date.now() + 5000);
+fs.utimesSync(filePath, newTime, newTime);
+
+run([buildIndexPath, '--incremental', '--stub-embeddings', '--mode', 'code', '--repo', repoRoot], 'second build');
+
+const manifestAfter = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
+const entryAfter = manifestAfter.files?.['sample.js'];
+if (!entryAfter) {
+  console.error('Missing manifest entry after rebuild.');
+  process.exit(1);
+}
+
+const statAfter = fs.statSync(filePath);
+if (entryAfter.mtimeMs !== statAfter.mtimeMs) {
+  console.error(`Manifest mtimeMs not updated (${entryAfter.mtimeMs} vs ${statAfter.mtimeMs}).`);
+  process.exit(1);
+}
+
+console.log('Incremental manifest refresh test passed');
diff --git a/tests/incremental-reuse.js b/tests/incremental-reuse.js
new file mode 100644
index 000000000..bda0872bb
--- /dev/null
+++ b/tests/incremental-reuse.js
@@ -0,0 +1,73 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { shouldReuseIncrementalIndex } from '../src/index/build/incremental.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'incremental-reuse');
+const outDir = path.join(tempRoot, 'index');
+
+await fs.rm(tempRoot, { recursive: true, force: true });
+await fs.mkdir(path.join(outDir, 'pieces'), { recursive: true });
+
+const indexState = { stage: 'stage2' };
+const pieceManifest = { version: 2, pieces: [{ name: 'chunk_meta', path: 'chunk_meta.json' }] };
+await fs.writeFile(path.join(outDir, 'index_state.json'), JSON.stringify(indexState));
+await fs.writeFile(path.join(outDir, 'pieces', 'manifest.json'), JSON.stringify(pieceManifest));
+
+const entries = [
+  { rel: 'src/a.js', stat: { size: 10, mtimeMs: 123 } },
+  { rel: 'src/b.js', stat: { size: 20, mtimeMs: 456 } }
+];
+
+const manifest = {
+  files: {
+    'src/a.js': { size: 10, mtimeMs: 123 },
+    'src/b.js': { size: 20, mtimeMs: 456 }
+  }
+};
+
+const reuse = await shouldReuseIncrementalIndex({
+  outDir,
+  entries,
+  manifest,
+  stage: 'stage1'
+});
+
+if (!reuse) {
+  console.error('incremental reuse test failed: expected reuse');
+  process.exit(1);
+}
+
+const extraManifest = {
+  files: {
+    ...manifest.files,
+    'src/c.js': { size: 30, mtimeMs: 789 }
+  }
+};
+
+const noReuseDeleted = await shouldReuseIncrementalIndex({
+  outDir,
+  entries,
+  manifest: extraManifest,
+  stage: 'stage1'
+});
+
+if (noReuseDeleted) {
+  console.error('incremental reuse test failed: expected deletion mismatch');
+  process.exit(1);
+}
+
+const noReuse = await shouldReuseIncrementalIndex({
+  outDir,
+  entries: [{ rel: 'src/a.js', stat: { size: 11, mtimeMs: 123 } }],
+  manifest,
+  stage: 'stage2'
+});
+
+if (noReuse) {
+  console.error('incremental reuse test failed: expected mismatch');
+  process.exit(1);
+}
+
+console.log('incremental reuse test passed');
diff --git a/tests/incremental-tokenization-cache.js b/tests/incremental-tokenization-cache.js
new file mode 100644
index 000000000..d67bcea3e
--- /dev/null
+++ b/tests/incremental-tokenization-cache.js
@@ -0,0 +1,91 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'incremental-token-cache');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const configPath = path.join(repoRoot, '.pairofcleats.json');
+const writeConfig = async (enableChargrams) => {
+  await fsPromises.writeFile(
+    configPath,
+    JSON.stringify({
+      indexing: {
+        postings: { enableChargrams },
+        fileListSampleSize: 10,
+        treeSitter: { enabled: false }
+      }
+    }, null, 2)
+  );
+};
+await writeConfig(false);
+
+const filePath = path.join(repoRoot, 'src.js');
+await fsPromises.writeFile(filePath, 'function alpha() { return 1; }\n');
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const runBuild = (label) => {
+  const result = spawnSync(
+    process.execPath,
+    [path.join(root, 'build_index.js'), '--stub-embeddings', '--incremental', '--repo', repoRoot],
+    { cwd: repoRoot, env, stdio: 'inherit' }
+  );
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    process.exit(result.status ?? 1);
+  }
+};
+
+runBuild('initial build');
+runBuild('cache build');
+
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+const userConfig = loadUserConfig(repoRoot);
+const codeDir = getIndexDir(repoRoot, 'code', userConfig);
+const fileListsPath = path.join(codeDir, '.filelists.json');
+if (!fs.existsSync(fileListsPath)) {
+  console.error('Missing .filelists.json');
+  process.exit(1);
+}
+const fileLists = JSON.parse(await fsPromises.readFile(fileListsPath, 'utf8'));
+const scannedSample = fileLists?.scanned?.sample;
+if (!Array.isArray(scannedSample)) {
+  console.error('Scanned sample payload is not an array');
+  process.exit(1);
+}
+const cachedEntry = scannedSample.find((entry) => entry?.file && entry.file.endsWith('src.js'));
+if (!cachedEntry || cachedEntry.cached !== true) {
+  console.error('Expected cached entry after incremental rebuild');
+  process.exit(1);
+}
+
+await writeConfig(true);
+runBuild('config change rebuild');
+
+const userConfigAfter = loadUserConfig(repoRoot);
+const codeDirAfter = getIndexDir(repoRoot, 'code', userConfigAfter);
+const fileListsAfter = JSON.parse(await fsPromises.readFile(path.join(codeDirAfter, '.filelists.json'), 'utf8'));
+const scannedAfter = fileListsAfter?.scanned?.sample;
+const rebuildEntry = Array.isArray(scannedAfter)
+  ? scannedAfter.find((entry) => entry?.file && entry.file.endsWith('src.js'))
+  : null;
+if (!rebuildEntry || rebuildEntry.cached === true) {
+  console.error('Expected cache invalidation after tokenization config change');
+  process.exit(1);
+}
+
+console.log('incremental tokenization cache test passed');
diff --git a/tests/index-cache.js b/tests/index-cache.js
new file mode 100644
index 000000000..bd79d9194
--- /dev/null
+++ b/tests/index-cache.js
@@ -0,0 +1,34 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import os from 'node:os';
+import { loadIndexWithCache } from '../src/retrieval/index-cache.js';
+
+const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'pairofcleats-index-cache-'));
+const indexDir = path.join(tempRoot, 'index');
+await fs.mkdir(indexDir, { recursive: true });
+
+const writeMeta = async (value) => {
+  await fs.writeFile(path.join(indexDir, 'chunk_meta.json'), JSON.stringify(value));
+};
+
+const cache = new Map();
+let loads = 0;
+const loader = () => {
+  loads += 1;
+  return { loaded: loads };
+};
+
+await writeMeta([{ id: 1 }]);
+const first = loadIndexWithCache(cache, indexDir, { modelIdDefault: 'm', fileChargramN: 3 }, loader);
+const second = loadIndexWithCache(cache, indexDir, { modelIdDefault: 'm', fileChargramN: 3 }, loader);
+assert.equal(loads, 1, 'cache should prevent reloads');
+assert.equal(first.loaded, second.loaded, 'cached result should match');
+
+await writeMeta([{ id: 2 }]);
+const third = loadIndexWithCache(cache, indexDir, { modelIdDefault: 'm', fileChargramN: 3 }, loader);
+assert.equal(loads, 2, 'cache should reload after signature change');
+assert.notEqual(third.loaded, first.loaded, 'reloaded result should differ');
+
+console.log('index cache tests passed');
diff --git a/tests/index-lifecycle-contract.js b/tests/index-lifecycle-contract.js
new file mode 100644
index 000000000..114db1caf
--- /dev/null
+++ b/tests/index-lifecycle-contract.js
@@ -0,0 +1,75 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'index-lifecycle');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(
+  path.join(repoRoot, 'alpha.js'),
+  'export const alpha = () => "alpha";\n'
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--mode', 'code', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+
+if (buildResult.status !== 0) {
+  console.error('Failed: index build for lifecycle contract');
+  process.exit(buildResult.status ?? 1);
+}
+
+const validateResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'tools', 'index-validate.js'), '--json', '--mode', 'code', '--repo', repoRoot],
+  { cwd: repoRoot, env, encoding: 'utf8' }
+);
+
+if (validateResult.status !== 0) {
+  console.error('Failed: index validate for lifecycle contract');
+  if (validateResult.stderr) console.error(validateResult.stderr.trim());
+  process.exit(validateResult.status ?? 1);
+}
+
+let payload = null;
+try {
+  payload = JSON.parse(validateResult.stdout || '{}');
+} catch {
+  console.error('Failed: index validate returned invalid JSON');
+  process.exit(1);
+}
+
+if (!payload || typeof payload !== 'object') {
+  console.error('Failed: index validate payload missing');
+  process.exit(1);
+}
+
+if (!payload.ok) {
+  console.error('Failed: index validate reported issues');
+  if (Array.isArray(payload.issues)) {
+    payload.issues.forEach((issue) => console.error(`- ${issue}`));
+  }
+  process.exit(1);
+}
+
+if (!payload.modes || !payload.modes.code) {
+  console.error('Failed: index validate missing code mode');
+  process.exit(1);
+}
+
+console.log('index lifecycle contract tests passed');
diff --git a/tests/index-lock.js b/tests/index-lock.js
new file mode 100644
index 000000000..00a7ee7e0
--- /dev/null
+++ b/tests/index-lock.js
@@ -0,0 +1,45 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { acquireIndexLock } from '../src/index/build/lock.js';
+
+const root = process.cwd();
+const baseDir = path.join(root, 'tests', '.cache', 'index-lock');
+const repoCacheRoot = path.join(baseDir, 'repo');
+const lockDir = path.join(repoCacheRoot, 'locks');
+const lockPath = path.join(lockDir, 'index.lock');
+const staleMs = 24 * 60 * 60 * 1000;
+
+await fsPromises.rm(baseDir, { recursive: true, force: true });
+await fsPromises.mkdir(lockDir, { recursive: true });
+
+await fsPromises.writeFile(
+  lockPath,
+  JSON.stringify({ pid: 999999, startedAt: new Date().toISOString() })
+);
+
+const lock = await acquireIndexLock({ repoCacheRoot, staleMs, log: () => {} });
+if (!lock) {
+  console.error('index-lock test failed: dead pid lock was not cleared.');
+  process.exit(1);
+}
+await lock.release();
+
+await fsPromises.writeFile(
+  lockPath,
+  JSON.stringify({ pid: process.pid, startedAt: new Date().toISOString() })
+);
+
+const lockActive = await acquireIndexLock({ repoCacheRoot, staleMs, log: () => {} });
+if (lockActive) {
+  await lockActive.release();
+  console.error('index-lock test failed: active lock should not be acquired.');
+  process.exit(1);
+}
+
+if (fs.existsSync(lockPath)) {
+  await fsPromises.rm(lockPath, { force: true });
+}
+
+console.log('index-lock test passed');
diff --git a/tests/index-validate.js b/tests/index-validate.js
new file mode 100644
index 000000000..476a11caf
--- /dev/null
+++ b/tests/index-validate.js
@@ -0,0 +1,82 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const cacheRoot = path.join(root, 'tests', '.cache', 'index-validate');
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+await fsPromises.rm(cacheRoot, { recursive: true, force: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const validatorPath = path.join(root, 'tools', 'index-validate.js');
+const buildPath = path.join(root, 'build_index.js');
+
+const missingResult = spawnSync(
+  process.execPath,
+  [validatorPath, '--repo', fixtureRoot, '--json'],
+  { env, encoding: 'utf8' }
+);
+if (missingResult.status === 0) {
+  console.error('Expected index-validate to fail when indexes are missing.');
+  process.exit(1);
+}
+
+const buildResult = spawnSync(
+  process.execPath,
+  [buildPath, '--stub-embeddings', '--repo', fixtureRoot],
+  { env, encoding: 'utf8' }
+);
+if (buildResult.status !== 0) {
+  console.error('Failed to build fixture index for index-validate test.');
+  if (buildResult.stderr) console.error(buildResult.stderr.trim());
+  process.exit(buildResult.status ?? 1);
+}
+const previousCacheRoot = process.env.PAIROFCLEATS_CACHE_ROOT;
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+const userConfig = loadUserConfig(fixtureRoot);
+const codeDir = getIndexDir(fixtureRoot, 'code', userConfig);
+if (previousCacheRoot === undefined) {
+  delete process.env.PAIROFCLEATS_CACHE_ROOT;
+} else {
+  process.env.PAIROFCLEATS_CACHE_ROOT = previousCacheRoot;
+}
+const piecesPath = path.join(codeDir, 'pieces', 'manifest.json');
+try {
+  await fsPromises.access(piecesPath);
+} catch {
+  console.error('Expected pieces manifest to exist after build.');
+  process.exit(1);
+}
+
+const okResult = spawnSync(
+  process.execPath,
+  [validatorPath, '--repo', fixtureRoot, '--json'],
+  { env, encoding: 'utf8' }
+);
+if (okResult.status !== 0) {
+  console.error('Expected index-validate to pass after building index.');
+  if (okResult.stderr) console.error(okResult.stderr.trim());
+  process.exit(okResult.status ?? 1);
+}
+
+let payload;
+try {
+  payload = JSON.parse(okResult.stdout);
+} catch {
+  console.error('index-validate did not return valid JSON.');
+  process.exit(1);
+}
+if (!payload || payload.ok !== true) {
+  console.error('index-validate JSON payload missing ok=true.');
+  process.exit(1);
+}
+
+console.log('index-validate test passed');
diff --git a/tests/indexer-service.js b/tests/indexer-service.js
new file mode 100644
index 000000000..149c44455
--- /dev/null
+++ b/tests/indexer-service.js
@@ -0,0 +1,49 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'indexer-service');
+const repoRoot = path.join(tempRoot, 'repo');
+const queueDir = path.join(tempRoot, 'queue');
+const configPath = path.join(tempRoot, 'service.json');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+
+const config = {
+  queueDir,
+  repos: [
+    { id: 'repo', path: repoRoot, syncPolicy: 'none' }
+  ]
+};
+await fsPromises.writeFile(configPath, JSON.stringify(config, null, 2));
+
+const enqueue = spawnSync(
+  process.execPath,
+  [path.join(root, 'tools', 'indexer-service.js'), 'enqueue', '--config', configPath, '--repo', repoRoot, '--mode', 'code'],
+  { encoding: 'utf8' }
+);
+if (enqueue.status !== 0) {
+  console.error(enqueue.stderr || enqueue.stdout || 'indexer-service enqueue failed');
+  process.exit(enqueue.status ?? 1);
+}
+
+const status = spawnSync(
+  process.execPath,
+  [path.join(root, 'tools', 'indexer-service.js'), 'status', '--config', configPath],
+  { encoding: 'utf8' }
+);
+if (status.status !== 0) {
+  console.error(status.stderr || status.stdout || 'indexer-service status failed');
+  process.exit(status.status ?? 1);
+}
+
+const payload = JSON.parse(status.stdout || '{}');
+assert.equal(payload.queue?.queued, 1);
+assert.ok(fs.existsSync(path.join(queueDir, 'queue.json')));
+
+console.log('indexer service test passed');
diff --git a/tests/indexer/incremental-plan.test.js b/tests/indexer/incremental-plan.test.js
new file mode 100644
index 000000000..ba266cca7
--- /dev/null
+++ b/tests/indexer/incremental-plan.test.js
@@ -0,0 +1,54 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { shouldReuseIncrementalIndex } from '../../src/index/build/incremental.js';
+
+const fail = (message) => {
+  console.error(message);
+  process.exit(1);
+};
+
+const root = process.cwd();
+const baseDir = path.join(root, 'tests', '.cache', 'indexer-plan');
+const outDir = path.join(baseDir, 'out');
+const piecesDir = path.join(outDir, 'pieces');
+const fixtureFile = path.join(baseDir, 'src', 'a.js');
+
+const setup = async () => {
+  await fs.rm(baseDir, { recursive: true, force: true });
+  await fs.mkdir(path.dirname(fixtureFile), { recursive: true });
+  await fs.writeFile(fixtureFile, 'const a = 1;\n');
+  await fs.mkdir(piecesDir, { recursive: true });
+  await fs.writeFile(path.join(outDir, 'index_state.json'), JSON.stringify({ stage: 'stage2' }));
+  await fs.writeFile(path.join(piecesDir, 'manifest.json'), JSON.stringify({ pieces: [{ id: 'piece-1' }] }));
+};
+
+const run = async () => {
+  await setup();
+  const stat = await fs.stat(fixtureFile);
+  const entries = [{ rel: 'src/a.js', stat }];
+  const manifest = { files: { 'src/a.js': { size: stat.size, mtimeMs: stat.mtimeMs } } };
+
+  const reuse = await shouldReuseIncrementalIndex({ outDir, entries, manifest, stage: 'stage2' });
+  if (!reuse) {
+    fail('shouldReuseIncrementalIndex should return true for matching manifest entries.');
+  }
+
+  const stageMismatch = await shouldReuseIncrementalIndex({ outDir, entries, manifest, stage: 'stage3' });
+  if (stageMismatch) {
+    fail('shouldReuseIncrementalIndex should fail when stage is not satisfied.');
+  }
+
+  const manifestMismatch = { files: { 'src/a.js': { size: stat.size + 1, mtimeMs: stat.mtimeMs } } };
+  const reuseMismatch = await shouldReuseIncrementalIndex({ outDir, entries, manifest: manifestMismatch, stage: 'stage2' });
+  if (reuseMismatch) {
+    fail('shouldReuseIncrementalIndex should fail when file sizes differ.');
+  }
+};
+
+try {
+  await run();
+  console.log('indexer incremental plan tests passed');
+} finally {
+  await fs.rm(baseDir, { recursive: true, force: true });
+}
diff --git a/tests/indexer/signatures.test.js b/tests/indexer/signatures.test.js
new file mode 100644
index 000000000..3d5dfb359
--- /dev/null
+++ b/tests/indexer/signatures.test.js
@@ -0,0 +1,86 @@
+#!/usr/bin/env node
+import { buildIncrementalSignature, buildTokenizationKey } from '../../src/index/build/indexer/signatures.js';
+
+const fail = (message) => {
+  console.error(message);
+  process.exit(1);
+};
+
+const baseRuntime = {
+  commentsConfig: {
+    licensePattern: /MIT/,
+    generatedPattern: /@generated/,
+    linterPattern: /eslint/
+  },
+  dictConfig: { splitCase: true },
+  postingsConfig: { enablePhraseNgrams: true },
+  dictSignature: 'sig-a',
+  segmentsConfig: { enabled: true }
+};
+
+const tokenKeyA = buildTokenizationKey(baseRuntime, 'code');
+const tokenKeyB = buildTokenizationKey({ ...baseRuntime, dictSignature: 'sig-b' }, 'code');
+if (tokenKeyA === tokenKeyB) {
+  fail('buildTokenizationKey should reflect dictSignature changes.');
+}
+
+const runtimeA = {
+  astDataflowEnabled: true,
+  controlFlowEnabled: false,
+  lintEnabled: true,
+  complexityEnabled: true,
+  riskAnalysisEnabled: false,
+  riskAnalysisCrossFileEnabled: false,
+  typeInferenceEnabled: true,
+  typeInferenceCrossFileEnabled: false,
+  gitBlameEnabled: true,
+  indexingConfig: {
+    riskRules: { foo: 'bar' },
+    riskCaps: { max: 1 },
+    importScan: 'post'
+  },
+  languageOptions: {
+    javascript: { parser: 'babel', flow: 'auto' },
+    typescript: { parser: 'auto', importsOnly: false },
+    treeSitter: {
+      enabled: true,
+      languages: { js: true },
+      configChunking: true,
+      maxBytes: 100,
+      maxLines: 200,
+      maxParseMs: 300,
+      byLanguage: {}
+    },
+    yamlChunking: { mode: 'root' },
+    kotlin: { flowMaxBytes: 1 }
+  },
+  embeddingEnabled: true,
+  embeddingService: false,
+  embeddingMode: 'inline',
+  embeddingBatchSize: 32,
+  fileCaps: { default: { maxBytes: 1, maxLines: 2 }, byExt: {}, byLanguage: {} },
+  fileScan: { sampleBytes: 64 },
+  incrementalBundleFormat: 'json'
+};
+
+const sigA = buildIncrementalSignature(runtimeA, 'code', tokenKeyA);
+const sigB = buildIncrementalSignature({
+  ...runtimeA,
+  languageOptions: {
+    ...runtimeA.languageOptions,
+    typescript: { parser: 'typescript', importsOnly: false }
+  }
+}, 'code', tokenKeyA);
+if (sigA === sigB) {
+  fail('buildIncrementalSignature should reflect parser changes.');
+}
+
+const sigC = buildIncrementalSignature({
+  ...runtimeA,
+  embeddingBatchSize: 64
+}, 'code', tokenKeyA);
+if (sigA === sigC) {
+  fail('buildIncrementalSignature should reflect embedding batch changes.');
+}
+
+console.log('indexer signatures tests passed');
diff --git a/tests/inline-embeddings-validation.js b/tests/inline-embeddings-validation.js
new file mode 100644
index 000000000..ad89828cb
--- /dev/null
+++ b/tests/inline-embeddings-validation.js
@@ -0,0 +1,47 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+
+import { attachEmbeddings } from '../src/index/build/file-processor/embeddings.js';
+
+{
+  const chunks = [{}, {}];
+  await assert.rejects(
+    () => attachEmbeddings({
+      chunks,
+      codeTexts: ['a', 'b'],
+      docTexts: ['', 'doc'],
+      embeddingEnabled: true,
+      embeddingMode: 'both',
+      embeddingBatchSize: 16,
+      runEmbedding: async (fn) => await fn(),
+      getChunkEmbedding: async () => [1, 2],
+      getChunkEmbeddings: async () => [[1, 2], [1, 2, 3]]
+    }),
+    /dims mismatch/i,
+    'expected inline embedding attachment to fail fast on dims mismatch'
+  );
+}
+
+{
+  const chunks = [{}, {}];
+  const res = await attachEmbeddings({
+    chunks,
+    codeTexts: ['a', 'b'],
+    docTexts: ['', 'doc'],
+    embeddingEnabled: true,
+    embeddingMode: 'both',
+    embeddingBatchSize: 16,
+    runEmbedding: async (fn) => await fn(),
+    getChunkEmbedding: async () => [9, 9, 9],
+    getChunkEmbeddings: async (texts) => texts.map((_, i) => new Float32Array([i, i + 1, i + 2]))
+  });
+
+  assert.ok(res && Number.isFinite(res.embeddingMs), 'expected timing result');
+  assert.equal(chunks.length, 2);
+  assert.equal(chunks[0].embed_code.length, 3);
+  assert.equal(chunks[0].embed_doc.length, 3, 'expected zero doc vector when doc text is missing');
+  assert.equal(chunks[0].embedding.length, 3);
+  assert.equal(chunks[1].embed_doc.length, 3, 'expected doc embedding vector');
+}
+
+console.log('inline embeddings validation test passed');
diff --git a/tests/io-concurrency-cap.js b/tests/io-concurrency-cap.js
new file mode 100644
index 000000000..ed8bb7410
--- /dev/null
+++ b/tests/io-concurrency-cap.js
@@ -0,0 +1,23 @@
+#!/usr/bin/env node
+import { resolveThreadLimits } from '../src/shared/threads.js';
+
+const threadLimits = resolveThreadLimits({
+  configConcurrency: 8,
+  importConcurrencyConfig: 8,
+  ioConcurrencyCapConfig: 16
+});
+
+if (threadLimits.ioConcurrency !== 16) {
+  throw new Error(`io-concurrency-cap test failed: expected ioConcurrency=16, got ${threadLimits.ioConcurrency}`);
+}
+
+// Verify cap is not increasing concurrency
+const uncapped = resolveThreadLimits({
+  configConcurrency: 8,
+  importConcurrencyConfig: 8
+});
+if (uncapped.ioConcurrency < threadLimits.ioConcurrency) {
+  throw new Error(`io-concurrency-cap test failed: uncapped ioConcurrency=${uncapped.ioConcurrency} should be >= capped ioConcurrency=${threadLimits.ioConcurrency}`);
+}
+
+console.log('io-concurrency-cap test passed');
diff --git a/tests/json-stream.js b/tests/json-stream.js
new file mode 100644
index 000000000..b2128088d
--- /dev/null
+++ b/tests/json-stream.js
@@ -0,0 +1,47 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { writeJsonArrayFile, writeJsonObjectFile } from '../src/shared/json-stream.js';
+
+const root = process.cwd();
+const outDir = path.join(root, 'tests', '.cache', 'json-stream');
+await fs.rm(outDir, { recursive: true, force: true });
+await fs.mkdir(outDir, { recursive: true });
+
+const arrayPath = path.join(outDir, 'array.json');
+const arrayInput = [
+  { id: 1, name: 'alpha' },
+  { id: 2, name: 'beta' }
+];
+await writeJsonArrayFile(arrayPath, arrayInput);
+const arrayParsed = JSON.parse(await fs.readFile(arrayPath, 'utf8'));
+if (JSON.stringify(arrayParsed) !== JSON.stringify(arrayInput)) {
+  console.error('json-stream array test failed: parsed output mismatch.');
+  process.exit(1);
+}
+
+const objPath = path.join(outDir, 'object.json');
+const fields = { model: 'test', dims: 2, scale: 1 };
+const arrays = {
+  vectors: [
+    [1, 2],
+    [3, 4]
+  ],
+  vocab: ['foo', 'bar']
+};
+await writeJsonObjectFile(objPath, { fields, arrays });
+const objParsed = JSON.parse(await fs.readFile(objPath, 'utf8'));
+if (objParsed.model !== fields.model || objParsed.dims !== fields.dims || objParsed.scale !== fields.scale) {
+  console.error('json-stream object test failed: fields mismatch.');
+  process.exit(1);
+}
+if (!Array.isArray(objParsed.vectors) || objParsed.vectors.length !== arrays.vectors.length) {
+  console.error('json-stream object test failed: vectors mismatch.');
+  process.exit(1);
+}
+if (!Array.isArray(objParsed.vocab) || objParsed.vocab.length !== arrays.vocab.length) {
+  console.error('json-stream object test failed: vocab mismatch.');
+  process.exit(1);
+}
+
+console.log('json-stream test passed');
diff --git a/tests/jsonrpc-parser.js b/tests/jsonrpc-parser.js
new file mode 100644
index 000000000..60811baad
--- /dev/null
+++ b/tests/jsonrpc-parser.js
@@ -0,0 +1,31 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { createFramedJsonRpcParser } from '../src/shared/jsonrpc.js';
+
+const frame = (payload) => {
+  const body = JSON.stringify(payload);
+  return Buffer.from(`Content-Length: ${Buffer.byteLength(body)}\r\n\r\n${body}`);
+};
+
+const messages = [];
+const errors = [];
+const parser = createFramedJsonRpcParser({
+  onMessage: (msg) => messages.push(msg),
+  onError: (err) => errors.push(err),
+  maxBufferBytes: 256,
+  maxHeaderBytes: 128,
+  maxMessageBytes: 64
+});
+
+parser.push(frame({ jsonrpc: '2.0', id: 1, result: 'ok' }));
+assert.equal(messages.length, 1, 'expected one message before overflow');
+assert.equal(errors.length, 0, 'did not expect errors for valid payload');
+
+parser.push(frame({ jsonrpc: '2.0', id: 2, result: 'x'.repeat(200) }));
+assert.equal(errors.length, 1, 'expected overflow error');
+assert.ok(errors[0]?.message?.includes('exceeded'), 'error message should mention size limit');
+
+parser.push(frame({ jsonrpc: '2.0', id: 3, result: 'ok' }));
+assert.equal(messages.length, 1, 'parser should stop after overflow');
+
+console.log('jsonrpc parser tests passed');
diff --git a/tests/kotlin-perf-guard.js b/tests/kotlin-perf-guard.js
new file mode 100644
index 000000000..a129e3eb0
--- /dev/null
+++ b/tests/kotlin-perf-guard.js
@@ -0,0 +1,46 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { buildKotlinChunks, buildKotlinRelations, computeKotlinFlow, getKotlinFileStats } from '../src/lang/kotlin.js';
+
+const text = 'class Widget { fun render(a: Int): Int { if (a > 0) { foo() } return a } }\n';
+const chunks = buildKotlinChunks(text, {}) || [];
+const target = chunks.find((chunk) => chunk.kind === 'MethodDeclaration' || chunk.kind === 'FunctionDeclaration');
+if (!target) {
+  console.error('Missing Kotlin function chunk for perf guard test.');
+  process.exit(1);
+}
+
+const stats = getKotlinFileStats(text);
+const fullOptions = {
+  stats,
+  kotlin: {
+    flowMaxBytes: 10 * 1024,
+    flowMaxLines: 100,
+    relationsMaxBytes: 10 * 1024,
+    relationsMaxLines: 100
+  }
+};
+const skipOptions = {
+  stats,
+  kotlin: {
+    flowMaxBytes: 1,
+    flowMaxLines: 1,
+    relationsMaxBytes: 1,
+    relationsMaxLines: 1
+  }
+};
+
+const flowFull = computeKotlinFlow(text, target, { ...fullOptions, dataflow: true, controlFlow: true });
+assert.ok(flowFull && flowFull.controlFlow, 'Expected flow metadata for Kotlin chunk.');
+
+const flowSkipped = computeKotlinFlow(text, target, { ...skipOptions, dataflow: true, controlFlow: true });
+assert.equal(flowSkipped, null, 'Expected flow metadata to be skipped for large Kotlin file.');
+
+const relationsFull = buildKotlinRelations(text, {}, chunks, fullOptions);
+assert.ok(relationsFull.calls.some((entry) => entry[1] && entry[1].includes('foo')),
+  'Expected Kotlin calls to include foo().');
+
+const relationsSkipped = buildKotlinRelations(text, {}, chunks, skipOptions);
+assert.equal(relationsSkipped.calls.length, 0, 'Expected Kotlin relations to be skipped.');
+
+console.log('kotlin perf guard test passed');
diff --git a/tests/lancedb-ann.js b/tests/lancedb-ann.js
new file mode 100644
index 000000000..bb2332221
--- /dev/null
+++ b/tests/lancedb-ann.js
@@ -0,0 +1,94 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { tryImport } from '../src/shared/optional-deps.js';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const tempRoot = path.join(root, 'tests', '.cache', 'lancedb-ann');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+const lanceAvailable = (await tryImport('@lancedb/lancedb')).ok;
+if (!lanceAvailable) {
+  console.warn('lancedb missing; skipping lancedb-ann test.');
+  process.exit(0);
+}
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+await fsPromises.cp(fixtureRoot, repoRoot, { recursive: true });
+
+const config = {
+  cache: { root: cacheRoot }
+};
+
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify(config, null, 2) + '\n'
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const run = (args, label) => {
+  const result = spawnSync(process.execPath, args, {
+    cwd: repoRoot,
+    env,
+    stdio: 'inherit'
+  });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    process.exit(result.status ?? 1);
+  }
+};
+
+run([path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot], 'build index');
+
+const userConfig = loadUserConfig(repoRoot);
+const codeDir = getIndexDir(repoRoot, 'code', userConfig);
+const proseDir = getIndexDir(repoRoot, 'prose', userConfig);
+const codeDb = path.join(codeDir, 'dense_vectors.lancedb');
+const proseDb = path.join(proseDir, 'dense_vectors.lancedb');
+const codeMeta = path.join(codeDir, 'dense_vectors.lancedb.meta.json');
+const proseMeta = path.join(proseDir, 'dense_vectors.lancedb.meta.json');
+
+if (!fs.existsSync(codeDb) || !fs.existsSync(codeMeta)) {
+  console.error('LanceDB index missing for code mode.');
+  process.exit(1);
+}
+if (!fs.existsSync(proseDb) || !fs.existsSync(proseMeta)) {
+  console.error('LanceDB index missing for prose mode.');
+  process.exit(1);
+}
+
+const searchResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'search.js'), 'index', '--backend', 'memory', '--json', '--ann', '--repo', repoRoot],
+  { cwd: repoRoot, env, encoding: 'utf8' }
+);
+if (searchResult.status !== 0) {
+  console.error('search.js failed for LanceDB ANN test.');
+  if (searchResult.stderr) console.error(searchResult.stderr.trim());
+  process.exit(searchResult.status ?? 1);
+}
+
+const payload = JSON.parse(searchResult.stdout || '{}');
+const stats = payload.stats || {};
+if (stats.annBackend !== 'lancedb') {
+  console.error(`Expected annBackend=lancedb, got ${stats.annBackend}`);
+  process.exit(1);
+}
+if (!stats.annLance?.available?.code || !stats.annLance?.available?.prose) {
+  console.error('Expected LanceDB availability for code and prose.');
+  process.exit(1);
+}
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+console.log('LanceDB ANN test passed');
diff --git a/tests/lang-filter.js b/tests/lang-filter.js
new file mode 100644
index 000000000..35a9be944
--- /dev/null
+++ b/tests/lang-filter.js
@@ -0,0 +1,24 @@
+import assert from 'node:assert/strict';
+import { mergeExtFilters, normalizeLangFilter } from '../src/retrieval/filters.js';
+
+const js = normalizeLangFilter('js');
+assert.ok(js && js.includes('.js'), 'expected js to include .js');
+assert.ok(js && js.includes('.jsx'), 'expected js to include .jsx');
+
+const mixed = normalizeLangFilter('ts,python');
+assert.ok(mixed && mixed.includes('.ts'), 'expected mixed to include .ts');
+assert.ok(mixed && mixed.includes('.py'), 'expected mixed to include .py');
+
+const extFilter = ['.ts', '.tsx'];
+const langFilter = normalizeLangFilter('typescript');
+const merged = mergeExtFilters(extFilter, langFilter);
+assert.ok(merged, 'expected merged to be non-null');
+assert.deepEqual(new Set(merged), new Set(extFilter));
+
+const mergedEmpty = mergeExtFilters(['.ts'], normalizeLangFilter('python'));
+assert.equal(mergedEmpty, null);
+
+const unknown = normalizeLangFilter('unknown');
+assert.equal(unknown, null);
+
+console.log('lang filter test passed');
diff --git a/tests/lang/js-chunking.test.js b/tests/lang/js-chunking.test.js
new file mode 100644
index 000000000..69b191073
--- /dev/null
+++ b/tests/lang/js-chunking.test.js
@@ -0,0 +1,33 @@
+#!/usr/bin/env node
+import { buildJsChunks } from '../../src/lang/javascript.js';
+
+const source = [
+  'export function alpha() {}',
+  'class Foo {',
+  '  method() {}',
+  '  static bar() {}',
+  '}',
+  'const beta = () => {};',
+  'export default function gamma() {}',
+  'exports.qux = function() {};'
+].join('\\n');
+
+const chunks = buildJsChunks(source) || [];
+const names = new Set(chunks.map((chunk) => chunk.name));
+
+const expect = (condition, message) => {
+  if (!condition) {
+    console.error(message);
+    process.exit(1);
+  }
+};
+
+expect(names.has('alpha'), 'Missing exported function chunk (alpha).');
+expect(names.has('Foo'), 'Missing class chunk (Foo).');
+expect(names.has('Foo.method'), 'Missing class method chunk (Foo.method).');
+expect(names.has('Foo.bar'), 'Missing class method chunk (Foo.bar).');
+expect(names.has('beta'), 'Missing arrow function chunk (beta).');
+expect(names.has('gamma'), 'Missing default function chunk (gamma).');
+expect(names.has('exports.qux') || names.has('qux'), 'Missing assignment function chunk (exports.qux).');
+
+console.log('JS chunking test passed.');
diff --git a/tests/lang/js-imports.test.js b/tests/lang/js-imports.test.js
new file mode 100644
index 000000000..0cb7d9431
--- /dev/null
+++ b/tests/lang/js-imports.test.js
@@ -0,0 +1,22 @@
+#!/usr/bin/env node
+import { collectImports } from '../../src/lang/javascript.js';
+
+const source = [
+  "import fs from 'fs';",
+  "import { join as joinPath } from 'path';",
+  "export * from 'module-a';",
+  "export { foo } from 'module-b';",
+  "const mod = require('module-c');",
+  "async function load() { return import('module-d'); }"
+].join('\n');
+
+const imports = collectImports(source);
+const sorted = imports.slice().sort();
+const expected = ['fs', 'path', 'module-a', 'module-b', 'module-c', 'module-d'].sort();
+
+if (JSON.stringify(sorted) !== JSON.stringify(expected)) {
+  console.error(`JS imports mismatch: ${JSON.stringify(sorted)} !== ${JSON.stringify(expected)}`);
+  process.exit(1);
+}
+
+console.log('JS imports test passed.');
diff --git a/tests/lang/js-relations.test.js b/tests/lang/js-relations.test.js
new file mode 100644
index 000000000..c3379b96f
--- /dev/null
+++ b/tests/lang/js-relations.test.js
@@ -0,0 +1,34 @@
+#!/usr/bin/env node
+import { buildCodeRelations } from '../../src/lang/javascript.js';
+
+const source = [
+  "import { readFile } from 'fs';",
+  'export function run(path) {',
+  '  return readFile(path);',
+  '}',
+  'const local = () => run("x");',
+  'module.exports = { run };'
+].join('\\n');
+
+const rel = buildCodeRelations(source, 'sample.js', { fs: ['fs.js'] }) || {};
+const calls = Array.isArray(rel.calls) ? rel.calls : [];
+const imports = Array.isArray(rel.imports) ? rel.imports : [];
+const exports = Array.isArray(rel.exports) ? rel.exports : [];
+
+const hasCall = calls.some(([from, to]) => from === 'run' && to === 'readFile');
+if (!hasCall) {
+  console.error(`Missing call relation from run -> readFile: ${JSON.stringify(calls)}`);
+  process.exit(1);
+}
+
+if (!imports.includes('fs')) {
+  console.error(`Missing import for fs: ${JSON.stringify(imports)}`);
+  process.exit(1);
+}
+
+if (!exports.includes('run') || !exports.includes('default')) {
+  console.error(`Missing exports for run/default: ${JSON.stringify(exports)}`);
+  process.exit(1);
+}
+
+console.log('JS relations test passed.');
diff --git a/tests/lang/python-heuristic-chunking.test.js b/tests/lang/python-heuristic-chunking.test.js
new file mode 100644
index 000000000..08ab79189
--- /dev/null
+++ b/tests/lang/python-heuristic-chunking.test.js
@@ -0,0 +1,40 @@
+#!/usr/bin/env node
+import { buildPythonHeuristicChunks } from '../../src/lang/python.js';
+
+const sample = [
+  'class Foo:',
+  '    def method(self):',
+  '        pass',
+  '',
+  'def top():',
+  '    pass',
+  '',
+  'async def later():',
+  '    pass'
+].join('\n');
+
+const chunks = buildPythonHeuristicChunks(sample) || [];
+const byName = Object.fromEntries(chunks.map((chunk) => [chunk.name, chunk]));
+
+const expect = (condition, message) => {
+  if (!condition) {
+    console.error(message);
+    process.exit(1);
+  }
+};
+
+expect(byName.Foo, 'Missing class chunk for Foo.');
+expect(byName['Foo.method'], 'Missing method chunk for Foo.method.');
+expect(byName.top, 'Missing function chunk for top.');
+expect(byName.later, 'Missing function chunk for later.');
+
+expect(byName.Foo.meta.startLine === 1, 'Foo startLine mismatch.');
+expect(byName.Foo.meta.endLine === 5, 'Foo endLine mismatch.');
+expect(byName['Foo.method'].meta.startLine === 2, 'Foo.method startLine mismatch.');
+expect(byName['Foo.method'].meta.endLine === 5, 'Foo.method endLine mismatch.');
+expect(byName.top.meta.startLine === 5, 'top startLine mismatch.');
+expect(byName.top.meta.endLine === 8, 'top endLine mismatch.');
+expect(byName.later.meta.startLine === 8, 'later startLine mismatch.');
+expect(byName.later.meta.endLine === 9, 'later endLine mismatch.');
+
+console.log('Python heuristic chunking test passed.');
diff --git a/tests/lang/python-imports.test.js b/tests/lang/python-imports.test.js
new file mode 100644
index 000000000..8c94eb682
--- /dev/null
+++ b/tests/lang/python-imports.test.js
@@ -0,0 +1,37 @@
+#!/usr/bin/env node
+import { collectPythonImports } from '../../src/lang/python.js';
+
+const source = [
+  'import os, sys as system',
+  'import json',
+  'from collections import defaultdict, namedtuple as nt',
+  'from foo.bar import Baz as Qux, Quux',
+  '# from ignored import nope'
+].join('\n');
+
+const { imports, usages } = collectPythonImports(source);
+const sorted = (items) => items.slice().sort();
+
+const expectSet = (label, actual, expected) => {
+  const actualSorted = sorted(actual);
+  const expectedSorted = sorted(expected);
+  const actualText = JSON.stringify(actualSorted);
+  const expectedText = JSON.stringify(expectedSorted);
+  if (actualText !== expectedText) {
+    console.error(`${label} mismatch: ${actualText} !== ${expectedText}`);
+    process.exit(1);
+  }
+};
+
+expectSet('imports', imports, ['os', 'sys', 'json', 'collections', 'foo.bar']);
+expectSet('usages', usages, [
+  'system',
+  'defaultdict',
+  'namedtuple',
+  'nt',
+  'Baz',
+  'Qux',
+  'Quux'
+]);
+
+console.log('Python imports test passed.');
diff --git a/tests/lang/python-pool.test.js b/tests/lang/python-pool.test.js
new file mode 100644
index 000000000..c2a72491f
--- /dev/null
+++ b/tests/lang/python-pool.test.js
@@ -0,0 +1,32 @@
+#!/usr/bin/env node
+import { getPythonAst, shutdownPythonAstPool } from '../../src/lang/python.js';
+import { findPythonExecutable } from '../../src/lang/python/executable.js';
+
+const sample = 'def add(a: int, b: int) -> int:\\n    return a + b\\n';
+const originalPath = process.env.PATH;
+process.env.PATH = '';
+
+const pythonBin = await findPythonExecutable();
+if (pythonBin) {
+  const ast = await getPythonAst(sample, null, {
+    pythonAst: { workerCount: 1, maxWorkers: 1, taskTimeoutMs: 5000 }
+  });
+  if (!ast || !Array.isArray(ast.defs)) {
+    console.error('Expected AST payload when python is available.');
+    process.exit(1);
+  }
+} else {
+  const ast = await getPythonAst(sample, null, {
+    pythonAst: { workerCount: 1, maxWorkers: 1, taskTimeoutMs: 5000 }
+  });
+  if (ast !== null) {
+    console.error('Expected null AST when python is not available.');
+    process.exit(1);
+  }
+}
+
+shutdownPythonAstPool();
+shutdownPythonAstPool();
+process.env.PATH = originalPath;
+
+console.log('Python pool test passed.');
diff --git a/tests/language-fidelity.js b/tests/language-fidelity.js
index 2e3c70e7f..74ce5564c 100644
--- a/tests/language-fidelity.js
+++ b/tests/language-fidelity.js
@@ -16,10 +16,12 @@ await fsPromises.mkdir(cacheRoot, { recursive: true });
 const env = {
   ...process.env,
   PAIROFCLEATS_CACHE_ROOT: cacheRoot,
-  PAIROFCLEATS_EMBEDDINGS: 'stub'
+  PAIROFCLEATS_EMBEDDINGS: 'stub',
+  PAIROFCLEATS_WORKER_POOL: 'off'
 };
 process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
 process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+process.env.PAIROFCLEATS_WORKER_POOL = 'off';
 const repoArgs = ['--repo', fixtureRoot];
 
 function run(args, label) {
@@ -69,19 +71,103 @@ if (!fs.existsSync(chunkMetaPath)) {
 }
 
 const chunkMeta = JSON.parse(fs.readFileSync(chunkMetaPath, 'utf8'));
+const fileMetaPath = path.join(codeDir, 'file_meta.json');
+const fileMeta = fs.existsSync(fileMetaPath)
+  ? JSON.parse(fs.readFileSync(fileMetaPath, 'utf8'))
+  : [];
+const failures = [];
+const extractPostings = (payload) => {
+  if (!payload || typeof payload !== 'object') return [];
+  if (Array.isArray(payload.postings)) return payload.postings;
+  if (Array.isArray(payload.arrays?.postings)) return payload.arrays.postings;
+  return [];
+};
+const validateTokenPostings = (payload, label) => {
+  const postings = extractPostings(payload);
+  let badEntry = null;
+  for (let i = 0; i < postings.length; i += 1) {
+    const list = postings[i];
+    if (!Array.isArray(list)) continue;
+    for (let j = 0; j < list.length; j += 1) {
+      const entry = list[j];
+      if (!Array.isArray(entry)) continue;
+      const count = entry[1];
+      if (!Number.isInteger(count)) {
+        badEntry = { i, j, count };
+        break;
+      }
+    }
+    if (badEntry) break;
+  }
+  if (badEntry) {
+    const labelSuffix = label ? ` (${label})` : '';
+    failures.push(`Token postings contain non-integer counts${labelSuffix} at ${badEntry.i}/${badEntry.j}: ${badEntry.count}`);
+  }
+};
+const tokenPostingsPath = path.join(codeDir, 'token_postings.json');
+const tokenPostingsMetaPath = path.join(codeDir, 'token_postings.meta.json');
+if (fs.existsSync(tokenPostingsPath)) {
+  try {
+    const tokenPostings = JSON.parse(fs.readFileSync(tokenPostingsPath, 'utf8'));
+    validateTokenPostings(tokenPostings, 'token_postings.json');
+  } catch {
+    failures.push('Token postings check failed: invalid JSON payload.');
+  }
+} else if (fs.existsSync(tokenPostingsMetaPath)) {
+  try {
+    const tokenMeta = JSON.parse(fs.readFileSync(tokenPostingsMetaPath, 'utf8'));
+    const parts = Array.isArray(tokenMeta?.fields?.parts) ? tokenMeta.fields.parts : [];
+    if (!parts.length) {
+      failures.push('Token postings check failed: sharded metadata missing parts list.');
+    } else {
+      for (const part of parts) {
+        const partPath = path.join(codeDir, part);
+        if (!fs.existsSync(partPath)) {
+          failures.push(`Token postings shard missing: ${partPath}`);
+          continue;
+        }
+        try {
+          const shard = JSON.parse(fs.readFileSync(partPath, 'utf8'));
+          validateTokenPostings(shard, part);
+        } catch {
+          failures.push(`Token postings shard check failed: invalid JSON payload in ${part}.`);
+        }
+      }
+    }
+  } catch {
+    failures.push('Token postings check failed: invalid sharded metadata payload.');
+  }
+}
+const fileById = new Map(
+  (Array.isArray(fileMeta) ? fileMeta : []).map((entry) => [entry.id, entry.file])
+);
+const resolveChunkFile = (chunk) => chunk?.file || fileById.get(chunk?.fileId) || null;
+const fileRelationsPath = path.join(codeDir, 'file_relations.json');
+let fileRelations = null;
+if (fs.existsSync(fileRelationsPath)) {
+  try {
+    const raw = JSON.parse(fs.readFileSync(fileRelationsPath, 'utf8'));
+    if (Array.isArray(raw)) {
+      fileRelations = new Map();
+      raw.forEach((entry) => {
+        if (entry?.file) fileRelations.set(entry.file, entry.relations || null);
+      });
+    }
+  } catch {}
+}
+const getFileRelations = (file) => (fileRelations?.get(file) || null);
 
 function findChunk(match) {
   return chunkMeta.find((chunk) => {
-    if (!chunk || !chunk.file) return false;
-    if (match.file && chunk.file !== match.file) return false;
+    const file = resolveChunkFile(chunk);
+    if (!chunk || !file) return false;
+    if (match.file && file !== match.file) return false;
     if (match.kind && chunk.kind !== match.kind) return false;
     if (match.nameIncludes && !String(chunk.name || '').includes(match.nameIncludes)) return false;
     return true;
   });
 }
 
-const failures = [];
-
 const branchSearch = runSearch(
   [searchPath, 'load', '--json', '--mode', 'code', '--branches', '1', '--no-ann'],
   'search (branches filter)'
@@ -172,6 +258,30 @@ if (asyncPayload) {
   }
 }
 
+const fileRegexSearch = runSearch(
+  [searchPath, 'buildAliases', '--json', '--mode', 'code', '--file', '/javascript_advanced\\.js$/', '--no-ann'],
+  'search (file regex filter)'
+);
+let fileRegexPayload = null;
+try {
+  fileRegexPayload = JSON.parse(fileRegexSearch);
+} catch {
+  failures.push('Search file regex filter failed: invalid JSON output.');
+}
+if (fileRegexPayload) {
+  const fileRegexHits = fileRegexPayload.code || [];
+  if (!fileRegexHits.length) {
+    failures.push('Search file regex filter failed: no results for javascript_advanced.js.');
+  } else {
+    const matches = fileRegexHits.every((hit) =>
+      String(hit.file || '').includes('javascript_advanced.js')
+    );
+    if (!matches) {
+      failures.push('Search file regex filter failed: returned non-matching files.');
+    }
+  }
+}
+
 const aliasChunk = findChunk({ file: 'src/javascript_advanced.js', nameIncludes: 'buildAliases' });
 if (!aliasChunk) {
   failures.push('Missing JavaScript alias chunk (buildAliases).');
@@ -186,6 +296,27 @@ if (!aliasChunk) {
   }
 }
 
+const goDocChunk = findChunk({ file: 'src/go_advanced.go', kind: 'StructDeclaration', nameIncludes: 'Widget' });
+if (!goDocChunk) {
+  failures.push('Missing Go struct chunk (Widget).');
+} else if (!String(goDocChunk.docmeta?.doc || '').includes('Widget holds a name')) {
+  failures.push('Go docstring missing for Widget struct.');
+}
+
+const perlDocChunk = findChunk({ file: 'src/perl_advanced.pl', kind: 'FunctionDeclaration', nameIncludes: 'greet' });
+if (!perlDocChunk) {
+  failures.push('Missing Perl function chunk (greet).');
+} else if (!String(perlDocChunk.docmeta?.doc || '').includes('Greets a caller')) {
+  failures.push('Perl docstring missing for greet.');
+}
+
+const sqlDocChunk = findChunk({ file: 'src/sql_advanced.sql', kind: 'TableDeclaration', nameIncludes: 'widgets' });
+if (!sqlDocChunk) {
+  failures.push('Missing SQL table chunk (widgets).');
+} else if (!String(sqlDocChunk.docmeta?.doc || '').includes('Widget table')) {
+  failures.push('SQL docstring missing for widgets.');
+}
+
 const riskChunk = findChunk({ file: 'src/javascript_risk.js', nameIncludes: 'runCommand' });
 if (!riskChunk) {
   failures.push('Missing JavaScript risk chunk (runCommand).');
@@ -311,7 +442,7 @@ if (pythonAvailable) {
 }
 
 const jsWidgetClass = chunkMeta.find((chunk) => {
-  if (!chunk || chunk.file !== 'src/javascript_advanced.js') return false;
+  if (!chunk || resolveChunkFile(chunk) !== 'src/javascript_advanced.js') return false;
   if (chunk.name !== 'Widget') return false;
   return chunk.kind === 'ClassDeclaration' ||
     chunk.kind === 'ExportedClass' ||
@@ -470,7 +601,8 @@ const javaMethod = findChunk({ file: 'src/java_advanced.java', kind: 'MethodDecl
 if (!javaMethod) {
   failures.push('Missing Java method chunk (Box.add).');
 } else {
-  const imports = javaMethod.codeRelations?.imports || [];
+  const javaFile = resolveChunkFile(javaMethod);
+  const imports = javaMethod.codeRelations?.imports || getFileRelations(javaFile)?.imports || [];
   if (!imports.some((imp) => imp === 'java.util.List')) {
     failures.push('Java import capture missing java.util.List.');
   }
@@ -510,7 +642,7 @@ if (!shellFunc) {
 }
 
 const tsClass = chunkMeta.find((chunk) =>
-  chunk.file === 'src/typescript_advanced.ts' &&
+  resolveChunkFile(chunk) === 'src/typescript_advanced.ts' &&
   chunk.kind === 'ClassDeclaration' &&
   chunk.name === 'Widget'
 );
@@ -537,6 +669,10 @@ const tsAlias = findChunk({ file: 'src/typescript_advanced.ts', kind: 'FunctionD
 if (!tsAlias) {
   failures.push('Missing TypeScript alias chunk (buildWidgetAliases).');
 } else {
+  const tsAliases = tsAlias.docmeta?.dataflow?.aliases || [];
+  if (!tsAliases.includes('name=label') || !tsAliases.includes('copy=items')) {
+    failures.push('TypeScript alias tracking missing expected aliases for buildWidgetAliases.');
+  }
   const inferredParams = tsAlias.docmeta?.inferredTypes?.params?.label || [];
   if (!inferredParams.some((entry) => entry.type === 'string')) {
     failures.push('TypeScript inferredTypes missing string for label param.');
@@ -568,6 +704,10 @@ const rubyMethod = findChunk({ file: 'src/ruby_advanced.rb', kind: 'MethodDeclar
 if (!rubyMethod) {
   failures.push('Missing Ruby method chunk (Widget.render).');
 }
+const gemfileChunk = findChunk({ file: 'src/Gemfile', kind: 'MethodDeclaration', nameIncludes: 'build_widget' });
+if (!gemfileChunk) {
+  failures.push('Missing Gemfile Ruby chunk (build_widget).');
+}
 
 const phpMethod = findChunk({ file: 'src/php_advanced.php', kind: 'MethodDeclaration', nameIncludes: 'Widget.render' });
 if (!phpMethod) {
@@ -582,6 +722,13 @@ if (!luaMethod) {
 const sqlTable = findChunk({ file: 'src/sql_advanced.sql', kind: 'TableDeclaration', nameIncludes: 'widgets' });
 if (!sqlTable) {
   failures.push('Missing SQL table chunk (widgets).');
+} else {
+  if (!Array.isArray(sqlTable.docmeta?.dataflow?.reads)) {
+    failures.push('SQL dataflow missing for widgets.');
+  }
+  if (typeof sqlTable.docmeta?.controlFlow?.branches !== 'number') {
+    failures.push('SQL control flow missing for widgets.');
+  }
 }
 
 const pgTable = findChunk({ file: 'src/sql_postgres.psql', kind: 'TableDeclaration', nameIncludes: 'pg_widgets' });
@@ -605,6 +752,101 @@ if (!sqliteTable) {
   failures.push('SQLite dialect metadata missing for sqlite_widgets.');
 }
 
+const dockerChunk = findChunk({ file: 'src/Dockerfile', nameIncludes: 'FROM' });
+if (!dockerChunk) {
+  failures.push('Missing Dockerfile chunk (FROM).');
+}
+
+const makeChunk = findChunk({ file: 'src/Makefile', nameIncludes: 'build' });
+if (!makeChunk) {
+  failures.push('Missing Makefile chunk (build).');
+}
+
+const protoChunk = findChunk({ file: 'src/schema.proto', nameIncludes: 'Widget' });
+if (!protoChunk) {
+  failures.push('Missing Protobuf chunk (Widget).');
+}
+
+const graphqlChunk = findChunk({ file: 'src/schema.graphql', nameIncludes: 'Widget' });
+if (!graphqlChunk) {
+  failures.push('Missing GraphQL chunk (Widget).');
+}
+
+const cmakeChunk = findChunk({ file: 'src/CMakeLists.txt', nameIncludes: 'add_executable' });
+if (!cmakeChunk) {
+  failures.push('Missing CMake chunk (add_executable).');
+}
+
+const bazelChunk = findChunk({ file: 'src/BUILD', nameIncludes: 'widget_lib' });
+if (!bazelChunk) {
+  failures.push('Missing Bazel chunk (widget_lib).');
+}
+
+const workspaceChunk = findChunk({ file: 'src/WORKSPACE', nameIncludes: 'workspace' });
+if (!workspaceChunk) {
+  failures.push('Missing Bazel WORKSPACE chunk (workspace).');
+}
+
+const starlarkChunk = findChunk({ file: 'src/defs.bzl', nameIncludes: 'widget_rule' });
+if (!starlarkChunk) {
+  failures.push('Missing Starlark chunk (widget_rule).');
+}
+
+const nixChunk = findChunk({ file: 'src/default.nix', nameIncludes: 'widget' });
+if (!nixChunk) {
+  failures.push('Missing Nix chunk (widget).');
+}
+
+const dartChunk = findChunk({ file: 'src/widget.dart', nameIncludes: 'Widget' });
+if (!dartChunk) {
+  failures.push('Missing Dart chunk (Widget).');
+}
+
+const scalaChunk = findChunk({ file: 'src/Widget.scala', nameIncludes: 'WidgetFactory' });
+if (!scalaChunk) {
+  failures.push('Missing Scala chunk (WidgetFactory).');
+}
+
+const groovyChunk = findChunk({ file: 'src/Widget.groovy', nameIncludes: 'buildWidget' });
+if (!groovyChunk) {
+  failures.push('Missing Groovy chunk (buildWidget).');
+}
+
+const rChunk = findChunk({ file: 'src/widget.r', nameIncludes: 'build_widget' });
+if (!rChunk) {
+  failures.push('Missing R chunk (build_widget).');
+}
+
+const juliaChunk = findChunk({ file: 'src/widget.jl', nameIncludes: 'build_widget' });
+if (!juliaChunk) {
+  failures.push('Missing Julia chunk (build_widget).');
+}
+
+const handlebarsChunk = findChunk({ file: 'src/widget.hbs', nameIncludes: 'widgets' });
+if (!handlebarsChunk) {
+  failures.push('Missing Handlebars chunk (widgets).');
+}
+
+const mustacheChunk = findChunk({ file: 'src/widget.mustache', nameIncludes: 'widget' });
+if (!mustacheChunk) {
+  failures.push('Missing Mustache chunk (widget).');
+}
+
+const jinjaChunk = findChunk({ file: 'src/widget.jinja2', nameIncludes: 'content' });
+if (!jinjaChunk) {
+  failures.push('Missing Jinja chunk (content).');
+}
+
+const djangoChunk = findChunk({ file: 'src/widget.djhtml', nameIncludes: 'body' });
+if (!djangoChunk) {
+  failures.push('Missing Django template chunk (body).');
+}
+
+const razorChunk = findChunk({ file: 'src/widget.razor', nameIncludes: 'page' });
+if (!razorChunk) {
+  failures.push('Missing Razor chunk (page).');
+}
+
 if (failures.length) {
   failures.forEach((msg) => console.error(msg));
   process.exit(1);
diff --git a/tests/language-registry/collectors.test.js b/tests/language-registry/collectors.test.js
new file mode 100644
index 000000000..0ce762ddc
--- /dev/null
+++ b/tests/language-registry/collectors.test.js
@@ -0,0 +1,133 @@
+#!/usr/bin/env node
+import { collectCmakeImports } from '../../src/index/language-registry/import-collectors/cmake.js';
+import { collectDartImports } from '../../src/index/language-registry/import-collectors/dart.js';
+import { collectDockerfileImports } from '../../src/index/language-registry/import-collectors/dockerfile.js';
+import { collectGraphqlImports } from '../../src/index/language-registry/import-collectors/graphql.js';
+import { collectGroovyImports } from '../../src/index/language-registry/import-collectors/groovy.js';
+import { collectHandlebarsImports } from '../../src/index/language-registry/import-collectors/handlebars.js';
+import { collectJinjaImports } from '../../src/index/language-registry/import-collectors/jinja.js';
+import { collectJuliaImports } from '../../src/index/language-registry/import-collectors/julia.js';
+import { collectMakefileImports } from '../../src/index/language-registry/import-collectors/makefile.js';
+import { collectMustacheImports } from '../../src/index/language-registry/import-collectors/mustache.js';
+import { collectNixImports } from '../../src/index/language-registry/import-collectors/nix.js';
+import { collectProtoImports } from '../../src/index/language-registry/import-collectors/proto.js';
+import { collectRazorImports } from '../../src/index/language-registry/import-collectors/razor.js';
+import { collectRImports } from '../../src/index/language-registry/import-collectors/r.js';
+import { collectScalaImports } from '../../src/index/language-registry/import-collectors/scala.js';
+import { collectStarlarkImports } from '../../src/index/language-registry/import-collectors/starlark.js';
+
+const sort = (list) => list.slice().sort();
+const expectSet = (label, actual, expected) => {
+  const actualSorted = sort(actual);
+  const expectedSorted = sort(expected);
+  if (JSON.stringify(actualSorted) !== JSON.stringify(expectedSorted)) {
+    console.error(`${label} mismatch: ${JSON.stringify(actualSorted)} !== ${JSON.stringify(expectedSorted)}`);
+    process.exit(1);
+  }
+};
+
+const cases = [
+  {
+    label: 'dockerfile',
+    fn: collectDockerfileImports,
+    text: 'FROM node:18 AS base\\nCOPY --from=base /src /dst',
+    expected: ['node:18', 'base']
+  },
+  {
+    label: 'makefile',
+    fn: collectMakefileImports,
+    text: 'include shared.mk\\n-include local.mk',
+    expected: ['shared.mk', 'local.mk']
+  },
+  {
+    label: 'proto',
+    fn: collectProtoImports,
+    text: 'import \"foo.proto\";\\nimport public \"bar.proto\";',
+    expected: ['foo.proto', 'bar.proto']
+  },
+  {
+    label: 'graphql',
+    fn: collectGraphqlImports,
+    text: '#import \"common.graphql\"',
+    expected: ['common.graphql']
+  },
+  {
+    label: 'cmake',
+    fn: collectCmakeImports,
+    text: 'include(foo)\\nadd_subdirectory(bar)\\nfind_package(Baz)',
+    expected: ['foo', 'bar', 'Baz']
+  },
+  {
+    label: 'starlark',
+    fn: collectStarlarkImports,
+    text: 'load(\"//path:target\", \"x\")',
+    expected: ['//path:target']
+  },
+  {
+    label: 'nix',
+    fn: collectNixImports,
+    text: 'import ./module.nix\\ncallPackage ../pkg.nix {}',
+    expected: ['./module.nix', '../pkg.nix']
+  },
+  {
+    label: 'dart',
+    fn: collectDartImports,
+    text: "import 'package:foo/bar.dart';",
+    expected: ['package:foo/bar.dart']
+  },
+  {
+    label: 'scala',
+    fn: collectScalaImports,
+    text: 'import foo.bar.Baz',
+    expected: ['foo.bar.Baz']
+  },
+  {
+    label: 'groovy',
+    fn: collectGroovyImports,
+    text: 'import foo.bar.Baz',
+    expected: ['foo.bar.Baz']
+  },
+  {
+    label: 'r',
+    fn: collectRImports,
+    text: 'library(ggplot2)\\nrequire(\"dplyr\")',
+    expected: ['ggplot2', 'dplyr']
+  },
+  {
+    label: 'julia',
+    fn: collectJuliaImports,
+    text: 'using Foo.Bar',
+    expected: ['Foo.Bar']
+  },
+  {
+    label: 'handlebars',
+    fn: collectHandlebarsImports,
+    text: '{{> partial-name}}',
+    expected: ['partial-name']
+  },
+  {
+    label: 'mustache',
+    fn: collectMustacheImports,
+    text: '{{> other}}',
+    expected: ['other']
+  },
+  {
+    label: 'jinja',
+    fn: collectJinjaImports,
+    text: '{% extends \"base.html\" %}',
+    expected: ['base.html']
+  },
+  {
+    label: 'razor',
+    fn: collectRazorImports,
+    text: '@using System.Text',
+    expected: ['System.Text']
+  }
+];
+
+for (const testCase of cases) {
+  const actual = testCase.fn(testCase.text);
+  expectSet(testCase.label, actual, testCase.expected);
+}
+
+console.log('Language registry collectors test passed.');
diff --git a/tests/language-registry/selection.test.js b/tests/language-registry/selection.test.js
new file mode 100644
index 000000000..552c9d6ef
--- /dev/null
+++ b/tests/language-registry/selection.test.js
@@ -0,0 +1,22 @@
+#!/usr/bin/env node
+import { getLanguageForFile } from '../../src/index/language-registry.js';
+
+const expectId = (ext, relPath, expected) => {
+  const lang = getLanguageForFile(ext, relPath);
+  const actual = lang ? lang.id : null;
+  if (actual !== expected) {
+    console.error(`Language mismatch for ${relPath || ext}: ${actual} !== ${expected}`);
+    process.exit(1);
+  }
+};
+
+expectId('.js', 'src/app.js', 'javascript');
+expectId('.ts', 'src/app.ts', 'typescript');
+expectId('.tsx', 'src/App.tsx', 'typescript');
+expectId('.py', 'src/app.py', 'python');
+expectId('.rs', 'src/lib.rs', 'rust');
+expectId('.go', 'src/main.go', 'go');
+expectId('.hbs', 'templates/view.hbs', 'handlebars');
+expectId('.dockerfile', 'Dockerfile.dockerfile', 'dockerfile');
+
+console.log('Language registry selection test passed.');
diff --git a/tests/lmdb-backend.js b/tests/lmdb-backend.js
new file mode 100644
index 000000000..1555a97c4
--- /dev/null
+++ b/tests/lmdb-backend.js
@@ -0,0 +1,103 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { Unpackr } from 'msgpackr';
+import { LMDB_META_KEYS, LMDB_SCHEMA_VERSION } from '../src/storage/lmdb/schema.js';
+import { resolveLmdbPaths } from '../tools/dict-utils.js';
+
+let open = null;
+try {
+  ({ open } = await import('lmdb'));
+} catch (err) {
+  console.error(`lmdb missing: ${err?.message || err}`);
+  process.exit(1);
+}
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'lmdb-backend');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(path.join(repoRoot, 'alpha.js'), 'const alpha = 1;\\n');
+await fsPromises.writeFile(path.join(repoRoot, 'beta.js'), 'const beta = 2;\\n');
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({ indexing: { treeSitter: { enabled: false } } }, null, 2)
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+
+const runNode = (label, args) => {
+  const result = spawnSync(process.execPath, args, { cwd: repoRoot, env, stdio: 'inherit' });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    process.exit(result.status ?? 1);
+  }
+};
+
+runNode('build_index', [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot]);
+runNode('build_lmdb_index', [path.join(root, 'tools', 'build-lmdb-index.js'), '--mode', 'code', '--repo', repoRoot]);
+
+const lmdbPaths = resolveLmdbPaths(repoRoot, {});
+const dbPath = lmdbPaths.codePath;
+const dataPath = path.join(dbPath, 'data.mdb');
+if (!fs.existsSync(dataPath)) {
+  console.error(`Expected LMDB data file to exist at ${dataPath}`);
+  process.exit(1);
+}
+
+const db = open({ path: dbPath, readOnly: true });
+const unpackr = new Unpackr();
+const decode = (value) => (value == null ? null : unpackr.unpack(value));
+const version = decode(db.get(LMDB_META_KEYS.schemaVersion));
+if (version !== LMDB_SCHEMA_VERSION) {
+  console.error(`Expected LMDB schema version ${LMDB_SCHEMA_VERSION}, got ${version}`);
+  process.exit(1);
+}
+const mode = decode(db.get(LMDB_META_KEYS.mode));
+if (mode !== 'code') {
+  console.error(`Expected LMDB mode code, got ${mode}`);
+  process.exit(1);
+}
+const chunkCount = Number(decode(db.get(LMDB_META_KEYS.chunkCount)) || 0);
+if (!Number.isFinite(chunkCount) || chunkCount <= 0) {
+  console.error('Expected LMDB chunkCount to be positive.');
+  process.exit(1);
+}
+db.close();
+
+const searchResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'search.js'), 'alpha', '--json', '--backend', 'lmdb', '--no-ann', '--repo', repoRoot],
+  { encoding: 'utf8', env }
+);
+if (searchResult.status !== 0) {
+  console.error('search.js failed for LMDB backend test.');
+  process.exit(searchResult.status ?? 1);
+}
+const output = String(searchResult.stdout || '').trim();
+let payload = null;
+try {
+  payload = JSON.parse(output);
+} catch {
+  console.error('Failed to parse LMDB search JSON output.');
+  process.exit(1);
+}
+if (payload.backend !== 'lmdb') {
+  console.error(`Expected backend=lmdb, got ${payload.backend}`);
+  process.exit(1);
+}
+
+console.log('lmdb backend test passed');
diff --git a/tests/lmdb-corruption.js b/tests/lmdb-corruption.js
new file mode 100644
index 000000000..ec6d54164
--- /dev/null
+++ b/tests/lmdb-corruption.js
@@ -0,0 +1,102 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { LMDB_META_KEYS } from '../src/storage/lmdb/schema.js';
+import { loadUserConfig, resolveLmdbPaths } from '../tools/dict-utils.js';
+
+let open = null;
+try {
+  ({ open } = await import('lmdb'));
+} catch (err) {
+  console.error(`lmdb missing: ${err?.message || err}`);
+  process.exit(1);
+}
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const tempRoot = path.join(root, 'tests', '.cache', 'lmdb-corruption');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+await fsPromises.cp(fixtureRoot, repoRoot, { recursive: true });
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({ sqlite: { use: false } }, null, 2)
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+
+const run = (args, label, options = {}) => {
+  const result = spawnSync(process.execPath, args, {
+    cwd: repoRoot,
+    env,
+    ...options
+  });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    if (result.stderr) console.error(result.stderr.trim());
+    process.exit(result.status ?? 1);
+  }
+  return result;
+};
+
+run(
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  'build index',
+  { stdio: 'inherit' }
+);
+run(
+  [path.join(root, 'tools', 'build-lmdb-index.js'), '--mode', 'all', '--repo', repoRoot],
+  'build lmdb index',
+  { stdio: 'inherit' }
+);
+
+const userConfig = loadUserConfig(repoRoot);
+const lmdbPaths = resolveLmdbPaths(repoRoot, userConfig);
+const db = open({ path: lmdbPaths.codePath, readOnly: false });
+if (typeof db.removeSync === 'function') {
+  db.removeSync(LMDB_META_KEYS.schemaVersion);
+} else {
+  db.remove(LMDB_META_KEYS.schemaVersion);
+}
+db.close();
+
+const report = run(
+  [path.join(root, 'tools', 'report-artifacts.js'), '--json', '--repo', repoRoot],
+  'report artifacts',
+  { encoding: 'utf8' }
+);
+
+let payload = null;
+try {
+  payload = JSON.parse(report.stdout || '{}');
+} catch {
+  console.error('Failed to parse report-artifacts JSON output.');
+  process.exit(1);
+}
+
+if (payload?.corruption?.ok !== false) {
+  console.error('Expected corruption report ok=false after LMDB tamper.');
+  process.exit(1);
+}
+if (payload?.corruption?.lmdb?.ok !== false) {
+  console.error('Expected LMDB corruption report ok=false.');
+  process.exit(1);
+}
+const issues = Array.isArray(payload?.corruption?.issues) ? payload.corruption.issues : [];
+if (!issues.some((issue) => issue.includes('lmdb/code'))) {
+  console.error('Expected LMDB corruption issues for code db.');
+  process.exit(1);
+}
+
+console.log('lmdb corruption test passed');
diff --git a/tests/lmdb-report-artifacts.js b/tests/lmdb-report-artifacts.js
new file mode 100644
index 000000000..50187fc7d
--- /dev/null
+++ b/tests/lmdb-report-artifacts.js
@@ -0,0 +1,83 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const tempRoot = path.join(root, 'tests', '.cache', 'lmdb-report-artifacts');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+await fsPromises.cp(fixtureRoot, repoRoot, { recursive: true });
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({ sqlite: { use: false } }, null, 2)
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+
+const run = (args, label, options = {}) => {
+  const result = spawnSync(process.execPath, args, {
+    cwd: repoRoot,
+    env,
+    ...options
+  });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    if (result.stderr) console.error(result.stderr.trim());
+    process.exit(result.status ?? 1);
+  }
+  return result;
+};
+
+run(
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  'build index',
+  { stdio: 'inherit' }
+);
+
+run(
+  [path.join(root, 'tools', 'build-lmdb-index.js'), '--mode', 'all', '--repo', repoRoot],
+  'build lmdb index',
+  { stdio: 'inherit' }
+);
+
+const report = run(
+  [path.join(root, 'tools', 'report-artifacts.js'), '--json', '--repo', repoRoot],
+  'report artifacts',
+  { encoding: 'utf8' }
+);
+
+let payload = null;
+try {
+  payload = JSON.parse(report.stdout || '{}');
+} catch {
+  console.error('Failed to parse report-artifacts JSON output.');
+  process.exit(1);
+}
+
+const lmdbThroughput = payload?.throughput?.lmdb;
+if (!lmdbThroughput?.code || !Number.isFinite(lmdbThroughput.code.chunksPerSec)) {
+  console.error('LMDB code throughput missing or invalid in report-artifacts.');
+  process.exit(1);
+}
+if (!lmdbThroughput?.prose || !Number.isFinite(lmdbThroughput.prose.chunksPerSec)) {
+  console.error('LMDB prose throughput missing or invalid in report-artifacts.');
+  process.exit(1);
+}
+if (payload?.corruption?.lmdb?.ok !== true) {
+  console.error('LMDB corruption report expected ok=true.');
+  process.exit(1);
+}
+
+console.log('lmdb report artifacts test passed');
diff --git a/tests/lsif-ingest.js b/tests/lsif-ingest.js
new file mode 100644
index 000000000..16629e606
--- /dev/null
+++ b/tests/lsif-ingest.js
@@ -0,0 +1,48 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'lsif-ingest');
+const repoRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const inputPath = path.join(root, 'tests', 'fixtures', 'lsif', 'dump.lsif');
+const outPath = path.join(tempRoot, 'lsif.jsonl');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+
+const result = spawnSync(
+  process.execPath,
+  [path.join(root, 'tools', 'lsif-ingest.js'), '--repo', repoRoot, '--input', inputPath, '--out', outPath, '--json'],
+  { encoding: 'utf8' }
+);
+if (result.status !== 0) {
+  console.error(result.stderr || result.stdout || 'lsif-ingest failed');
+  process.exit(result.status ?? 1);
+}
+
+if (!fs.existsSync(outPath)) {
+  console.error('lsif output not found');
+  process.exit(1);
+}
+
+const lines = fs.readFileSync(outPath, 'utf8').trim().split(/\r?\n/).filter(Boolean);
+assert.ok(lines.length >= 1, 'expected lsif output lines');
+
+const first = JSON.parse(lines[0]);
+assert.equal(first.file, 'src/sample.ts');
+assert.equal(first.role, 'definition');
+assert.equal(first.startLine, 2);
+assert.equal(first.language, 'typescript');
+
+const metaPath = `${outPath}.meta.json`;
+const meta = JSON.parse(fs.readFileSync(metaPath, 'utf8'));
+assert.ok(meta.stats.vertices >= 4);
+assert.ok(meta.stats.edges >= 2);
+assert.ok(meta.stats.definitions >= 1);
+assert.ok(meta.stats.references >= 1);
+
+console.log('lsif ingest test passed');
diff --git a/tests/lsp-shutdown.js b/tests/lsp-shutdown.js
new file mode 100644
index 000000000..0009ba2fa
--- /dev/null
+++ b/tests/lsp-shutdown.js
@@ -0,0 +1,24 @@
+#!/usr/bin/env node
+import path from 'node:path';
+import { pathToFileURL } from 'node:url';
+import { createLspClient } from '../src/integrations/tooling/lsp/client.js';
+
+const root = process.cwd();
+const serverPath = path.join(root, 'tests', 'fixtures', 'lsp', 'stub-lsp-server.js');
+const logs = [];
+const client = createLspClient({
+  cmd: process.execPath,
+  args: [serverPath, '--exit-on-shutdown'],
+  log: (message) => logs.push(message)
+});
+
+await client.initialize({ rootUri: pathToFileURL(root).href });
+await client.shutdownAndExit();
+await new Promise((resolve) => setTimeout(resolve, 200));
+client.kill();
+
+if (logs.some((line) => line.includes('ERR_STREAM_DESTROYED'))) {
+  throw new Error('LSP shutdown emitted ERR_STREAM_DESTROYED.');
+}
+
+console.log('LSP shutdown test passed');
diff --git a/tests/mcp-robustness.js b/tests/mcp-robustness.js
new file mode 100644
index 000000000..b98ce06c0
--- /dev/null
+++ b/tests/mcp-robustness.js
@@ -0,0 +1,187 @@
+#!/usr/bin/env node
+import { spawn } from 'node:child_process';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+
+const root = process.cwd();
+const serverPath = path.join(root, 'tools', 'mcp-server.js');
+const tempRoot = path.join(root, 'tests', '.cache', 'mcp-robustness');
+const queueCache = path.join(tempRoot, 'queue-cache');
+const timeoutCache = path.join(tempRoot, 'timeout-cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(queueCache, { recursive: true });
+await fsPromises.mkdir(timeoutCache, { recursive: true });
+
+function encodeMessage(payload) {
+  const json = JSON.stringify(payload);
+  return `Content-Length: ${Buffer.byteLength(json, 'utf8')}\r\n\r\n${json}`;
+}
+
+function createReader(stream) {
+  let buffer = Buffer.alloc(0);
+  const tryRead = () => {
+    const headerEnd = buffer.indexOf('\r\n\r\n');
+    if (headerEnd === -1) return null;
+    const header = buffer.slice(0, headerEnd).toString('utf8');
+    const match = header.match(/Content-Length:\s*(\d+)/i);
+    if (!match) {
+      buffer = buffer.slice(headerEnd + 4);
+      return null;
+    }
+    const length = parseInt(match[1], 10);
+    const total = headerEnd + 4 + length;
+    if (buffer.length < total) return null;
+    const body = buffer.slice(headerEnd + 4, total).toString('utf8');
+    buffer = buffer.slice(total);
+    return JSON.parse(body);
+  };
+  const notifications = [];
+  const readRaw = async () => {
+    const existing = tryRead();
+    if (existing) return existing;
+    return new Promise((resolve) => {
+      const onData = (chunk) => {
+        buffer = Buffer.concat([buffer, chunk]);
+        const parsed = tryRead();
+        if (!parsed) return;
+        stream.off('data', onData);
+        resolve(parsed);
+      };
+      stream.on('data', onData);
+    });
+  };
+  const readMessage = async () => {
+    while (true) {
+      const parsed = await readRaw();
+      if (parsed && parsed.method && parsed.id === undefined) {
+        notifications.push(parsed);
+        continue;
+      }
+      return parsed;
+    }
+  };
+  return { readMessage, notifications };
+}
+
+async function runQueueTest() {
+  const server = spawn(process.execPath, [serverPath], {
+    stdio: ['pipe', 'pipe', 'inherit'],
+    env: {
+      ...process.env,
+      PAIROFCLEATS_HOME: queueCache,
+      PAIROFCLEATS_CACHE_ROOT: queueCache,
+      PAIROFCLEATS_MCP_QUEUE_MAX: '1'
+    }
+  });
+  const { readMessage } = createReader(server.stdout);
+  const timeout = setTimeout(() => {
+    console.error('MCP queue test timed out.');
+    server.kill('SIGKILL');
+    process.exit(1);
+  }, 30000);
+  const send = (payload) => server.stdin.write(encodeMessage(payload));
+
+  try {
+    send({
+      jsonrpc: '2.0',
+      id: 1,
+      method: 'initialize',
+      params: { protocolVersion: '2024-11-05', capabilities: {} }
+    });
+    await readMessage();
+
+    send({
+      jsonrpc: '2.0',
+      id: 2,
+      method: 'tools/call',
+      params: { name: 'index_status', arguments: { repoPath: root } }
+    });
+    send({
+      jsonrpc: '2.0',
+      id: 3,
+      method: 'tools/call',
+      params: { name: 'index_status', arguments: { repoPath: root } }
+    });
+
+    const first = await readMessage();
+    const second = await readMessage();
+    const responses = [first, second];
+    const overload = responses.find((msg) => msg?.error?.code === -32001);
+    if (!overload || overload.error?.data?.code !== 'QUEUE_OVERLOADED') {
+      throw new Error('Expected queue overload error response.');
+    }
+
+    send({ jsonrpc: '2.0', id: 4, method: 'shutdown' });
+    await readMessage();
+    send({ jsonrpc: '2.0', method: 'exit' });
+  } catch (err) {
+    server.kill('SIGKILL');
+    throw err;
+  } finally {
+    clearTimeout(timeout);
+    server.stdin.end();
+  }
+}
+
+async function runTimeoutTest() {
+  const server = spawn(process.execPath, [serverPath], {
+    stdio: ['pipe', 'pipe', 'inherit'],
+    env: {
+      ...process.env,
+      PAIROFCLEATS_HOME: timeoutCache,
+      PAIROFCLEATS_CACHE_ROOT: timeoutCache,
+      PAIROFCLEATS_MCP_TOOL_TIMEOUT_MS: '1'
+    }
+  });
+  const { readMessage } = createReader(server.stdout);
+  const timeout = setTimeout(() => {
+    console.error('MCP timeout test timed out.');
+    server.kill('SIGKILL');
+    process.exit(1);
+  }, 30000);
+  const send = (payload) => server.stdin.write(encodeMessage(payload));
+
+  try {
+    send({
+      jsonrpc: '2.0',
+      id: 10,
+      method: 'initialize',
+      params: { protocolVersion: '2024-11-05', capabilities: {} }
+    });
+    await readMessage();
+
+    send({
+      jsonrpc: '2.0',
+      id: 11,
+      method: 'tools/call',
+      params: { name: 'index_status', arguments: { repoPath: root } }
+    });
+    const response = await readMessage();
+    const payloadText = response.result?.content?.[0]?.text || '';
+    const payload = JSON.parse(payloadText || '{}');
+    if (!response.result?.isError || payload.code !== 'TOOL_TIMEOUT') {
+      throw new Error('Expected tool timeout error response.');
+    }
+
+    send({ jsonrpc: '2.0', id: 12, method: 'shutdown' });
+    await readMessage();
+    send({ jsonrpc: '2.0', method: 'exit' });
+  } catch (err) {
+    server.kill('SIGKILL');
+    throw err;
+  } finally {
+    clearTimeout(timeout);
+    server.stdin.end();
+  }
+}
+
+runQueueTest()
+  .then(runTimeoutTest)
+  .then(() => {
+    console.log('MCP robustness tests passed');
+  })
+  .catch((err) => {
+    console.error(err?.message || err);
+    process.exit(1);
+  });
diff --git a/tests/mcp-schema.js b/tests/mcp-schema.js
new file mode 100644
index 000000000..6ca216122
--- /dev/null
+++ b/tests/mcp-schema.js
@@ -0,0 +1,180 @@
+#!/usr/bin/env node
+import { spawn } from 'node:child_process';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { getToolDefs } from '../src/integrations/mcp/defs.js';
+import { stableStringify } from '../src/shared/stable-json.js';
+import { DEFAULT_MODEL_ID } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const serverPath = path.join(root, 'tools', 'mcp-server.js');
+const sampleRepo = path.join(root, 'tests', 'fixtures', 'sample');
+const tempRoot = path.join(root, 'tests', '.cache', 'mcp-schema');
+const cacheRoot = path.join(tempRoot, 'cache');
+const emptyRepo = path.join(tempRoot, 'empty');
+const snapshotPath = path.join(root, 'tests', 'fixtures', 'mcp', 'schema-snapshot.json');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+await fsPromises.mkdir(emptyRepo, { recursive: true });
+
+function encodeMessage(payload) {
+  const json = JSON.stringify(payload);
+  return `Content-Length: ${Buffer.byteLength(json, 'utf8')}\r\n\r\n${json}`;
+}
+
+function createReader(stream) {
+  let buffer = Buffer.alloc(0);
+  const tryRead = () => {
+    const headerEnd = buffer.indexOf('\r\n\r\n');
+    if (headerEnd === -1) return null;
+    const header = buffer.slice(0, headerEnd).toString('utf8');
+    const match = header.match(/Content-Length:\s*(\d+)/i);
+    if (!match) {
+      buffer = buffer.slice(headerEnd + 4);
+      return null;
+    }
+    const length = parseInt(match[1], 10);
+    const total = headerEnd + 4 + length;
+    if (buffer.length < total) return null;
+    const body = buffer.slice(headerEnd + 4, total).toString('utf8');
+    buffer = buffer.slice(total);
+    return JSON.parse(body);
+  };
+  const notifications = [];
+  const readRaw = async () => {
+    const existing = tryRead();
+    if (existing) return existing;
+    return new Promise((resolve) => {
+      const onData = (chunk) => {
+        buffer = Buffer.concat([buffer, chunk]);
+        const parsed = tryRead();
+        if (!parsed) return;
+        stream.off('data', onData);
+        resolve(parsed);
+      };
+      stream.on('data', onData);
+    });
+  };
+  const readMessage = async () => {
+    while (true) {
+      const parsed = await readRaw();
+      if (parsed && parsed.method && parsed.id === undefined) {
+        notifications.push(parsed);
+        continue;
+      }
+      return parsed;
+    }
+  };
+  return { readMessage, notifications };
+}
+
+const server = spawn(process.execPath, [serverPath], {
+  stdio: ['pipe', 'pipe', 'inherit'],
+  env: {
+    ...process.env,
+    PAIROFCLEATS_HOME: cacheRoot,
+    PAIROFCLEATS_CACHE_ROOT: cacheRoot
+  }
+});
+
+const { readMessage } = createReader(server.stdout);
+const timeout = setTimeout(() => {
+  console.error('MCP schema test timed out.');
+  server.kill('SIGKILL');
+  process.exit(1);
+}, 30000);
+
+function send(payload) {
+  server.stdin.write(encodeMessage(payload));
+}
+
+const shapeValue = (value) => {
+  if (Array.isArray(value)) {
+    return value.map((entry) => shapeValue(entry));
+  }
+  if (value && typeof value === 'object') {
+    const out = {};
+    for (const key of Object.keys(value).sort()) {
+      out[key] = shapeValue(value[key]);
+    }
+    return out;
+  }
+  if (value === null) return '<null>';
+  return `<${typeof value}>`;
+};
+
+const toolSchemaSnapshot = getToolDefs(DEFAULT_MODEL_ID).map((tool) => ({
+  name: tool.name,
+  required: Array.isArray(tool.inputSchema?.required)
+    ? [...tool.inputSchema.required].sort()
+    : [],
+  properties: Object.keys(tool.inputSchema?.properties || {}).sort()
+}));
+
+async function run() {
+  send({
+    jsonrpc: '2.0',
+    id: 1,
+    method: 'initialize',
+    params: { protocolVersion: '2024-11-05', capabilities: {} }
+  });
+  await readMessage();
+
+  send({
+    jsonrpc: '2.0',
+    id: 2,
+    method: 'tools/call',
+    params: {
+      name: 'index_status',
+      arguments: { repoPath: sampleRepo }
+    }
+  });
+  const status = await readMessage();
+  const statusText = status.result?.content?.[0]?.text || '';
+  const statusPayload = JSON.parse(statusText || '{}');
+
+  send({
+    jsonrpc: '2.0',
+    id: 3,
+    method: 'tools/call',
+    params: {
+      name: 'config_status',
+      arguments: { repoPath: emptyRepo }
+    }
+  });
+  const configStatus = await readMessage();
+  const configText = configStatus.result?.content?.[0]?.text || '';
+  const configPayload = JSON.parse(configText || '{}');
+
+  send({ jsonrpc: '2.0', id: 4, method: 'shutdown' });
+  await readMessage();
+  send({ jsonrpc: '2.0', method: 'exit' });
+
+  return {
+    tools: toolSchemaSnapshot,
+    responses: {
+      index_status: shapeValue(statusPayload),
+      config_status: shapeValue(configPayload)
+    }
+  };
+}
+
+run()
+  .then(async (actual) => {
+    clearTimeout(timeout);
+    server.stdin.end();
+    const expectedRaw = await fsPromises.readFile(snapshotPath, 'utf8');
+    const expected = JSON.parse(expectedRaw);
+    if (stableStringify(actual) !== stableStringify(expected)) {
+      console.error('MCP schema snapshot mismatch.');
+      process.exit(1);
+    }
+    console.log('MCP schema snapshot test passed');
+  })
+  .catch((err) => {
+    clearTimeout(timeout);
+    console.error(err?.message || err);
+    server.kill('SIGKILL');
+    process.exit(1);
+  });
diff --git a/tests/metadata-v2.js b/tests/metadata-v2.js
new file mode 100644
index 000000000..0a6886b49
--- /dev/null
+++ b/tests/metadata-v2.js
@@ -0,0 +1,53 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { buildMetaV2 } from '../src/index/metadata-v2.js';
+
+const chunk = {
+  file: 'src/example.js',
+  ext: '.js',
+  start: 10,
+  end: 42,
+  startLine: 2,
+  endLine: 4,
+  kind: 'FunctionDeclaration',
+  name: 'makeWidget',
+  segment: {
+    segmentId: 'seg-1',
+    type: 'code',
+    languageId: 'javascript',
+    parentSegmentId: null,
+    embeddingContext: 'code'
+  }
+};
+
+const docmeta = {
+  signature: 'makeWidget(opts)',
+  params: ['opts'],
+  returnType: 'Widget',
+  inferredTypes: {
+    returns: [{ type: 'Widget', source: 'tooling', confidence: 0.9 }]
+  },
+  risk: {
+    tags: ['command-exec'],
+    sources: [{ name: 'req.body' }],
+    sinks: [{ name: 'exec' }],
+    flows: [{ source: 'req.body', sink: 'exec', scope: 'local' }]
+  }
+};
+
+const meta = buildMetaV2({
+  chunk,
+  docmeta,
+  toolInfo: { tool: 'pairofcleats', version: '0.0.0-test', configHash: 'deadbeef' }
+});
+
+assert.ok(meta, 'expected metaV2 output');
+assert.ok(meta.chunkId, 'expected metaV2 chunkId');
+assert.equal(meta.file, 'src/example.js');
+assert.equal(meta.segment?.segmentId, 'seg-1');
+assert.equal(meta.signature, 'makeWidget(opts)');
+assert.equal(meta.returns, 'Widget');
+assert.equal(meta.types?.tooling?.returns?.[0]?.type, 'Widget');
+assert.equal(meta.risk?.flows?.[0]?.sink, 'exec');
+
+console.log('metadata v2 test passed');
diff --git a/tests/minhash-parity.js b/tests/minhash-parity.js
new file mode 100644
index 000000000..74b7506e2
--- /dev/null
+++ b/tests/minhash-parity.js
@@ -0,0 +1,21 @@
+#!/usr/bin/env node
+import { SimpleMinHash } from '../src/index/minhash.js';
+import { rankMinhash } from '../src/retrieval/rankers.js';
+
+const tokens = ['alpha', 'beta', 'gamma', 'delta'];
+const mh = new SimpleMinHash();
+tokens.forEach((token) => mh.update(token));
+const idx = {
+  minhash: { signatures: [mh.hashValues] },
+  chunkMeta: [{ weight: 1 }]
+};
+const results = rankMinhash(idx, tokens, 1);
+if (!results.length || results[0].idx !== 0) {
+  console.error('minhash parity test failed: expected top hit for id 0');
+  process.exit(1);
+}
+if (results[0].sim < 0.99) {
+  console.error(`minhash parity test failed: expected sim≈1, got ${results[0].sim}`);
+  process.exit(1);
+}
+console.log('minhash parity test passed');
diff --git a/tests/parity.js b/tests/parity.js
index dccfa0da9..1c923b128 100644
--- a/tests/parity.js
+++ b/tests/parity.js
@@ -4,15 +4,29 @@ import fsSync from 'node:fs';
 import path from 'node:path';
 import { spawnSync } from 'node:child_process';
 import { performance } from 'node:perf_hooks';
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
 import { getIndexDir, loadUserConfig, resolveSqlitePaths } from '../tools/dict-utils.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['ann', 'write-report', 'enforce'],
-  string: ['queries', 'out', 'search', 'sqlite-backend'],
-  alias: { n: 'top', q: 'queries' },
-  default: { top: 5, limit: 0, 'sqlite-backend': 'sqlite' }
-});
+const argv = createCli({
+  scriptName: 'parity',
+  options: {
+    ann: { type: 'boolean', default: true },
+    'write-report': { type: 'boolean', default: false },
+    enforce: { type: 'boolean', default: false },
+    'enforce-fts': { type: 'boolean', default: false },
+    'min-overlap': { type: 'number' },
+    'min-rank-corr': { type: 'number' },
+    'max-delta': { type: 'number' },
+    'min-overlap-single': { type: 'number' },
+    queries: { type: 'string' },
+    out: { type: 'string' },
+    search: { type: 'string' },
+    'sqlite-backend': { type: 'string', default: 'sqlite' },
+    top: { type: 'number', default: 5 },
+    limit: { type: 'number', default: 0 }
+  },
+  aliases: { n: 'top', q: 'queries' }
+}).parse();
 
 const root = process.cwd();
 const repoArgs = ['--repo', root];
@@ -156,6 +170,18 @@ function hitScore(hit) {
 function summarizeMatch(memoryHits, sqliteHits) {
   const mem = memoryHits.slice(0, topN);
   const sql = sqliteHits.slice(0, topN);
+  if (!mem.length && !sql.length) {
+    return {
+      overlap: 1,
+      avgDelta: 0,
+      missingFromSqlite: [],
+      missingFromMemory: [],
+      rankCorr: null,
+      topMemory: [],
+      topSqlite: [],
+      zeroHits: true
+    };
+  }
   const memKeys = mem.map(hitKey);
   const sqlKeys = sql.map(hitKey);
   const memRanks = new Map(memKeys.map((key, idx) => [key, idx + 1]));
@@ -300,11 +326,39 @@ if (argv['write-report']) {
 }
 
 if (argv.enforce) {
-  const minOverlap = typeof argv['min-overlap'] === 'number'
-    ? argv['min-overlap']
-    : (parseFloat(argv['min-overlap']) || 0.6);
-  if (summary.overlapAvg < minOverlap) {
-    console.error(`Overlap below threshold (${summary.overlapAvg.toFixed(3)} < ${minOverlap}).`);
-    process.exit(1);
+  const isFts = sqliteBackend === 'sqlite-fts';
+  const defaults = isFts
+    ? { minOverlap: 0.7, minRankCorr: 0.55, maxDelta: 0.5, minSingleOverlap: 0.6 }
+    : { minOverlap: 0.95, minRankCorr: 0.9, maxDelta: 0.1, minSingleOverlap: 0.6 };
+  const thresholds = {
+    minOverlap: Number.isFinite(argv['min-overlap']) ? argv['min-overlap'] : defaults.minOverlap,
+    minRankCorr: Number.isFinite(argv['min-rank-corr']) ? argv['min-rank-corr'] : defaults.minRankCorr,
+    maxDelta: Number.isFinite(argv['max-delta']) ? argv['max-delta'] : defaults.maxDelta,
+    minSingleOverlap: Number.isFinite(argv['min-overlap-single'])
+      ? argv['min-overlap-single']
+      : defaults.minSingleOverlap
+  };
+  const minOverlapSingle = overlapValues.length ? Math.min(...overlapValues) : 1;
+  const failures = [];
+  if (summary.overlapAvg < thresholds.minOverlap) {
+    failures.push(`overlapAvg ${summary.overlapAvg.toFixed(3)} < ${thresholds.minOverlap}`);
+  }
+  if (summary.rankCorrAvg !== null && summary.rankCorrAvg < thresholds.minRankCorr) {
+    failures.push(`rankCorrAvg ${summary.rankCorrAvg.toFixed(3)} < ${thresholds.minRankCorr}`);
+  }
+  if (summary.scoreDeltaAvg > thresholds.maxDelta) {
+    failures.push(`avgDelta ${summary.scoreDeltaAvg.toFixed(3)} > ${thresholds.maxDelta}`);
+  }
+  if (minOverlapSingle < thresholds.minSingleOverlap) {
+    failures.push(`minOverlap@K ${minOverlapSingle.toFixed(3)} < ${thresholds.minSingleOverlap}`);
+  }
+  if (failures.length) {
+    const label = failures.join('; ');
+    if (isFts && argv['enforce-fts'] !== true) {
+      console.warn(`SQLite FTS parity warning: ${label}`);
+    } else {
+      console.error(`Parity thresholds failed: ${label}`);
+      process.exit(1);
+    }
   }
 }
diff --git a/tests/piece-assembly.js b/tests/piece-assembly.js
new file mode 100644
index 000000000..c6dbc4f3e
--- /dev/null
+++ b/tests/piece-assembly.js
@@ -0,0 +1,125 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+import { loadChunkMeta, loadTokenPostings } from '../src/shared/artifact-io.js';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const buildIndexPath = path.join(root, 'build_index.js');
+const assemblePath = path.join(root, 'tools', 'assemble-pieces.js');
+
+if (!fs.existsSync(fixtureRoot)) {
+  console.error(`Missing fixture: ${fixtureRoot}`);
+  process.exit(1);
+}
+
+const cacheRoot = path.join(root, 'tests', '.cache', 'piece-assembly');
+const cacheA = path.join(cacheRoot, 'a');
+const cacheB = path.join(cacheRoot, 'b');
+const outputDir = path.join(cacheRoot, 'assembled', 'index-code');
+
+await fsPromises.rm(cacheRoot, { recursive: true, force: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const baseEnv = {
+  ...process.env,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const run = (label, args, env) => {
+  const result = spawnSync(process.execPath, args, {
+    cwd: fixtureRoot,
+    env,
+    stdio: 'inherit'
+  });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    process.exit(result.status ?? 1);
+  }
+};
+
+run('build_index (A)', [buildIndexPath, '--stub-embeddings', '--mode', 'code', '--repo', fixtureRoot], {
+  ...baseEnv,
+  PAIROFCLEATS_CACHE_ROOT: cacheA
+});
+run('build_index (B)', [buildIndexPath, '--stub-embeddings', '--mode', 'code', '--repo', fixtureRoot], {
+  ...baseEnv,
+  PAIROFCLEATS_CACHE_ROOT: cacheB
+});
+
+const userConfig = loadUserConfig(fixtureRoot);
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheA;
+const indexA = getIndexDir(fixtureRoot, 'code', userConfig);
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheB;
+const indexB = getIndexDir(fixtureRoot, 'code', userConfig);
+
+run('assemble-pieces', [
+  assemblePath,
+  '--repo',
+  fixtureRoot,
+  '--mode',
+  'code',
+  '--out',
+  outputDir,
+  '--input',
+  indexA,
+  '--input',
+  indexB,
+  '--force'
+], {
+  ...baseEnv,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot
+});
+
+const chunksA = loadChunkMeta(indexA).length;
+const chunksB = loadChunkMeta(indexB).length;
+const chunksOut = loadChunkMeta(outputDir).length;
+if (chunksOut !== chunksA + chunksB) {
+  console.error(`Expected merged chunk count ${chunksA + chunksB}, got ${chunksOut}`);
+  process.exit(1);
+}
+
+const tokenIndex = loadTokenPostings(outputDir);
+if (!Array.isArray(tokenIndex?.docLengths) || tokenIndex.docLengths.length !== chunksOut) {
+  console.error('Merged token_postings docLengths mismatch.');
+  process.exit(1);
+}
+if (!Array.isArray(tokenIndex?.vocab) || !Array.isArray(tokenIndex?.postings)) {
+  console.error('Merged token_postings missing vocab/postings.');
+  process.exit(1);
+}
+if (tokenIndex.vocab.length !== tokenIndex.postings.length) {
+  console.error('Merged token_postings vocab/postings length mismatch.');
+  process.exit(1);
+}
+let minDocId = Number.POSITIVE_INFINITY;
+let maxDocId = -1;
+for (const posting of tokenIndex.postings) {
+  if (!Array.isArray(posting)) continue;
+  for (const entry of posting) {
+    if (!Array.isArray(entry)) continue;
+    const docId = entry[0];
+    if (!Number.isFinite(docId)) continue;
+    if (docId < minDocId) minDocId = docId;
+    if (docId > maxDocId) maxDocId = docId;
+  }
+}
+if (maxDocId < chunksA || maxDocId >= chunksOut) {
+  console.error('Merged token_postings docIds not offset correctly.');
+  process.exit(1);
+}
+if (minDocId < 0) {
+  console.error('Merged token_postings docIds should be non-negative.');
+  process.exit(1);
+}
+
+const manifestPath = path.join(outputDir, 'pieces', 'manifest.json');
+if (!fs.existsSync(manifestPath)) {
+  console.error(`Missing pieces manifest: ${manifestPath}`);
+  process.exit(1);
+}
+
+console.log('Piece assembly test passed');
diff --git a/tests/postings-quantize.js b/tests/postings-quantize.js
new file mode 100644
index 000000000..68e96353e
--- /dev/null
+++ b/tests/postings-quantize.js
@@ -0,0 +1,57 @@
+#!/usr/bin/env node
+import { buildPostings } from '../src/index/build/postings.js';
+import { quantizeVec } from '../src/index/embedding.js';
+
+const chunks = [
+  {
+    tokens: ['a'],
+    embedding: [0.1, 0.2],
+    embed_doc: [0.3, 0.4],
+    embed_code: [0.5, 0.6],
+    minhashSig: [1, 2]
+  },
+  {
+    tokens: ['b'],
+    embedding: [0.7, 0.8],
+    minhashSig: [3, 4]
+  }
+];
+
+const tokenPostings = new Map([
+  ['a', [[0, 1]]],
+  ['b', [[1, 1]]]
+]);
+
+const postings = await buildPostings({
+  chunks,
+  df: new Map(),
+  tokenPostings,
+  docLengths: [1, 1],
+  fieldPostings: null,
+  fieldDocLengths: null,
+  phrasePost: new Map(),
+  triPost: new Map(),
+  postingsConfig: {},
+  modelId: 'test',
+  useStubEmbeddings: true,
+  log: () => {},
+  workerPool: null,
+  embeddingsEnabled: true
+});
+
+const expectedMerged = chunks.map((chunk) => quantizeVec(chunk.embedding));
+const expectedDoc = chunks.map((chunk) => quantizeVec(chunk.embed_doc || chunk.embedding));
+const expectedCode = chunks.map((chunk) => quantizeVec(chunk.embed_code || chunk.embedding));
+
+const equal = (label, actual, expected) => {
+  if (JSON.stringify(actual) !== JSON.stringify(expected)) {
+    console.error(`postings quantize test failed: ${label}`);
+    process.exit(1);
+  }
+};
+
+equal('dense', postings.quantizedVectors, expectedMerged);
+equal('doc', postings.quantizedDocVectors, expectedDoc);
+equal('code', postings.quantizedCodeVectors, expectedCode);
+
+console.log('postings quantize test passed');
diff --git a/tests/preprocess-files.js b/tests/preprocess-files.js
new file mode 100644
index 000000000..7a0dce5fc
--- /dev/null
+++ b/tests/preprocess-files.js
@@ -0,0 +1,62 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { preprocessFiles } from '../src/index/build/preprocess.js';
+import { buildIgnoreMatcher } from '../src/index/build/ignore.js';
+
+const root = process.cwd();
+const cacheRoot = path.join(root, 'tests', '.cache', 'preprocess');
+await fs.rm(cacheRoot, { recursive: true, force: true });
+await fs.mkdir(path.join(cacheRoot, 'src'), { recursive: true });
+await fs.mkdir(path.join(cacheRoot, 'docs'), { recursive: true });
+
+await fs.writeFile(path.join(cacheRoot, 'src', 'app.js'), 'const a = 1;\nconst b = 2;\n');
+await fs.writeFile(path.join(cacheRoot, 'src', 'app.min.js'), 'var x=1;');
+await fs.writeFile(
+  path.join(cacheRoot, 'src', 'minified.js'),
+  'const x=' + 'a'.repeat(200)
+);
+await fs.copyFile(
+  path.join(root, 'tests', 'fixtures', 'binary', 'sample.png'),
+  path.join(cacheRoot, 'src', 'binary.png')
+);
+await fs.writeFile(path.join(cacheRoot, 'docs', 'readme.md'), '# title\n');
+
+const { ignoreMatcher } = await buildIgnoreMatcher({ root: cacheRoot, userConfig: {} });
+const fileScan = {
+  sampleBytes: 256,
+  minified: {
+    sampleMinBytes: 1,
+    minChars: 20,
+    avgLineThreshold: 10,
+    maxLineThreshold: 10,
+    maxWhitespaceRatio: 0.2
+  },
+  binary: {
+    sampleMinBytes: 1,
+    maxNonTextRatio: 0.1
+  }
+};
+
+const result = await preprocessFiles({
+  root: cacheRoot,
+  modes: ['code', 'prose'],
+  ignoreMatcher,
+  maxFileBytes: null,
+  fileCaps: {},
+  fileScan,
+  lineCounts: true,
+  concurrency: 4
+});
+
+const codeEntries = result.entriesByMode.code.map((entry) => entry.rel).sort();
+const proseEntries = result.entriesByMode.prose.map((entry) => entry.rel).sort();
+assert.deepEqual(codeEntries, ['src/app.js']);
+assert.deepEqual(proseEntries, ['docs/readme.md']);
+const codeSkips = result.skippedByMode.code.map((skip) => skip.reason);
+assert.ok(codeSkips.includes('minified'));
+assert.ok(codeSkips.includes('binary'));
+assert.ok(result.lineCountsByMode.code.get('src/app.js') > 0);
+
+console.log('preprocess-files test passed.');
diff --git a/tests/profile-config.js b/tests/profile-config.js
new file mode 100644
index 000000000..7094566e2
--- /dev/null
+++ b/tests/profile-config.js
@@ -0,0 +1,36 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fsPromises from 'node:fs/promises';
+import os from 'node:os';
+import path from 'node:path';
+import { loadUserConfig } from '../tools/dict-utils.js';
+
+const tempRoot = await fsPromises.mkdtemp(path.join(os.tmpdir(), 'poc-profile-'));
+const configPath = path.join(tempRoot, '.pairofcleats.json');
+
+try {
+  await fsPromises.writeFile(
+    configPath,
+    JSON.stringify({ profile: 'lite' }, null, 2),
+    'utf8'
+  );
+
+  const loaded = loadUserConfig(tempRoot);
+  assert.equal(loaded.profile, 'lite');
+  assert.equal(loaded.indexing?.gitBlame, false);
+
+  const previousProfile = process.env.PAIROFCLEATS_PROFILE;
+  process.env.PAIROFCLEATS_PROFILE = 'full';
+  const loadedEnv = loadUserConfig(tempRoot);
+  assert.equal(loadedEnv.profile, 'full');
+  assert.equal(loadedEnv.indexing?.gitBlame, true);
+  if (previousProfile) {
+    process.env.PAIROFCLEATS_PROFILE = previousProfile;
+  } else {
+    delete process.env.PAIROFCLEATS_PROFILE;
+  }
+} finally {
+  await fsPromises.rm(tempRoot, { recursive: true, force: true });
+}
+
+console.log('profile-config test passed');
diff --git a/tests/prose-skip-imports.js b/tests/prose-skip-imports.js
new file mode 100644
index 000000000..f0819b7b5
--- /dev/null
+++ b/tests/prose-skip-imports.js
@@ -0,0 +1,37 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const cacheRoot = path.join(root, 'tests', '.cache', 'prose-skip-imports');
+
+await fsPromises.rm(cacheRoot, { recursive: true, force: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const result = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--mode', 'prose', '--repo', fixtureRoot],
+  { cwd: fixtureRoot, env, encoding: 'utf8' }
+);
+
+if (result.status !== 0) {
+  console.error('Failed: build_index prose mode');
+  if (result.stderr) console.error(result.stderr.trim());
+  process.exit(result.status ?? 1);
+}
+
+const stderr = result.stderr || '';
+if (stderr.includes('Scanning for imports')) {
+  console.error('Prose mode should skip import scanning, but imports log was present.');
+  process.exit(1);
+}
+
+console.log('Prose import scan skip test passed');
diff --git a/tests/python-ast-worker.js b/tests/python-ast-worker.js
new file mode 100644
index 000000000..c5946cfb0
--- /dev/null
+++ b/tests/python-ast-worker.js
@@ -0,0 +1,41 @@
+#!/usr/bin/env node
+import { spawnSync } from 'node:child_process';
+import { getPythonAst, shutdownPythonAstPool } from '../src/lang/python.js';
+
+function hasPython() {
+  const candidates = ['python', 'python3'];
+  for (const cmd of candidates) {
+    const result = spawnSync(cmd, ['-c', 'import sys; sys.stdout.write("ok")'], { encoding: 'utf8' });
+    if (result.status === 0 && result.stdout.trim() === 'ok') return true;
+  }
+  return false;
+}
+
+if (!hasPython()) {
+  console.log('Python AST worker test skipped (python not available).');
+  process.exit(0);
+}
+
+const sample = `
+def add(a: int, b: int) -> int:
+    return a + b
+`;
+
+const ast = await getPythonAst(sample, null, {
+  dataflow: true,
+  controlFlow: true,
+  pythonAst: { workerCount: 1, maxWorkers: 1, taskTimeoutMs: 5000 }
+});
+
+if (!ast || !Array.isArray(ast.defs)) {
+  console.error('Python AST worker returned no defs.');
+  process.exit(1);
+}
+const hasAdd = ast.defs.some((entry) => entry?.name === 'add');
+if (!hasAdd) {
+  console.error('Python AST worker missing add() definition.');
+  process.exit(1);
+}
+
+console.log('Python AST worker test passed');
+shutdownPythonAstPool();
diff --git a/tests/python-fallback.js b/tests/python-fallback.js
index dfd24d77c..b1ad6bf7f 100644
--- a/tests/python-fallback.js
+++ b/tests/python-fallback.js
@@ -17,9 +17,10 @@ const chunks = buildPythonHeuristicChunks(text) || [];
 const hasPoint = chunks.some((chunk) => chunk.name === 'Point');
 const hasDistance = chunks.some((chunk) => chunk.name === 'Point.distance');
 const hasOuter = chunks.some((chunk) => chunk.name === 'outer');
+const hasFetch = chunks.some((chunk) => chunk.name === 'fetch_data');
 
-if (!hasPoint || !hasDistance || !hasOuter) {
-  console.error('Python heuristic fallback missing expected chunks (Point, Point.distance, outer).');
+if (!hasPoint || !hasDistance || !hasOuter || !hasFetch) {
+  console.error('Python heuristic fallback missing expected chunks (Point, Point.distance, outer, fetch_data).');
   process.exit(1);
 }
 
diff --git a/tests/query-intent.js b/tests/query-intent.js
new file mode 100644
index 000000000..cbd21d2a1
--- /dev/null
+++ b/tests/query-intent.js
@@ -0,0 +1,40 @@
+#!/usr/bin/env node
+import { classifyQuery, resolveIntentFieldWeights, resolveIntentVectorMode } from '../src/retrieval/query-intent.js';
+
+const cases = [
+  { query: 'src/utils/file.ts', tokens: ['src/utils/file.ts'], phrases: [], expect: 'path' },
+  { query: 'renderToString', tokens: ['renderToString'], phrases: [], expect: 'code' },
+  { query: 'how to configure proxy headers', tokens: ['how', 'to', 'configure', 'proxy', 'headers'], phrases: [], expect: 'prose' },
+  { query: 'parse json', tokens: ['parse', 'json'], phrases: ['parse json'], expect: 'mixed' }
+];
+
+for (const sample of cases) {
+  const info = classifyQuery({
+    query: sample.query,
+    tokens: sample.tokens,
+    phrases: sample.phrases
+  });
+  if (info.type !== sample.expect) {
+    console.error(`Expected intent ${sample.expect} for "${sample.query}", got ${info.type}`);
+    process.exit(1);
+  }
+}
+
+const proseIntent = classifyQuery({
+  query: 'how to configure proxy headers',
+  tokens: ['how', 'to', 'configure', 'proxy', 'headers'],
+  phrases: []
+});
+const weights = resolveIntentFieldWeights(null, proseIntent);
+if (!weights || !(weights.doc > weights.name)) {
+  console.error('Expected prose intent to emphasize doc weights.');
+  process.exit(1);
+}
+
+const vectorMode = resolveIntentVectorMode('auto', proseIntent);
+if (vectorMode !== 'doc') {
+  console.error(`Expected auto vector mode to resolve to doc for prose, got ${vectorMode}`);
+  process.exit(1);
+}
+
+console.log('query intent test passed');
diff --git a/tests/read-failure-skip.js b/tests/read-failure-skip.js
new file mode 100644
index 000000000..94612ac64
--- /dev/null
+++ b/tests/read-failure-skip.js
@@ -0,0 +1,81 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { createFileProcessor } from '../src/index/build/file-processor.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'read-failure-skip');
+const repoRoot = path.join(tempRoot, 'repo');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+
+const targetPath = path.join(repoRoot, 'missing.js');
+await fsPromises.writeFile(targetPath, 'console.log("hello");\n');
+const stat = await fsPromises.stat(targetPath);
+await fsPromises.unlink(targetPath);
+
+const skippedFiles = [];
+const { processFile } = createFileProcessor({
+  root: repoRoot,
+  mode: 'code',
+  dictConfig: {},
+  dictWords: new Set(),
+  languageOptions: { astDataflowEnabled: false, controlFlowEnabled: false },
+  postingsConfig: {},
+  segmentsConfig: {},
+  commentsConfig: {},
+  allImports: {},
+  contextWin: 0,
+  incrementalState: {
+    enabled: false,
+    manifest: { files: {} },
+    bundleDir: '',
+    bundleFormat: 'json'
+  },
+  getChunkEmbedding: async () => null,
+  getChunkEmbeddings: async () => null,
+  typeInferenceEnabled: false,
+  riskAnalysisEnabled: false,
+  riskConfig: {},
+  relationsEnabled: false,
+  seenFiles: new Set(),
+  gitBlameEnabled: false,
+  lintEnabled: false,
+  complexityEnabled: false,
+  structuralMatches: null,
+  cacheConfig: {},
+  cacheReporter: null,
+  queues: null,
+  workerPool: null,
+  crashLogger: null,
+  skippedFiles,
+  embeddingEnabled: false,
+  toolInfo: null,
+  tokenizationStats: null
+});
+
+const fileEntry = {
+  abs: targetPath,
+  rel: 'missing.js',
+  stat,
+  lines: 1,
+  scan: { checkedBinary: true, checkedMinified: true }
+};
+
+const result = await processFile(fileEntry, 0);
+if (result !== null) {
+  console.error('Expected null result for read failure.');
+  process.exit(1);
+}
+const skip = skippedFiles.find((entry) => entry?.file === targetPath && entry?.reason === 'read-failure');
+if (!skip) {
+  console.error('Expected read-failure skip entry.');
+  process.exit(1);
+}
+if (!skip.code && !skip.message) {
+  console.error('Expected read-failure to include error details.');
+  process.exit(1);
+}
+
+console.log('read-failure skip test passed');
diff --git a/tests/retrieval-backend-policy.js b/tests/retrieval-backend-policy.js
new file mode 100644
index 000000000..73c25b936
--- /dev/null
+++ b/tests/retrieval-backend-policy.js
@@ -0,0 +1,65 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { resolveBackendSelection } from '../src/retrieval/cli/policy.js';
+
+const base = {
+  sqliteScoreModeConfig: false,
+  sqliteConfigured: true,
+  sqliteAvailable: true,
+  sqliteCodeAvailable: true,
+  sqliteProseAvailable: true,
+  sqliteCodePath: 'code.db',
+  sqliteProsePath: 'prose.db',
+  lmdbConfigured: true,
+  lmdbAvailable: true,
+  lmdbCodeAvailable: true,
+  lmdbProseAvailable: true,
+  lmdbCodePath: 'lmdb-code',
+  lmdbProsePath: 'lmdb-prose',
+  sqliteAutoChunkThreshold: 0,
+  sqliteAutoArtifactBytes: 0,
+  needsSqlite: true,
+  needsCode: true,
+  needsProse: false,
+  root: process.cwd(),
+  userConfig: {}
+};
+
+const autoResult = await resolveBackendSelection({
+  ...base,
+  backendArg: ''
+});
+assert.equal(autoResult.useSqlite, true, 'expected auto backend to select sqlite');
+assert.equal(autoResult.useLmdb, false, 'expected auto backend to avoid lmdb');
+
+const lmdbFallback = await resolveBackendSelection({
+  ...base,
+  backendArg: '',
+  sqliteAvailable: false,
+  sqliteCodeAvailable: false,
+  lmdbAvailable: true
+});
+assert.equal(lmdbFallback.useSqlite, false, 'expected sqlite to be skipped when unavailable');
+assert.equal(lmdbFallback.useLmdb, true, 'expected lmdb to be selected when available');
+
+const forcedSqlite = await resolveBackendSelection({
+  ...base,
+  backendArg: 'sqlite',
+  sqliteAvailable: false,
+  sqliteCodeAvailable: false
+});
+assert.ok(forcedSqlite.error, 'expected sqlite error when forced and missing');
+assert.ok(forcedSqlite.error.message.includes('SQLite backend requested'), 'expected sqlite error message');
+assert.ok(forcedSqlite.error.message.includes('code=code.db'), 'expected sqlite missing path in message');
+
+const forcedLmdb = await resolveBackendSelection({
+  ...base,
+  backendArg: 'lmdb',
+  lmdbAvailable: false,
+  lmdbCodeAvailable: false
+});
+assert.ok(forcedLmdb.error, 'expected lmdb error when forced and missing');
+assert.ok(forcedLmdb.error.message.includes('LMDB backend requested'), 'expected lmdb error message');
+assert.ok(forcedLmdb.error.message.includes('code=lmdb-code'), 'expected lmdb missing path in message');
+
+console.log('retrieval backend policy test passed');
diff --git a/tests/retrieval-branch-filter.js b/tests/retrieval-branch-filter.js
new file mode 100644
index 000000000..1ed03ea13
--- /dev/null
+++ b/tests/retrieval-branch-filter.js
@@ -0,0 +1,32 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { applyBranchFilter } from '../src/retrieval/cli/branch-filter.js';
+
+let recorded = null;
+const backendPolicy = { reason: 'auto', backendLabel: 'sqlite' };
+const result = await applyBranchFilter({
+  branchFilter: 'main',
+  caseSensitive: false,
+  repoBranch: 'dev',
+  backendLabel: 'sqlite',
+  backendPolicy,
+  emitOutput: false,
+  jsonOutput: true,
+  recordSearchMetrics: (status) => {
+    recorded = status;
+  }
+});
+
+assert.equal(result.matched, false, 'expected branch mismatch to be reported');
+assert.equal(recorded, 'ok', 'expected search metrics to be recorded');
+assert.ok(result.payload, 'expected payload for branch mismatch');
+assert.equal(result.payload.backend, 'sqlite');
+assert.deepEqual(result.payload.prose, []);
+assert.deepEqual(result.payload.code, []);
+assert.deepEqual(result.payload.records, []);
+assert.equal(result.payload.stats.branch, 'dev');
+assert.equal(result.payload.stats.branchFilter, 'main');
+assert.equal(result.payload.stats.branchMatch, false);
+assert.deepEqual(result.payload.stats.backendPolicy, backendPolicy);
+
+console.log('retrieval branch filter test passed');
diff --git a/tests/safe-regex-engine.js b/tests/safe-regex-engine.js
new file mode 100644
index 000000000..01873afc3
--- /dev/null
+++ b/tests/safe-regex-engine.js
@@ -0,0 +1,88 @@
+import assert from 'node:assert/strict';
+import { createSafeRegex, isNativeRe2Available } from '../src/shared/safe-regex.js';
+
+const basic = createSafeRegex('foo(\\d+)', '', { engine: 're2js' });
+assert.ok(basic, 'expected basic safe regex to compile');
+const match = basic.exec('xxfoo123yy');
+assert.ok(match, 'expected match');
+assert.equal(match[0], 'foo123');
+assert.equal(match[1], '123');
+assert.equal(match.index, 2);
+assert.equal(match.input, 'xxfoo123yy');
+
+const g = createSafeRegex('a', 'g', { engine: 're2js' });
+assert.ok(g, 'expected global regex to compile');
+const m1 = g.exec('a a');
+assert.ok(m1);
+assert.equal(m1.index, 0);
+assert.equal(g.lastIndex, 1);
+
+const m2 = g.exec('a a');
+assert.ok(m2);
+assert.equal(m2.index, 2);
+assert.equal(g.lastIndex, 3);
+
+const m3 = g.exec('a a');
+assert.equal(m3, null);
+assert.equal(g.lastIndex, 0, 'expected lastIndex reset after global miss');
+
+const t = createSafeRegex('a', 'g', { engine: 're2js' });
+assert.ok(t);
+assert.equal(t.test('a a'), true);
+assert.equal(t.lastIndex, 1);
+assert.equal(t.test('a a'), true);
+assert.equal(t.lastIndex, 3);
+assert.equal(t.test('a a'), false);
+assert.equal(t.lastIndex, 0);
+
+const sticky = createSafeRegex('a', 'y', { engine: 're2js' });
+assert.ok(sticky);
+sticky.lastIndex = 1;
+const sm1 = sticky.exec('ba');
+assert.ok(sm1);
+assert.equal(sm1.index, 1);
+assert.equal(sticky.lastIndex, 2);
+const sm2 = sticky.exec('ba');
+assert.equal(sm2, null);
+assert.equal(sticky.lastIndex, 0, 'expected lastIndex reset after sticky miss');
+
+const tooLongPattern = createSafeRegex('a'.repeat(20), '', { maxPatternLength: 5, engine: 're2js' });
+assert.equal(tooLongPattern, null, 'expected maxPatternLength to reject pattern');
+
+const inputLimit = createSafeRegex('a', 'g', { maxInputLength: 2, engine: 're2js' });
+assert.ok(inputLimit);
+assert.equal(inputLimit.exec('aaa'), null);
+assert.equal(inputLimit.lastIndex, 0);
+
+const flagNorm = createSafeRegex('a', 'g', { flags: 'imzzz', engine: 're2js' });
+assert.ok(flagNorm);
+assert.ok(flagNorm.flags.includes('i'));
+assert.ok(flagNorm.flags.includes('m'));
+assert.ok(!flagNorm.flags.includes('z'));
+
+const forcedRe2js = createSafeRegex('a', '', { engine: 're2js' });
+assert.ok(forcedRe2js);
+assert.equal(forcedRe2js.engine, 're2js');
+
+const auto = createSafeRegex('a', '', { engine: 'auto' });
+assert.ok(auto);
+assert.ok(['re2', 're2js'].includes(auto.engine));
+
+const nativeAvailable = isNativeRe2Available();
+let sawWarn = false;
+const originalWarn = console.warn;
+console.warn = () => {
+  sawWarn = true;
+};
+const forcedRe2 = createSafeRegex('a', '', { engine: 're2' });
+console.warn = originalWarn;
+assert.ok(forcedRe2);
+if (nativeAvailable) {
+  assert.equal(forcedRe2.engine, 're2');
+  assert.equal(sawWarn, false);
+} else {
+  assert.equal(forcedRe2.engine, 're2js');
+  assert.equal(sawWarn, true);
+}
+
+console.log('safe regex engine test passed');
diff --git a/tests/scip-ingest.js b/tests/scip-ingest.js
new file mode 100644
index 000000000..3c6bc5252
--- /dev/null
+++ b/tests/scip-ingest.js
@@ -0,0 +1,47 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'scip-ingest');
+const repoRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const inputPath = path.join(root, 'tests', 'fixtures', 'scip', 'index.json');
+const outPath = path.join(tempRoot, 'scip.jsonl');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+
+const result = spawnSync(
+  process.execPath,
+  [path.join(root, 'tools', 'scip-ingest.js'), '--repo', repoRoot, '--input', inputPath, '--out', outPath, '--json'],
+  { encoding: 'utf8' }
+);
+if (result.status !== 0) {
+  console.error(result.stderr || result.stdout || 'scip-ingest failed');
+  process.exit(result.status ?? 1);
+}
+
+if (!fs.existsSync(outPath)) {
+  console.error('scip output not found');
+  process.exit(1);
+}
+
+const lines = fs.readFileSync(outPath, 'utf8').trim().split(/\r?\n/).filter(Boolean);
+assert.ok(lines.length >= 2, 'expected scip output lines');
+
+const first = JSON.parse(lines[0]);
+assert.equal(first.file, 'src/example.js');
+assert.equal(first.name, 'doThing');
+assert.equal(first.role, 'definition');
+assert.equal(first.startLine, 2);
+
+const metaPath = `${outPath}.meta.json`;
+const meta = JSON.parse(fs.readFileSync(metaPath, 'utf8'));
+assert.equal(meta.stats.occurrences, lines.length);
+assert.equal(meta.stats.definitions, 1);
+assert.equal(meta.stats.references, 1);
+
+console.log('scip ingest test passed');
diff --git a/tests/script-coverage-harness.js b/tests/script-coverage-harness.js
new file mode 100644
index 000000000..d86910c2b
--- /dev/null
+++ b/tests/script-coverage-harness.js
@@ -0,0 +1,22 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { applyActionCoverage, createCoverageState, finalizeCoverage, reportCoverage } from './script-coverage/report.js';
+
+const unknownState = createCoverageState({ scriptNames: ['build-index'] });
+applyActionCoverage(unknownState, { label: 'unknown', covers: ['missing-script'] });
+const unknownSummary = finalizeCoverage(unknownState);
+assert.deepEqual(unknownSummary.unknownCovers, ['missing-script']);
+assert.equal(reportCoverage(unknownSummary), false, 'expected unknown covers to fail report');
+
+const tierMissingState = createCoverageState({ scriptNames: ['build-index'] });
+applyActionCoverage(tierMissingState, { label: 'tier-missing', covers: ['build-index'] });
+const tierMissingSummary = finalizeCoverage(tierMissingState);
+assert.equal(tierMissingSummary.missingTierB.length, 1, 'expected tier B to remain missing without override');
+
+const tierOverrideState = createCoverageState({ scriptNames: ['build-index'] });
+applyActionCoverage(tierOverrideState, { label: 'tier-override', coversTierB: ['build-index'] });
+const tierOverrideSummary = finalizeCoverage(tierOverrideState);
+assert.equal(tierOverrideSummary.missingTierB.length, 0, 'expected tier B override to satisfy coverage');
+assert.equal(tierOverrideSummary.coveredTierB.length, 1, 'expected tier B override to mark covered');
+
+console.log('script coverage harness test passed');
diff --git a/tests/script-coverage.js b/tests/script-coverage.js
index 20b94417e..46ab0cda1 100644
--- a/tests/script-coverage.js
+++ b/tests/script-coverage.js
@@ -1,410 +1,69 @@
 #!/usr/bin/env node
-import fs from 'node:fs';
-import fsPromises from 'node:fs/promises';
-import path from 'node:path';
-import { spawnSync } from 'node:child_process';
+import { createCli } from '../src/shared/cli.js';
+import { buildActions } from './script-coverage/actions.js';
+import { loadPackageScripts, resolveScriptCoveragePaths } from './script-coverage/paths.js';
+import { applyActionCoverage, applyDefaultSkips, createCoverageState, finalizeCoverage, reportCoverage } from './script-coverage/report.js';
+import { createCommandRunner, prepareCoverageDirs, resolveRetries, runShellScripts } from './script-coverage/runner.js';
 
 const root = process.cwd();
-const pkg = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8'));
-const scripts = pkg.scripts || {};
+const argv = createCli({
+  scriptName: 'script-coverage',
+  options: {
+    retries: { type: 'number', default: 2 },
+    'log-dir': { type: 'string', default: '' }
+  }
+}).parse();
+const envRetries = Number.parseInt(
+  process.env.PAIROFCLEATS_TEST_RETRIES ?? process.env.npm_config_test_retries ?? '',
+  10
+);
+const retries = resolveRetries({ argvRetries: argv.retries, envRetries, defaultRetries: 2 });
+const logDirOverride = argv['log-dir']
+  || process.env.PAIROFCLEATS_TEST_LOG_DIR
+  || process.env.npm_config_test_log_dir
+  || '';
+
+const {
+  baseCacheRoot,
+  repoCacheRoot,
+  fixtureRoot,
+  failureLogRoot,
+  ciOutDir,
+  mergeDir
+} = resolveScriptCoveragePaths({ root, logDirOverride });
+
+const scripts = loadPackageScripts(root);
 const scriptNames = Object.keys(scripts);
-const coverage = new Map(scriptNames.map((name) => [name, { status: 'pending', via: null, reason: null }]));
+const coverageState = createCoverageState({ scriptNames });
 
-if (coverage.has('script-coverage-test')) {
-  coverage.set('script-coverage-test', { status: 'covered', via: 'self', reason: null });
-}
-if (coverage.has('test-all')) {
-  markSkipped('test-all', 'aggregates script-coverage-test and bench');
-}
-
-const baseCacheRoot = path.join(root, 'tests', '.cache', 'script-coverage');
-const repoCacheRoot = path.join(baseCacheRoot, 'repo');
-const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
 const repoEnv = {
   ...process.env,
   PAIROFCLEATS_CACHE_ROOT: repoCacheRoot,
   PAIROFCLEATS_EMBEDDINGS: 'stub'
 };
 
-await fsPromises.rm(baseCacheRoot, { recursive: true, force: true });
-await fsPromises.mkdir(repoCacheRoot, { recursive: true });
-
-function markCovered(name, via) {
-  if (!coverage.has(name)) return;
-  const entry = coverage.get(name);
-  if (entry.status === 'pending') {
-    coverage.set(name, { status: 'covered', via, reason: null });
-  }
-}
-
-function markSkipped(name, reason) {
-  if (!coverage.has(name)) return;
-  coverage.set(name, { status: 'skipped', via: null, reason });
-}
-
-function run(label, cmd, args, options = {}) {
-  const result = spawnSync(cmd, args, { stdio: 'inherit', ...options });
-  if (result.status !== 0) {
-    console.error(`Failed: ${label}`);
-    process.exit(result.status ?? 1);
-  }
-}
-
-function runNode(label, scriptPath, args = [], options = {}) {
-  run(label, process.execPath, [scriptPath, ...args], options);
-}
-
-const ciOutDir = path.join(baseCacheRoot, 'ci-artifacts');
-
-const actions = [
-  {
-    label: 'download-dicts-test',
-    run: () => runNode('download-dicts-test', path.join(root, 'tests', 'download-dicts.js')),
-    covers: ['download-dicts', 'download-dicts-test']
-  },
-  {
-    label: 'download-extensions-test',
-    run: () => runNode('download-extensions-test', path.join(root, 'tests', 'download-extensions.js')),
-    covers: ['download-extensions', 'verify-extensions', 'download-extensions-test']
-  },
-  {
-    label: 'tooling-detect-test',
-    run: () => runNode('tooling-detect-test', path.join(root, 'tests', 'tooling-detect.js')),
-    covers: ['tooling-detect', 'tooling-detect-test']
-  },
-  {
-    label: 'tooling-install-test',
-    run: () => runNode('tooling-install-test', path.join(root, 'tests', 'tooling-install.js')),
-    covers: ['tooling-install', 'tooling-install-test']
-  },
-  {
-    label: 'clean-artifacts-test',
-    run: () => runNode('clean-artifacts-test', path.join(root, 'tests', 'clean-artifacts.js')),
-    covers: ['clean-artifacts', 'clean-artifacts-test']
-  },
-  {
-    label: 'uninstall-test',
-    run: () => runNode('uninstall-test', path.join(root, 'tests', 'uninstall.js')),
-    covers: ['uninstall', 'uninstall-test']
-  },
-  {
-    label: 'sqlite-incremental-test',
-    run: () => runNode('sqlite-incremental-test', path.join(root, 'tests', 'sqlite-incremental.js')),
-    covers: ['sqlite-incremental-test']
-  },
-  {
-    label: 'sqlite-compact-test',
-    run: () => runNode('sqlite-compact-test', path.join(root, 'tests', 'sqlite-compact.js')),
-    covers: ['sqlite-compact-test', 'compact-sqlite-index']
-  },
-  {
-    label: 'sqlite-ann-extension-test',
-    run: () => runNode('sqlite-ann-extension-test', path.join(root, 'tests', 'sqlite-ann-extension.js')),
-    covers: ['sqlite-ann-extension-test']
-  },
-  {
-    label: 'language-fidelity-test',
-    run: () => runNode('language-fidelity-test', path.join(root, 'tests', 'language-fidelity.js')),
-    covers: ['language-fidelity-test']
-  },
-  {
-    label: 'type-inference-crossfile-test',
-    run: () => runNode('type-inference-crossfile-test', path.join(root, 'tests', 'type-inference-crossfile.js')),
-    covers: ['type-inference-crossfile-test']
-  },
-  {
-    label: 'type-inference-crossfile-go',
-    run: () => runNode('type-inference-crossfile-go', path.join(root, 'tests', 'type-inference-crossfile-go.js')),
-    covers: []
-  },
-  {
-    label: 'format-fidelity-test',
-    run: () => runNode('format-fidelity-test', path.join(root, 'tests', 'format-fidelity.js')),
-    covers: ['format-fidelity-test']
-  },
-  {
-    label: 'compare-models-test',
-    run: () => runNode('compare-models-test', path.join(root, 'tests', 'compare-models.js')),
-    covers: ['compare-models-test', 'compare-models']
-  },
-  {
-    label: 'summary-report-test',
-    run: () => runNode('summary-report-test', path.join(root, 'tests', 'summary-report.js')),
-    covers: ['summary-report-test', 'summary-report']
-  },
-  {
-    label: 'docs-consistency-test',
-    run: () => runNode('docs-consistency-test', path.join(root, 'tests', 'docs-consistency.js')),
-    covers: ['docs-consistency-test']
-  },
-  {
-    label: 'repometrics-dashboard-test',
-    run: () => runNode('repometrics-dashboard-test', path.join(root, 'tests', 'repometrics-dashboard.js')),
-    covers: ['repometrics-dashboard-test', 'repometrics-dashboard']
-  },
-  {
-    label: 'triage-test',
-    run: () => runNode('triage-test', path.join(root, 'tests', 'triage-records.js')),
-    covers: ['triage-test']
-  },
-  {
-    label: 'mcp-server-test',
-    run: () => runNode('mcp-server-test', path.join(root, 'tests', 'mcp-server.js')),
-    covers: ['mcp-server-test', 'mcp-server']
-  },
-  {
-    label: 'git-hooks-test',
-    run: () => runNode('git-hooks-test', path.join(root, 'tests', 'git-hooks.js')),
-    covers: ['git-hooks-test', 'git-hooks']
-  },
-  {
-    label: 'git-meta-test',
-    run: () => runNode('git-meta-test', path.join(root, 'tests', 'git-meta.js')),
-    covers: []
-  },
-  {
-    label: 'churn-filter-test',
-    run: () => runNode('churn-filter-test', path.join(root, 'tests', 'churn-filter.js')),
-    covers: []
-  },
-  {
-    label: 'search-filters-test',
-    run: () => runNode('search-filters-test', path.join(root, 'tests', 'search-filters.js')),
-    covers: ['search-filters-test']
-  },
-  {
-    label: 'unicode-offset-test',
-    run: () => runNode('unicode-offset-test', path.join(root, 'tests', 'unicode-offset.js')),
-    covers: []
-  },
-  {
-    label: 'repo-root-test',
-    run: () => runNode('repo-root-test', path.join(root, 'tests', 'repo-root.js')),
-    covers: []
-  },
-  {
-    label: 'file-size-guard-test',
-    run: () => runNode('file-size-guard-test', path.join(root, 'tests', 'file-size-guard.js')),
-    covers: []
-  },
-  {
-    label: 'ts-jsx-fixtures',
-    run: () => runNode('ts-jsx-fixtures', path.join(root, 'tests', 'ts-jsx-fixtures.js')),
-    covers: []
-  },
-  {
-    label: 'python-fallback-test',
-    run: () => runNode('python-fallback-test', path.join(root, 'tests', 'python-fallback.js')),
-    covers: []
-  },
-  {
-    label: 'verify',
-    run: () => runNode('verify', path.join(root, 'tests', 'smoke.js')),
-    covers: ['verify']
-  },
-  {
-    label: 'fixture-smoke',
-    run: () => runNode('fixture-smoke', path.join(root, 'tests', 'fixture-smoke.js')),
-    covers: ['fixture-smoke', 'build-index', 'build-sqlite-index', 'search']
-  },
-  {
-    label: 'fixture-empty',
-    run: () => runNode('fixture-empty', path.join(root, 'tests', 'fixture-empty.js')),
-    covers: []
-  },
-  {
-    label: 'fixture-parity',
-    run: () => runNode('fixture-parity', path.join(root, 'tests', 'fixture-parity.js')),
-    covers: ['fixture-parity']
-  },
-  {
-    label: 'fixture-eval',
-    run: () => runNode('fixture-eval', path.join(root, 'tests', 'fixture-eval.js')),
-    covers: ['fixture-eval']
-  },
-  {
-    label: 'query-cache-test',
-    run: () => runNode('query-cache-test', path.join(root, 'tests', 'query-cache.js')),
-    covers: ['query-cache-test']
-  },
-  {
-    label: 'repo-build-index',
-    run: () => runNode('build-index', path.join(root, 'build_index.js'), ['--stub-embeddings', '--repo', fixtureRoot], { cwd: fixtureRoot, env: repoEnv }),
-    covers: ['build-index']
-  },
-  {
-    label: 'repo-build-sqlite-index',
-    run: () => runNode('build-sqlite-index', path.join(root, 'tools', 'build-sqlite-index.js'), ['--repo', fixtureRoot], { cwd: fixtureRoot, env: repoEnv }),
-    covers: ['build-sqlite-index']
-  },
-  {
-    label: 'parity',
-    run: () => runNode(
-      'parity',
-      path.join(root, 'tests', 'parity.js'),
-      ['--search', path.join(root, 'search.js'), '--no-ann'],
-      { cwd: fixtureRoot, env: repoEnv }
-    ),
-    covers: ['parity']
-  },
-  {
-    label: 'repo-search',
-    run: () => runNode('search', path.join(root, 'search.js'), ['message', '--json', '--no-ann', '--repo', fixtureRoot], { cwd: fixtureRoot, env: repoEnv }),
-    covers: ['search']
-  },
-  {
-    label: 'search-sqlite',
-    run: () => runNode('search-sqlite', path.join(root, 'tools', 'search-sqlite.js'), ['message', '--json', '--no-ann', '--repo', fixtureRoot], { cwd: fixtureRoot, env: repoEnv }),
-    covers: ['search-sqlite']
-  },
-  {
-    label: 'report-artifacts',
-    run: () => runNode('report-artifacts', path.join(root, 'tools', 'report-artifacts.js'), ['--json', '--repo', fixtureRoot], { cwd: fixtureRoot, env: repoEnv }),
-    covers: ['report-artifacts']
-  },
-  {
-    label: 'cache-gc-test',
-    run: () => runNode('cache-gc-test', path.join(root, 'tests', 'cache-gc.js')),
-    covers: ['cache-gc', 'cache-gc-test']
-  },
-  {
-    label: 'generate-repo-dict',
-    run: () => runNode('generate-repo-dict', path.join(root, 'tools', 'generate-repo-dict.js'), ['--min-count', '1', '--repo', fixtureRoot], { cwd: fixtureRoot, env: repoEnv }),
-    covers: ['generate-repo-dict']
-  },
-  {
-    label: 'ci-build',
-    run: () => runNode('ci-build', path.join(root, 'tools', 'ci-build-artifacts.js'), ['--out', ciOutDir, '--skip-build', '--repo', fixtureRoot], { cwd: fixtureRoot, env: repoEnv }),
-    covers: ['ci-build']
-  },
-  {
-    label: 'ci-restore',
-    run: () => runNode('ci-restore', path.join(root, 'tools', 'ci-restore-artifacts.js'), ['--from', ciOutDir, '--force', '--repo', fixtureRoot], { cwd: fixtureRoot, env: repoEnv }),
-    covers: ['ci-restore']
-  },
-  {
-    label: 'bootstrap',
-    run: () => runNode(
-      'bootstrap',
-      path.join(root, 'tools', 'bootstrap.js'),
-      ['--skip-install', '--skip-dicts', '--skip-index', '--skip-artifacts', '--skip-tooling', '--repo', fixtureRoot],
-      { cwd: fixtureRoot, env: repoEnv }
-    ),
-    covers: ['bootstrap']
-  },
-  {
-    label: 'setup-test',
-    run: () => runNode('setup-test', path.join(root, 'tests', 'setup.js')),
-    covers: ['setup', 'setup-test']
-  },
-  {
-    label: 'config-validate-test',
-    run: () => runNode('config-validate-test', path.join(root, 'tests', 'config-validate.js')),
-    covers: ['config-validate', 'config-validate-test']
-  },
-  {
-    label: 'cli-test',
-    run: () => runNode('cli-test', path.join(root, 'tests', 'cli.js')),
-    covers: ['cli-test']
-  }
-];
-
-const mergeDir = path.join(baseCacheRoot, 'merge');
-await fsPromises.mkdir(mergeDir, { recursive: true });
-const mergeBase = path.join(mergeDir, 'base.txt');
-const mergeTarget = path.join(mergeDir, 'target.txt');
-await fsPromises.writeFile(mergeBase, 'alpha\nbeta\n');
-await fsPromises.writeFile(mergeTarget, 'beta\ngamma\n');
-
-actions.push({
-  label: 'merge-history',
-  run: () => runNode('merge-history', path.join(root, 'tools', 'mergeSearchHistory.js'), [mergeBase, mergeTarget]),
-  covers: ['merge-history']
-});
-actions.push({
-  label: 'merge-no-results',
-  run: () => runNode('merge-no-results', path.join(root, 'tools', 'mergeNoResultQueries.js'), [mergeBase, mergeTarget]),
-  covers: ['merge-no-results']
+await prepareCoverageDirs({ baseCacheRoot, repoCacheRoot, failureLogRoot });
+const { run, runNode } = createCommandRunner({ retries, failureLogRoot });
+
+const actions = await buildActions({
+  root,
+  fixtureRoot,
+  repoEnv,
+  baseCacheRoot,
+  ciOutDir,
+  mergeDir,
+  runNode
 });
 
 for (const action of actions) {
   console.log(`[script-coverage] ${action.label}`);
   action.run();
-  for (const name of action.covers) {
-    markCovered(name, action.label);
-  }
-}
-
-markSkipped('download-models', 'requires network model download');
-markSkipped('bench', 'benchmarks are long-running');
-markSkipped('bench-ann', 'benchmarks are long-running');
-markSkipped('watch-index', 'watch mode runs until interrupted');
-markSkipped('format', 'modifies working tree');
-markSkipped('lint', 'requires npm install and project lint config');
-
-const shellScripts = [
-  path.join(root, 'merge-history.sh'),
-  path.join(root, 'merge-no-results.sh'),
-  path.join(root, 'merge-metrics.sh'),
-  path.join(root, 'tools', 'merge-history.sh'),
-  path.join(root, 'tools', 'merge-no-results.sh'),
-  path.join(root, 'tools', 'merge-metrics.sh'),
-  path.join(root, 'tools', 'merge-agentinfo-notes.sh'),
-  path.join(root, 'tools', 'merge-agentinfo-index.sh')
-];
-
-const bashCheck = spawnSync('bash', ['-c', 'echo ok'], { encoding: 'utf8' });
-const bashAvailable = bashCheck.status === 0;
-const jqCheck = bashAvailable ? spawnSync('bash', ['-c', 'command -v jq'], { encoding: 'utf8' }) : null;
-const jqAvailable = jqCheck && jqCheck.status === 0;
-const toPosixPath = (value) => (process.platform === 'win32' ? value.replace(/\\/g, '/') : value);
-const bashPathCheck = bashAvailable
-  ? spawnSync('bash', ['-c', `cd "${toPosixPath(root)}"`], { encoding: 'utf8' })
-  : null;
-const bashAccessible = bashPathCheck && bashPathCheck.status === 0;
-
-if (bashAvailable && bashAccessible) {
-  const shellWorkDir = path.join(baseCacheRoot, 'shell');
-  await fsPromises.mkdir(shellWorkDir, { recursive: true });
-  const base = path.join(shellWorkDir, 'base.json');
-  const ours = path.join(shellWorkDir, 'ours.json');
-  const theirs = path.join(shellWorkDir, 'theirs.json');
-  await fsPromises.writeFile(base, JSON.stringify({ file: { md: 1, code: 1 } }, null, 2));
-  await fsPromises.writeFile(ours, JSON.stringify({ file: { md: 2, code: 0 } }, null, 2));
-  await fsPromises.writeFile(theirs, JSON.stringify({ file: { md: 3, code: 2 } }, null, 2));
-
-  for (const scriptPath of shellScripts) {
-    if (!fs.existsSync(scriptPath)) continue;
-    if (scriptPath.endsWith('merge-metrics.sh') && !jqAvailable) {
-      console.log(`[skip] ${scriptPath} (jq not available)`);
-      continue;
-    }
-    const args = [scriptPath, base, ours, theirs].map(toPosixPath);
-    run('shell-script', 'bash', args, { cwd: root });
-  }
-} else if (!bashAvailable) {
-  console.log('[skip] shell scripts (bash not available)');
-} else {
-  console.log('[skip] shell scripts (bash cannot access workspace path)');
-}
-
-const missing = [];
-const skipped = [];
-const covered = [];
-for (const [name, entry] of coverage.entries()) {
-  if (entry.status === 'pending') missing.push(name);
-  if (entry.status === 'skipped') skipped.push({ name, reason: entry.reason });
-  if (entry.status === 'covered') covered.push({ name, via: entry.via });
+  applyActionCoverage(coverageState, action);
 }
 
-if (missing.length) {
-  console.error(`Missing coverage for: ${missing.join(', ')}`);
-  process.exit(1);
-}
+await runShellScripts({ root, baseCacheRoot, run });
 
-console.log(`script coverage: ${covered.length} covered, ${skipped.length} skipped`);
-if (skipped.length) {
-  for (const entry of skipped) {
-    console.log(`- skipped ${entry.name}: ${entry.reason}`);
-  }
-}
+applyDefaultSkips(coverageState);
+const summary = finalizeCoverage(coverageState);
+const ok = reportCoverage(summary);
+if (!ok) process.exit(1);
diff --git a/tests/script-coverage/actions.js b/tests/script-coverage/actions.js
new file mode 100644
index 000000000..aa92096d5
--- /dev/null
+++ b/tests/script-coverage/actions.js
@@ -0,0 +1,1096 @@
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+
+export const buildActions = async (context) => {
+  const { root, fixtureRoot, repoEnv, baseCacheRoot, runNode } = context;
+  const ciOutDir = context.ciOutDir || path.join(baseCacheRoot, 'ci-artifacts');
+
+const actions = [
+  {
+    label: 'download-dicts-test',
+    run: () => runNode('download-dicts-test', path.join(root, 'tests', 'download-dicts.js')),
+    covers: ['download-dicts', 'download-dicts-test']
+  },
+  {
+    label: 'download-extensions-test',
+    run: () => runNode('download-extensions-test', path.join(root, 'tests', 'download-extensions.js')),
+    covers: ['download-extensions', 'verify-extensions', 'download-extensions-test']
+  },
+  {
+    label: 'vector-extension-sanitize-test',
+    run: () => runNode('vector-extension-sanitize-test', path.join(root, 'tests', 'vector-extension-sanitize.js')),
+    covers: ['vector-extension-sanitize-test']
+  },
+  {
+    label: 'tooling-detect-test',
+    run: () => runNode('tooling-detect-test', path.join(root, 'tests', 'tooling-detect.js')),
+    covers: ['tooling-detect', 'tooling-detect-test']
+  },
+  {
+    label: 'tooling-install-test',
+    run: () => runNode('tooling-install-test', path.join(root, 'tests', 'tooling-install.js')),
+    covers: ['tooling-install', 'tooling-install-test']
+  },
+  {
+    label: 'clean-artifacts-test',
+    run: () => runNode('clean-artifacts-test', path.join(root, 'tests', 'clean-artifacts.js')),
+    covers: ['clean-artifacts', 'clean-artifacts-test']
+  },
+  {
+    label: 'uninstall-test',
+    run: () => runNode('uninstall-test', path.join(root, 'tests', 'uninstall.js')),
+    covers: ['uninstall', 'uninstall-test']
+  },
+  {
+    label: 'sqlite-incremental-test',
+    run: () => runNode('sqlite-incremental-test', path.join(root, 'tests', 'sqlite-incremental.js')),
+    covers: ['sqlite-incremental-test']
+  },
+  {
+    label: 'sqlite-incremental-no-change-test',
+    run: () => runNode('sqlite-incremental-no-change-test', path.join(root, 'tests', 'sqlite-incremental-no-change.js')),
+    covers: ['sqlite-incremental-no-change-test']
+  },
+  {
+    label: 'sqlite-bundle-missing-test',
+    run: () => runNode('sqlite-bundle-missing-test', path.join(root, 'tests', 'sqlite-bundle-missing.js')),
+    covers: ['sqlite-bundle-missing-test']
+  },
+  {
+    label: 'sqlite-index-state-fail-closed-test',
+    run: () => runNode('sqlite-index-state-fail-closed-test', path.join(root, 'tests', 'sqlite-index-state-fail-closed.js')),
+    covers: ['sqlite-index-state-fail-closed-test']
+  },
+  {
+    label: 'artifact-size-guardrails-test',
+    run: () => runNode('artifact-size-guardrails-test', path.join(root, 'tests', 'artifact-size-guardrails.js')),
+    covers: ['artifact-size-guardrails-test']
+  },
+  {
+    label: 'chunk-meta-jsonl-cleanup-test',
+    run: () => runNode('chunk-meta-jsonl-cleanup-test', path.join(root, 'tests', 'chunk-meta-jsonl-cleanup.js')),
+    covers: ['chunk-meta-jsonl-cleanup-test']
+  },
+  {
+    label: 'safe-regex-engine-test',
+    run: () => runNode('safe-regex-engine-test', path.join(root, 'tests', 'safe-regex-engine.js')),
+    covers: ['safe-regex-engine-test']
+  },
+
+  {
+    label: 'incremental-manifest-test',
+    run: () => runNode('incremental-manifest-test', path.join(root, 'tests', 'incremental-manifest.js')),
+    covers: ['incremental-manifest-test']
+  },
+  {
+    label: 'index-lock-test',
+    run: () => runNode('index-lock-test', path.join(root, 'tests', 'index-lock.js')),
+    covers: ['index-lock-test']
+  },
+  {
+    label: 'sqlite-compact-test',
+    run: () => runNode('sqlite-compact-test', path.join(root, 'tests', 'sqlite-compact.js')),
+    covers: ['sqlite-compact-test', 'compact-sqlite-index'],
+    coversTierB: ['compact-sqlite-index']
+  },
+  {
+    label: 'sqlite-sidecar-cleanup-test',
+    run: () => runNode('sqlite-sidecar-cleanup-test', path.join(root, 'tests', 'sqlite-sidecar-cleanup.js')),
+    covers: ['sqlite-sidecar-cleanup-test']
+  },
+  {
+    label: 'sqlite-ann-extension-test',
+    run: () => runNode('sqlite-ann-extension-test', path.join(root, 'tests', 'sqlite-ann-extension.js')),
+    covers: ['sqlite-ann-extension-test']
+  },
+  {
+    label: 'sqlite-vec-candidate-set-test',
+    run: () => runNode('sqlite-vec-candidate-set-test', path.join(root, 'tests', 'sqlite-vec-candidate-set.js')),
+    covers: ['sqlite-vec-candidate-set-test']
+  },
+  {
+    label: 'sqlite-build-manifest-test',
+    run: () => runNode('sqlite-build-manifest-test', path.join(root, 'tests', 'sqlite-build-manifest.js')),
+    covers: ['sqlite-build-manifest-test']
+  },
+  {
+    label: 'sqlite-build-vocab-test',
+    run: () => runNode('sqlite-build-vocab-test', path.join(root, 'tests', 'sqlite-build-vocab.js')),
+    covers: ['sqlite-build-vocab-test']
+  },
+  {
+    label: 'sqlite-build-delete-test',
+    run: () => runNode('sqlite-build-delete-test', path.join(root, 'tests', 'sqlite-build-delete.js')),
+    covers: ['sqlite-build-delete-test']
+  },
+  {
+    label: 'hnsw-ann-test',
+    run: () => runNode('hnsw-ann-test', path.join(root, 'tests', 'hnsw-ann.js')),
+    covers: ['hnsw-ann-test']
+  },
+  {
+    label: 'hnsw-atomic-test',
+    run: () => runNode('hnsw-atomic-test', path.join(root, 'tests', 'hnsw-atomic.js')),
+    covers: ['hnsw-atomic-test']
+  },
+  {
+    label: 'minhash-parity-test',
+    run: () => runNode('minhash-parity-test', path.join(root, 'tests', 'minhash-parity.js')),
+    covers: ['minhash-parity-test']
+  },
+  {
+    label: 'language-fidelity-test',
+    run: () => runNode('language-fidelity-test', path.join(root, 'tests', 'language-fidelity.js')),
+    covers: ['language-fidelity-test']
+  },
+  {
+    label: 'metadata-v2-test',
+    run: () => runNode('metadata-v2-test', path.join(root, 'tests', 'metadata-v2.js')),
+    covers: ['metadata-v2-test']
+  },
+  {
+    label: 'chunking-limits-test',
+    run: () => runNode('chunking-limits-test', path.join(root, 'tests', 'chunking-limits.js')),
+    covers: ['chunking-limits-test']
+  },
+  {
+    label: 'graph-chunk-id-test',
+    run: () => runNode('graph-chunk-id-test', path.join(root, 'tests', 'graph-chunk-id.js')),
+    covers: ['graph-chunk-id-test']
+  },
+  {
+    label: 'sqlite-chunk-id-test',
+    run: () => runNode('sqlite-chunk-id-test', path.join(root, 'tests', 'sqlite-chunk-id.js')),
+    covers: ['sqlite-chunk-id-test']
+  },
+  {
+    label: 'kotlin-perf-guard-test',
+    run: () => runNode('kotlin-perf-guard-test', path.join(root, 'tests', 'kotlin-perf-guard.js')),
+    covers: ['kotlin-perf-guard-test']
+  },
+  {
+    label: 'tree-sitter-chunks-test',
+    run: () => runNode('tree-sitter-chunks-test', path.join(root, 'tests', 'tree-sitter-chunks.js')),
+    covers: ['tree-sitter-chunks-test']
+  },
+  {
+    label: 'type-inference-crossfile-go',
+    run: () => runNode('type-inference-crossfile-go', path.join(root, 'tests', 'type-inference-crossfile-go.js')),
+    covers: []
+  },
+  {
+    label: 'type-inference-crossfile-test',
+    run: () => runNode('type-inference-crossfile-test', path.join(root, 'tests', 'type-inference-crossfile.js')),
+    covers: ['type-inference-crossfile-test']
+  },
+  {
+    label: 'type-inference-lsp-enrichment-test',
+    run: () => runNode('type-inference-lsp-enrichment-test', path.join(root, 'tests', 'type-inference-lsp-enrichment.js')),
+    covers: ['type-inference-lsp-enrichment-test']
+  },
+  {
+    label: 'type-inference-typescript-provider-no-ts',
+    run: () => runNode('type-inference-typescript-provider-no-ts', path.join(root, 'tests', 'type-inference-typescript-provider-no-ts.js')),
+    covers: []
+  },
+  {
+    label: 'type-inference-clangd-provider-no-clangd',
+    run: () => runNode('type-inference-clangd-provider-no-clangd', path.join(root, 'tests', 'type-inference-clangd-provider-no-clangd.js')),
+    covers: []
+  },
+  {
+    label: 'type-inference-sourcekit-provider-no-sourcekit',
+    run: () => runNode('type-inference-sourcekit-provider-no-sourcekit', path.join(root, 'tests', 'type-inference-sourcekit-provider-no-sourcekit.js')),
+    covers: []
+  },
+  {
+    label: 'format-fidelity-test',
+    run: () => runNode('format-fidelity-test', path.join(root, 'tests', 'format-fidelity.js')),
+    covers: ['format-fidelity-test']
+  },
+  {
+    label: 'chunking-yaml-test',
+    run: () => runNode('chunking-yaml-test', path.join(root, 'tests', 'chunking-yaml.js')),
+    covers: []
+  },
+  {
+    label: 'chunking-sql-lua-test',
+    run: () => runNode('chunking-sql-lua-test', path.join(root, 'tests', 'chunking-sql-lua.js')),
+    covers: []
+  },
+  {
+    label: 'segment-pipeline-test',
+    run: () => runNode('segment-pipeline-test', path.join(root, 'tests', 'segment-pipeline.js')),
+    covers: []
+  },
+  {
+    label: 'prose-skip-imports-test',
+    run: () => runNode('prose-skip-imports-test', path.join(root, 'tests', 'prose-skip-imports.js')),
+    covers: ['prose-skip-imports-test']
+  },
+  {
+    label: 'extracted-prose-test',
+    run: () => runNode('extracted-prose-test', path.join(root, 'tests', 'extracted-prose.js')),
+    covers: []
+  },
+  {
+    label: 'tokenize-dictionary-test',
+    run: () => runNode('tokenize-dictionary-test', path.join(root, 'tests', 'tokenize-dictionary.js')),
+    covers: []
+  },
+  {
+    label: 'import-links-test',
+    run: () => runNode('import-links-test', path.join(root, 'tests', 'import-links.js')),
+    covers: ['import-links-test']
+  },
+  {
+    label: 'git-blame-range-test',
+    run: () => runNode('git-blame-range-test', path.join(root, 'tests', 'git-blame-range.js')),
+    covers: ['git-blame-range-test']
+  },
+  {
+    label: 'external-docs-test',
+    run: () => runNode('external-docs-test', path.join(root, 'tests', 'external-docs.js')),
+    covers: ['external-docs-test']
+  },
+  {
+    label: 'tooling-lsp-test',
+    run: () => runNode('tooling-lsp-test', path.join(root, 'tests', 'tooling-lsp.js')),
+    covers: []
+  },
+  {
+    label: 'lsp-shutdown-test',
+    run: () => runNode('lsp-shutdown-test', path.join(root, 'tests', 'lsp-shutdown.js')),
+    covers: ['lsp-shutdown-test']
+  },
+  {
+    label: 'bench-language-repos-test',
+    run: () => runNode('bench-language-repos-test', path.join(root, 'tests', 'bench-language-repos.js')),
+    covers: ['bench-language-test']
+  },
+  {
+    label: 'bench-language-lock-test',
+    run: () => runNode('bench-language-lock-test', path.join(root, 'tests', 'bench-language-lock.js')),
+    covers: ['bench-language-lock-test']
+  },
+  {
+    label: 'bench-language-progress-parse-test',
+    run: () => runNode('bench-language-progress-parse-test', path.join(root, 'tests', 'bench-language-progress-parse.js')),
+    covers: ['bench-language-progress-parse-test']
+  },
+  {
+    label: 'bench-language-lock-semantics-test',
+    run: () => runNode('bench-language-lock-semantics-test', path.join(root, 'tests', 'bench-language-lock-semantics.js')),
+    covers: ['bench-language-lock-semantics-test']
+  },
+  {
+    label: 'retrieval-branch-filter-test',
+    run: () => runNode('retrieval-branch-filter-test', path.join(root, 'tests', 'retrieval-branch-filter.js')),
+    covers: ['retrieval-branch-filter-test']
+  },
+  {
+    label: 'retrieval-backend-policy-test',
+    run: () => runNode('retrieval-backend-policy-test', path.join(root, 'tests', 'retrieval-backend-policy.js')),
+    covers: ['retrieval-backend-policy-test']
+  },
+  {
+    label: 'summary-report-test',
+    run: () => runNode('summary-report-test', path.join(root, 'tests', 'summary-report.js')),
+    covers: ['summary-report-test', 'summary-report']
+  },
+  {
+    label: 'repometrics-dashboard-test',
+    run: () => runNode('repometrics-dashboard-test', path.join(root, 'tests', 'repometrics-dashboard.js')),
+    covers: ['repometrics-dashboard-test', 'repometrics-dashboard']
+  },
+  {
+    label: 'index-validate-test',
+    run: () => runNode('index-validate-test', path.join(root, 'tests', 'index-validate.js')),
+    covers: ['index-validate-test', 'index-validate']
+  },
+  {
+    label: 'embeddings-validate-test',
+    run: () => runNode('embeddings-validate-test', path.join(root, 'tests', 'embeddings-validate.js')),
+    covers: ['embeddings-validate-test']
+  },
+  {
+    label: 'triage-test',
+    run: () => runNode('triage-test', path.join(root, 'tests', 'triage-records.js')),
+    covers: ['triage-test']
+  },
+  {
+    label: 'mcp-server-test',
+    run: () => runNode('mcp-server-test', path.join(root, 'tests', 'mcp-server.js')),
+    covers: ['mcp-server-test', 'mcp-server']
+  },
+  {
+    label: 'mcp-schema-test',
+    run: () => runNode('mcp-schema-test', path.join(root, 'tests', 'mcp-schema.js')),
+    covers: ['mcp-schema-test']
+  },
+  {
+    label: 'mcp-robustness-test',
+    run: () => runNode('mcp-robustness-test', path.join(root, 'tests', 'mcp-robustness.js')),
+    covers: ['mcp-robustness-test']
+  },
+  {
+    label: 'api-server-test',
+    run: () => runNode('api-server-test', path.join(root, 'tests', 'api-server.js')),
+    covers: ['api-server-test', 'api-server']
+  },
+  {
+    label: 'api-server-stream-test',
+    run: () => runNode('api-server-stream-test', path.join(root, 'tests', 'api-server-stream.js')),
+    covers: ['api-server-stream-test']
+  },
+  {
+    label: 'indexer-service-test',
+    run: () => runNode('indexer-service-test', path.join(root, 'tests', 'indexer-service.js')),
+    covers: ['indexer-service', 'indexer-service-test']
+  },
+  {
+    label: 'piece-assembly-test',
+    run: () => runNode('piece-assembly-test', path.join(root, 'tests', 'piece-assembly.js')),
+    covers: ['piece-assembly-test']
+  },
+  {
+    label: 'compact-pieces-test',
+    run: () => runNode('compact-pieces-test', path.join(root, 'tests', 'compact-pieces.js')),
+    covers: ['compact-pieces-test']
+  },
+  {
+    label: 'git-hooks-test',
+    run: () => runNode('git-hooks-test', path.join(root, 'tests', 'git-hooks.js')),
+    covers: ['git-hooks-test', 'git-hooks']
+  },
+  {
+    label: 'git-meta-test',
+    run: () => runNode('git-meta-test', path.join(root, 'tests', 'git-meta.js')),
+    covers: []
+  },
+  {
+    label: 'churn-filter-test',
+    run: () => runNode('churn-filter-test', path.join(root, 'tests', 'churn-filter.js')),
+    covers: []
+  },
+  {
+    label: 'search-filters-test',
+    run: () => runNode('search-filters-test', path.join(root, 'tests', 'search-filters.js')),
+    covers: ['search-filters-test']
+  },
+  {
+    label: 'ctags-ingest-test',
+    run: () => runNode('ctags-ingest-test', path.join(root, 'tests', 'ctags-ingest.js')),
+    covers: ['ctags-ingest', 'ctags-ingest-test']
+  },
+  {
+    label: 'scip-ingest-test',
+    run: () => runNode('scip-ingest-test', path.join(root, 'tests', 'scip-ingest.js')),
+    covers: ['scip-ingest', 'scip-ingest-test']
+  },
+  {
+    label: 'lsif-ingest-test',
+    run: () => runNode('lsif-ingest-test', path.join(root, 'tests', 'lsif-ingest.js')),
+    covers: ['lsif-ingest', 'lsif-ingest-test']
+  },
+  {
+    label: 'gtags-ingest-test',
+    run: () => runNode('gtags-ingest-test', path.join(root, 'tests', 'gtags-ingest.js')),
+    covers: ['gtags-ingest', 'gtags-ingest-test']
+  },
+  {
+    label: 'structural-search-test',
+    run: () => runNode('structural-search-test', path.join(root, 'tests', 'structural-search.js')),
+    covers: ['structural-search', 'structural-search-test']
+  },
+  {
+    label: 'structural-filters-test',
+    run: () => runNode('structural-filters-test', path.join(root, 'tests', 'structural-filters.js')),
+    covers: ['structural-filters-test']
+  },
+  {
+    label: 'lang-filter-test',
+    run: () => runNode('lang-filter-test', path.join(root, 'tests', 'lang-filter.js')),
+    covers: ['lang-filter-test']
+  },
+  {
+    label: 'sqlite-auto-backend-test',
+    run: () => runNode('sqlite-auto-backend-test', path.join(root, 'tests', 'sqlite-auto-backend.js')),
+    covers: ['sqlite-auto-backend-test']
+  },
+  {
+    label: 'sqlite-missing-dep-test',
+    run: () => runNode('sqlite-missing-dep-test', path.join(root, 'tests', 'sqlite-missing-dep.js')),
+    covers: ['sqlite-missing-dep-test']
+  },
+  {
+    label: 'search-explain-test',
+    run: () => runNode('search-explain-test', path.join(root, 'tests', 'search-explain.js')),
+    covers: ['search-explain-test']
+  },
+  {
+    label: 'search-rrf-test',
+    run: () => runNode('search-rrf-test', path.join(root, 'tests', 'search-rrf.js')),
+    covers: ['search-rrf-test']
+  },
+  {
+    label: 'artifact-bak-recovery-test',
+    run: () => runNode('artifact-bak-recovery-test', path.join(root, 'tests', 'artifact-bak-recovery.js')),
+    covers: ['artifact-bak-recovery-test']
+  },
+  {
+    label: 'encoding-hash-test',
+    run: () => runNode('encoding-hash-test', path.join(root, 'tests', 'encoding-hash.js')),
+    covers: ['encoding-hash-test']
+  },
+  {
+    label: 'embeddings-cache-identity-test',
+    run: () => runNode('embeddings-cache-identity-test', path.join(root, 'tests', 'embeddings-cache-identity.js')),
+    covers: ['embeddings-cache-identity-test']
+  },
+  {
+    label: 'embeddings-cache-invalidation-test',
+    run: () => runNode('embeddings-cache-invalidation-test', path.join(root, 'tests', 'embeddings-cache-invalidation.js')),
+    covers: ['embeddings-cache-invalidation-test']
+  },
+  {
+    label: 'embeddings-dims-mismatch-test',
+    run: () => runNode('embeddings-dims-mismatch-test', path.join(root, 'tests', 'embeddings-dims-mismatch.js')),
+    covers: ['embeddings-dims-mismatch-test']
+  },
+  {
+    label: 'embeddings-dims-validation-test',
+    run: () => runNode('embeddings-dims-validation-test', path.join(root, 'tests', 'embeddings-dims-validation.js')),
+    covers: ['embeddings-dims-validation-test']
+  },
+  {
+    label: 'embeddings-sqlite-dense-test',
+    run: () => runNode('embeddings-sqlite-dense-test', path.join(root, 'tests', 'embeddings-sqlite-dense.js')),
+    covers: ['embeddings-sqlite-dense-test']
+  },
+  {
+    label: 'search-topn-filters-test',
+    run: () => runNode('search-topn-filters-test', path.join(root, 'tests', 'search-topn-filters.js')),
+    covers: ['search-topn-filters-test']
+  },
+  {
+    label: 'search-determinism-test',
+    run: () => runNode('search-determinism-test', path.join(root, 'tests', 'search-determinism.js')),
+    covers: ['search-determinism-test']
+  },
+  {
+    label: 'filter-index-artifact-test',
+    run: () => runNode('filter-index-artifact-test', path.join(root, 'tests', 'filter-index-artifact.js')),
+    covers: ['filter-index-artifact-test']
+  },
+  {
+    label: 'search-symbol-boost-test',
+    run: () => runNode('search-symbol-boost-test', path.join(root, 'tests', 'search-symbol-boost.js')),
+    covers: ['search-symbol-boost-test']
+  },
+  {
+    label: 'vscode-extension-test',
+    run: () => runNode('vscode-extension-test', path.join(root, 'tests', 'vscode-extension.js')),
+    covers: ['vscode-extension-test']
+  },
+  {
+    label: 'ext-filter-test',
+    run: () => runNode('ext-filter-test', path.join(root, 'tests', 'ext-filter.js')),
+    covers: ['ext-filter-test']
+  },
+  {
+    label: 'filter-strictness-test',
+    run: () => runNode('filter-strictness-test', path.join(root, 'tests', 'filter-strictness.js')),
+    covers: ['filter-strictness-test']
+  },
+  {
+    label: 'filter-index-test',
+    run: () => runNode('filter-index-test', path.join(root, 'tests', 'filter-index.js')),
+    covers: ['filter-index-test']
+  },
+  {
+    label: 'search-missing-index-test',
+    run: () => runNode('search-missing-index-test', path.join(root, 'tests', 'search-missing-index.js')),
+    covers: ['search-missing-index-test']
+  },
+  {
+    label: 'search-help-test',
+    run: () => runNode('search-help-test', path.join(root, 'tests', 'search-help.js')),
+    covers: ['search-help-test']
+  },
+  {
+    label: 'search-removed-flags-test',
+    run: () => runNode('search-removed-flags-test', path.join(root, 'tests', 'search-removed-flags.js')),
+    covers: []
+  },
+  {
+    label: 'search-missing-flag-values-test',
+    run: () => runNode('search-missing-flag-values-test', path.join(root, 'tests', 'search-missing-flag-values.js')),
+    covers: []
+  },
+  {
+    label: 'search-windows-path-filter-test',
+    run: () => runNode('search-windows-path-filter-test', path.join(root, 'tests', 'search-windows-path-filter.js')),
+    covers: []
+  },
+  {
+    label: 'search-explain-symbol-test',
+    run: () => runNode('search-explain-symbol-test', path.join(root, 'tests', 'search-explain-symbol.js')),
+    covers: []
+  },
+  {
+    label: 'unicode-offset-test',
+    run: () => runNode('unicode-offset-test', path.join(root, 'tests', 'unicode-offset.js')),
+    covers: []
+  },
+  {
+    label: 'repo-root-test',
+    run: () => runNode('repo-root-test', path.join(root, 'tests', 'repo-root.js')),
+    covers: []
+  },
+  {
+    label: 'tool-root-test',
+    run: () => runNode('tool-root-test', path.join(root, 'tests', 'tool-root.js')),
+    covers: []
+  },
+  {
+    label: 'file-size-guard-test',
+    run: () => runNode('file-size-guard-test', path.join(root, 'tests', 'file-size-guard.js')),
+    covers: []
+  },
+  {
+    label: 'file-line-guard-test',
+    run: () => runNode('file-line-guard-test', path.join(root, 'tests', 'file-line-guard.js')),
+    covers: []
+  },
+  {
+    label: 'skip-minified-binary-test',
+    run: () => runNode('skip-minified-binary-test', path.join(root, 'tests', 'skip-minified-binary.js')),
+    covers: []
+  },
+  {
+    label: 'read-failure-skip-test',
+    run: () => runNode('read-failure-skip-test', path.join(root, 'tests', 'read-failure-skip.js')),
+    covers: []
+  },
+  {
+    label: 'encoding-fallback-test',
+    run: () => runNode('encoding-fallback-test', path.join(root, 'tests', 'encoding-fallback.js')),
+    covers: []
+  },
+  {
+    label: 'incremental-tokenization-cache-test',
+    run: () => runNode('incremental-tokenization-cache-test', path.join(root, 'tests', 'incremental-tokenization-cache.js')),
+    covers: []
+  },
+  {
+    label: 'tokenization-buffering-test',
+    run: () => runNode('tokenization-buffering-test', path.join(root, 'tests', 'tokenization-buffering.js')),
+    covers: []
+  },
+  {
+    label: 'postings-quantize-test',
+    run: () => runNode('postings-quantize-test', path.join(root, 'tests', 'postings-quantize.js')),
+    covers: []
+  },
+  {
+    label: 'embedding-batch-multipliers-test',
+    run: () => runNode('embedding-batch-multipliers-test', path.join(root, 'tests', 'embedding-batch-multipliers.js')),
+    covers: []
+  },
+  {
+    label: 'typescript-imports-only-test',
+    run: () => runNode('typescript-imports-only-test', path.join(root, 'tests', 'typescript-imports-only.js')),
+    covers: []
+  },
+  {
+    label: 'import-priority-test',
+    run: () => runNode('import-priority-test', path.join(root, 'tests', 'import-priority.js')),
+    covers: []
+  },
+  {
+    label: 'ignore-overrides-test',
+    run: () => runNode('ignore-overrides-test', path.join(root, 'tests', 'ignore-overrides.js')),
+    covers: []
+  },
+  {
+    label: 'incremental-cache-signature-test',
+    run: () => runNode('incremental-cache-signature-test', path.join(root, 'tests', 'incremental-cache-signature.js')),
+    covers: []
+  },
+  {
+    label: 'incremental-reuse-test',
+    run: () => runNode('incremental-reuse-test', path.join(root, 'tests', 'incremental-reuse.js')),
+    covers: []
+  },
+  {
+    label: 'thread-limits-test',
+    run: () => runNode('thread-limits-test', path.join(root, 'tests', 'thread-limits.js')),
+    covers: []
+  },
+  {
+    label: 'bench-progress-format-test',
+    run: () => runNode('bench-progress-format-test', path.join(root, 'tests', 'bench-progress-format.js')),
+    covers: []
+  },
+  {
+    label: 'shard-merge-test',
+    run: () => runNode('shard-merge-test', path.join(root, 'tests', 'shard-merge.js')),
+    covers: []
+  },
+  {
+    label: 'shard-plan-test',
+    run: () => runNode('shard-plan-test', path.join(root, 'tests', 'shard-plan.js')),
+    covers: []
+  },
+  {
+    label: 'preprocess-files-test',
+    run: () => runNode('preprocess-files-test', path.join(root, 'tests', 'preprocess-files.js')),
+    covers: []
+  },
+  {
+    label: 'service-queue-test',
+    run: () => runNode('service-queue-test', path.join(root, 'tests', 'service-queue.js')),
+    covers: []
+  },
+  {
+    label: 'build-embeddings-cache-test',
+    run: () => runNode('build-embeddings-cache-test', path.join(root, 'tests', 'build-embeddings-cache.js')),
+    covers: []
+  },
+  {
+    label: 'embedding-batch-autotune-test',
+    run: () => runNode('embedding-batch-autotune-test', path.join(root, 'tests', 'embedding-batch-autotune.js')),
+    covers: []
+  },
+  {
+    label: 'sqlite-build-indexes-test',
+    run: () => runNode('sqlite-build-indexes-test', path.join(root, 'tests', 'sqlite-build-indexes.js')),
+    covers: []
+  },
+  {
+    label: 'lmdb-backend-test',
+    run: () => runNode('lmdb-backend-test', path.join(root, 'tests', 'lmdb-backend.js')),
+    covers: ['build-lmdb-index', 'lmdb-backend-test'],
+    coversTierB: ['build-lmdb-index']
+  },
+  {
+    label: 'two-stage-state-test',
+    run: () => runNode('two-stage-state-test', path.join(root, 'tests', 'two-stage-state.js')),
+    covers: []
+  },
+  {
+    label: 'ts-jsx-fixtures',
+    run: () => runNode('ts-jsx-fixtures', path.join(root, 'tests', 'ts-jsx-fixtures.js')),
+    covers: []
+  },
+  {
+    label: 'python-heuristic-chunking-test',
+    run: () => runNode(
+      'python-heuristic-chunking-test',
+      path.join(root, 'tests', 'lang', 'python-heuristic-chunking.test.js')
+    ),
+    covers: []
+  },
+  {
+    label: 'python-imports-test',
+    run: () => runNode(
+      'python-imports-test',
+      path.join(root, 'tests', 'lang', 'python-imports.test.js')
+    ),
+    covers: []
+  },
+  {
+    label: 'python-pool-test',
+    run: () => runNode(
+      'python-pool-test',
+      path.join(root, 'tests', 'lang', 'python-pool.test.js')
+    ),
+    covers: []
+  },
+  {
+    label: 'js-imports-test',
+    run: () => runNode('js-imports-test', path.join(root, 'tests', 'lang', 'js-imports.test.js')),
+    covers: []
+  },
+  {
+    label: 'js-chunking-test',
+    run: () => runNode('js-chunking-test', path.join(root, 'tests', 'lang', 'js-chunking.test.js')),
+    covers: []
+  },
+  {
+    label: 'js-relations-test',
+    run: () => runNode('js-relations-test', path.join(root, 'tests', 'lang', 'js-relations.test.js')),
+    covers: []
+  },
+  {
+    label: 'chunking-limits-unit-test',
+    run: () => runNode(
+      'chunking-limits-unit-test',
+      path.join(root, 'tests', 'chunking', 'limits.test.js')
+    ),
+    covers: []
+  },
+  {
+    label: 'chunking-yaml-unit-test',
+    run: () => runNode(
+      'chunking-yaml-unit-test',
+      path.join(root, 'tests', 'chunking', 'yaml.test.js')
+    ),
+    covers: []
+  },
+  {
+    label: 'chunking-json-unit-test',
+    run: () => runNode(
+      'chunking-json-unit-test',
+      path.join(root, 'tests', 'chunking', 'json.test.js')
+    ),
+    covers: []
+  },
+  {
+    label: 'build-runtime-stage-overrides-test',
+    run: () => runNode(
+      'build-runtime-stage-overrides-test',
+      path.join(root, 'tests', 'build-runtime', 'stage-overrides.test.js')
+    ),
+    covers: []
+  },
+  {
+    label: 'build-runtime-content-hash-test',
+    run: () => runNode(
+      'build-runtime-content-hash-test',
+      path.join(root, 'tests', 'build-runtime', 'content-hash.test.js')
+    ),
+    covers: []
+  },
+  {
+    label: 'indexer-signatures-test',
+    run: () => runNode(
+      'indexer-signatures-test',
+      path.join(root, 'tests', 'indexer', 'signatures.test.js')
+    ),
+    covers: []
+  },
+  {
+    label: 'indexer-incremental-plan-test',
+    run: () => runNode(
+      'indexer-incremental-plan-test',
+      path.join(root, 'tests', 'indexer', 'incremental-plan.test.js')
+    ),
+    covers: []
+  },
+  {
+    label: 'file-processor-skip-test',
+    run: () => runNode(
+      'file-processor-skip-test',
+      path.join(root, 'tests', 'file-processor', 'skip.test.js')
+    ),
+    covers: []
+  },
+  {
+    label: 'file-processor-cached-bundle-test',
+    run: () => runNode(
+      'file-processor-cached-bundle-test',
+      path.join(root, 'tests', 'file-processor', 'cached-bundle.test.js')
+    ),
+    covers: []
+  },
+  {
+    label: 'artifacts-token-mode-test',
+    run: () => runNode(
+      'artifacts-token-mode-test',
+      path.join(root, 'tests', 'artifacts', 'token-mode.test.js')
+    ),
+    covers: []
+  },
+  {
+    label: 'artifacts-file-meta-test',
+    run: () => runNode(
+      'artifacts-file-meta-test',
+      path.join(root, 'tests', 'artifacts', 'file-meta.test.js')
+    ),
+    covers: []
+  },
+  {
+    label: 'language-registry-collectors-test',
+    run: () => runNode(
+      'language-registry-collectors-test',
+      path.join(root, 'tests', 'language-registry', 'collectors.test.js')
+    ),
+    covers: []
+  },
+  {
+    label: 'language-registry-selection-test',
+    run: () => runNode(
+      'language-registry-selection-test',
+      path.join(root, 'tests', 'language-registry', 'selection.test.js')
+    ),
+    covers: []
+  },
+  {
+    label: 'python-fallback-test',
+    run: () => runNode('python-fallback-test', path.join(root, 'tests', 'python-fallback.js')),
+    covers: []
+  },
+  {
+    label: 'python-ast-worker-test',
+    run: () => runNode('python-ast-worker-test', path.join(root, 'tests', 'python-ast-worker.js')),
+    covers: []
+  },
+  {
+    label: 'verify',
+    run: () => runNode('verify', path.join(root, 'tests', 'smoke.js')),
+    covers: ['verify']
+  },
+  {
+    label: 'fixture-smoke',
+    run: () => runNode('fixture-smoke', path.join(root, 'tests', 'fixture-smoke.js')),
+    covers: ['fixture-smoke', 'build-index', 'build-sqlite-index', 'search'],
+    coversTierB: ['build-index', 'build-sqlite-index']
+  },
+  {
+    label: 'fixture-parity',
+    run: () => runNode('fixture-parity', path.join(root, 'tests', 'fixture-parity.js'), ['--fixtures', 'sample']),
+    covers: ['fixture-parity']
+  },
+  {
+    label: 'fixture-empty',
+    run: () => runNode('fixture-empty', path.join(root, 'tests', 'fixture-empty.js')),
+    covers: []
+  },
+  {
+    label: 'fixture-eval',
+    run: () => runNode('fixture-eval', path.join(root, 'tests', 'fixture-eval.js')),
+    covers: ['fixture-eval']
+  },
+  {
+    label: 'eval-quality-test',
+    run: () => runNode('eval-quality-test', path.join(root, 'tests', 'eval-quality.js')),
+    covers: ['eval-quality-test', 'eval-run']
+  },
+  {
+    label: 'fielded-bm25-test',
+    run: () => runNode('fielded-bm25-test', path.join(root, 'tests', 'fielded-bm25.js')),
+    covers: ['fielded-bm25-test']
+  },
+  {
+    label: 'artifact-formats-test',
+    run: () => runNode('artifact-formats-test', path.join(root, 'tests', 'artifact-formats.js')),
+    covers: ['artifact-formats-test']
+  },
+  {
+    label: 'query-intent-test',
+    run: () => runNode('query-intent-test', path.join(root, 'tests', 'query-intent.js')),
+    covers: ['query-intent-test']
+  },
+  {
+    label: 'context-expansion-test',
+    run: () => runNode('context-expansion-test', path.join(root, 'tests', 'context-expansion.js')),
+    covers: ['context-expansion-test']
+  },
+  {
+    label: 'query-cache-test',
+    run: () => runNode('query-cache-test', path.join(root, 'tests', 'query-cache.js')),
+    covers: ['query-cache-test']
+  },
+  {
+    label: 'json-stream-test',
+    run: () => runNode('json-stream-test', path.join(root, 'tests', 'json-stream.js')),
+    covers: ['json-stream-test']
+  },
+  {
+    label: 'index-cache-test',
+    run: () => runNode('index-cache-test', path.join(root, 'tests', 'index-cache.js')),
+    covers: ['index-cache-test']
+  },
+  {
+    label: 'sqlite-cache-test',
+    run: () => runNode('sqlite-cache-test', path.join(root, 'tests', 'sqlite-cache.js')),
+    covers: ['sqlite-cache-test']
+  },
+  {
+    label: 'worker-pool-test',
+    run: () => runNode('worker-pool-test', path.join(root, 'tests', 'worker-pool.js')),
+    covers: ['worker-pool-test']
+  },
+  {
+    label: 'worker-pool-windows-test',
+    run: () => runNode('worker-pool-windows-test', path.join(root, 'tests', 'worker-pool-windows.js')),
+    covers: ['worker-pool-windows-test']
+  },
+  {
+    label: 'repo-build-index',
+    run: () => runNode('build-index', path.join(root, 'build_index.js'), ['--stub-embeddings', '--repo', fixtureRoot], { cwd: fixtureRoot, env: repoEnv }),
+    covers: ['build-index']
+  },
+  {
+    label: 'build-index-all-test',
+    run: () => runNode('build-index-all-test', path.join(root, 'tests', 'build-index-all.js')),
+    covers: ['build-index-all-test']
+  },
+  {
+    label: 'repo-build-sqlite-index',
+    run: () => runNode('build-sqlite-index', path.join(root, 'tools', 'build-sqlite-index.js'), ['--repo', fixtureRoot], { cwd: fixtureRoot, env: repoEnv }),
+    covers: ['build-sqlite-index']
+  },
+  {
+    label: 'parity',
+    run: () => runNode(
+      'parity',
+      path.join(root, 'tests', 'parity.js'),
+      ['--search', path.join(root, 'search.js'), '--no-ann'],
+      { cwd: fixtureRoot, env: repoEnv }
+    ),
+    covers: ['parity']
+  },
+  {
+    label: 'repo-search',
+    run: () => runNode('search', path.join(root, 'search.js'), ['message', '--json', '--no-ann', '--repo', fixtureRoot], { cwd: fixtureRoot, env: repoEnv }),
+    covers: ['search']
+  },
+  {
+    label: 'report-artifacts',
+    run: () => runNode('report-artifacts', path.join(root, 'tools', 'report-artifacts.js'), ['--json', '--repo', fixtureRoot], { cwd: fixtureRoot, env: repoEnv }),
+    covers: ['report-artifacts']
+  },
+  {
+    label: 'cache-gc-test',
+    run: () => runNode('cache-gc-test', path.join(root, 'tests', 'cache-gc.js')),
+    covers: ['cache-gc', 'cache-gc-test']
+  },
+  {
+    label: 'cache-lru-test',
+    run: () => runNode('cache-lru-test', path.join(root, 'tests', 'cache-lru.js')),
+    covers: ['cache-lru-test']
+  },
+  {
+    label: 'discover-test',
+    run: () => runNode('discover-test', path.join(root, 'tests', 'discover.js')),
+    covers: ['discover-test']
+  },
+  {
+    label: 'watch-debounce-test',
+    run: () => runNode('watch-debounce-test', path.join(root, 'tests', 'watch-debounce.js')),
+    covers: ['watch-debounce-test']
+  },
+  {
+    label: 'watch-filter-test',
+    run: () => runNode('watch-filter-test', path.join(root, 'tests', 'watch-filter.js')),
+    covers: ['watch-filter-test']
+  },
+  {
+    label: 'generate-repo-dict',
+    run: () => runNode('generate-repo-dict', path.join(root, 'tools', 'generate-repo-dict.js'), ['--min-count', '1', '--repo', fixtureRoot], { cwd: fixtureRoot, env: repoEnv }),
+    covers: ['generate-repo-dict']
+  },
+  {
+    label: 'ci-build',
+    run: () => runNode('ci-build', path.join(root, 'tools', 'ci-build-artifacts.js'), ['--out', ciOutDir, '--skip-build', '--repo', fixtureRoot], { cwd: fixtureRoot, env: repoEnv }),
+    covers: ['ci-build']
+  },
+  {
+    label: 'ci-restore',
+    run: () => runNode('ci-restore', path.join(root, 'tools', 'ci-restore-artifacts.js'), ['--from', ciOutDir, '--force', '--repo', fixtureRoot], { cwd: fixtureRoot, env: repoEnv }),
+    covers: ['ci-restore']
+  },
+  {
+    label: 'bootstrap',
+    run: () => runNode(
+      'bootstrap',
+      path.join(root, 'tools', 'bootstrap.js'),
+      ['--skip-install', '--skip-dicts', '--skip-index', '--skip-artifacts', '--skip-tooling', '--repo', fixtureRoot],
+      { cwd: fixtureRoot, env: repoEnv }
+    ),
+    covers: ['bootstrap']
+  },
+  {
+    label: 'setup-test',
+    run: () => runNode('setup-test', path.join(root, 'tests', 'setup.js')),
+    covers: ['setup', 'setup-test']
+  },
+  {
+    label: 'setup-index-detection-test',
+    run: () => runNode('setup-index-detection-test', path.join(root, 'tests', 'setup-index-detection.js')),
+    covers: ['setup-index-detection-test']
+  },
+  {
+    label: 'config-validate-test',
+    run: () => runNode('config-validate-test', path.join(root, 'tests', 'config-validate.js')),
+    covers: ['config-validate', 'config-validate-test']
+  },
+  {
+    label: 'config-dump-test',
+    run: () => runNode('config-dump-test', path.join(root, 'tests', 'config-dump.js')),
+    covers: ['config-dump-test']
+  },
+  {
+    label: 'uv-threadpool-env-test',
+    run: () => runNode('uv-threadpool-env-test', path.join(root, 'tests', 'uv-threadpool-env.js')),
+    covers: ['uv-threadpool-env-test']
+  },
+  {
+    label: 'uv-threadpool-no-override-test',
+    run: () => runNode('uv-threadpool-no-override-test', path.join(root, 'tests', 'uv-threadpool-no-override.js')),
+    covers: ['uv-threadpool-no-override-test']
+  },
+  {
+    label: 'io-concurrency-cap-test',
+    run: () => runNode('io-concurrency-cap-test', path.join(root, 'tests', 'io-concurrency-cap.js')),
+    covers: ['io-concurrency-cap-test']
+  },
+  {
+    label: 'profile-config-test',
+    run: () => runNode('profile-config-test', path.join(root, 'tests', 'profile-config.js')),
+    covers: ['profile-config-test']
+  },
+  {
+    label: 'backend-policy-test',
+    run: () => runNode('backend-policy-test', path.join(root, 'tests', 'backend-policy.js')),
+    covers: ['backend-policy-test']
+  },
+  {
+    label: 'dict-adaptive-test',
+    run: () => runNode('dict-adaptive-test', path.join(root, 'tests', 'dict-adaptive.js')),
+    covers: ['dict-adaptive-test']
+  },
+  {
+    label: 'chargram-guardrails-test',
+    run: () => runNode('chargram-guardrails-test', path.join(root, 'tests', 'chargram-guardrails.js')),
+    covers: ['chargram-guardrails-test']
+  },
+  {
+    label: 'core-api-test',
+    run: () => runNode('core-api-test', path.join(root, 'tests', 'core-api.js')),
+    covers: ['core-api-test']
+  },
+  {
+    label: 'typescript-parser-selection-test',
+    run: () => runNode('typescript-parser-selection-test', path.join(root, 'tests', 'typescript-parser-selection.js')),
+    covers: ['typescript-parser-selection-test']
+  },
+  {
+    label: 'script-coverage-harness-test',
+    run: () => runNode('script-coverage-harness-test', path.join(root, 'tests', 'script-coverage-harness.js')),
+    covers: ['script-coverage-harness-test']
+  },
+  {
+    label: 'cli-test',
+    run: () => runNode('cli-test', path.join(root, 'tests', 'cli.js')),
+    covers: ['cli-test']
+  }
+];
+  const mergeDir = context.mergeDir || path.join(baseCacheRoot, 'merge');
+  await fsPromises.mkdir(mergeDir, { recursive: true });
+  const mergeBase = path.join(mergeDir, 'base.txt');
+  const mergeTarget = path.join(mergeDir, 'target.txt');
+  await fsPromises.writeFile(mergeBase, 'alpha\nbeta\n');
+  await fsPromises.writeFile(mergeTarget, 'beta\ngamma\n');
+
+  actions.push({
+    label: 'merge-append',
+    run: () => runNode('merge-append', path.join(root, 'tools', 'mergeAppendOnly.js'), [mergeBase, mergeTarget]),
+    covers: ['merge-append']
+  });
+
+  return actions;
+};
diff --git a/tests/script-coverage/paths.js b/tests/script-coverage/paths.js
new file mode 100644
index 000000000..f017d7342
--- /dev/null
+++ b/tests/script-coverage/paths.js
@@ -0,0 +1,30 @@
+import fs from 'node:fs';
+import path from 'node:path';
+
+export const loadPackageScripts = (root) => {
+  const pkg = JSON.parse(fs.readFileSync(path.join(root, 'package.json'), 'utf8'));
+  return pkg.scripts || {};
+};
+
+export const resolveFailureLogRoot = ({ root, logDirOverride }) => {
+  const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
+  return logDirOverride
+    ? path.resolve(logDirOverride)
+    : path.join(root, 'tests', '.logs', timestamp);
+};
+
+export const resolveScriptCoveragePaths = ({ root, logDirOverride }) => {
+  const baseCacheRoot = path.join(root, 'tests', '.cache', 'script-coverage');
+  const repoCacheRoot = path.join(baseCacheRoot, 'repo');
+  const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+  const failureLogRoot = resolveFailureLogRoot({ root, logDirOverride });
+  return {
+    baseCacheRoot,
+    repoCacheRoot,
+    fixtureRoot,
+    failureLogRoot,
+    ciOutDir: path.join(baseCacheRoot, 'ci-artifacts'),
+    mergeDir: path.join(baseCacheRoot, 'merge'),
+    shellWorkDir: path.join(baseCacheRoot, 'shell')
+  };
+};
diff --git a/tests/script-coverage/report.js b/tests/script-coverage/report.js
new file mode 100644
index 000000000..e7ddd10c1
--- /dev/null
+++ b/tests/script-coverage/report.js
@@ -0,0 +1,148 @@
+const TIER_B_DEFAULT = [
+  'build-index',
+  'build-sqlite-index',
+  'build-lmdb-index',
+  'compact-sqlite-index'
+];
+
+const createCoverageEntry = () => ({ status: 'pending', via: null, reason: null });
+
+export const createCoverageState = ({ scriptNames }) => {
+  const coverage = new Map(scriptNames.map((name) => [name, createCoverageEntry()]));
+  const tierBRequired = new Set(TIER_B_DEFAULT.filter((name) => coverage.has(name)));
+  const tierBCoverage = new Map(
+    Array.from(tierBRequired, (name) => [name, createCoverageEntry()])
+  );
+  const unknownCovers = new Set();
+
+  const markCovered = (name, via) => {
+    if (!coverage.has(name)) {
+      unknownCovers.add(name);
+      return;
+    }
+    const entry = coverage.get(name);
+    if (entry.status === 'pending') {
+      coverage.set(name, { status: 'covered', via, reason: null });
+    }
+  };
+
+  const markSkipped = (name, reason) => {
+    if (!coverage.has(name)) return;
+    coverage.set(name, { status: 'skipped', via: null, reason });
+  };
+
+  const markTierBCovered = (name, via) => {
+    if (!tierBCoverage.has(name)) {
+      unknownCovers.add(name);
+      return;
+    }
+    const entry = tierBCoverage.get(name);
+    if (entry.status === 'pending') {
+      tierBCoverage.set(name, { status: 'covered', via, reason: null });
+    }
+  };
+
+  return {
+    coverage,
+    tierBCoverage,
+    tierBRequired,
+    unknownCovers,
+    markCovered,
+    markSkipped,
+    markTierBCovered
+  };
+};
+
+export const applyActionCoverage = (state, action) => {
+  const covers = Array.isArray(action.covers) ? action.covers : [];
+  for (const name of covers) {
+    state.markCovered(name, action.label);
+  }
+  const tierCovers = Array.isArray(action.coversTierB) ? action.coversTierB : [];
+  for (const name of tierCovers) {
+    state.markTierBCovered(name, action.label);
+  }
+};
+
+export const applyDefaultSkips = (state) => {
+  if (state.coverage.has('script-coverage-test')) {
+    state.markCovered('script-coverage-test', 'self');
+  }
+  state.markSkipped('test-all', 'aggregates script-coverage-test and bench');
+  state.markSkipped('test-all-no-bench', 'aggregates script-coverage-test without bench');
+  state.markSkipped('download-models', 'requires network model download');
+  state.markSkipped('bench', 'benchmarks are long-running');
+  state.markSkipped('bench-ann', 'benchmarks are long-running');
+  state.markSkipped('bench-dict-seg', 'benchmarks are long-running');
+  state.markSkipped('bench-score-strategy', 'benchmarks are long-running');
+  state.markSkipped('bench-micro', 'benchmarks are long-running');
+  state.markSkipped('compare-models', 'benchmark/perf evaluation');
+  state.markSkipped('bench-language', 'benchmarks are long-running');
+  state.markSkipped('smoke:section1', 'smoke lanes are run manually');
+  state.markSkipped('smoke:retrieval', 'smoke lanes are run manually');
+  state.markSkipped('smoke:services', 'smoke lanes are run manually');
+  state.markSkipped('smoke:workers', 'smoke lanes are run manually');
+  state.markSkipped('smoke:embeddings', 'smoke lanes are run manually');
+  state.markSkipped('smoke:sqlite', 'smoke lanes are run manually');
+  state.markSkipped('watch-index', 'watch mode runs until interrupted');
+  state.markSkipped('format', 'modifies working tree');
+  state.markSkipped('lint', 'requires npm install and project lint config');
+
+  for (const name of state.coverage.keys()) {
+    if (name.startsWith('bench-language:')) {
+      state.markSkipped(name, 'bench-language variants are long-running');
+    }
+  }
+};
+
+export const finalizeCoverage = (state) => {
+  const missing = [];
+  const skipped = [];
+  const covered = [];
+  for (const [name, entry] of state.coverage.entries()) {
+    if (entry.status === 'pending') missing.push(name);
+    if (entry.status === 'skipped') skipped.push({ name, reason: entry.reason });
+    if (entry.status === 'covered') covered.push({ name, via: entry.via });
+  }
+
+  const missingTierB = [];
+  const coveredTierB = [];
+  for (const [name, entry] of state.tierBCoverage.entries()) {
+    if (entry.status === 'pending') missingTierB.push(name);
+    if (entry.status === 'covered') coveredTierB.push({ name, via: entry.via });
+  }
+
+  return {
+    missing,
+    missingTierB,
+    skipped,
+    covered,
+    coveredTierB,
+    unknownCovers: Array.from(state.unknownCovers)
+  };
+};
+
+export const reportCoverage = (summary) => {
+  if (summary.unknownCovers.length) {
+    console.error(`Unknown coverage script names: ${summary.unknownCovers.join(', ')}`);
+    return false;
+  }
+  if (summary.missing.length || summary.missingTierB.length) {
+    if (summary.missing.length) {
+      console.error(`Missing coverage for: ${summary.missing.join(', ')}`);
+    }
+    if (summary.missingTierB.length) {
+      console.error(`Missing Tier B coverage for: ${summary.missingTierB.join(', ')}`);
+    }
+    return false;
+  }
+
+  console.log(`script coverage: ${summary.covered.length} covered, ${summary.skipped.length} skipped`);
+  console.log(`tier B coverage: ${summary.coveredTierB.length} covered, ${summary.missingTierB.length} missing`);
+  if (summary.skipped.length) {
+    for (const entry of summary.skipped) {
+      console.log(`- skipped ${entry.name}: ${entry.reason}`);
+    }
+  }
+  return true;
+};
diff --git a/tests/script-coverage/runner.js b/tests/script-coverage/runner.js
new file mode 100644
index 000000000..fc108c06e
--- /dev/null
+++ b/tests/script-coverage/runner.js
@@ -0,0 +1,130 @@
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+export const resolveRetries = ({ argvRetries, envRetries, defaultRetries = 2 }) => {
+  if (Number.isFinite(argvRetries)) return Math.max(0, argvRetries);
+  if (Number.isFinite(envRetries)) return Math.max(0, envRetries);
+  return defaultRetries;
+};
+
+export const prepareCoverageDirs = async ({ baseCacheRoot, repoCacheRoot, failureLogRoot }) => {
+  await fsPromises.rm(baseCacheRoot, { recursive: true, force: true });
+  await fsPromises.mkdir(repoCacheRoot, { recursive: true });
+  await fsPromises.mkdir(failureLogRoot, { recursive: true });
+};
+
+const sanitizeLabel = (label) => label.replace(/[^a-z0-9-_]+/gi, '_').slice(0, 120);
+
+const writeFailureLog = (failureLogRoot, label, attempt, cmd, args, options, result) => {
+  const safeLabel = sanitizeLabel(label);
+  const logPath = path.join(failureLogRoot, `${safeLabel}.attempt-${attempt}.log`);
+  const lines = [
+    `label: ${label}`,
+    `attempt: ${attempt}`,
+    `command: ${[cmd, ...args].join(' ')}`,
+    `cwd: ${options.cwd || process.cwd()}`,
+    `exit: ${result.status ?? 'null'}`,
+    ''
+  ];
+  if (result.stdout) {
+    lines.push('--- stdout ---', String(result.stdout));
+  }
+  if (result.stderr) {
+    lines.push('--- stderr ---', String(result.stderr));
+  }
+  fs.writeFileSync(logPath, lines.join('\n'), 'utf8');
+  return logPath;
+};
+
+export const createCommandRunner = ({ retries, failureLogRoot }) => {
+  const run = (label, cmd, args, options = {}) => {
+    const maxAttempts = retries + 1;
+    const normalizeOutput = (value) => {
+      if (!value) return '';
+      let text = String(value);
+      text = text.replace(/\r\n/g, '\n');
+      text = text.replace(/\n{3,}/g, '\n\n');
+      text = text.replace(/^\n+/, '\n');
+      return text;
+    };
+    for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
+      const { env: optionEnv, ...spawnOptions } = options;
+      const env = { ...process.env, ...optionEnv };
+      if (!env.PAIROFCLEATS_TEST_LOG_DIR) {
+        env.PAIROFCLEATS_TEST_LOG_DIR = failureLogRoot;
+      }
+      const result = spawnSync(cmd, args, {
+        encoding: 'utf8',
+        maxBuffer: 50 * 1024 * 1024,
+        stdio: 'pipe',
+        env,
+        ...spawnOptions
+      });
+      if (result.stdout) process.stdout.write(normalizeOutput(result.stdout));
+      if (result.stderr) process.stderr.write(normalizeOutput(result.stderr));
+      if (result.status === 0) return;
+      const logPath = writeFailureLog(failureLogRoot, label, attempt, cmd, args, options, result);
+      console.error(`Failed: ${label} (attempt ${attempt}/${maxAttempts}). Log: ${logPath}`);
+      if (attempt < maxAttempts) {
+        console.error(`Retrying: ${label}`);
+      }
+    }
+    process.exit(1);
+  };
+
+  const runNode = (label, scriptPath, args = [], options = {}) => {
+    run(label, process.execPath, [scriptPath, ...args], options);
+  };
+
+  return { run, runNode };
+};
+
+export const runShellScripts = async ({ root, baseCacheRoot, run }) => {
+  const shellScripts = [
+    path.join(root, 'merge-history.sh'),
+    path.join(root, 'merge-no-results.sh'),
+    path.join(root, 'merge-metrics.sh'),
+    path.join(root, 'tools', 'merge-history.sh'),
+    path.join(root, 'tools', 'merge-no-results.sh'),
+    path.join(root, 'tools', 'merge-metrics.sh'),
+    path.join(root, 'tools', 'merge-agentinfo-notes.sh'),
+    path.join(root, 'tools', 'merge-agentinfo-index.sh')
+  ];
+
+  const bashCheck = spawnSync('bash', ['-c', 'echo ok'], { encoding: 'utf8' });
+  const bashAvailable = bashCheck.status === 0;
+  const jqCheck = bashAvailable ? spawnSync('bash', ['-c', 'command -v jq'], { encoding: 'utf8' }) : null;
+  const jqAvailable = jqCheck && jqCheck.status === 0;
+  const toPosixPath = (value) => (process.platform === 'win32' ? value.replace(/\\/g, '/') : value);
+  const bashPathCheck = bashAvailable
+    ? spawnSync('bash', ['-c', `cd "${toPosixPath(root)}"`], { encoding: 'utf8' })
+    : null;
+  const bashAccessible = bashPathCheck && bashPathCheck.status === 0;
+
+  if (bashAvailable && bashAccessible) {
+    const shellWorkDir = path.join(baseCacheRoot, 'shell');
+    await fsPromises.mkdir(shellWorkDir, { recursive: true });
+    const base = path.join(shellWorkDir, 'base.json');
+    const ours = path.join(shellWorkDir, 'ours.json');
+    const theirs = path.join(shellWorkDir, 'theirs.json');
+    await fsPromises.writeFile(base, JSON.stringify({ file: { md: 1, code: 1 } }, null, 2));
+    await fsPromises.writeFile(ours, JSON.stringify({ file: { md: 2, code: 0 } }, null, 2));
+    await fsPromises.writeFile(theirs, JSON.stringify({ file: { md: 3, code: 2 } }, null, 2));
+
+    for (const scriptPath of shellScripts) {
+      if (!fs.existsSync(scriptPath)) continue;
+      if (scriptPath.endsWith('merge-metrics.sh') && !jqAvailable) {
+        console.log(`[skip] ${scriptPath} (jq not available)`);
+        continue;
+      }
+      const args = [scriptPath, base, ours, theirs].map(toPosixPath);
+      run('shell-script', 'bash', args, { cwd: root });
+    }
+  } else if (!bashAvailable) {
+    console.log('[skip] shell scripts (bash not available)');
+  } else {
+    console.log('[skip] shell scripts (bash cannot access workspace path)');
+  }
+};
diff --git a/tests/search-contract.js b/tests/search-contract.js
new file mode 100644
index 000000000..1607d4082
--- /dev/null
+++ b/tests/search-contract.js
@@ -0,0 +1,85 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'search-contract');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(
+  path.join(repoRoot, 'README.md'),
+  '# Sample\n\nalpha bravo\n'
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+
+if (buildResult.status !== 0) {
+  console.error('Failed: build index for search contract');
+  process.exit(buildResult.status ?? 1);
+}
+
+const searchPath = path.join(root, 'search.js');
+const result = spawnSync(
+  process.execPath,
+  [searchPath, 'alpha', '--mode', 'prose', '--json', '--backend', 'memory', '--no-ann', '--repo', repoRoot],
+  { cwd: repoRoot, env, encoding: 'utf8' }
+);
+
+if (result.status !== 0) {
+  console.error('Failed: search contract run');
+  if (result.stderr) console.error(result.stderr.trim());
+  process.exit(result.status ?? 1);
+}
+
+let payload = null;
+try {
+  payload = JSON.parse(result.stdout || '{}');
+} catch {
+  console.error('Failed: search contract returned invalid JSON');
+  process.exit(1);
+}
+
+if (!payload || typeof payload !== 'object') {
+  console.error('Failed: search contract payload missing');
+  process.exit(1);
+}
+
+for (const key of ['backend', 'code', 'prose', 'records', 'stats']) {
+  if (!(key in payload)) {
+    console.error(`Failed: search contract missing ${key}`);
+    process.exit(1);
+  }
+}
+
+if (!Array.isArray(payload.prose) || payload.prose.length === 0) {
+  console.error('Failed: search contract expected prose hits');
+  process.exit(1);
+}
+
+const hit = payload.prose[0];
+if (!hit || !hit.file) {
+  console.error('Failed: search contract hit missing file');
+  process.exit(1);
+}
+if (!Number.isFinite(hit.startLine)) {
+  console.error('Failed: search contract hit missing startLine');
+  process.exit(1);
+}
+
+console.log('search contract tests passed');
diff --git a/tests/search-determinism.js b/tests/search-determinism.js
new file mode 100644
index 000000000..0b093b890
--- /dev/null
+++ b/tests/search-determinism.js
@@ -0,0 +1,96 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'search-determinism');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const content = 'alpha beta gamma\nalpha beta gamma\n';
+const files = ['alpha-1.txt', 'alpha-2.txt', 'alpha-3.txt'];
+for (const file of files) {
+  await fsPromises.writeFile(path.join(repoRoot, file), content);
+}
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+if (buildResult.status !== 0) {
+  console.error('Failed: build index');
+  process.exit(buildResult.status ?? 1);
+}
+
+const searchPath = path.join(root, 'search.js');
+const searchArgs = [
+  searchPath,
+  'alpha',
+  '--mode',
+  'prose',
+  '--top',
+  '3',
+  '--ann',
+  '--explain',
+  '--json',
+  '--backend',
+  'memory',
+  '--repo',
+  repoRoot
+];
+
+function runSearch(label) {
+  const result = spawnSync(process.execPath, searchArgs, {
+    cwd: repoRoot,
+    env,
+    encoding: 'utf8'
+  });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    if (result.stderr) console.error(result.stderr.trim());
+    process.exit(result.status ?? 1);
+  }
+  let payload = null;
+  try {
+    payload = JSON.parse(result.stdout || '{}');
+  } catch {
+    console.error(`Failed: ${label} returned invalid JSON`);
+    process.exit(1);
+  }
+  return payload;
+}
+
+const first = runSearch('search first');
+const second = runSearch('search second');
+
+const firstHits = first.prose || [];
+const secondHits = second.prose || [];
+if (!firstHits.length || !secondHits.length) {
+  console.error('Expected prose hits for determinism test.');
+  process.exit(1);
+}
+for (const hit of firstHits) {
+  if (!hit.scoreBreakdown) {
+    console.error('Expected score breakdown for determinism test.');
+    process.exit(1);
+  }
+}
+
+if (JSON.stringify(firstHits) !== JSON.stringify(secondHits)) {
+  console.error('Determinism test failed: search results differ between runs.');
+  process.exit(1);
+}
+
+console.log('search determinism tests passed');
diff --git a/tests/search-explain-symbol.js b/tests/search-explain-symbol.js
new file mode 100644
index 000000000..7f3896097
--- /dev/null
+++ b/tests/search-explain-symbol.js
@@ -0,0 +1,63 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'explain-symbol');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(
+  path.join(repoRoot, 'symbol.js'),
+  'export function boostExample() { return "symbol boost test"; }\n'
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub',
+  PAIROFCLEATS_WORKER_POOL: 'off'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+if (buildResult.status !== 0) {
+  console.error('Failed: build_index');
+  process.exit(buildResult.status ?? 1);
+}
+
+const searchResult = spawnSync(
+  process.execPath,
+  [
+    path.join(root, 'search.js'),
+    'boostExample',
+    '--mode',
+    'code',
+    '--explain',
+    '--no-ann',
+    '--repo',
+    repoRoot
+  ],
+  { encoding: 'utf8', env }
+);
+if (searchResult.status !== 0) {
+  console.error('Search failed.');
+  if (searchResult.stderr) console.error(searchResult.stderr.trim());
+  process.exit(searchResult.status ?? 1);
+}
+
+const output = searchResult.stdout || '';
+if (!output.includes('Symbol')) {
+  console.error('Expected explain output to include symbol boost details.');
+  process.exit(1);
+}
+
+console.log('explain symbol test passed');
diff --git a/tests/search-explain.js b/tests/search-explain.js
new file mode 100644
index 000000000..2ffcd08a9
--- /dev/null
+++ b/tests/search-explain.js
@@ -0,0 +1,62 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'search-explain');
+const cacheRoot = path.join(tempRoot, 'cache');
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', fixtureRoot],
+  { env, stdio: 'inherit' }
+);
+if (buildResult.status !== 0) {
+  console.error('search explain test failed: build_index failed');
+  process.exit(buildResult.status ?? 1);
+}
+
+const stripAnsi = (value) => value.replace(/\u001b\[[0-9;]*m/g, '');
+
+const runSearch = (args, label) => {
+  const result = spawnSync(
+    process.execPath,
+    [path.join(root, 'search.js'), 'return', '--mode', 'code', '--no-ann', '--repo', fixtureRoot, ...args],
+    { env, encoding: 'utf8' }
+  );
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    if (result.stderr) console.error(result.stderr.trim());
+    process.exit(result.status ?? 1);
+  }
+  return stripAnsi(`${result.stdout || ''}${result.stderr || ''}`);
+};
+
+const explainOutput = runSearch(['--explain'], 'explain');
+if (!explainOutput.includes('Score:')) {
+  console.error('Explain output missing Score breakdown.');
+  process.exit(1);
+}
+if (!explainOutput.includes('Sparse:')) {
+  console.error('Explain output missing Sparse breakdown.');
+  process.exit(1);
+}
+
+const whyOutput = runSearch(['--why'], 'why');
+if (!whyOutput.includes('Score:')) {
+  console.error('Why output missing Score breakdown.');
+  process.exit(1);
+}
+
+console.log('search explain tests passed');
diff --git a/tests/search-filters.js b/tests/search-filters.js
index 84e13c366..cc46ceb55 100644
--- a/tests/search-filters.js
+++ b/tests/search-filters.js
@@ -56,6 +56,25 @@ runGit(
   { GIT_AUTHOR_DATE: dateNew, GIT_COMMITTER_DATE: dateNew }
 );
 
+await fsPromises.writeFile(path.join(repoRoot, 'CaseFile.TXT'), 'AlphaCase alpha\n');
+runGit(['add', '.'], 'git add CaseFile');
+runGit(
+  ['commit', '-m', 'add case file', '--author', 'Casey <casey@example.com>', '--date', dateNew],
+  'git commit CaseFile',
+  { GIT_AUTHOR_DATE: dateNew, GIT_COMMITTER_DATE: dateNew }
+);
+
+await fsPromises.writeFile(
+  path.join(repoRoot, 'sample.js'),
+  'const equal = (a, b) => a && b;\nfunction check(a, b) {\n  return a && b;\n}\n'
+);
+runGit(['add', '.'], 'git add sample.js');
+runGit(
+  ['commit', '-m', 'add sample.js', '--author', 'Dana <dana@example.com>', '--date', dateNew],
+  'git commit sample.js',
+  { GIT_AUTHOR_DATE: dateNew, GIT_COMMITTER_DATE: dateNew }
+);
+
 const env = {
   ...process.env,
   PAIROFCLEATS_CACHE_ROOT: cacheRoot,
@@ -73,11 +92,15 @@ if (buildResult.status !== 0) {
 }
 
 const searchPath = path.join(root, 'search.js');
+const branchName = (() => {
+  const result = spawnSync('git', ['rev-parse', '--abbrev-ref', 'HEAD'], { cwd: repoRoot, encoding: 'utf8' });
+  return result.status === 0 ? result.stdout.trim() : null;
+})();
 
-function runSearch(query, args, label) {
+function runSearch(query, args, label, mode = 'prose') {
   const result = spawnSync(
     process.execPath,
-    [searchPath, query, '--mode', 'prose', '--json', '--no-ann', '--repo', repoRoot, ...args],
+    [searchPath, query, '--mode', mode, '--json', '--no-ann', '--repo', repoRoot, ...args],
     { cwd: repoRoot, env, encoding: 'utf8' }
   );
   if (result.status !== 0) {
@@ -88,7 +111,8 @@ function runSearch(query, args, label) {
   return JSON.parse(result.stdout || '{}');
 }
 
-const extractFiles = (payload) => new Set((payload.prose || []).map((hit) => path.basename(hit.file || '')));
+const extractFiles = (payload, key = 'prose') =>
+  new Set((payload[key] || []).map((hit) => path.basename(hit.file || '')));
 
 const negativeToken = runSearch('alpha -gamma', [], 'negative token');
 const negativeTokenFiles = extractFiles(negativeToken);
@@ -145,4 +169,48 @@ if (!modifiedSinceFiles.has('beta.txt') || modifiedSinceFiles.has('alpha.txt'))
   process.exit(1);
 }
 
+if (branchName) {
+  const branchMatch = runSearch('alpha', ['--branch', branchName], 'branch filter');
+  if (!(branchMatch.prose || []).length) {
+    console.error('branch filter returned no results for current branch.');
+    process.exit(1);
+  }
+  const branchMiss = runSearch('alpha', ['--branch', 'no-such-branch'], 'branch mismatch');
+  if ((branchMiss.prose || []).length) {
+    console.error('branch mismatch should return no results.');
+    process.exit(1);
+  }
+}
+
+const caseInsensitiveFile = runSearch('alpha', ['--file', 'casefile.txt'], 'case-insensitive file');
+if (!extractFiles(caseInsensitiveFile).has('CaseFile.TXT')) {
+  console.error('case-insensitive file filter failed.');
+  process.exit(1);
+}
+const caseSensitiveFile = runSearch('alpha', ['--file', 'casefile.txt', '--case-file'], 'case-sensitive file');
+if (extractFiles(caseSensitiveFile).has('CaseFile.TXT')) {
+  console.error('case-sensitive file filter should not match.');
+  process.exit(1);
+}
+const regexFile = runSearch('alpha', ['--file', '/casefile\\.txt/'], 'regex file filter');
+if (!extractFiles(regexFile).has('CaseFile.TXT')) {
+  console.error('regex file filter failed.');
+  process.exit(1);
+}
+const caseInsensitiveToken = runSearch('AlphaCase', [], 'case-insensitive token');
+if (!extractFiles(caseInsensitiveToken).has('CaseFile.TXT')) {
+  console.error('case-insensitive token match failed.');
+  process.exit(1);
+}
+const caseSensitiveToken = runSearch('AlphaCase', ['--case-tokens'], 'case-sensitive token');
+if (extractFiles(caseSensitiveToken).has('CaseFile.TXT')) {
+  console.error('case-sensitive token match should not match.');
+  process.exit(1);
+}
+const punctuationSearch = runSearch('&&', [], 'punctuation token', 'code');
+if (!extractFiles(punctuationSearch, 'code').has('sample.js')) {
+  console.error('punctuation token match failed.');
+  process.exit(1);
+}
+
 console.log('Search filter tests passed');
diff --git a/tests/search-help.js b/tests/search-help.js
new file mode 100644
index 000000000..653454c34
--- /dev/null
+++ b/tests/search-help.js
@@ -0,0 +1,21 @@
+#!/usr/bin/env node
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const result = spawnSync(process.execPath, [path.join(root, 'search.js')], { encoding: 'utf8' });
+if (result.status === 0) {
+  console.error('Expected search help to exit non-zero with no query.');
+  process.exit(1);
+}
+
+const output = `${result.stdout || ''}${result.stderr || ''}`;
+const requiredFlags = ['--calls', '--uses', '--author', '--import', '--explain'];
+for (const flag of requiredFlags) {
+  if (!output.includes(flag)) {
+    console.error(`Help output missing flag: ${flag}`);
+    process.exit(1);
+  }
+}
+
+console.log('search help test passed');
diff --git a/tests/search-missing-flag-values.js b/tests/search-missing-flag-values.js
new file mode 100644
index 000000000..8240cb71d
--- /dev/null
+++ b/tests/search-missing-flag-values.js
@@ -0,0 +1,35 @@
+#!/usr/bin/env node
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const searchPath = path.join(root, 'search.js');
+
+function runFlag(flag) {
+  return spawnSync(
+    process.execPath,
+    [searchPath, 'test', flag],
+    { encoding: 'utf8' }
+  );
+}
+
+const cases = [
+  { flag: '--type', name: '--type' },
+  { flag: '--author', name: '--author' },
+  { flag: '--import', name: '--import' }
+];
+
+for (const entry of cases) {
+  const result = runFlag(entry.flag);
+  if (result.status === 0) {
+    console.error(`Expected non-zero exit for ${entry.name}.`);
+    process.exit(1);
+  }
+  const output = `${result.stderr || ''}${result.stdout || ''}`;
+  if (!output.includes(`Missing value for ${entry.name}`)) {
+    console.error(`Expected missing value message for ${entry.name}.`);
+    process.exit(1);
+  }
+}
+
+console.log('missing flag values test passed');
diff --git a/tests/search-missing-index.js b/tests/search-missing-index.js
new file mode 100644
index 000000000..27d477871
--- /dev/null
+++ b/tests/search-missing-index.js
@@ -0,0 +1,38 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'search-missing-index');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const result = spawnSync(
+  process.execPath,
+  [path.join(root, 'search.js'), 'alpha', '--mode', 'code', '--no-ann', '--repo', repoRoot],
+  { encoding: 'utf8', env }
+);
+
+if (result.status === 0) {
+  console.error('Expected search to fail when index is missing.');
+  process.exit(1);
+}
+
+const output = `${result.stdout || ''}${result.stderr || ''}`;
+if (!output.includes('build-index')) {
+  console.error('Expected missing index message to include build-index hint.');
+  process.exit(1);
+}
+
+console.log('missing index test passed');
diff --git a/tests/search-removed-flags.js b/tests/search-removed-flags.js
new file mode 100644
index 000000000..1c8b78d7d
--- /dev/null
+++ b/tests/search-removed-flags.js
@@ -0,0 +1,34 @@
+#!/usr/bin/env node
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const searchPath = path.join(root, 'search.js');
+
+function runFlag(flag) {
+  return spawnSync(
+    process.execPath,
+    [searchPath, 'test', flag],
+    { encoding: 'utf8' }
+  );
+}
+
+const cases = [
+  { flag: '--human', label: 'human' },
+  { flag: '--headline', label: 'headline' }
+];
+
+for (const entry of cases) {
+  const result = runFlag(entry.flag);
+  if (result.status === 0) {
+    console.error(`Expected non-zero exit for ${entry.flag}.`);
+    process.exit(1);
+  }
+  const output = `${result.stderr || ''}${result.stdout || ''}`;
+  if (!output.toLowerCase().includes('removed') || !output.includes(entry.flag)) {
+    console.error(`Expected actionable error for ${entry.flag}.`);
+    process.exit(1);
+  }
+}
+
+console.log('removed flags test passed');
diff --git a/tests/search-rrf.js b/tests/search-rrf.js
new file mode 100644
index 000000000..8ddbdc4c4
--- /dev/null
+++ b/tests/search-rrf.js
@@ -0,0 +1,73 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'search-rrf');
+const cacheRoot = path.join(tempRoot, 'cache');
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', fixtureRoot],
+  { env, stdio: 'inherit' }
+);
+if (buildResult.status !== 0) {
+  console.error('search rrf test failed: build_index failed');
+  process.exit(buildResult.status ?? 1);
+}
+
+const result = spawnSync(
+  process.execPath,
+  [
+    path.join(root, 'search.js'),
+    'return',
+    '--mode',
+    'code',
+    '--ann',
+    '--json',
+    '--repo',
+    fixtureRoot
+  ],
+  { env, encoding: 'utf8' }
+);
+
+if (result.status !== 0) {
+  console.error('search rrf test failed: search returned error');
+  if (result.stderr) console.error(result.stderr.trim());
+  process.exit(result.status ?? 1);
+}
+
+let payload = null;
+try {
+  payload = JSON.parse(result.stdout || '{}');
+} catch (err) {
+  console.error('search rrf test failed: invalid JSON output');
+  process.exit(1);
+}
+
+const hit = payload?.code?.[0];
+if (!payload?.stats?.annActive) {
+  console.error('search rrf test failed: annActive was false');
+  process.exit(1);
+}
+if (!hit?.scoreBreakdown?.rrf) {
+  console.error('search rrf test failed: scoreBreakdown.rrf missing');
+  process.exit(1);
+}
+if (hit.scoreType !== 'rrf') {
+  console.error(`search rrf test failed: expected scoreType rrf, got ${hit.scoreType}`);
+  process.exit(1);
+}
+
+console.log('search rrf tests passed');
diff --git a/tests/search-symbol-boost.js b/tests/search-symbol-boost.js
new file mode 100644
index 000000000..46df0ecff
--- /dev/null
+++ b/tests/search-symbol-boost.js
@@ -0,0 +1,77 @@
+import assert from 'node:assert/strict';
+import { createSearchPipeline } from '../src/retrieval/pipeline.js';
+
+const idx = {
+  chunkMeta: [
+    {
+      id: 0,
+      file: 'a.js',
+      start: 0,
+      end: 10,
+      kind: 'FunctionDeclaration',
+      name: 'foo',
+      tokens: ['alpha']
+    },
+    {
+      id: 1,
+      file: 'b.js',
+      start: 0,
+      end: 10,
+      kind: 'FunctionDeclaration',
+      name: 'bar',
+      tokens: ['alpha']
+    }
+  ],
+  fileRelations: new Map([
+    ['a.js', { exports: ['foo'] }],
+    ['b.js', { exports: [] }]
+  ])
+};
+
+const searchPipeline = createSearchPipeline({
+  useSqlite: false,
+  sqliteFtsRequested: false,
+  sqliteFtsNormalize: false,
+  sqliteFtsProfile: 'balanced',
+  sqliteFtsWeights: null,
+  bm25K1: 1.2,
+  bm25B: 0.75,
+  postingsConfig: {
+    enablePhraseNgrams: false,
+    enableChargrams: false,
+    phraseMinN: 2,
+    phraseMaxN: 3,
+    chargramMinN: 3,
+    chargramMaxN: 3
+  },
+  queryTokens: ['alpha'],
+  phraseNgramSet: null,
+  phraseRange: null,
+  symbolBoost: {
+    enabled: true,
+    definitionWeight: 1.4,
+    exportWeight: 1.2
+  },
+  filters: {},
+  filtersActive: false,
+  topN: 2,
+  annEnabled: false,
+  scoreBlend: { enabled: false },
+  minhashMaxDocs: 0,
+  vectorAnnState: null,
+  vectorAnnUsed: {},
+  buildCandidateSetSqlite: () => null,
+  getTokenIndexForQuery: () => null,
+  rankSqliteFts: () => [],
+  rankVectorAnnSqlite: () => []
+});
+
+const results = searchPipeline(idx, 'code', null);
+assert.equal(results.length, 2, 'expected two results');
+assert.equal(results[0].name, 'foo', 'expected exported definition to rank first');
+assert.ok(results[0].score > results[1].score, 'expected boosted score to win');
+assert.ok(results[0].scoreBreakdown?.symbol?.definition, 'expected definition flag');
+assert.ok(results[0].scoreBreakdown?.symbol?.export, 'expected export flag');
+assert.ok(results[0].scoreBreakdown?.symbol?.factor > 1, 'expected symbol boost factor');
+
+console.log('symbol boost test passed');
diff --git a/tests/search-topn-filters.js b/tests/search-topn-filters.js
new file mode 100644
index 000000000..16648a33e
--- /dev/null
+++ b/tests/search-topn-filters.js
@@ -0,0 +1,102 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'search-topn-filters');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const allowedFiles = ['allowed-1.txt', 'allowed-2.txt'];
+const blockedCount = 12;
+const allowedContent = 'alpha beta gamma\nalpha beta\n';
+const blockedContent = `${Array.from({ length: 200 }, () => 'alpha').join(' ')}\n`;
+
+for (const file of allowedFiles) {
+  await fsPromises.writeFile(path.join(repoRoot, file), allowedContent);
+}
+for (let i = 0; i < blockedCount; i += 1) {
+  await fsPromises.writeFile(path.join(repoRoot, `blocked-${i + 1}.txt`), blockedContent);
+}
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+function run(args, label, options = {}) {
+  const result = spawnSync(process.execPath, args, {
+    cwd: repoRoot,
+    env,
+    stdio: 'inherit',
+    ...options
+  });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    process.exit(result.status ?? 1);
+  }
+  return result;
+}
+
+run([path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot], 'build index');
+run([path.join(root, 'tools', 'build-sqlite-index.js'), '--repo', repoRoot], 'build sqlite index');
+
+const searchPath = path.join(root, 'search.js');
+
+function runSearch(backend) {
+  const result = spawnSync(
+    process.execPath,
+    [
+      searchPath,
+      'alpha',
+      '--mode',
+      'prose',
+      '--top',
+      '2',
+      '--file',
+      'allowed',
+      '--json',
+      '--backend',
+      backend,
+      '--no-ann',
+      '--repo',
+      repoRoot
+    ],
+    { cwd: repoRoot, env, encoding: 'utf8' }
+  );
+  if (result.status !== 0) {
+    console.error(`Failed: search (${backend})`);
+    if (result.stderr) console.error(result.stderr.trim());
+    process.exit(result.status ?? 1);
+  }
+  let payload = null;
+  try {
+    payload = JSON.parse(result.stdout || '{}');
+  } catch {
+    console.error(`Failed: search (${backend}) returned invalid JSON`);
+    process.exit(1);
+  }
+  const hits = payload.prose || [];
+  if (hits.length !== 2) {
+    console.error(`Expected 2 results for ${backend}, got ${hits.length}`);
+    process.exit(1);
+  }
+  for (const hit of hits) {
+    const fileBase = path.basename(hit.file || '');
+    if (!fileBase.startsWith('allowed-')) {
+      console.error(`Unexpected file in ${backend} results: ${fileBase}`);
+      process.exit(1);
+    }
+  }
+}
+
+runSearch('memory');
+runSearch('sqlite-fts');
+
+console.log('search top-N filter tests passed');
diff --git a/tests/search-windows-path-filter.js b/tests/search-windows-path-filter.js
new file mode 100644
index 000000000..dfa91a344
--- /dev/null
+++ b/tests/search-windows-path-filter.js
@@ -0,0 +1,78 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'windows-path-filter');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(path.join(repoRoot, 'src', 'nested'), { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(
+  path.join(repoRoot, 'src', 'nested', 'util.js'),
+  'export function winPathFilter() { return "windows path filter"; }\n'
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub',
+  PAIROFCLEATS_WORKER_POOL: 'off'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+if (buildResult.status !== 0) {
+  console.error('Failed: build_index');
+  process.exit(buildResult.status ?? 1);
+}
+
+function runSearch(extraArgs) {
+  const result = spawnSync(
+    process.execPath,
+    [
+      path.join(root, 'search.js'),
+      'windows path filter',
+      '--json',
+      '--mode',
+      'code',
+      '--no-ann',
+      '--repo',
+      repoRoot,
+      ...extraArgs
+    ],
+    { encoding: 'utf8', env }
+  );
+  if (result.status !== 0) {
+    console.error('Search failed.');
+    if (result.stderr) console.error(result.stderr.trim());
+    process.exit(result.status ?? 1);
+  }
+  try {
+    return JSON.parse(result.stdout || '{}');
+  } catch {
+    console.error('Search output was not valid JSON.');
+    process.exit(1);
+  }
+}
+
+const filePayload = runSearch(['--file', 'src\\nested\\util.js']);
+if (!Array.isArray(filePayload.code) || filePayload.code.length === 0) {
+  console.error('Expected results for Windows-style --file filter.');
+  process.exit(1);
+}
+
+const pathPayload = runSearch(['--path', 'src\\nested']);
+if (!Array.isArray(pathPayload.code) || pathPayload.code.length === 0) {
+  console.error('Expected results for Windows-style --path filter.');
+  process.exit(1);
+}
+
+console.log('windows path filter test passed');
diff --git a/tests/segment-pipeline.js b/tests/segment-pipeline.js
new file mode 100644
index 000000000..cebcdb556
--- /dev/null
+++ b/tests/segment-pipeline.js
@@ -0,0 +1,104 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import path from 'node:path';
+import { discoverSegments, chunkSegments } from '../src/index/segments.js';
+import { extractComments, normalizeCommentConfig } from '../src/index/comments.js';
+import { buildLineIndex } from '../src/shared/lines.js';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'segments');
+
+const assert = (condition, message) => {
+  if (condition) return;
+  console.error(message);
+  process.exit(1);
+};
+
+const mdPath = path.join(fixtureRoot, 'docs', 'guide.md');
+const mdText = fs.readFileSync(mdPath, 'utf8');
+const mdSegments = discoverSegments({
+  text: mdText,
+  ext: '.md',
+  relPath: 'docs/guide.md',
+  mode: 'prose',
+  segmentsConfig: { inlineCodeSpans: true }
+});
+assert(mdSegments.some((seg) => seg.type === 'config' && seg.meta?.frontmatter), 'Expected markdown frontmatter segment.');
+const fencedSegments = mdSegments.filter((seg) => seg.type === 'embedded' && seg.meta?.fenceInfo);
+assert(fencedSegments.some((seg) => String(seg.meta.fenceInfo).includes('js')), 'Expected markdown JS fenced segment.');
+assert(fencedSegments.some((seg) => String(seg.meta.fenceInfo).includes('json')), 'Expected markdown JSON fenced segment.');
+const inlineSegments = mdSegments.filter((seg) => seg.meta?.inlineCode);
+assert(inlineSegments.length === 2, `Expected 2 inline code segments, got ${inlineSegments.length}.`);
+
+const mdChunks = chunkSegments({
+  text: mdText,
+  ext: '.md',
+  relPath: 'docs/guide.md',
+  mode: 'prose',
+  segments: mdSegments,
+  lineIndex: buildLineIndex(mdText),
+  context: {}
+});
+let lastStart = -1;
+for (const chunk of mdChunks) {
+  assert(chunk.start >= 0 && chunk.end <= mdText.length, 'Markdown chunk range invalid.');
+  assert(chunk.segment, 'Markdown chunk missing segment metadata.');
+  assert(chunk.start >= lastStart, 'Markdown chunks are out of order.');
+  lastStart = chunk.start;
+}
+
+const vuePath = path.join(fixtureRoot, 'src', 'component.vue');
+const vueText = fs.readFileSync(vuePath, 'utf8');
+const vueSegments = discoverSegments({
+  text: vueText,
+  ext: '.vue',
+  relPath: 'src/component.vue',
+  mode: 'code'
+});
+assert(vueSegments.some((seg) => seg.meta?.block === 'template'), 'Expected Vue template segment.');
+assert(vueSegments.some((seg) => seg.meta?.block === 'script' || seg.meta?.block === 'scriptSetup'), 'Expected Vue script segment.');
+assert(vueSegments.some((seg) => seg.meta?.block === 'style'), 'Expected Vue style segment.');
+
+const sveltePath = path.join(fixtureRoot, 'src', 'widget.svelte');
+const svelteText = fs.readFileSync(sveltePath, 'utf8');
+const svelteSegments = discoverSegments({
+  text: svelteText,
+  ext: '.svelte',
+  relPath: 'src/widget.svelte',
+  mode: 'code'
+});
+assert(svelteSegments.some((seg) => seg.meta?.block === 'script'), 'Expected Svelte script segment.');
+assert(svelteSegments.some((seg) => seg.meta?.block === 'style'), 'Expected Svelte style segment.');
+assert(svelteSegments.some((seg) => seg.meta?.block === 'template'), 'Expected Svelte template segment.');
+
+const astroPath = path.join(fixtureRoot, 'src', 'page.astro');
+const astroText = fs.readFileSync(astroPath, 'utf8');
+const astroSegments = discoverSegments({
+  text: astroText,
+  ext: '.astro',
+  relPath: 'src/page.astro',
+  mode: 'code'
+});
+assert(astroSegments.some((seg) => seg.meta?.block === 'frontmatter'), 'Expected Astro frontmatter segment.');
+assert(astroSegments.some((seg) => seg.meta?.block === 'template'), 'Expected Astro template segment.');
+assert(astroSegments.some((seg) => seg.meta?.block === 'style'), 'Expected Astro style segment.');
+
+const commentPath = path.join(fixtureRoot, 'src', 'comments.js');
+const commentText = fs.readFileSync(commentPath, 'utf8');
+const commentConfig = normalizeCommentConfig({ extract: 'all', includeLicense: false });
+const commentData = extractComments({
+  text: commentText,
+  ext: '.js',
+  languageId: 'javascript',
+  lineIndex: buildLineIndex(commentText),
+  config: commentConfig
+});
+assert(commentData.comments.some((comment) => comment.type === 'doc'), 'Expected doc comment extracted.');
+assert(commentData.comments.some((comment) => comment.type === 'inline'), 'Expected inline comment extracted.');
+assert(commentData.comments.some((comment) => comment.type === 'block'), 'Expected block comment extracted.');
+assert(commentData.comments.some((comment) => comment.type === 'license'), 'Expected license comment extracted.');
+assert(!commentData.comments.some((comment) => comment.text.includes('eslint-disable')), 'Expected linter comment to be skipped.');
+assert(!commentData.comments.some((comment) => comment.text.includes('generated by')), 'Expected generated comment to be skipped.');
+assert(commentData.configSegments.some((segment) => segment.languageId === 'json' && segment.meta?.source === 'comment'), 'Expected JSON config segment from comment.');
+
+console.log('segment pipeline tests passed');
diff --git a/tests/service-queue.js b/tests/service-queue.js
new file mode 100644
index 000000000..bfe90e7da
--- /dev/null
+++ b/tests/service-queue.js
@@ -0,0 +1,49 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import {
+  ensureQueueDir,
+  enqueueJob,
+  claimNextJob,
+  completeJob,
+  queueSummary
+} from '../tools/service/queue.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'service-queue');
+const queueDir = path.join(tempRoot, 'queue');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await ensureQueueDir(queueDir);
+
+const baseJob = {
+  createdAt: new Date().toISOString(),
+  repo: '/tmp/repo',
+  mode: 'all',
+  reason: 'test'
+};
+
+await enqueueJob(queueDir, { ...baseJob, id: 'job-index' }, null, 'index');
+await enqueueJob(queueDir, { ...baseJob, id: 'job-embed' }, null, 'embeddings');
+
+const summaryIndex = await queueSummary(queueDir, 'index');
+const summaryEmbed = await queueSummary(queueDir, 'embeddings');
+if (summaryIndex.total !== 1 || summaryEmbed.total !== 1) {
+  console.error('Queue summary counts mismatch');
+  process.exit(1);
+}
+
+const job = await claimNextJob(queueDir, 'index');
+if (!job || job.status !== 'running') {
+  console.error('Expected queued job to transition to running');
+  process.exit(1);
+}
+await completeJob(queueDir, job.id, 'failed', { exitCode: 1 }, 'index');
+
+const summaryAfter = await queueSummary(queueDir, 'index');
+if (summaryAfter.failed !== 1) {
+  console.error('Expected failed job count to be 1');
+  process.exit(1);
+}
+
+console.log('service queue test passed');
diff --git a/tests/setup-index-detection.js b/tests/setup-index-detection.js
new file mode 100644
index 000000000..3ba5a69c6
--- /dev/null
+++ b/tests/setup-index-detection.js
@@ -0,0 +1,122 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'setup-index-detection');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+
+await fsPromises.writeFile(path.join(repoRoot, '.pairofcleats.json'), '{}');
+await fsPromises.writeFile(path.join(repoRoot, 'README.md'), 'setup detection fixture\n');
+
+const userConfig = loadUserConfig(repoRoot);
+const codeIndexDir = getIndexDir(repoRoot, 'code', userConfig);
+
+async function resetIndexDir() {
+  await fsPromises.rm(codeIndexDir, { recursive: true, force: true });
+  await fsPromises.mkdir(codeIndexDir, { recursive: true });
+}
+
+function runSetup(label) {
+  const result = spawnSync(
+    process.execPath,
+    [
+      path.join(root, 'tools', 'setup.js'),
+      '--repo',
+      repoRoot,
+      '--non-interactive',
+      '--json',
+      '--skip-install',
+      '--skip-dicts',
+      '--skip-models',
+      '--skip-extensions',
+      '--skip-tooling',
+      '--skip-index',
+      '--skip-sqlite',
+      '--skip-artifacts'
+    ],
+    {
+      cwd: repoRoot,
+      encoding: 'utf8',
+      env: { ...process.env, PAIROFCLEATS_CACHE_ROOT: cacheRoot }
+    }
+  );
+  if (result.status !== 0) {
+    console.error(`setup index detection failed: ${label}`);
+    if (result.stderr) console.error(result.stderr.trim());
+    process.exit(result.status ?? 1);
+  }
+  let payload = null;
+  try {
+    payload = JSON.parse(result.stdout || '{}');
+  } catch {
+    console.error(`setup index detection failed: ${label} (invalid JSON output)`);
+    process.exit(1);
+  }
+  return payload;
+}
+
+const scenarios = [
+  {
+    label: 'chunk_meta.json',
+    build: async () => {
+      await fsPromises.writeFile(path.join(codeIndexDir, 'chunk_meta.json'), '[]');
+    },
+    expectReady: true
+  },
+  {
+    label: 'chunk_meta.jsonl',
+    build: async () => {
+      await fsPromises.writeFile(path.join(codeIndexDir, 'chunk_meta.jsonl'), '{}\n');
+    },
+    expectReady: true
+  },
+  {
+    label: 'chunk_meta.meta.json + parts',
+    build: async () => {
+      const partsDir = path.join(codeIndexDir, 'chunk_meta.parts');
+      await fsPromises.mkdir(partsDir, { recursive: true });
+      const partName = 'chunk_meta.part-00000.jsonl';
+      await fsPromises.writeFile(path.join(partsDir, partName), '{}\n');
+      const meta = { parts: [path.join('chunk_meta.parts', partName)], count: 1 };
+      await fsPromises.writeFile(
+        path.join(codeIndexDir, 'chunk_meta.meta.json'),
+        JSON.stringify(meta, null, 2)
+      );
+    },
+    expectReady: true
+  },
+  {
+    label: 'chunk_meta.meta.json without parts',
+    build: async () => {
+      await fsPromises.writeFile(
+        path.join(codeIndexDir, 'chunk_meta.meta.json'),
+        JSON.stringify({ parts: [], count: 0 }, null, 2)
+      );
+    },
+    expectReady: false
+  }
+];
+
+for (const scenario of scenarios) {
+  await resetIndexDir();
+  await scenario.build();
+  const payload = runSetup(scenario.label);
+  const ready = payload?.steps?.index?.ready === true;
+  if (ready !== scenario.expectReady) {
+    console.error(
+      `setup index detection failed: ${scenario.label} expected ready=${scenario.expectReady}, got ${ready}`
+    );
+    process.exit(1);
+  }
+}
+
+console.log('setup index detection tests passed');
diff --git a/tests/shard-merge.js b/tests/shard-merge.js
new file mode 100644
index 000000000..0a8e52333
--- /dev/null
+++ b/tests/shard-merge.js
@@ -0,0 +1,98 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+import { MAX_JSON_BYTES, loadChunkMeta, loadTokenPostings } from '../src/shared/artifact-io.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'shard-merge');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRootA = path.join(tempRoot, 'cache-a');
+const cacheRootB = path.join(tempRoot, 'cache-b');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(path.join(repoRoot, 'src'), { recursive: true });
+await fsPromises.mkdir(path.join(repoRoot, 'lib'), { recursive: true });
+await fsPromises.mkdir(cacheRootA, { recursive: true });
+await fsPromises.mkdir(cacheRootB, { recursive: true });
+
+await fsPromises.writeFile(path.join(repoRoot, 'src', 'alpha.js'), 'export const alpha = 1;\n');
+await fsPromises.writeFile(path.join(repoRoot, 'lib', 'beta.py'), 'def beta():\n  return 2\n');
+
+const configPath = path.join(repoRoot, '.pairofcleats.json');
+const writeConfig = async (shardsEnabled) => {
+  await fsPromises.writeFile(
+    configPath,
+    JSON.stringify({
+      indexing: {
+        fileListSampleSize: 10,
+        shards: {
+          enabled: shardsEnabled,
+          maxWorkers: 1,
+          minFiles: 1
+        },
+        treeSitter: { enabled: false }
+      }
+    }, null, 2)
+  );
+};
+
+const runBuild = (cacheRoot, label) => {
+  const env = {
+    ...process.env,
+    PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+    PAIROFCLEATS_EMBEDDINGS: 'stub'
+  };
+  const result = spawnSync(
+    process.execPath,
+    [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+    { cwd: repoRoot, env, stdio: 'inherit' }
+  );
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    process.exit(result.status ?? 1);
+  }
+};
+
+const readIndex = (cacheRoot) => {
+  const previousCacheRoot = process.env.PAIROFCLEATS_CACHE_ROOT;
+  process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+  const userConfig = loadUserConfig(repoRoot);
+  const codeDir = getIndexDir(repoRoot, 'code', userConfig);
+  const chunks = loadChunkMeta(codeDir, { maxBytes: MAX_JSON_BYTES });
+  const tokenIndex = loadTokenPostings(codeDir, { maxBytes: MAX_JSON_BYTES });
+  if (previousCacheRoot === undefined) {
+    delete process.env.PAIROFCLEATS_CACHE_ROOT;
+  } else {
+    process.env.PAIROFCLEATS_CACHE_ROOT = previousCacheRoot;
+  }
+  return { chunks, tokenIndex };
+};
+
+await writeConfig(false);
+runBuild(cacheRootA, 'baseline build');
+const baseline = readIndex(cacheRootA);
+
+await writeConfig(true);
+runBuild(cacheRootB, 'sharded build');
+const sharded = readIndex(cacheRootB);
+
+if (baseline.chunks.length !== sharded.chunks.length) {
+  console.error('Shard merge mismatch: chunk counts differ');
+  process.exit(1);
+}
+if (JSON.stringify(baseline.chunks) !== JSON.stringify(sharded.chunks)) {
+  console.error('Shard merge mismatch: chunk metadata differs');
+  process.exit(1);
+}
+if (JSON.stringify(baseline.tokenIndex.vocab) !== JSON.stringify(sharded.tokenIndex.vocab)) {
+  console.error('Shard merge mismatch: token vocab differs');
+  process.exit(1);
+}
+if (JSON.stringify(baseline.tokenIndex.postings) !== JSON.stringify(sharded.tokenIndex.postings)) {
+  console.error('Shard merge mismatch: token postings differ');
+  process.exit(1);
+}
+
+console.log('shard merge test passed');
diff --git a/tests/shard-plan.js b/tests/shard-plan.js
new file mode 100644
index 000000000..20b0174be
--- /dev/null
+++ b/tests/shard-plan.js
@@ -0,0 +1,73 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import path from 'node:path';
+import { planShards } from '../src/index/build/shards.js';
+
+const makeEntry = (rel) => ({
+  rel,
+  abs: path.join('C:\\repo', rel)
+});
+
+const entriesA = [
+  makeEntry('src/sub/a.js'),
+  makeEntry('src/sub/b.js'),
+  makeEntry('src/root.js')
+];
+const shardsA = planShards(entriesA, {
+  mode: 'code',
+  dirDepth: 2,
+  lineCounts: new Map()
+});
+const labelsA = new Set(shardsA.map((shard) => shard.label));
+assert.ok(labelsA.has('src/javascript'), 'parent shard missing');
+assert.ok(labelsA.has('./javascript'), 'root shard missing');
+assert.ok(!Array.from(labelsA).some((label) => label.startsWith('src/sub/')));
+const srcShardA = shardsA.find((shard) => shard.label === 'src/javascript');
+assert.equal(srcShardA.entries.length, 2);
+
+const entriesB = [];
+const lineCountsB = new Map();
+for (let i = 0; i < 10; i += 1) {
+  const rel = `src/large${i}/file.js`;
+  entriesB.push(makeEntry(rel));
+  lineCountsB.set(rel, 100);
+}
+entriesB.push(makeEntry('src/huge/file.js'));
+lineCountsB.set('src/huge/file.js', 60);
+entriesB.push(makeEntry('src/small/file.js'));
+lineCountsB.set('src/small/file.js', 1);
+const shardsB = planShards(entriesB, {
+  mode: 'code',
+  dirDepth: 2,
+  lineCounts: lineCountsB
+});
+const labelsB = new Set(shardsB.map((shard) => shard.label));
+assert.ok(labelsB.has('src/huge/javascript'), 'huge-file shard should stay separate');
+assert.ok(!labelsB.has('src/small/javascript'), 'small shard should merge to parent');
+const parentB = shardsB.find((shard) => shard.label === 'src/javascript');
+assert.ok(parentB, 'parent shard should exist for merged subdirs');
+assert.ok(parentB.entries.some((entry) => entry.rel === 'src/small/file.js'));
+
+const entriesC = [];
+const lineCountsC = new Map();
+const extensions = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'];
+for (const ext of extensions) {
+  const rel = `file.${ext}`;
+  entriesC.push(makeEntry(rel));
+  lineCountsC.set(rel, 10);
+}
+for (let i = 0; i < 10; i += 1) {
+  const rel = `big${i}.js`;
+  entriesC.push(makeEntry(rel));
+  lineCountsC.set(rel, 10);
+}
+const shardsC = planShards(entriesC, {
+  mode: 'code',
+  dirDepth: 1,
+  lineCounts: lineCountsC
+});
+const splitParts = shardsC.filter((shard) => shard.label.startsWith('./javascript#'));
+assert.equal(splitParts.length, 10, 'expected split shards for large group');
+assert.ok(splitParts.every((shard) => shard.splitFrom === './javascript'));
+
+console.log('shard-plan test passed.');
diff --git a/tests/skip-minified-binary.js b/tests/skip-minified-binary.js
new file mode 100644
index 000000000..aad6a6ae2
--- /dev/null
+++ b/tests/skip-minified-binary.js
@@ -0,0 +1,94 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, getMetricsDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'skip-minified-binary');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+const configPath = path.join(repoRoot, '.pairofcleats.json');
+await fsPromises.writeFile(
+  configPath,
+  JSON.stringify({
+    indexing: {
+      maxFileBytes: 200000,
+      fileListSampleSize: 20,
+      treeSitter: { enabled: false }
+    }
+  }, null, 2)
+);
+
+const minifiedPath = path.join(repoRoot, 'app.min.js');
+const binaryPath = path.join(repoRoot, 'binary.png');
+const normalPath = path.join(repoRoot, 'normal.js');
+await fsPromises.writeFile(minifiedPath, 'function minified(){return 42;}');
+await fsPromises.writeFile(normalPath, 'function ok() { return 1; }\n');
+await fsPromises.copyFile(
+  path.join(root, 'tests', 'fixtures', 'binary', 'sample.png'),
+  binaryPath
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: repoRoot, env, stdio: 'inherit' }
+);
+if (buildResult.status !== 0) {
+  console.error('Failed: build_index');
+  process.exit(buildResult.status ?? 1);
+}
+
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+const userConfig = loadUserConfig(repoRoot);
+const codeDir = getIndexDir(repoRoot, 'code', userConfig);
+const fileListsPath = path.join(codeDir, '.filelists.json');
+if (!fs.existsSync(fileListsPath)) {
+  console.error('Missing .filelists.json');
+  process.exit(1);
+}
+const fileLists = JSON.parse(await fsPromises.readFile(fileListsPath, 'utf8'));
+const skippedSample = fileLists?.skipped?.sample;
+if (!Array.isArray(skippedSample)) {
+  console.error('Skipped sample payload is not an array');
+  process.exit(1);
+}
+const minifiedSkip = skippedSample.find((entry) => entry?.file && entry.file.endsWith('app.min.js'));
+if (!minifiedSkip || minifiedSkip.reason !== 'minified') {
+  console.error('Expected minified skip entry for app.min.js');
+  process.exit(1);
+}
+const binarySkip = skippedSample.find((entry) => entry?.file && entry.file.endsWith('binary.png'));
+if (!binarySkip || binarySkip.reason !== 'binary') {
+  console.error('Expected binary skip entry for binary.js');
+  process.exit(1);
+}
+
+const metricsDir = getMetricsDir(repoRoot, userConfig);
+const metricsPath = path.join(metricsDir, 'index-code.json');
+if (!fs.existsSync(metricsPath)) {
+  console.error('Missing index-code metrics');
+  process.exit(1);
+}
+const metrics = JSON.parse(await fsPromises.readFile(metricsPath, 'utf8'));
+const minifiedCount = metrics?.files?.skippedByReason?.minified || 0;
+const binaryCount = metrics?.files?.skippedByReason?.binary || 0;
+if (minifiedCount < 1 || binaryCount < 1) {
+  console.error('Expected skippedByReason.minified and skippedByReason.binary to be >= 1');
+  process.exit(1);
+}
+
+console.log('minified/binary skip test passed');
diff --git a/tests/smoke-embeddings.js b/tests/smoke-embeddings.js
new file mode 100644
index 000000000..ab60e474b
--- /dev/null
+++ b/tests/smoke-embeddings.js
@@ -0,0 +1,24 @@
+#!/usr/bin/env node
+import path from 'node:path';
+import { cleanup, runNode, root } from './smoke-utils.js';
+
+const cacheRoots = [
+  path.join(root, 'tests', '.cache', 'build-embeddings-cache'),
+  path.join(root, 'tests', '.cache', 'embeddings-dims-mismatch')
+];
+
+let failure = null;
+try {
+  await cleanup(cacheRoots);
+  runNode('embeddings-cache', path.join(root, 'tests', 'build-embeddings-cache.js'));
+  runNode('embeddings-dims-mismatch', path.join(root, 'tests', 'embeddings-dims-mismatch.js'));
+} catch (err) {
+  console.error(err?.message || err);
+  failure = err;
+}
+await cleanup(cacheRoots);
+
+if (failure) {
+  process.exit(failure.exitCode ?? 1);
+}
+console.log('smoke embeddings passed');
diff --git a/tests/smoke-retrieval.js b/tests/smoke-retrieval.js
new file mode 100644
index 000000000..6f4f7bcc4
--- /dev/null
+++ b/tests/smoke-retrieval.js
@@ -0,0 +1,156 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { cleanup, root } from './smoke-utils.js';
+
+const tempRoot = path.join(root, 'tests', '.cache', 'smoke-retrieval');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const searchPath = path.join(root, 'search.js');
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const fail = (message, exitCode = 1) => {
+  const error = new Error(message);
+  error.exitCode = exitCode;
+  throw error;
+};
+
+const runNode = (label, args, options = {}) => {
+  const result = spawnSync(process.execPath, args, { env, encoding: 'utf8', ...options });
+  if (result.status !== 0) {
+    const stderr = result.stderr ? result.stderr.trim() : '';
+    if (stderr) console.error(stderr);
+    fail(`Failed: ${label}`, result.status ?? 1);
+  }
+  return result;
+};
+
+let failure = null;
+try {
+  await cleanup([tempRoot]);
+  await fsPromises.mkdir(cacheRoot, { recursive: true });
+  await fsPromises.cp(fixtureRoot, repoRoot, { recursive: true });
+
+  const build = spawnSync(
+    process.execPath,
+    [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+    { env, stdio: 'inherit' }
+  );
+  if (build.status !== 0) {
+    fail('smoke retrieval failed: build_index failed', build.status ?? 1);
+  }
+
+  const helpResult = spawnSync(process.execPath, [searchPath], { encoding: 'utf8' });
+  if (helpResult.status === 0) {
+    fail('Expected search help to exit non-zero with no query.');
+  }
+  const helpOutput = `${helpResult.stdout || ''}${helpResult.stderr || ''}`;
+  const requiredFlags = ['--calls', '--uses', '--author', '--import', '--explain'];
+  for (const flag of requiredFlags) {
+    if (!helpOutput.includes(flag)) {
+      fail(`Help output missing flag: ${flag}`);
+    }
+  }
+
+  const rrfResult = runNode(
+    'search rrf',
+    [searchPath, 'return', '--mode', 'code', '--ann', '--json', '--repo', repoRoot]
+  );
+  let rrfPayload = null;
+  try {
+    rrfPayload = JSON.parse(rrfResult.stdout || '{}');
+  } catch {
+    fail('search rrf test failed: invalid JSON output');
+  }
+  const rrfHit = rrfPayload?.code?.[0];
+  if (!rrfPayload?.stats?.annActive) {
+    fail('search rrf test failed: annActive was false');
+  }
+  if (!rrfHit?.scoreBreakdown?.rrf) {
+    fail('search rrf test failed: scoreBreakdown.rrf missing');
+  }
+  if (rrfHit.scoreType !== 'rrf') {
+    fail(`search rrf test failed: expected scoreType rrf, got ${rrfHit.scoreType}`);
+  }
+
+  const filterResult = runNode(
+    'search filters',
+    [
+      searchPath,
+      'return',
+      '--mode',
+      'code',
+      '--json',
+      '--no-ann',
+      '--repo',
+      repoRoot,
+      '--file',
+      'src/index.js'
+    ]
+  );
+  const filterPayload = JSON.parse(filterResult.stdout || '{}');
+  const filterHits = filterPayload?.code || [];
+  if (!filterHits.length) {
+    fail('search filter test failed: no results returned');
+  }
+  const badFilterHit = filterHits.find((hit) => !(hit.file || '').replace(/\\/g, '/').endsWith('src/index.js'));
+  if (badFilterHit) {
+    fail('search filter test failed: file filter mismatch');
+  }
+
+  const stripAnsi = (value) => value.replace(/\u001b\[[0-9;]*m/g, '');
+  const explainResult = runNode(
+    'search explain',
+    [searchPath, 'return', '--mode', 'code', '--no-ann', '--repo', repoRoot, '--explain']
+  );
+  const explainOutput = stripAnsi(`${explainResult.stdout || ''}${explainResult.stderr || ''}`);
+  if (!explainOutput.includes('Score:')) {
+    fail('Explain output missing Score breakdown.');
+  }
+  if (!explainOutput.includes('Sparse:')) {
+    fail('Explain output missing Sparse breakdown.');
+  }
+
+  const blendConfig = {
+    search: {
+      scoreBlend: {
+        enabled: true,
+        sparseWeight: 0.6,
+        annWeight: 0.4
+      }
+    }
+  };
+  await fsPromises.writeFile(
+    path.join(repoRoot, '.pairofcleats.json'),
+    `${JSON.stringify(blendConfig, null, 2)}\n`
+  );
+
+  const blendResult = runNode(
+    'search blend',
+    [searchPath, 'return', '--mode', 'code', '--ann', '--json', '--repo', repoRoot]
+  );
+  const blendPayload = JSON.parse(blendResult.stdout || '{}');
+  const blendHit = blendPayload?.code?.[0];
+  if (!blendHit?.scoreBreakdown?.blend) {
+    fail('search blend test failed: scoreBreakdown.blend missing');
+  }
+  if (blendHit.scoreType !== 'blend') {
+    fail(`search blend test failed: expected scoreType blend, got ${blendHit.scoreType}`);
+  }
+} catch (err) {
+  console.error(err?.message || err);
+  failure = err;
+}
+
+await cleanup([tempRoot]);
+if (failure) {
+  process.exit(failure.exitCode ?? 1);
+}
+console.log('smoke retrieval passed');
diff --git a/tests/smoke-section1.js b/tests/smoke-section1.js
new file mode 100644
index 000000000..1b0094a6c
--- /dev/null
+++ b/tests/smoke-section1.js
@@ -0,0 +1,24 @@
+#!/usr/bin/env node
+import path from 'node:path';
+import { cleanup, runNode, root } from './smoke-utils.js';
+
+const cacheRoots = [
+  path.join(root, 'tests', '.cache', 'core-api'),
+  path.join(root, 'tests', '.cache', 'api-server')
+];
+
+let failure = null;
+try {
+  await cleanup(cacheRoots);
+  runNode('core-api', path.join(root, 'tests', 'core-api.js'));
+  runNode('api-server', path.join(root, 'tests', 'api-server.js'));
+} catch (err) {
+  console.error(err?.message || err);
+  failure = err;
+}
+await cleanup(cacheRoots);
+
+if (failure) {
+  process.exit(failure.exitCode ?? 1);
+}
+console.log('smoke section1 passed');
diff --git a/tests/smoke-services.js b/tests/smoke-services.js
new file mode 100644
index 000000000..a9e5ebea5
--- /dev/null
+++ b/tests/smoke-services.js
@@ -0,0 +1,20 @@
+#!/usr/bin/env node
+import path from 'node:path';
+import { cleanup, runNode, root } from './smoke-utils.js';
+
+const cacheRoots = [path.join(root, 'tests', '.cache', 'mcp-server')];
+
+let failure = null;
+try {
+  await cleanup(cacheRoots);
+  runNode('mcp-server', path.join(root, 'tests', 'mcp-server.js'));
+} catch (err) {
+  console.error(err?.message || err);
+  failure = err;
+}
+await cleanup(cacheRoots);
+
+if (failure) {
+  process.exit(failure.exitCode ?? 1);
+}
+console.log('smoke services passed');
diff --git a/tests/smoke-sqlite.js b/tests/smoke-sqlite.js
new file mode 100644
index 000000000..04b373a91
--- /dev/null
+++ b/tests/smoke-sqlite.js
@@ -0,0 +1,24 @@
+#!/usr/bin/env node
+import path from 'node:path';
+import { cleanup, runNode, root } from './smoke-utils.js';
+
+const cacheRoots = [
+  path.join(root, 'tests', '.cache', 'sqlite-incremental'),
+  path.join(root, 'tests', '.cache', 'sqlite-ann-fallback')
+];
+
+let failure = null;
+try {
+  await cleanup(cacheRoots);
+  runNode('sqlite-incremental', path.join(root, 'tests', 'sqlite-incremental.js'));
+  runNode('sqlite-ann-fallback', path.join(root, 'tests', 'sqlite-ann-fallback.js'));
+} catch (err) {
+  console.error(err?.message || err);
+  failure = err;
+}
+await cleanup(cacheRoots);
+
+if (failure) {
+  process.exit(failure.exitCode ?? 1);
+}
+console.log('smoke sqlite passed');
diff --git a/tests/smoke-utils.js b/tests/smoke-utils.js
new file mode 100644
index 000000000..a75e38d1b
--- /dev/null
+++ b/tests/smoke-utils.js
@@ -0,0 +1,23 @@
+import fsPromises from 'node:fs/promises';
+import { spawnSync } from 'node:child_process';
+
+export const root = process.cwd();
+
+export async function cleanup(paths) {
+  for (const dir of paths) {
+    await fsPromises.rm(dir, { recursive: true, force: true });
+  }
+}
+
+export function runNode(label, scriptPath, args = [], options = {}) {
+  const result = spawnSync(process.execPath, [scriptPath, ...args], {
+    stdio: 'inherit',
+    ...options
+  });
+  if (result.status !== 0) {
+    const error = new Error(`Failed: ${label}`);
+    error.exitCode = result.status ?? 1;
+    throw error;
+  }
+  return result;
+}
diff --git a/tests/smoke-workers.js b/tests/smoke-workers.js
new file mode 100644
index 000000000..f0f959293
--- /dev/null
+++ b/tests/smoke-workers.js
@@ -0,0 +1,21 @@
+#!/usr/bin/env node
+import path from 'node:path';
+import { cleanup, runNode, root } from './smoke-utils.js';
+
+const cacheRoots = [path.join(root, 'tests', '.cache', 'language-fidelity')];
+
+let failure = null;
+try {
+  await cleanup(cacheRoots);
+  runNode('worker-pool', path.join(root, 'tests', 'worker-pool.js'));
+  runNode('language-fidelity', path.join(root, 'tests', 'language-fidelity.js'));
+} catch (err) {
+  console.error(err?.message || err);
+  failure = err;
+}
+await cleanup(cacheRoots);
+
+if (failure) {
+  process.exit(failure.exitCode ?? 1);
+}
+console.log('smoke workers passed');
diff --git a/tests/smoke.js b/tests/smoke.js
index d57bde131..b3f47cf10 100644
--- a/tests/smoke.js
+++ b/tests/smoke.js
@@ -1,19 +1,19 @@
 #!/usr/bin/env node
 import fs from 'node:fs';
 import path from 'node:path';
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
 import { getDictionaryPaths, getDictConfig, getIndexDir, loadUserConfig, resolveSqlitePaths } from '../tools/dict-utils.js';
 import { normalizePostingsConfig } from '../src/shared/postings-config.js';
 import { getVectorExtensionConfig, resolveVectorExtensionPath } from '../tools/vector-extension.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['require-index', 'require-sqlite', 'require-dicts'],
-  default: {
-    'require-index': false,
-    'require-sqlite': false,
-    'require-dicts': false
+const argv = createCli({
+  scriptName: 'verify',
+  options: {
+    'require-index': { type: 'boolean', default: false },
+    'require-sqlite': { type: 'boolean', default: false },
+    'require-dicts': { type: 'boolean', default: false }
   }
-});
+}).parse();
 
 const root = process.cwd();
 let failures = 0;
diff --git a/tests/sqlite-ann-extension.js b/tests/sqlite-ann-extension.js
index 7144022db..78febfcdf 100644
--- a/tests/sqlite-ann-extension.js
+++ b/tests/sqlite-ann-extension.js
@@ -40,8 +40,8 @@ const config = {
   cache: { root: cacheRoot },
   sqlite: {
     use: true,
-    annMode: 'extension',
     vectorExtension: {
+      annMode: 'extension',
       path: extensionPath
     }
   },
@@ -144,7 +144,8 @@ await fsPromises.rm(deletableFile, { force: true });
 run([path.join(root, 'build_index.js'), '--incremental', '--stub-embeddings', '--repo', repoRoot], 'build index (incremental)');
 run([path.join(root, 'tools', 'build-sqlite-index.js'), '--incremental', '--mode', 'code', '--repo', repoRoot], 'build sqlite index (incremental)');
 
-const dbAfter = new Database(sqlitePaths.codePath, { readonly: true });
+const sqlitePathsAfter = resolveSqlitePaths(repoRoot, userConfig);
+const dbAfter = new Database(sqlitePathsAfter.codePath, { readonly: true });
 try {
   dbAfter.loadExtension(extensionPath);
 } catch (err) {
diff --git a/tests/sqlite-ann-fallback.js b/tests/sqlite-ann-fallback.js
new file mode 100644
index 000000000..7f5adca3d
--- /dev/null
+++ b/tests/sqlite-ann-fallback.js
@@ -0,0 +1,88 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'sqlite-ann-fallback');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+const missingExtensionPath = path.join(tempRoot, 'missing', 'vec0-missing.node');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(path.join(repoRoot, 'src'), { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(
+  path.join(repoRoot, 'src', 'alpha.js'),
+  'export const alpha = () => "ann_fallback_token";\n'
+);
+
+const config = {
+  cache: { root: cacheRoot },
+  dictionary: { languages: ['en'] },
+  sqlite: {
+    use: true,
+    vectorExtension: {
+      annMode: 'extension',
+      path: missingExtensionPath
+    }
+  }
+};
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify(config, null, 2) + '\n'
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const runNode = (label, args) => {
+  const result = spawnSync(process.execPath, args, { cwd: repoRoot, env, stdio: 'inherit' });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    process.exit(result.status ?? 1);
+  }
+};
+
+runNode('build_index', [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot]);
+runNode('build_sqlite', [path.join(root, 'tools', 'build-sqlite-index.js'), '--repo', repoRoot]);
+
+const searchResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'search.js'), 'ann_fallback_token', '--backend', 'sqlite', '--ann', '--json', '--repo', repoRoot],
+  { env, encoding: 'utf8' }
+);
+if (searchResult.status !== 0) {
+  console.error('sqlite ann fallback test failed: search returned error');
+  if (searchResult.stderr) console.error(searchResult.stderr.trim());
+  process.exit(searchResult.status ?? 1);
+}
+
+let payload = null;
+try {
+  payload = JSON.parse(searchResult.stdout || '{}');
+} catch {
+  console.error('sqlite ann fallback test failed: invalid JSON output');
+  process.exit(1);
+}
+
+const hits = payload?.code || [];
+if (!hits.length) {
+  console.error('sqlite ann fallback test failed: no results returned');
+  process.exit(1);
+}
+if (payload?.stats?.annBackend === 'sqlite-extension') {
+  console.error('sqlite ann fallback test failed: ann backend should not be sqlite-extension');
+  process.exit(1);
+}
+if (payload?.stats?.annExtension?.available?.code) {
+  console.error('sqlite ann fallback test failed: ann extension should be unavailable');
+  process.exit(1);
+}
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+console.log('sqlite ann fallback test passed');
diff --git a/tests/sqlite-auto-backend.js b/tests/sqlite-auto-backend.js
new file mode 100644
index 000000000..dc34086da
--- /dev/null
+++ b/tests/sqlite-auto-backend.js
@@ -0,0 +1,69 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'sqlite-auto');
+const cacheRoot = path.join(tempRoot, '.cache');
+const searchPath = path.join(root, 'search.js');
+const buildIndexPath = path.join(root, 'build_index.js');
+const buildSqlitePath = path.join(root, 'tools', 'build-sqlite-index.js');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+
+const sampleCode = `
+export function greet(name) {
+  return "hello " + name;
+}
+`;
+await fsPromises.writeFile(path.join(tempRoot, 'sample.js'), sampleCode);
+
+const writeConfig = async (threshold) => {
+  const config = {
+    sqlite: { use: true },
+    search: { sqliteAutoChunkThreshold: threshold, annDefault: false }
+  };
+  await fsPromises.writeFile(
+    path.join(tempRoot, '.pairofcleats.json'),
+    JSON.stringify(config, null, 2)
+  );
+};
+
+await writeConfig(1);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const run = (args, label) => {
+  const result = spawnSync(process.execPath, args, { cwd: tempRoot, env, encoding: 'utf8' });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    if (result.stderr) console.error(result.stderr.trim());
+    process.exit(result.status ?? 1);
+  }
+  return result.stdout || '';
+};
+
+run([buildIndexPath, '--stub-embeddings', '--repo', tempRoot], 'build index');
+run([buildSqlitePath, '--repo', tempRoot], 'build sqlite');
+
+const backendA = JSON.parse(run([searchPath, 'greet', '--json', '--repo', tempRoot], 'search auto sqlite')).backend;
+if (backendA !== 'sqlite') {
+  console.error(`Expected sqlite backend for threshold=1, got ${backendA}`);
+  process.exit(1);
+}
+
+await writeConfig(999999);
+const backendB = JSON.parse(run([searchPath, 'greet', '--json', '--repo', tempRoot], 'search auto memory')).backend;
+if (backendB !== 'memory') {
+  console.error(`Expected memory backend for threshold=999999, got ${backendB}`);
+  process.exit(1);
+}
+
+console.log('SQLite auto backend test passed');
diff --git a/tests/sqlite-build-delete.js b/tests/sqlite-build-delete.js
new file mode 100644
index 000000000..0b8e9fb2f
--- /dev/null
+++ b/tests/sqlite-build-delete.js
@@ -0,0 +1,67 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { deleteDocIds } from '../src/storage/sqlite/build/delete.js';
+
+let Database;
+try {
+  ({ default: Database } = await import('better-sqlite3'));
+} catch (err) {
+  console.error('better-sqlite3 is required for sqlite build delete test.');
+  process.exit(1);
+}
+
+const db = new Database(':memory:');
+db.exec(`
+  CREATE TABLE chunks (id INTEGER, mode TEXT);
+  CREATE TABLE chunks_fts (rowid INTEGER, mode TEXT);
+  CREATE TABLE token_postings (doc_id INTEGER, mode TEXT);
+  CREATE TABLE phrase_postings (doc_id INTEGER, mode TEXT);
+  CREATE TABLE chargram_postings (doc_id INTEGER, mode TEXT);
+  CREATE TABLE minhash_signatures (doc_id INTEGER, mode TEXT);
+  CREATE TABLE dense_vectors (doc_id INTEGER, mode TEXT);
+  CREATE TABLE doc_lengths (doc_id INTEGER, mode TEXT);
+  CREATE TABLE dense_vectors_ann (id INTEGER PRIMARY KEY, embedding BLOB);
+`);
+
+const insertChunk = db.prepare('INSERT INTO chunks (id, mode) VALUES (?, ?)');
+const insertChunkFts = db.prepare('INSERT INTO chunks_fts (rowid, mode) VALUES (?, ?)');
+const insertDoc = (table) => db.prepare(`INSERT INTO ${table} (doc_id, mode) VALUES (?, ?)`);
+const insertAnn = db.prepare('INSERT INTO dense_vectors_ann (id, embedding) VALUES (?, ?)');
+
+for (const id of [1, 2]) {
+  insertChunk.run(id, 'code');
+  insertChunkFts.run(id, 'code');
+  insertDoc('token_postings').run(id, 'code');
+  insertDoc('phrase_postings').run(id, 'code');
+  insertDoc('chargram_postings').run(id, 'code');
+  insertDoc('minhash_signatures').run(id, 'code');
+  insertDoc('dense_vectors').run(id, 'code');
+  insertDoc('doc_lengths').run(id, 'code');
+  insertAnn.run(id, Buffer.from('x'));
+}
+
+insertChunk.run(1, 'prose');
+insertChunkFts.run(1, 'prose');
+insertDoc('token_postings').run(1, 'prose');
+insertDoc('doc_lengths').run(1, 'prose');
+insertAnn.run(3, Buffer.from('y'));
+
+deleteDocIds(db, 'code', [1, 2], [{ table: 'dense_vectors_ann', column: 'id', withMode: false }]);
+
+const remainingCodeChunks = db.prepare('SELECT COUNT(*) AS total FROM chunks WHERE mode = ?').get('code').total;
+assert.equal(remainingCodeChunks, 0, 'expected code chunks to be removed');
+const remainingProseChunks = db.prepare('SELECT COUNT(*) AS total FROM chunks WHERE mode = ?').get('prose').total;
+assert.equal(remainingProseChunks, 1, 'expected prose chunks to remain');
+
+const remainingTokens = db.prepare('SELECT COUNT(*) AS total FROM token_postings WHERE mode = ?').get('code').total;
+assert.equal(remainingTokens, 0, 'expected code token postings to be removed');
+
+const remainingAnn = db.prepare('SELECT COUNT(*) AS total FROM dense_vectors_ann').get().total;
+assert.equal(remainingAnn, 1, 'expected ANN rows to be removed for deleted ids');
+
+const remainingAnnRow = db.prepare('SELECT id FROM dense_vectors_ann').get();
+assert.equal(remainingAnnRow.id, 3, 'expected ANN row for other ids to remain');
+
+db.close();
+
+console.log('sqlite build delete test passed');
diff --git a/tests/sqlite-build-indexes.js b/tests/sqlite-build-indexes.js
new file mode 100644
index 000000000..d37d6922c
--- /dev/null
+++ b/tests/sqlite-build-indexes.js
@@ -0,0 +1,93 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig, resolveSqlitePaths } from '../tools/dict-utils.js';
+
+let Database = null;
+try {
+  ({ default: Database } = await import('better-sqlite3'));
+} catch (err) {
+  console.error(`better-sqlite3 missing: ${err?.message || err}`);
+  process.exit(1);
+}
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'sqlite-build-indexes');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(path.join(repoRoot, 'alpha.js'), 'const alpha = 1;\n');
+await fsPromises.writeFile(path.join(repoRoot, 'beta.js'), 'const beta = 2;\n');
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({
+    indexing: {
+      treeSitter: { enabled: false },
+      artifacts: {
+        chunkMetaFormat: 'jsonl',
+        chunkMetaShardSize: 1,
+        tokenPostingsFormat: 'sharded',
+        tokenPostingsShardSize: 1
+      }
+    }
+  }, null, 2)
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const runNode = (label, args) => {
+  const result = spawnSync(process.execPath, args, { cwd: repoRoot, env, stdio: 'inherit' });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    process.exit(result.status ?? 1);
+  }
+};
+
+runNode('build_index', [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot]);
+runNode('build_index_stage4', [path.join(root, 'build_index.js'), '--stub-embeddings', '--stage', 'stage4', '--repo', repoRoot]);
+
+const previousCacheRoot = process.env.PAIROFCLEATS_CACHE_ROOT;
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+const userConfig = loadUserConfig(repoRoot);
+const indexDir = getIndexDir(repoRoot, 'code', userConfig);
+const chunkMetaPartsDir = path.join(indexDir, 'chunk_meta.parts');
+const tokenPostingsShardsDir = path.join(indexDir, 'token_postings.shards');
+if (!fs.existsSync(chunkMetaPartsDir)) {
+  console.error(`Expected chunk_meta.parts to exist at ${chunkMetaPartsDir}`);
+  process.exit(1);
+}
+if (!fs.existsSync(tokenPostingsShardsDir)) {
+  console.error(`Expected token_postings.shards to exist at ${tokenPostingsShardsDir}`);
+  process.exit(1);
+}
+const chunkMetaJson = path.join(indexDir, 'chunk_meta.json');
+if (fs.existsSync(chunkMetaJson)) {
+  console.error(`Expected chunk_meta.json to be absent at ${chunkMetaJson}`);
+  process.exit(1);
+}
+const sqlitePaths = resolveSqlitePaths(repoRoot, {});
+if (previousCacheRoot === undefined) {
+  delete process.env.PAIROFCLEATS_CACHE_ROOT;
+} else {
+  process.env.PAIROFCLEATS_CACHE_ROOT = previousCacheRoot;
+}
+const db = new Database(sqlitePaths.codePath);
+const indexList = db.prepare("PRAGMA index_list('token_postings')").all();
+const indexNames = new Set(indexList.map((row) => row.name));
+if (!indexNames.has('idx_token_postings_token')) {
+  console.error('Expected idx_token_postings_token to exist');
+  process.exit(1);
+}
+db.close();
+
+console.log('sqlite build indexes test passed');
diff --git a/tests/sqlite-build-manifest.js b/tests/sqlite-build-manifest.js
new file mode 100644
index 000000000..5000cf185
--- /dev/null
+++ b/tests/sqlite-build-manifest.js
@@ -0,0 +1,32 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { diffFileManifests, isManifestMatch, normalizeManifestFiles } from '../src/storage/sqlite/build/manifest.js';
+
+const manifestFiles = {
+  'src/conflict.js': { hash: 'aaa', mtimeMs: 1, size: 10 },
+  'src\\conflict.js': { hash: 'bbb', mtimeMs: 2, size: 20 },
+  'src/unchanged.js': { hash: 'keep', mtimeMs: 5, size: 50 },
+  'src/changed.js': { mtimeMs: 9, size: 90 }
+};
+
+const normalized = normalizeManifestFiles(manifestFiles);
+assert.ok(normalized.conflicts.includes('src/conflict.js'), 'expected conflict to be recorded');
+assert.equal(normalized.entries.length, 3, 'expected normalized entries to dedupe conflicts');
+
+const dbFiles = new Map();
+dbFiles.set('src/unchanged.js', { hash: 'keep', mtimeMs: 5, size: 50 });
+dbFiles.set('src/changed.js', { hash: 'old', mtimeMs: 8, size: 90 });
+dbFiles.set('src/deleted.js', { hash: 'gone', mtimeMs: 1, size: 10 });
+
+const { changed, deleted } = diffFileManifests(normalized.entries, dbFiles);
+
+assert.ok(changed.some((record) => record.normalized === 'src/changed.js'), 'expected changed file to be detected');
+assert.ok(!changed.some((record) => record.normalized === 'src/unchanged.js'), 'expected unchanged file to be skipped');
+assert.deepEqual(deleted, ['src/deleted.js'], 'expected deleted file list');
+
+const matchByHash = isManifestMatch({ hash: 'abc' }, { hash: 'abc', mtimeMs: 1, size: 1 });
+assert.equal(matchByHash, true, 'expected hash match to win');
+const matchByMeta = isManifestMatch({ mtimeMs: 5, size: 50 }, { mtimeMs: 5, size: 50 });
+assert.equal(matchByMeta, true, 'expected mtime+size match');
+
+console.log('sqlite build manifest test passed');
diff --git a/tests/sqlite-build-vocab.js b/tests/sqlite-build-vocab.js
new file mode 100644
index 000000000..d3f32ef41
--- /dev/null
+++ b/tests/sqlite-build-vocab.js
@@ -0,0 +1,56 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { ensureVocabIds } from '../src/storage/sqlite/build/vocab.js';
+
+let Database;
+try {
+  ({ default: Database } = await import('better-sqlite3'));
+} catch (err) {
+  console.error('better-sqlite3 is required for sqlite build vocab test.');
+  process.exit(1);
+}
+
+const db = new Database(':memory:');
+db.exec('CREATE TABLE token_vocab (mode TEXT, token_id INTEGER, token TEXT, PRIMARY KEY (mode, token_id))');
+
+const insertSeed = db.prepare('INSERT INTO token_vocab (mode, token_id, token) VALUES (?, ?, ?)');
+insertSeed.run('code', 0, 'alpha');
+insertSeed.run('code', 1, 'beta');
+insertSeed.run('prose', 0, 'beta');
+
+const insertStmt = db.prepare('INSERT OR REPLACE INTO token_vocab (mode, token_id, token) VALUES (?, ?, ?)');
+
+let result = ensureVocabIds(
+  db,
+  'code',
+  'token_vocab',
+  'token_id',
+  'token',
+  ['beta', 'gamma', 'beta'],
+  insertStmt
+);
+assert.equal(result.inserted, 1, 'expected one new token');
+assert.equal(result.map.get('beta'), 1, 'expected existing token id');
+assert.equal(result.map.get('gamma'), 2, 'expected new token id');
+
+const rowCount = db.prepare('SELECT COUNT(*) AS total FROM token_vocab WHERE mode = ?').get('code').total;
+assert.equal(rowCount, 3, 'expected vocab size to grow by one');
+
+const beforeCount = rowCount;
+result = ensureVocabIds(
+  db,
+  'code',
+  'token_vocab',
+  'token_id',
+  'token',
+  ['delta', 'epsilon'],
+  insertStmt,
+  { limits: { ratio: 0.4, absolute: 1 } }
+);
+assert.equal(result.skip, true, 'expected vocab growth to be skipped');
+const afterCount = db.prepare('SELECT COUNT(*) AS total FROM token_vocab WHERE mode = ?').get('code').total;
+assert.equal(afterCount, beforeCount, 'expected vocab size to remain unchanged');
+
+db.close();
+
+console.log('sqlite build vocab test passed');
diff --git a/tests/sqlite-bundle-missing.js b/tests/sqlite-bundle-missing.js
new file mode 100644
index 000000000..874c9871a
--- /dev/null
+++ b/tests/sqlite-bundle-missing.js
@@ -0,0 +1,120 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getRepoCacheRoot, loadUserConfig, resolveSqlitePaths } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const tempRoot = path.join(root, 'tests', '.cache', 'sqlite-bundle-missing');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+await fsPromises.cp(fixtureRoot, repoRoot, { recursive: true });
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+
+const run = (args, label, options = {}) => {
+  const result = spawnSync(process.execPath, args, {
+    cwd: repoRoot,
+    env,
+    encoding: 'utf8',
+    ...options
+  });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    if (result.stderr) console.error(result.stderr.trim());
+    process.exit(result.status ?? 1);
+  }
+  return result;
+};
+
+run([
+  path.join(root, 'build_index.js'),
+  '--incremental',
+  '--stub-embeddings',
+  '--mode',
+  'code',
+  '--repo',
+  repoRoot
+], 'build index');
+
+const userConfig = loadUserConfig(repoRoot);
+const repoCacheRoot = getRepoCacheRoot(repoRoot, userConfig);
+const manifestPath = path.join(repoCacheRoot, 'incremental', 'code', 'manifest.json');
+const bundleDir = path.join(repoCacheRoot, 'incremental', 'code', 'files');
+if (!fs.existsSync(manifestPath)) {
+  console.error('Missing incremental manifest for sqlite bundle test.');
+  process.exit(1);
+}
+const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
+const manifestFiles = Object.values(manifest.files || {});
+if (!manifestFiles.length) {
+  console.error('Incremental manifest contains no files.');
+  process.exit(1);
+}
+const bundleName = manifestFiles[0]?.bundle;
+if (!bundleName) {
+  console.error('Manifest entry missing bundle name.');
+  process.exit(1);
+}
+const bundlePath = path.join(bundleDir, bundleName);
+if (!fs.existsSync(bundlePath)) {
+  console.error(`Expected bundle file missing: ${bundlePath}`);
+  process.exit(1);
+}
+await fsPromises.rm(bundlePath, { force: true });
+
+const sqliteBuild = spawnSync(
+  process.execPath,
+  [
+    path.join(root, 'tools', 'build-sqlite-index.js'),
+    '--mode',
+    'code',
+    '--repo',
+    repoRoot
+  ],
+  { cwd: repoRoot, env, encoding: 'utf8' }
+);
+if (sqliteBuild.status !== 0) {
+  console.error('build-sqlite-index failed for missing bundle test.');
+  if (sqliteBuild.stderr) console.error(sqliteBuild.stderr.trim());
+  process.exit(sqliteBuild.status ?? 1);
+}
+const output = `${sqliteBuild.stdout || ''}\n${sqliteBuild.stderr || ''}`;
+if (!output.includes('falling back to file-backed artifacts')) {
+  console.error('Expected bundle fallback warning not found in output.');
+  process.exit(1);
+}
+
+const sqlitePaths = resolveSqlitePaths(repoRoot, userConfig);
+if (!fs.existsSync(sqlitePaths.codePath)) {
+  console.error(`Missing sqlite db after fallback: ${sqlitePaths.codePath}`);
+  process.exit(1);
+}
+
+let Database;
+try {
+  ({ default: Database } = await import('better-sqlite3'));
+} catch {
+  console.error('better-sqlite3 is required for sqlite bundle test.');
+  process.exit(1);
+}
+const db = new Database(sqlitePaths.codePath, { readonly: true });
+const row = db.prepare('SELECT COUNT(*) AS total FROM chunks WHERE mode = ?').get('code');
+db.close();
+if (!Number(row?.total)) {
+  console.error('Expected sqlite index to contain chunks after fallback rebuild.');
+  process.exit(1);
+}
+
+console.log('sqlite bundle missing fallback test passed');
diff --git a/tests/sqlite-cache.js b/tests/sqlite-cache.js
new file mode 100644
index 000000000..172961b47
--- /dev/null
+++ b/tests/sqlite-cache.js
@@ -0,0 +1,25 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import os from 'node:os';
+import { createSqliteDbCache } from '../src/retrieval/sqlite-cache.js';
+
+const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'pairofcleats-sqlite-cache-'));
+const dbPath = path.join(tempRoot, 'index.db');
+await fs.writeFile(dbPath, 'initial');
+
+const cache = createSqliteDbCache();
+let closed = false;
+const db = { close: () => { closed = true; } };
+cache.set(dbPath, db);
+
+const first = cache.get(dbPath);
+assert.equal(first, db, 'should return cached db');
+
+await fs.writeFile(dbPath, 'changed');
+const second = cache.get(dbPath);
+assert.equal(second, null, 'should invalidate on signature change');
+assert.equal(closed, true, 'should close invalidated db');
+
+console.log('sqlite cache tests passed');
diff --git a/tests/sqlite-chunk-id.js b/tests/sqlite-chunk-id.js
new file mode 100644
index 000000000..d5786c405
--- /dev/null
+++ b/tests/sqlite-chunk-id.js
@@ -0,0 +1,16 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { buildChunkRow } from '../src/storage/sqlite/build-helpers.js';
+import { CREATE_TABLES_BASE_SQL } from '../src/storage/sqlite/schema.js';
+
+const chunk = {
+  file: 'src/example.js',
+  start: 0,
+  end: 12,
+  metaV2: { chunkId: 'chunk_sqlite_1' }
+};
+const row = buildChunkRow(chunk, 'code', 0);
+assert.equal(row.chunk_id, 'chunk_sqlite_1', 'expected chunk_id in sqlite row');
+assert.ok(CREATE_TABLES_BASE_SQL.includes('chunk_id'), 'expected chunk_id column in sqlite schema');
+
+console.log('sqlite chunk id test passed');
diff --git a/tests/sqlite-incremental-no-change.js b/tests/sqlite-incremental-no-change.js
new file mode 100644
index 000000000..f67350fb9
--- /dev/null
+++ b/tests/sqlite-incremental-no-change.js
@@ -0,0 +1,144 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { loadUserConfig, resolveSqlitePaths } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const tempRoot = path.join(root, 'tests', '.cache', 'sqlite-incremental-no-change');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+const stripMaxOldSpaceFlag = (options) => {
+  if (!options) return '';
+  return options
+    .replace(/--max-old-space-size=\d+/g, '')
+    .replace(/--max-old-space-size\s+\d+/g, '')
+    .replace(/\s+/g, ' ')
+    .trim();
+};
+
+const nodeOptions = stripMaxOldSpaceFlag(process.env.NODE_OPTIONS || '');
+
+const rmWithRetries = async (target, { retries = 8, delayMs = 150 } = {}) => {
+  for (let attempt = 0; attempt <= retries; attempt += 1) {
+    try {
+      await fsPromises.rm(target, { recursive: true, force: true });
+      return;
+    } catch (err) {
+      if (!err || attempt >= retries) throw err;
+      if (!['EBUSY', 'EPERM', 'ENOTEMPTY'].includes(err.code)) throw err;
+      await new Promise((resolve) => setTimeout(resolve, delayMs * (attempt + 1)));
+    }
+  }
+};
+
+await rmWithRetries(tempRoot);
+await fsPromises.mkdir(tempRoot, { recursive: true });
+await fsPromises.cp(fixtureRoot, repoRoot, { recursive: true });
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub',
+  PAIROFCLEATS_WORKER_POOL: 'off',
+  PAIROFCLEATS_MAX_OLD_SPACE_MB: '4096'
+};
+if (nodeOptions) {
+  env.NODE_OPTIONS = nodeOptions;
+} else {
+  delete env.NODE_OPTIONS;
+}
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+process.env.PAIROFCLEATS_WORKER_POOL = 'off';
+process.env.PAIROFCLEATS_MAX_OLD_SPACE_MB = '4096';
+
+function run(args, label) {
+  const result = spawnSync(process.execPath, args, {
+    cwd: repoRoot,
+    env,
+    stdio: 'inherit'
+  });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    process.exit(result.status ?? 1);
+  }
+}
+
+function runCapture(args, label) {
+  const result = spawnSync(process.execPath, args, {
+    cwd: repoRoot,
+    env,
+    encoding: 'utf8'
+  });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    if (result.stderr) console.error(result.stderr.trim());
+    process.exit(result.status ?? 1);
+  }
+  return result;
+}
+
+run([path.join(root, 'build_index.js'), '--incremental', '--stub-embeddings', '--repo', repoRoot], 'build index');
+const initialSqlite = runCapture(
+  [path.join(root, 'tools', 'build-sqlite-index.js'), '--repo', repoRoot],
+  'build sqlite index'
+);
+const initialOutput = `${initialSqlite.stdout || ''}\n${initialSqlite.stderr || ''}`;
+if (!initialOutput.includes('Validation (smoke) ok for code')) {
+  console.error('Expected sqlite smoke validation for code build.');
+  process.exit(1);
+}
+
+const userConfig = loadUserConfig(repoRoot);
+let sqlitePaths = resolveSqlitePaths(repoRoot, userConfig);
+
+let Database;
+try {
+  ({ default: Database } = await import('better-sqlite3'));
+} catch (err) {
+  console.error('better-sqlite3 is required for sqlite incremental no-change test.');
+  process.exit(1);
+}
+
+const dbBefore = new Database(sqlitePaths.codePath, { readonly: true });
+const beforeCounts = {
+  chunks: dbBefore.prepare('SELECT COUNT(*) AS total FROM chunks WHERE mode = ?').get('code').total,
+  files: dbBefore.prepare('SELECT COUNT(*) AS total FROM file_manifest WHERE mode = ?').get('code').total,
+  hash: (dbBefore.prepare('SELECT hash FROM file_manifest WHERE mode = ? AND file = ?')
+    .get('code', 'src/index.js') || {}).hash || null
+};
+dbBefore.close();
+
+const noChangeResult = runCapture(
+  [path.join(root, 'tools', 'build-sqlite-index.js'), '--incremental', '--repo', repoRoot],
+  'build sqlite index (no change)'
+);
+const noChangeOutput = `${noChangeResult.stdout || ''}\n${noChangeResult.stderr || ''}`;
+if (!noChangeOutput.includes('SQLite indexes updated')) {
+  console.error('Expected incremental sqlite update output for no-change run.');
+  process.exit(1);
+}
+if (noChangeOutput.includes('rebuilding full index')) {
+  console.error('Expected no full rebuild for no-change run.');
+  process.exit(1);
+}
+
+sqlitePaths = resolveSqlitePaths(repoRoot, userConfig);
+const dbAfter = new Database(sqlitePaths.codePath, { readonly: true });
+const afterCounts = {
+  chunks: dbAfter.prepare('SELECT COUNT(*) AS total FROM chunks WHERE mode = ?').get('code').total,
+  files: dbAfter.prepare('SELECT COUNT(*) AS total FROM file_manifest WHERE mode = ?').get('code').total,
+  hash: (dbAfter.prepare('SELECT hash FROM file_manifest WHERE mode = ? AND file = ?')
+    .get('code', 'src/index.js') || {}).hash || null
+};
+dbAfter.close();
+
+assert.equal(afterCounts.chunks, beforeCounts.chunks, 'expected chunk counts to remain stable');
+assert.equal(afterCounts.files, beforeCounts.files, 'expected file manifest counts to remain stable');
+assert.equal(afterCounts.hash, beforeCounts.hash, 'expected file manifest hash to remain stable');
+
+console.log('sqlite incremental no-change test passed');
diff --git a/tests/sqlite-incremental.js b/tests/sqlite-incremental.js
index ad345e018..b1c62bd69 100644
--- a/tests/sqlite-incremental.js
+++ b/tests/sqlite-incremental.js
@@ -3,8 +3,8 @@ import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
 import { spawnSync } from 'node:child_process';
-import { loadUserConfig, resolveSqlitePaths } from '../tools/dict-utils.js';
-import { SCHEMA_VERSION } from '../src/sqlite/schema.js';
+import { getRepoCacheRoot, loadUserConfig, resolveSqlitePaths } from '../tools/dict-utils.js';
+import { SCHEMA_VERSION } from '../src/storage/sqlite/schema.js';
 
 const root = process.cwd();
 const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
@@ -12,17 +12,50 @@ const tempRoot = path.join(root, 'tests', '.cache', 'sqlite-incremental');
 const repoRoot = path.join(tempRoot, 'repo');
 const cacheRoot = path.join(tempRoot, 'cache');
 
-await fsPromises.rm(tempRoot, { recursive: true, force: true });
+const stripMaxOldSpaceFlag = (options) => {
+  if (!options) return '';
+  return options
+    .replace(/--max-old-space-size=\d+/g, '')
+    .replace(/--max-old-space-size\s+\d+/g, '')
+    .replace(/\s+/g, ' ')
+    .trim();
+};
+
+const nodeOptions = stripMaxOldSpaceFlag(process.env.NODE_OPTIONS || '');
+
+const rmWithRetries = async (target, { retries = 8, delayMs = 150 } = {}) => {
+  for (let attempt = 0; attempt <= retries; attempt += 1) {
+    try {
+      await fsPromises.rm(target, { recursive: true, force: true });
+      return;
+    } catch (err) {
+      if (!err || attempt >= retries) throw err;
+      if (!['EBUSY', 'EPERM', 'ENOTEMPTY'].includes(err.code)) throw err;
+      await new Promise((resolve) => setTimeout(resolve, delayMs * (attempt + 1)));
+    }
+  }
+};
+
+await rmWithRetries(tempRoot);
 await fsPromises.mkdir(tempRoot, { recursive: true });
 await fsPromises.cp(fixtureRoot, repoRoot, { recursive: true });
 
 const env = {
   ...process.env,
   PAIROFCLEATS_CACHE_ROOT: cacheRoot,
-  PAIROFCLEATS_EMBEDDINGS: 'stub'
+  PAIROFCLEATS_EMBEDDINGS: 'stub',
+  PAIROFCLEATS_WORKER_POOL: 'off',
+  PAIROFCLEATS_MAX_OLD_SPACE_MB: '8192'
 };
+if (nodeOptions) {
+  env.NODE_OPTIONS = nodeOptions;
+} else {
+  delete env.NODE_OPTIONS;
+}
 process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
 process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+process.env.PAIROFCLEATS_WORKER_POOL = 'off';
+process.env.PAIROFCLEATS_MAX_OLD_SPACE_MB = '8192';
 
 function run(args, label) {
   const result = spawnSync(process.execPath, args, {
@@ -51,10 +84,22 @@ function runCapture(args, label) {
 }
 
 run([path.join(root, 'build_index.js'), '--incremental', '--stub-embeddings', '--repo', repoRoot], 'build index');
-run([path.join(root, 'tools', 'build-sqlite-index.js'), '--repo', repoRoot], 'build sqlite index');
+const initialSqlite = runCapture(
+  [path.join(root, 'tools', 'build-sqlite-index.js'), '--repo', repoRoot],
+  'build sqlite index'
+);
+const initialOutput = `${initialSqlite.stdout || ''}\n${initialSqlite.stderr || ''}`;
+if (!initialOutput.includes('Validation (smoke) ok for code')) {
+  console.error('Expected sqlite smoke validation for code build.');
+  process.exit(1);
+}
+if (!initialOutput.includes('Validation (smoke) ok for prose')) {
+  console.error('Expected sqlite smoke validation for prose build.');
+  process.exit(1);
+}
 
 const userConfig = loadUserConfig(repoRoot);
-const sqlitePaths = resolveSqlitePaths(repoRoot, userConfig);
+let sqlitePaths = resolveSqlitePaths(repoRoot, userConfig);
 
 let Database;
 try {
@@ -82,6 +127,7 @@ await fsPromises.writeFile(targetFile, updated);
 run([path.join(root, 'build_index.js'), '--incremental', '--stub-embeddings', '--repo', repoRoot], 'build index (incremental)');
 run([path.join(root, 'tools', 'build-sqlite-index.js'), '--incremental', '--repo', repoRoot], 'build sqlite index (incremental)');
 
+sqlitePaths = resolveSqlitePaths(repoRoot, userConfig);
 const dbAfter = new Database(sqlitePaths.codePath, { readonly: true });
 const afterRow = dbAfter
   .prepare('SELECT hash, chunk_count FROM file_manifest WHERE mode = ? AND file = ?')
@@ -116,6 +162,33 @@ if (!payload.code?.length && !payload.prose?.length) {
   process.exit(1);
 }
 
+const repoCacheRoot = getRepoCacheRoot(repoRoot, userConfig);
+const manifestPath = path.join(repoCacheRoot, 'incremental', 'code', 'manifest.json');
+let manifest = null;
+try {
+  manifest = JSON.parse(await fsPromises.readFile(manifestPath, 'utf8'));
+} catch {
+  console.error('Failed to load incremental manifest for normalization test.');
+  process.exit(1);
+}
+if (!manifest?.files?.['src/index.js']) {
+  console.error('Expected manifest entry for src/index.js.');
+  process.exit(1);
+}
+manifest.files['src\\index.js'] = manifest.files['src/index.js'];
+delete manifest.files['src/index.js'];
+await fsPromises.writeFile(manifestPath, JSON.stringify(manifest, null, 2));
+
+const normalizedResult = runCapture(
+  [path.join(root, 'tools', 'build-sqlite-index.js'), '--incremental', '--repo', repoRoot],
+  'build sqlite index (normalized manifest)'
+);
+const normalizedOutput = `${normalizedResult.stdout || ''}\n${normalizedResult.stderr || ''}`;
+if (!normalizedOutput.includes('SQLite indexes updated')) {
+  console.error('Expected incremental sqlite update with normalized manifest.');
+  process.exit(1);
+}
+
 const downgradeVersion = Math.max(0, SCHEMA_VERSION - 1);
 const dbDowngrade = new Database(sqlitePaths.codePath);
 dbDowngrade.pragma(`user_version = ${downgradeVersion}`);
diff --git a/tests/sqlite-index-state-fail-closed.js b/tests/sqlite-index-state-fail-closed.js
new file mode 100644
index 000000000..2e359ee2a
--- /dev/null
+++ b/tests/sqlite-index-state-fail-closed.js
@@ -0,0 +1,90 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, getRepoCacheRoot, loadUserConfig, resolveIndexRoot } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const tempRoot = path.join(root, 'tests', '.cache', 'sqlite-index-state-fail');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+await fsPromises.cp(fixtureRoot, repoRoot, { recursive: true });
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+
+const run = (args, label) => {
+  const result = spawnSync(process.execPath, args, { cwd: repoRoot, env, stdio: 'inherit' });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    process.exit(result.status ?? 1);
+  }
+};
+
+run([
+  path.join(root, 'build_index.js'),
+  '--stub-embeddings',
+  '--mode',
+  'code',
+  '--repo',
+  repoRoot
+], 'build index');
+
+const userConfig = loadUserConfig(repoRoot);
+const indexRoot = resolveIndexRoot(repoRoot, userConfig);
+const codeDir = getIndexDir(repoRoot, 'code', userConfig, { indexRoot });
+const repoCacheRoot = getRepoCacheRoot(repoRoot, userConfig);
+const statePath = path.join(codeDir, 'index_state.json');
+if (!fs.existsSync(statePath)) {
+  console.error('Expected index_state.json after initial build.');
+  process.exit(1);
+}
+
+const chunkMetaJson = path.join(codeDir, 'chunk_meta.json');
+const chunkMetaJsonl = path.join(codeDir, 'chunk_meta.jsonl');
+const chunkMetaMeta = path.join(codeDir, 'chunk_meta.meta.json');
+const chunkMetaParts = path.join(codeDir, 'chunk_meta.parts');
+await fsPromises.rm(chunkMetaJson, { force: true });
+await fsPromises.rm(chunkMetaJsonl, { force: true });
+await fsPromises.rm(chunkMetaMeta, { force: true });
+await fsPromises.rm(chunkMetaParts, { recursive: true, force: true });
+const manifestPath = path.join(repoCacheRoot, 'incremental', 'code', 'manifest.json');
+await fsPromises.rm(manifestPath, { force: true });
+
+const sqliteBuild = spawnSync(
+  process.execPath,
+  [
+    path.join(root, 'tools', 'build-sqlite-index.js'),
+    '--mode',
+    'code',
+    '--repo',
+    repoRoot
+  ],
+  { cwd: repoRoot, env, encoding: 'utf8' }
+);
+if (sqliteBuild.status === 0) {
+  console.error('Expected build-sqlite-index to fail with missing artifacts.');
+  process.exit(1);
+}
+
+const state = JSON.parse(fs.readFileSync(statePath, 'utf8'));
+if (!state?.sqlite) {
+  console.error('index_state.json missing sqlite section after failure.');
+  process.exit(1);
+}
+if (state.sqlite.pending !== true || state.sqlite.ready !== false) {
+  console.error(`Expected sqlite pending=true and ready=false, got pending=${state.sqlite.pending} ready=${state.sqlite.ready}`);
+  process.exit(1);
+}
+
+console.log('sqlite index state fail-closed test passed');
diff --git a/tests/sqlite-missing-dep.js b/tests/sqlite-missing-dep.js
new file mode 100644
index 000000000..a4c63af31
--- /dev/null
+++ b/tests/sqlite-missing-dep.js
@@ -0,0 +1,92 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'sqlite-missing-dep');
+const cacheRoot = path.join(tempRoot, '.cache');
+const searchPath = path.join(root, 'search.js');
+const buildIndexPath = path.join(root, 'build_index.js');
+const buildSqlitePath = path.join(root, 'tools', 'build-sqlite-index.js');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+
+const sampleCode = `
+export function greet(name) {
+  return "hello " + name;
+}
+`;
+await fsPromises.writeFile(path.join(tempRoot, 'sample.js'), sampleCode);
+
+const config = {
+  sqlite: { use: true },
+  search: { sqliteAutoChunkThreshold: 1, annDefault: false }
+};
+await fsPromises.writeFile(
+  path.join(tempRoot, '.pairofcleats.json'),
+  JSON.stringify(config, null, 2)
+);
+
+const envBase = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const run = (args, label, envOverride = {}) => {
+  const result = spawnSync(process.execPath, args, {
+    cwd: tempRoot,
+    env: { ...envBase, ...envOverride },
+    encoding: 'utf8'
+  });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    if (result.stderr) console.error(result.stderr.trim());
+    process.exit(result.status ?? 1);
+  }
+  return result.stdout || '';
+};
+
+run([buildIndexPath, '--stub-embeddings', '--repo', tempRoot], 'build index');
+run([buildSqlitePath, '--repo', tempRoot], 'build sqlite');
+
+const autoOutput = run(
+  [searchPath, 'greet', '--json', '--repo', tempRoot],
+  'search auto with sqlite disabled',
+  { PAIROFCLEATS_SQLITE_DISABLED: '1' }
+);
+let autoBackend = null;
+try {
+  autoBackend = JSON.parse(autoOutput).backend;
+} catch {
+  console.error('Failed to parse JSON output for auto sqlite fallback.');
+  process.exit(1);
+}
+if (autoBackend !== 'memory') {
+  console.error(`Expected memory backend with sqlite disabled, got ${autoBackend}`);
+  process.exit(1);
+}
+
+const forcedResult = spawnSync(
+  process.execPath,
+  [searchPath, 'greet', '--json', '--backend', 'sqlite', '--repo', tempRoot],
+  {
+    cwd: tempRoot,
+    env: { ...envBase, PAIROFCLEATS_SQLITE_DISABLED: '1' },
+    encoding: 'utf8'
+  }
+);
+if (forcedResult.status === 0) {
+  console.error('Expected forced sqlite search to fail when sqlite is disabled.');
+  process.exit(1);
+}
+const forcedStderr = forcedResult.stderr || '';
+if (!forcedStderr.includes('better-sqlite3 is required')) {
+  console.error('Expected missing dependency message for forced sqlite backend.');
+  if (forcedStderr) console.error(forcedStderr.trim());
+  process.exit(1);
+}
+
+console.log('SQLite missing dependency test passed');
diff --git a/tests/sqlite-sidecar-cleanup.js b/tests/sqlite-sidecar-cleanup.js
new file mode 100644
index 000000000..1ba5b8780
--- /dev/null
+++ b/tests/sqlite-sidecar-cleanup.js
@@ -0,0 +1,57 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { loadUserConfig, resolveSqlitePaths } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const tempRoot = path.join(root, 'tests', '.cache', 'sqlite-sidecar-cleanup');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(tempRoot, { recursive: true });
+await fsPromises.cp(fixtureRoot, repoRoot, { recursive: true });
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+
+const run = (args, label) => {
+  const result = spawnSync(process.execPath, args, { cwd: repoRoot, env, stdio: 'inherit' });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    process.exit(result.status ?? 1);
+  }
+};
+
+run([path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot], 'build index');
+run([path.join(root, 'tools', 'build-sqlite-index.js'), '--mode', 'code', '--repo', repoRoot], 'build sqlite');
+
+const userConfig = loadUserConfig(repoRoot);
+const sqlitePaths = resolveSqlitePaths(repoRoot, userConfig);
+const walPath = `${sqlitePaths.codePath}-wal`;
+const shmPath = `${sqlitePaths.codePath}-shm`;
+await fsPromises.writeFile(walPath, 'stale-wal');
+await fsPromises.writeFile(shmPath, 'stale-shm');
+
+run([path.join(root, 'tools', 'build-sqlite-index.js'), '--mode', 'code', '--repo', repoRoot], 'rebuild sqlite');
+
+const staleWal = fs.existsSync(walPath) ? fs.readFileSync(walPath) : null;
+const staleShm = fs.existsSync(shmPath) ? fs.readFileSync(shmPath) : null;
+if (staleWal && staleWal.toString('utf8') === 'stale-wal') {
+  console.error('Stale WAL sidecar was not cleaned up.');
+  process.exit(1);
+}
+if (staleShm && staleShm.toString('utf8') === 'stale-shm') {
+  console.error('Stale SHM sidecar was not cleaned up.');
+  process.exit(1);
+}
+
+console.log('sqlite sidecar cleanup test passed');
diff --git a/tests/sqlite-vec-candidate-set.js b/tests/sqlite-vec-candidate-set.js
new file mode 100644
index 000000000..073270638
--- /dev/null
+++ b/tests/sqlite-vec-candidate-set.js
@@ -0,0 +1,53 @@
+#!/usr/bin/env node
+import assert from 'node:assert';
+import { queryVectorAnn } from '../tools/vector-extension.js';
+
+const config = {
+  enabled: true,
+  table: 'dense_vectors_ann',
+  column: 'embedding',
+  encoding: 'float32'
+};
+
+let currentRows = [];
+let lastSql = null;
+let lastParams = null;
+
+const db = {
+  prepare: (sql) => {
+    lastSql = sql;
+    return {
+      all: (...params) => {
+        lastParams = params;
+        return currentRows;
+      }
+    };
+  }
+};
+
+currentRows = [
+  { rowid: 2, distance: 0.5 },
+  { rowid: 3, distance: 0.1 },
+  { rowid: 1, distance: 0.1 }
+];
+const smallCandidates = new Set([1, 2, 3]);
+const smallHits = queryVectorAnn(db, config, [0, 1], 2, smallCandidates);
+assert.ok(lastSql.includes('rowid IN'), 'expected candidate pushdown for small set');
+assert.ok(lastSql.includes('ORDER BY distance'), 'expected distance ordering');
+assert.equal(smallHits[0].idx, 1, 'expected rowid tie-break on distance');
+assert.equal(smallHits[1].idx, 3, 'expected rowid tie-break on distance');
+
+const largeCandidates = new Set(Array.from({ length: 901 }, (_, i) => i));
+currentRows = [
+  { rowid: 2000, distance: 0.05 },
+  { rowid: 10, distance: 0.1 }
+];
+lastSql = null;
+lastParams = null;
+const largeHits = queryVectorAnn(db, config, [0, 1], 2, largeCandidates);
+assert.ok(!lastSql.includes('rowid IN'), 'expected fallback query for large set');
+assert.equal(largeHits.length, 1, 'expected candidate filtering for large set');
+assert.equal(largeHits[0].idx, 10, 'expected candidate filtering for large set');
+assert.ok(Array.isArray(lastParams), 'expected SQL parameters for ANN query');
+
+console.log('sqlite vec candidate set test passed');
diff --git a/tests/structural-filters.js b/tests/structural-filters.js
new file mode 100644
index 000000000..962c2a4e5
--- /dev/null
+++ b/tests/structural-filters.js
@@ -0,0 +1,76 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, getRepoCacheRoot, loadUserConfig } from '../tools/dict-utils.js';
+import { loadChunkMeta, readJsonFile } from '../src/shared/artifact-io.js';
+import { filterChunks } from '../src/retrieval/output.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'structural-filters');
+const repoRoot = path.join(tempRoot, 'repo');
+const srcDir = path.join(repoRoot, 'src');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(srcDir, { recursive: true });
+await fsPromises.writeFile(path.join(srcDir, 'example.js'), 'eval("x");\n', 'utf8');
+
+const userConfig = loadUserConfig(repoRoot);
+const cacheRoot = getRepoCacheRoot(repoRoot, userConfig);
+const structuralDir = path.join(cacheRoot, 'structural');
+await fsPromises.mkdir(structuralDir, { recursive: true });
+const match = {
+  engine: 'semgrep',
+  pack: 'test-pack',
+  ruleId: 'no-eval',
+  tags: ['security'],
+  path: 'src/example.js',
+  startLine: 1,
+  endLine: 1,
+  snippet: 'eval("x")'
+};
+await fsPromises.writeFile(
+  path.join(structuralDir, 'structural.jsonl'),
+  `${JSON.stringify(match)}\n`,
+  'utf8'
+);
+
+const buildResult = spawnSync(process.execPath, [
+  path.join(root, 'build_index.js'),
+  '--stub-embeddings',
+  '--repo',
+  repoRoot
+], { encoding: 'utf8' });
+if (buildResult.status !== 0) {
+  console.error(buildResult.stderr || buildResult.stdout || 'build_index failed');
+  process.exit(buildResult.status ?? 1);
+}
+
+const indexDir = getIndexDir(repoRoot, 'code', userConfig);
+const chunkMeta = loadChunkMeta(indexDir);
+const fileMeta = readJsonFile(path.join(indexDir, 'file_meta.json'));
+const fileMetaById = new Map(
+  Array.isArray(fileMeta) ? fileMeta.map((entry) => [entry.id, entry]) : []
+);
+for (const chunk of chunkMeta) {
+  if (!chunk || chunk.file || chunk.fileId == null) continue;
+  const meta = fileMetaById.get(chunk.fileId);
+  if (meta?.file) chunk.file = meta.file;
+}
+const target = chunkMeta.find((chunk) => chunk.file === 'src/example.js');
+assert.ok(target, 'expected example.js chunk to exist');
+assert.ok(Array.isArray(target.docmeta?.structural), 'expected structural metadata on chunk');
+assert.equal(target.docmeta.structural[0]?.pack, 'test-pack');
+assert.equal(target.docmeta.structural[0]?.ruleId, 'no-eval');
+
+const packFiltered = filterChunks(chunkMeta, { structPack: 'test-pack' });
+assert.ok(packFiltered.find((chunk) => chunk.file === 'src/example.js'), 'expected struct-pack filter to match');
+
+const ruleFiltered = filterChunks(chunkMeta, { structRule: 'no-eval' });
+assert.ok(ruleFiltered.find((chunk) => chunk.file === 'src/example.js'), 'expected struct-rule filter to match');
+
+const tagFiltered = filterChunks(chunkMeta, { structTag: 'security' });
+assert.ok(tagFiltered.find((chunk) => chunk.file === 'src/example.js'), 'expected struct-tag filter to match');
+
+console.log('structural filters test passed');
diff --git a/tests/structural-search.js b/tests/structural-search.js
new file mode 100644
index 000000000..7b89d35d0
--- /dev/null
+++ b/tests/structural-search.js
@@ -0,0 +1,64 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'structural-search');
+const repoRoot = path.join(tempRoot, 'repo');
+const srcDir = path.join(repoRoot, 'src');
+const docsDir = path.join(repoRoot, 'docs');
+const binRoot = path.join(root, 'tests', 'fixtures', 'structural', 'bin');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(srcDir, { recursive: true });
+await fsPromises.mkdir(docsDir, { recursive: true });
+await fsPromises.writeFile(path.join(srcDir, 'example.js'), 'eval(\"x\");\n');
+await fsPromises.writeFile(path.join(srcDir, 'example.ts'), 'eval(x);\n');
+await fsPromises.writeFile(path.join(docsDir, 'notes.md'), 'TODO: update\n');
+
+for (const binName of ['semgrep', 'sg', 'comby']) {
+  try {
+    await fsPromises.chmod(path.join(binRoot, binName), 0o755);
+  } catch {}
+}
+
+const env = {
+  ...process.env,
+  PATH: `${binRoot}${path.delimiter}${process.env.PATH || ''}`,
+  PAIROFCLEATS_PROFILE: 'full'
+};
+
+const result = spawnSync(
+  process.execPath,
+  [
+    path.join(root, 'tools', 'structural-search.js'),
+    '--repo', repoRoot,
+    '--pack', 'semgrep-security',
+    '--pack', 'astgrep-js-safety',
+    '--pack', 'comby-docs',
+    '--format', 'json'
+  ],
+  { encoding: 'utf8', env }
+);
+
+if (result.status !== 0) {
+  console.error(result.stderr || result.stdout || 'structural-search failed');
+  process.exit(result.status ?? 1);
+}
+
+const payload = JSON.parse(result.stdout || '{}');
+assert.ok(Array.isArray(payload.results), 'expected results array');
+assert.ok(payload.results.length >= 3, 'expected at least 3 results');
+
+const engines = new Set(payload.results.map((entry) => entry.engine));
+assert.ok(engines.has('semgrep'), 'expected semgrep result');
+assert.ok(engines.has('ast-grep'), 'expected ast-grep result');
+assert.ok(engines.has('comby'), 'expected comby result');
+
+const comby = payload.results.find((entry) => entry.engine === 'comby');
+assert.equal(comby.path, 'docs/notes.md');
+
+console.log('structural search test passed');
diff --git a/tests/sublime-pycompile.js b/tests/sublime-pycompile.js
new file mode 100644
index 000000000..fca759990
--- /dev/null
+++ b/tests/sublime-pycompile.js
@@ -0,0 +1,48 @@
+#!/usr/bin/env node
+import { spawnSync } from 'node:child_process';
+import fs from 'node:fs';
+import path from 'node:path';
+
+const root = process.cwd();
+const pkgDir = path.join(root, 'sublime', 'PairOfCleats');
+
+const collectPyFiles = (dir) => {
+  const out = [];
+  const stack = [dir];
+  while (stack.length) {
+    const current = stack.pop();
+    const entries = fs.readdirSync(current, { withFileTypes: true });
+    for (const entry of entries) {
+      const full = path.join(current, entry.name);
+      if (entry.isDirectory()) {
+        stack.push(full);
+      } else if (entry.isFile() && entry.name.endsWith('.py')) {
+        out.push(full);
+      }
+    }
+  }
+  out.sort();
+  return out;
+};
+
+const pyFiles = collectPyFiles(pkgDir);
+if (!pyFiles.length) {
+  console.error('sublime-pycompile: no python files found under', pkgDir);
+  process.exit(1);
+}
+
+const python = process.env.PYTHON || 'python';
+const result = spawnSync(
+  python,
+  ['-m', 'py_compile', ...pyFiles],
+  { encoding: 'utf8' }
+);
+
+if (result.status !== 0) {
+  console.error('sublime-pycompile: python -m py_compile failed');
+  if (result.stdout) console.error(result.stdout);
+  if (result.stderr) console.error(result.stderr);
+  process.exit(result.status || 1);
+}
+
+console.log(`sublime-pycompile: ok (compiled ${pyFiles.length} files)`);
diff --git a/tests/sublime/test_api_client.py b/tests/sublime/test_api_client.py
new file mode 100644
index 000000000..58a46923a
--- /dev/null
+++ b/tests/sublime/test_api_client.py
@@ -0,0 +1,139 @@
+import importlib
+import json
+import os
+import sys
+import tempfile
+import threading
+import unittest
+from http.server import BaseHTTPRequestHandler
+from socketserver import TCPServer
+
+REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
+PACKAGE_ROOT = os.path.join(REPO_ROOT, 'sublime')
+if PACKAGE_ROOT not in sys.path:
+    sys.path.insert(0, PACKAGE_ROOT)
+
+api_client = importlib.import_module('PairOfCleats.lib.api_client')
+
+
+class _Handler(BaseHTTPRequestHandler):
+    def do_GET(self):
+        path = self.path.split('?', 1)[0]
+        query = {}
+        if '?' in self.path:
+            try:
+                from urllib.parse import parse_qs
+                query = {k: v[0] for k, v in parse_qs(self.path.split('?', 1)[1]).items()}
+            except Exception:
+                query = {}
+
+        if path == '/map':
+            fmt = query.get('format') or 'json'
+            self.send_response(200)
+            self.send_header('Access-Control-Allow-Origin', '*')
+            self.send_header('X-PairofCleats-Map-CacheKey', 'test-cache-key')
+            if fmt == 'json':
+                payload = {
+                    'root': {'path': query.get('repo') or '/repo', 'id': 'repo-id'},
+                    'summary': {'counts': {'files': 1, 'members': 1, 'edges': 0}},
+                    'warnings': []
+                }
+                body = json.dumps(payload).encode('utf-8')
+                self.send_header('Content-Type', 'application/json')
+                self.send_header('Content-Length', str(len(body)))
+                self.end_headers()
+                self.wfile.write(body)
+                return
+
+            if fmt == 'dot':
+                body = b'digraph G {}\n'
+                self.send_header('Content-Type', 'text/plain')
+                self.send_header('Content-Length', str(len(body)))
+                self.end_headers()
+                self.wfile.write(body)
+                return
+
+            body = b'<!doctype html><html></html>'
+            self.send_header('Content-Type', 'text/html')
+            self.send_header('Content-Length', str(len(body)))
+            self.end_headers()
+            self.wfile.write(body)
+            return
+
+        if path == '/map/nodes':
+            self.send_response(200)
+            self.send_header('Access-Control-Allow-Origin', '*')
+            payload = {
+                'generatedAt': 'now',
+                'root': query.get('repo') or '/repo',
+                'nodes': [{'id': 'n1', 'label': 'node 1', 'file': 'src/a.js'}]
+            }
+            body = json.dumps(payload).encode('utf-8')
+            self.send_header('Content-Type', 'application/json')
+            self.send_header('Content-Length', str(len(body)))
+            self.end_headers()
+            self.wfile.write(body)
+            return
+
+        self.send_response(404)
+        self.end_headers()
+
+    def log_message(self, _format, *_args):
+        return
+
+
+class ApiClientTests(unittest.TestCase):
+    def test_generate_map_report_writes_artifacts(self):
+        server = TCPServer(('127.0.0.1', 0), _Handler)
+        port = server.server_address[1]
+
+        thread = threading.Thread(target=server.serve_forever)
+        thread.daemon = True
+        thread.start()
+
+        try:
+            with tempfile.TemporaryDirectory() as tmp:
+                output_path = os.path.join(tmp, 'out.dot')
+                model_path = os.path.join(tmp, 'model.json')
+                nodes_path = os.path.join(tmp, 'nodes.json')
+
+                settings = {
+                    'api_timeout_ms': 2000,
+                    'map_index_mode': 'code',
+                    'map_collapse_default': 'none'
+                }
+
+                report = api_client.generate_map_report(
+                    'http://127.0.0.1:{0}'.format(port),
+                    '/repo',
+                    settings,
+                    'repo',
+                    '',
+                    'imports',
+                    'dot',
+                    output_path,
+                    model_path,
+                    nodes_path
+                )
+
+                self.assertTrue(report.get('ok'))
+                self.assertEqual(report.get('format'), 'dot')
+                self.assertEqual(report.get('cacheKey'), 'test-cache-key')
+
+                self.assertTrue(os.path.exists(output_path))
+                self.assertTrue(os.path.exists(model_path))
+                self.assertTrue(os.path.exists(nodes_path))
+
+        finally:
+            try:
+                server.shutdown()
+            except Exception:
+                pass
+            try:
+                server.server_close()
+            except Exception:
+                pass
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/sublime/test_plugin.py b/tests/sublime/test_plugin.py
new file mode 100644
index 000000000..d736657c5
--- /dev/null
+++ b/tests/sublime/test_plugin.py
@@ -0,0 +1,294 @@
+import importlib
+import os
+import sys
+import tempfile
+import types
+import unittest
+
+BASE_SETTINGS = {}
+
+
+def install_sublime_stubs():
+    sublime = types.ModuleType('sublime')
+
+    class DummySettings(object):
+        def __init__(self, values):
+            self._values = values
+
+        def get(self, key, default=None):
+            return self._values.get(key, default)
+
+    def load_settings(_name):
+        return DummySettings(BASE_SETTINGS)
+
+    sublime.load_settings = load_settings
+    sublime.set_timeout = lambda fn, _delay=0: fn()
+    sublime.error_message = lambda _message: None
+    sublime.status_message = lambda _message: None
+    sublime.active_window = lambda: None
+    sublime.ENCODED_POSITION = 1
+
+    class Region(object):
+        def __init__(self, a, b):
+            self.a = a
+            self.b = b
+
+    sublime.Region = Region
+
+    sublime_plugin = types.ModuleType('sublime_plugin')
+
+    class WindowCommand(object):
+        def __init__(self, window=None):
+            self.window = window
+
+    sublime_plugin.WindowCommand = WindowCommand
+
+    class TextCommand(object):
+        def __init__(self, view=None):
+            self.view = view
+
+    sublime_plugin.TextCommand = TextCommand
+
+    sys.modules['sublime'] = sublime
+    sys.modules['sublime_plugin'] = sublime_plugin
+
+
+install_sublime_stubs()
+
+REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
+PACKAGE_ROOT = os.path.join(REPO_ROOT, 'sublime')
+if PACKAGE_ROOT not in sys.path:
+    sys.path.insert(0, PACKAGE_ROOT)
+
+config = importlib.import_module('PairOfCleats.lib.config')
+index_state = importlib.import_module('PairOfCleats.lib.index_state')
+indexing = importlib.import_module('PairOfCleats.lib.indexing')
+map_lib = importlib.import_module('PairOfCleats.lib.map')
+map_state = importlib.import_module('PairOfCleats.lib.map_state')
+paths = importlib.import_module('PairOfCleats.lib.paths')
+search = importlib.import_module('PairOfCleats.lib.search')
+results = importlib.import_module('PairOfCleats.lib.results')
+watch = importlib.import_module('PairOfCleats.lib.watch')
+
+
+class MockView(object):
+    def __init__(self, filename=None):
+        self._filename = filename
+
+    def file_name(self):
+        return self._filename
+
+
+class MockWindow(object):
+    def __init__(self, project_data=None, folders=None, view=None):
+        self._project_data = project_data or {}
+        self._folders = folders or []
+        self._view = view
+
+    def project_data(self):
+        return self._project_data
+
+    def set_project_data(self, data):
+        self._project_data = data
+
+    def folders(self):
+        return list(self._folders)
+
+    def active_view(self):
+        return self._view
+
+    def id(self):
+        return 1
+
+
+class DummyProcess(object):
+    def __init__(self, running=True):
+        self._running = running
+
+    def poll(self):
+        return None if self._running else 0
+
+
+class DummyHandle(object):
+    def __init__(self, process):
+        self.process = process
+        self.cancelled = False
+
+    def cancel(self):
+        self.cancelled = True
+
+
+class SublimePluginTests(unittest.TestCase):
+    def setUp(self):
+        BASE_SETTINGS.clear()
+        BASE_SETTINGS.update(config.DEFAULT_SETTINGS)
+
+    def test_find_repo_root_prefers_pairofcleats_json(self):
+        with tempfile.TemporaryDirectory() as root:
+            os.makedirs(os.path.join(root, 'src'))
+            open(os.path.join(root, '.pairofcleats.json'), 'w').close()
+            target = os.path.join(root, 'src', 'file.txt')
+            open(target, 'w').close()
+
+            resolved = paths.find_repo_root(target)
+            self.assertEqual(resolved, root)
+
+    def test_find_repo_root_git_fallback(self):
+        with tempfile.TemporaryDirectory() as root:
+            os.makedirs(os.path.join(root, 'src'))
+            git_dir = os.path.join(root, '.git')
+            os.makedirs(git_dir)
+            target = os.path.join(root, 'src', 'file.txt')
+            open(target, 'w').close()
+
+            resolved = paths.find_repo_root(target)
+            self.assertEqual(resolved, root)
+
+    def test_resolve_cli_prefers_configured_path(self):
+        with tempfile.TemporaryDirectory() as root:
+            os.makedirs(os.path.join(root, 'bin'))
+            cli_path = os.path.join(root, 'bin', 'pairofcleats.js')
+            open(cli_path, 'w').close()
+            settings = dict(config.DEFAULT_SETTINGS)
+            settings['pairofcleats_path'] = 'bin/pairofcleats.js'
+            settings['node_path'] = '/usr/bin/node'
+
+            resolved = paths.resolve_cli(settings, root)
+            self.assertEqual(resolved['command'], '/usr/bin/node')
+            self.assertEqual(resolved['args_prefix'], [cli_path])
+            self.assertEqual(resolved['source'], 'settings')
+
+    def test_resolve_cli_local_bin(self):
+        with tempfile.TemporaryDirectory() as root:
+            bin_dir = os.path.join(root, 'node_modules', '.bin')
+            os.makedirs(bin_dir)
+            local_cli = os.path.join(bin_dir, 'pairofcleats.cmd')
+            open(local_cli, 'w').close()
+            settings = dict(config.DEFAULT_SETTINGS)
+
+            resolved = paths.resolve_cli(settings, root)
+            self.assertEqual(resolved['command'], local_cli)
+            self.assertEqual(resolved['args_prefix'], [])
+            self.assertEqual(resolved['source'], 'node_modules')
+
+    def test_settings_merge_project_overrides(self):
+        BASE_SETTINGS['open_results_in'] = 'quick_panel'
+        BASE_SETTINGS['env'] = {'PAIROFCLEATS_CACHE_ROOT': 'A'}
+        project_data = {
+            'settings': {
+                'pairofcleats': {
+                    'open_results_in': 'output_panel',
+                    'env': {
+                        'PAIROFCLEATS_CACHE_ROOT': 'B'
+                    }
+                }
+            }
+        }
+        window = MockWindow(project_data=project_data)
+        settings = config.get_settings(window)
+
+        self.assertEqual(settings['open_results_in'], 'output_panel')
+        self.assertEqual(settings['env']['PAIROFCLEATS_CACHE_ROOT'], 'B')
+
+    def test_validate_settings_reports_invalid_values(self):
+        settings = dict(config.DEFAULT_SETTINGS)
+        settings['index_mode_default'] = 'invalid'
+        settings['open_results_in'] = 'nowhere'
+        errors = config.validate_settings(settings)
+        self.assertTrue(errors)
+
+    def test_build_search_args(self):
+        args = search.build_search_args(
+            'alpha',
+            repo_root='/repo',
+            mode='code',
+            backend='memory',
+            limit=5,
+            explain=True
+        )
+        self.assertIn('--json', args)
+        self.assertIn('--mode', args)
+        self.assertIn('--backend', args)
+        self.assertIn('--top', args)
+        self.assertIn('--explain', args)
+        self.assertIn('/repo', args)
+
+    def test_map_output_dir_default(self):
+        with tempfile.TemporaryDirectory() as root:
+            settings = dict(config.DEFAULT_SETTINGS)
+            output_dir = map_lib.resolve_output_dir(root, settings)
+            expected = os.path.join(root, '.pairofcleats', 'maps')
+            self.assertEqual(output_dir, expected)
+
+    def test_build_map_args(self):
+        settings = dict(config.DEFAULT_SETTINGS)
+        args = map_lib.build_map_args(
+            '/repo',
+            settings,
+            'file',
+            'src/app.js',
+            'calls',
+            'dot',
+            '/out.dot',
+            '/out.model.json',
+            '/out.nodes.json'
+        )
+        self.assertIn('report', args)
+        self.assertIn('map', args)
+        self.assertIn('--scope', args)
+        self.assertIn('file', args)
+        self.assertIn('--include', args)
+        self.assertIn('calls', args)
+
+    def test_record_last_map(self):
+        window = MockWindow()
+        payload = {'outPath': '/tmp/map.dot', 'format': 'dot'}
+        map_state.record_last_map(window, payload)
+        stored = map_state.get_last_map(window)
+        self.assertEqual(stored.get('format'), 'dot')
+
+    def test_collect_hits_tolerates_partial_payload(self):
+        payload = {
+            'code': [{'file': 'src/a.py'}],
+            'prose': None,
+            'records': 'bad',
+            'extractedProse': [{'file': 'docs/readme.md'}]
+        }
+        hits = results.collect_hits(payload)
+        files = [hit.get('file') for hit in hits]
+        self.assertIn('src/a.py', files)
+        self.assertIn('docs/readme.md', files)
+
+    def test_record_last_build(self):
+        window = MockWindow(project_data={})
+        state = index_state.record_last_build(window, 'code')
+        self.assertEqual(state.get('last_mode'), 'code')
+        stored = index_state.get_last_build(window)
+        self.assertEqual(stored.get('last_mode'), 'code')
+
+    def test_build_index_args(self):
+        args = indexing.build_index_args('code', repo_root='/repo')
+        self.assertEqual(args[0:2], ['index', 'build'])
+        self.assertIn('--mode', args)
+        self.assertIn('--repo', args)
+
+    def test_resolve_watch_root_folder_scope(self):
+        settings = dict(config.DEFAULT_SETTINGS)
+        settings['index_watch_scope'] = 'folder'
+        window = MockWindow(folders=['/workspace/sub'])
+        resolved = paths.resolve_watch_root(window, settings)
+        self.assertEqual(resolved, '/workspace/sub')
+
+    def test_watch_gating(self):
+        window = MockWindow()
+        process = DummyProcess(running=True)
+        handle = DummyHandle(process)
+        watch.register(window, handle, '/repo')
+        self.assertTrue(watch.is_running(window))
+        stopped = watch.stop(window)
+        self.assertTrue(stopped)
+        self.assertTrue(handle.cancelled)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/subprocess-quoting.js b/tests/subprocess-quoting.js
new file mode 100644
index 000000000..ba722b7c6
--- /dev/null
+++ b/tests/subprocess-quoting.js
@@ -0,0 +1,107 @@
+#!/usr/bin/env node
+import http from 'node:http';
+import os from 'node:os';
+import path from 'node:path';
+import readline from 'node:readline';
+import fsPromises from 'node:fs/promises';
+import { spawn, spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const fixtureRoot = path.join(root, 'tests', 'fixtures', 'sample');
+const cacheRoot = path.join(root, 'tests', '.cache', 'subprocess-quoting');
+const serverPath = path.join(root, 'tools', 'api-server.js');
+
+await fsPromises.rm(cacheRoot, { recursive: true, force: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+// Create a repo path containing spaces to catch quoting/arg-parsing bugs.
+const repoParent = await fsPromises.mkdtemp(path.join(os.tmpdir(), 'pairofcleats repo with spaces '));
+const repoPath = path.join(repoParent, 'sample repo');
+await fsPromises.cp(fixtureRoot, repoPath, { recursive: true });
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const build = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoPath],
+  { env, stdio: 'inherit' }
+);
+if (build.status !== 0) {
+  console.error('subprocess-quoting test failed: build_index failed');
+  process.exit(1);
+}
+
+const server = spawn(
+  process.execPath,
+  [serverPath, '--repo', repoPath, '--host', '127.0.0.1', '--port', '0', '--json'],
+  { env, stdio: ['ignore', 'pipe', 'pipe'] }
+);
+
+let stderr = '';
+server.stderr.on('data', (chunk) => {
+  stderr += chunk.toString();
+});
+
+const rl = readline.createInterface({ input: server.stdout });
+const readStartup = () => new Promise((resolve, reject) => {
+  const timeout = setTimeout(() => reject(new Error('timeout waiting for api-server startup')), 15000);
+  rl.once('line', (line) => {
+    clearTimeout(timeout);
+    resolve(line);
+  });
+});
+
+const requestJson = (baseUrl, pathname) => new Promise((resolve, reject) => {
+  const req = http.get(baseUrl + pathname, (res) => {
+    let data = '';
+    res.on('data', (chunk) => {
+      data += chunk.toString();
+    });
+    res.on('end', () => {
+      try {
+        resolve({ status: res.statusCode || 0, body: JSON.parse(data || '{}') });
+      } catch (err) {
+        reject(err);
+      }
+    });
+  });
+  req.on('error', reject);
+});
+
+let serverInfo = null;
+try {
+  const line = await readStartup();
+  serverInfo = JSON.parse(line || '{}');
+  if (!serverInfo?.baseUrl) {
+    throw new Error('api-server did not report a baseUrl');
+  }
+
+  const health = await requestJson(serverInfo.baseUrl, '/health');
+  if (!health.body?.ok) {
+    throw new Error('api-server /health failed');
+  }
+
+  const map = await requestJson(serverInfo.baseUrl, '/map?format=json');
+  if (!map.body?.root?.path) {
+    throw new Error('api-server /map did not return a map model');
+  }
+} catch (err) {
+  console.error(err?.message || err);
+  if (stderr.trim()) {
+    console.error(stderr.trim());
+  }
+  server.kill('SIGKILL');
+  process.exit(1);
+} finally {
+  try {
+    server.kill('SIGKILL');
+  } catch (e) {
+    // ignore
+  }
+}
+
+console.log('subprocess-quoting: ok');
diff --git a/tests/summary-report.js b/tests/summary-report.js
index 920997b27..dc0452f8d 100644
--- a/tests/summary-report.js
+++ b/tests/summary-report.js
@@ -18,15 +18,22 @@ await fsPromises.cp(fixtureRoot, repoRoot, { recursive: true });
 const env = {
   ...process.env,
   PAIROFCLEATS_CACHE_ROOT: cacheRoot,
-  PAIROFCLEATS_EMBEDDINGS: 'stub'
+  PAIROFCLEATS_EMBEDDINGS: 'stub',
+  PAIROFCLEATS_PROFILE: 'full'
 };
 
 const result = spawnSync(
   process.execPath,
   [
     path.join(root, 'tools', 'combined-summary.js'),
+    '--repo',
+    repoRoot,
     '--models',
     'Xenova/all-MiniLM-L12-v2,Xenova/all-MiniLM-L6-v2',
+    '--limit',
+    '5',
+    '--top',
+    '3',
     '--no-ann',
     '--out',
     outPath
diff --git a/tests/thread-limits.js b/tests/thread-limits.js
new file mode 100644
index 000000000..e8dc72e97
--- /dev/null
+++ b/tests/thread-limits.js
@@ -0,0 +1,37 @@
+#!/usr/bin/env node
+import { resolveThreadLimits } from '../src/shared/threads.js';
+import { planShardBatches } from '../src/index/build/shards.js';
+
+const argv = { threads: 4 };
+const rawArgv = ['--threads', '4'];
+const envConfig = {};
+const limits = resolveThreadLimits({ argv, rawArgv, envConfig, configConcurrency: null, importConcurrencyConfig: null });
+
+if (limits.fileConcurrency !== 4) {
+  console.error(`thread limits test failed: fileConcurrency ${limits.fileConcurrency} !== 4`);
+  process.exit(1);
+}
+if (limits.cpuConcurrency !== limits.fileConcurrency) {
+  console.error('thread limits test failed: cpuConcurrency not equal fileConcurrency');
+  process.exit(1);
+}
+
+const items = [
+  { id: 'a', weight: 8 },
+  { id: 'b', weight: 7 },
+  { id: 'c', weight: 6 },
+  { id: 'd', weight: 5 }
+];
+const batches = planShardBatches(items, 2, { resolveWeight: (item) => item.weight });
+if (batches.length !== 2) {
+  console.error(`thread limits test failed: expected 2 batches, got ${batches.length}`);
+  process.exit(1);
+}
+const sums = batches.map((batch) => batch.reduce((sum, item) => sum + item.weight, 0));
+const sorted = sums.slice().sort((a, b) => b - a);
+if (sorted[0] !== 13 || sorted[1] !== 13) {
+  console.error(`thread limits test failed: batch sums ${sorted.join(',')} expected 13,13`);
+  process.exit(1);
+}
+
+console.log('thread limits test passed');
diff --git a/tests/tokenization-buffering.js b/tests/tokenization-buffering.js
new file mode 100644
index 000000000..bf0d74285
--- /dev/null
+++ b/tests/tokenization-buffering.js
@@ -0,0 +1,50 @@
+#!/usr/bin/env node
+import {
+  createTokenizationBuffers,
+  createTokenizationContext,
+  tokenizeChunkText
+} from '../src/index/build/tokenization.js';
+
+const context = createTokenizationContext({
+  dictWords: new Set(['alpha', 'beta']),
+  dictConfig: {},
+  postingsConfig: {}
+});
+
+const input = {
+  text: 'function alphaBeta() { return alpha + beta; }',
+  mode: 'code',
+  ext: '.js',
+  context
+};
+
+const baseline = tokenizeChunkText(input);
+const buffers = createTokenizationBuffers();
+const buffered = tokenizeChunkText({ ...input, buffers });
+const mutated = tokenizeChunkText({
+  ...input,
+  text: 'const gamma = alpha + beta;',
+  buffers
+});
+const bufferedAgain = tokenizeChunkText({ ...input, buffers });
+
+const compare = (label, a, b) => {
+  if (JSON.stringify(a) !== JSON.stringify(b)) {
+    console.error(`Tokenization mismatch for ${label}`);
+    process.exit(1);
+  }
+};
+
+compare('tokens', baseline.tokens, buffered.tokens);
+compare('seq', baseline.seq, buffered.seq);
+compare('ngrams', baseline.ngrams, buffered.ngrams);
+compare('chargrams', baseline.chargrams, buffered.chargrams);
+compare('minhash', baseline.minhashSig, buffered.minhashSig);
+if (JSON.stringify(mutated.tokens) === JSON.stringify(baseline.tokens)) {
+  console.error('Expected buffer reuse to handle different content.');
+  process.exit(1);
+}
+compare('tokens (reuse)', baseline.tokens, bufferedAgain.tokens);
+compare('minhash (reuse)', baseline.minhashSig, bufferedAgain.minhashSig);
+
+console.log('tokenization buffering test passed');
diff --git a/tests/tokenize-dictionary.js b/tests/tokenize-dictionary.js
new file mode 100644
index 000000000..0c6f6c2af
--- /dev/null
+++ b/tests/tokenize-dictionary.js
@@ -0,0 +1,24 @@
+#!/usr/bin/env node
+import { splitWordsWithDict } from '../src/shared/tokenize.js';
+
+const dict = new Set(['alpha', 'beta']);
+const unknown = splitWordsWithDict('alphazzzbeta', dict, { segmentation: 'greedy' });
+if (unknown.join('|') !== 'alpha|zzz|beta') {
+  console.error(`Unexpected unknown span split: ${unknown.join('|')}`);
+  process.exit(1);
+}
+
+const dpDict = new Set(['abc', 'ab', 'cd']);
+const autoSegments = splitWordsWithDict('abcd', dpDict, { segmentation: 'auto', dpMaxTokenLength: 8 });
+if (autoSegments.join('|') !== 'ab|cd') {
+  console.error(`Unexpected DP fallback split: ${autoSegments.join('|')}`);
+  process.exit(1);
+}
+
+const ahoSegments = splitWordsWithDict('alphabeta', dict, { segmentation: 'aho' });
+if (ahoSegments.join('|') !== 'alpha|beta') {
+  console.error(`Unexpected Aho split: ${ahoSegments.join('|')}`);
+  process.exit(1);
+}
+
+console.log('dictionary tokenization test passed');
diff --git a/tests/tool-root.js b/tests/tool-root.js
new file mode 100644
index 000000000..90f916510
--- /dev/null
+++ b/tests/tool-root.js
@@ -0,0 +1,65 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'tool-root');
+const repoRoot = path.join(tempRoot, 'repo');
+const outsideRoot = path.join(tempRoot, 'outside');
+const cacheRoot = path.join(tempRoot, 'cache');
+const srcDir = path.join(repoRoot, 'src');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(srcDir, { recursive: true });
+await fsPromises.mkdir(outsideRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(
+  path.join(srcDir, 'index.js'),
+  'export function greet(name) {\n  return `hi ${name}`;\n}\n',
+  'utf8'
+);
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot],
+  { cwd: outsideRoot, env, stdio: 'inherit' }
+);
+if (buildResult.status !== 0) {
+  console.error('Failed: build_index from outside repo root');
+  process.exit(buildResult.status ?? 1);
+}
+
+const searchResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'search.js'), 'greet', '--json', '--no-ann', '--repo', repoRoot],
+  { cwd: outsideRoot, env, encoding: 'utf8' }
+);
+if (searchResult.status !== 0) {
+  console.error('Failed: search from outside repo root');
+  console.error(searchResult.stderr || searchResult.stdout || '');
+  process.exit(searchResult.status ?? 1);
+}
+
+let payload = null;
+try {
+  payload = JSON.parse(searchResult.stdout || '{}');
+} catch {
+  console.error('Failed: search output was not JSON');
+  process.exit(1);
+}
+
+const hits = payload.code || [];
+if (!hits.length) {
+  console.error('Failed: search returned no results');
+  process.exit(1);
+}
+
+console.log('Tool root outside-repo test passed');
diff --git a/tests/tooling-detect.js b/tests/tooling-detect.js
index eed5e9bce..825672617 100644
--- a/tests/tooling-detect.js
+++ b/tests/tooling-detect.js
@@ -24,7 +24,7 @@ try {
 }
 
 const languages = payload.languages || {};
-const required = ['python', 'rust', 'go', 'java', 'cpp', 'objc'];
+const required = ['python', 'rust', 'go', 'java', 'cpp', 'objc', 'swift'];
 for (const lang of required) {
   if (!languages[lang]) {
     console.error(`Missing detected language: ${lang}`);
@@ -33,7 +33,7 @@ for (const lang of required) {
 }
 
 const toolIds = (payload.tools || []).map((tool) => tool.id);
-const toolRequired = ['clangd', 'gopls', 'rust-analyzer', 'jdtls'];
+const toolRequired = ['clangd', 'gopls', 'rust-analyzer', 'jdtls', 'sourcekit-lsp'];
 for (const tool of toolRequired) {
   if (!toolIds.includes(tool)) {
     console.error(`Missing tooling entry: ${tool}`);
diff --git a/tests/tooling-lsp.js b/tests/tooling-lsp.js
new file mode 100644
index 000000000..b9b76ab4d
--- /dev/null
+++ b/tests/tooling-lsp.js
@@ -0,0 +1,126 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { PassThrough } from 'node:stream';
+import { buildLineIndex } from '../src/shared/lines.js';
+import { createFramedJsonRpcParser, writeFramedJsonRpc } from '../src/shared/jsonrpc.js';
+import { flattenSymbols } from '../src/integrations/tooling/lsp/symbols.js';
+import { rangeToOffsets } from '../src/integrations/tooling/lsp/positions.js';
+
+const messages = [];
+const errors = [];
+const parser = createFramedJsonRpcParser({
+  onMessage: (msg) => messages.push(msg),
+  onError: (err) => errors.push(err)
+});
+
+const waitFor = async (count) => {
+  for (let i = 0; i < 50; i += 1) {
+    if (messages.length >= count) return;
+    await new Promise((resolve) => setTimeout(resolve, 0));
+  }
+  throw new Error(`Timed out waiting for ${count} messages.`);
+};
+
+const msgOne = { jsonrpc: '2.0', id: 1, result: 'ok' };
+const msgTwo = { jsonrpc: '2.0', method: 'notify', params: { ok: true } };
+
+const frame = (payload) => {
+  const body = Buffer.from(JSON.stringify(payload), 'utf8');
+  const header = `Content-Length: ${body.length}\r\n\r\n`;
+  return Buffer.concat([Buffer.from(header, 'utf8'), body]);
+};
+
+const combined = Buffer.concat([frame(msgOne), frame(msgTwo)]);
+parser.push(combined.slice(0, 12));
+parser.push(combined.slice(12));
+
+await waitFor(2);
+assert.equal(errors.length, 0);
+assert.equal(messages.length, 2);
+assert.deepEqual(messages[0], msgOne);
+assert.deepEqual(messages[1], msgTwo);
+
+const capture = new PassThrough();
+const capturedChunks = [];
+capture.on('data', (chunk) => capturedChunks.push(chunk));
+await writeFramedJsonRpc(capture, msgOne);
+const parserTwo = createFramedJsonRpcParser({
+  onMessage: (msg) => messages.push(msg),
+  onError: (err) => errors.push(err)
+});
+parserTwo.push(Buffer.concat(capturedChunks));
+await waitFor(3);
+assert.deepEqual(messages[messages.length - 1], msgOne);
+
+const largeMessages = [];
+const largeErrors = [];
+const parserLarge = createFramedJsonRpcParser({
+  onMessage: (msg) => largeMessages.push(msg),
+  onError: (err) => largeErrors.push(err)
+});
+const largePayload = {
+  jsonrpc: '2.0',
+  id: 99,
+  result: 'x'.repeat(512 * 1024)
+};
+const largeFrame = frame(largePayload);
+for (let i = 0; i < largeFrame.length; i += 1024) {
+  parserLarge.push(largeFrame.slice(i, i + 1024));
+}
+for (let i = 0; i < 50; i += 1) {
+  if (largeMessages.length) break;
+  await new Promise((resolve) => setTimeout(resolve, 0));
+}
+assert.equal(largeErrors.length, 0);
+assert.equal(largeMessages.length, 1);
+assert.equal(largeMessages[0].id, 99);
+
+const docSymbols = [
+  {
+    name: 'Widget',
+    kind: 5,
+    detail: 'class Widget',
+    range: { start: { line: 0, character: 0 }, end: { line: 4, character: 0 } },
+    selectionRange: { start: { line: 0, character: 6 }, end: { line: 0, character: 12 } },
+    children: [
+      {
+        name: 'render',
+        kind: 6,
+        detail: 'func render()',
+        range: { start: { line: 1, character: 2 }, end: { line: 2, character: 0 } },
+        selectionRange: { start: { line: 1, character: 2 }, end: { line: 1, character: 8 } }
+      }
+    ]
+  }
+];
+
+const flattenedDoc = flattenSymbols(docSymbols);
+assert.equal(flattenedDoc.length, 2);
+assert.equal(flattenedDoc[1].fullName, 'Widget.render');
+
+const infoSymbols = [
+  {
+    name: 'makeWidget',
+    kind: 12,
+    containerName: 'Factory',
+    location: {
+      uri: 'file:///tmp/example.swift',
+      range: { start: { line: 5, character: 0 }, end: { line: 7, character: 0 } }
+    }
+  }
+];
+
+const flattenedInfo = flattenSymbols(infoSymbols);
+assert.equal(flattenedInfo.length, 1);
+assert.equal(flattenedInfo[0].fullName, 'Factory.makeWidget');
+
+const text = 'alpha\nbeta\ngamma';
+const lineIndex = buildLineIndex(text);
+const offsets = rangeToOffsets(lineIndex, {
+  start: { line: 0, character: 1 },
+  end: { line: 1, character: 2 }
+});
+assert.equal(offsets.start, 1);
+assert.equal(offsets.end, lineIndex[1] + 2);
+
+console.log('tooling LSP utils test passed');
diff --git a/tests/tree-sitter-chunks.js b/tests/tree-sitter-chunks.js
new file mode 100644
index 000000000..bf52d007c
--- /dev/null
+++ b/tests/tree-sitter-chunks.js
@@ -0,0 +1,89 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { buildTreeSitterChunks, preloadTreeSitterLanguages } from '../src/lang/tree-sitter.js';
+
+const root = path.resolve('tests', 'fixtures', 'tree-sitter');
+const fixtures = [
+  { id: 'swift', file: 'swift.swift', languageId: 'swift', expect: ['Widget', 'Widget.greet'] },
+  { id: 'kotlin', file: 'kotlin.kt', languageId: 'kotlin', expect: ['Widget', 'Widget.greet'] },
+  { id: 'csharp', file: 'csharp.cs', languageId: 'csharp', expect: ['Widget', 'Widget.Greet'] },
+  { id: 'clike', file: 'clike.c', ext: '.c', expect: ['Widget', 'greet'] },
+  { id: 'cpp', file: 'cpp.cpp', ext: '.cpp', expect: ['Widget', 'Widget.greet'] },
+  { id: 'objc', file: 'objc.m', ext: '.m', expect: ['Widget', 'greet'] },
+  { id: 'go', file: 'go.go', languageId: 'go', expect: ['Widget', 'Widget.Greet'] },
+  { id: 'rust', file: 'rust.rs', languageId: 'rust', expect: ['Widget', 'Widget.greet'] },
+  { id: 'java', file: 'java.java', languageId: 'java', expect: ['Widget', 'Widget.greet'] }
+];
+
+const preloadIds = fixtures
+  .map((fixture) => fixture.languageId
+    || (fixture.ext === '.c' ? 'clike' : null)
+    || (fixture.ext === '.cpp' ? 'cpp' : null)
+    || (fixture.ext === '.m' ? 'objc' : null))
+  .filter(Boolean);
+
+await preloadTreeSitterLanguages(preloadIds);
+
+const options = { treeSitter: { enabled: true }, log: () => {} };
+
+const first = fixtures[0];
+const firstText = fs.readFileSync(path.join(root, first.file), 'utf8');
+const firstChunks = buildTreeSitterChunks({
+  text: firstText,
+  languageId: first.languageId,
+  ext: first.ext,
+  options
+});
+
+if (!firstChunks || !firstChunks.length) {
+  console.log('tree-sitter not available; skipping tree-sitter chunk tests.');
+  process.exit(0);
+}
+
+const limitedByBytes = buildTreeSitterChunks({
+  text: firstText,
+  languageId: first.languageId,
+  ext: first.ext,
+  options: { treeSitter: { enabled: true, maxBytes: 1 }, log: () => {} }
+});
+
+if (limitedByBytes !== null) {
+  throw new Error('expected tree-sitter to skip oversized file by maxBytes');
+}
+
+const limitedByLines = buildTreeSitterChunks({
+  text: firstText,
+  languageId: first.languageId,
+  ext: first.ext,
+  options: { treeSitter: { enabled: true, maxLines: 1 }, log: () => {} }
+});
+
+if (limitedByLines !== null) {
+  throw new Error('expected tree-sitter to skip oversized file by maxLines');
+}
+
+const toNameSet = (chunks) => new Set(chunks.map((c) => c.name));
+const assertHas = (set, expected, label) => {
+  for (const name of expected) {
+    if (!set.has(name)) {
+      throw new Error(`${label} missing expected chunk name: ${name}`);
+    }
+  }
+};
+
+for (const fixture of fixtures) {
+  const text = fs.readFileSync(path.join(root, fixture.file), 'utf8');
+  const chunks = buildTreeSitterChunks({
+    text,
+    languageId: fixture.languageId,
+    ext: fixture.ext,
+    options
+  }) || [];
+  if (!chunks.length) {
+    throw new Error(`${fixture.id} tree-sitter chunks not found`);
+  }
+  const names = toNameSet(chunks);
+  assertHas(names, fixture.expect, fixture.id);
+}
+
+console.log('tree-sitter chunk fixtures passed.');
diff --git a/tests/triage-records.js b/tests/triage-records.js
index dfa0f07b0..2f5a83bd6 100644
--- a/tests/triage-records.js
+++ b/tests/triage-records.js
@@ -8,10 +8,25 @@ const root = process.cwd();
 const repoRoot = path.join(root, 'tests', 'fixtures', 'sample');
 const triageFixtureRoot = path.join(root, 'tests', 'fixtures', 'triage');
 const cacheRoot = path.join(root, 'tests', '.cache', 'triage-records');
+const testLogRoot = process.env.PAIROFCLEATS_TEST_LOG_DIR
+  || process.env.npm_config_test_log_dir
+  || '';
+const resolvedTestLogRoot = testLogRoot ? path.resolve(testLogRoot) : '';
 
 await fsPromises.rm(cacheRoot, { recursive: true, force: true });
 await fsPromises.mkdir(cacheRoot, { recursive: true });
 
+async function writeTestLog(name, payload) {
+  if (!resolvedTestLogRoot) return;
+  const outPath = path.join(resolvedTestLogRoot, name);
+  try {
+    await fsPromises.mkdir(resolvedTestLogRoot, { recursive: true });
+    await fsPromises.writeFile(outPath, JSON.stringify(payload, null, 2));
+  } catch (err) {
+    console.warn(`Failed to write test log ${outPath}: ${err?.message || err}`);
+  }
+}
+
 const env = {
   ...process.env,
   PAIROFCLEATS_CACHE_ROOT: cacheRoot,
@@ -123,6 +138,8 @@ const recordSearch = runJson('search-records', [
   '--repo', repoRoot
 ], { cwd: repoRoot, env });
 
+await writeTestLog('triage-record-search.json', recordSearch);
+
 if (!Array.isArray(recordSearch.records) || recordSearch.records.length === 0) {
   console.error('Record search returned no results.');
   process.exit(1);
@@ -150,6 +167,10 @@ if (!fs.existsSync(contextOut)) {
 }
 
 const pack = JSON.parse(await fsPromises.readFile(contextOut, 'utf8'));
+await writeTestLog('triage-context-pack.json', pack);
+await writeTestLog('triage-context-pack-evidence.json', pack.repoEvidence || {});
+await writeTestLog('triage-context-pack-history.json', { history: pack.history || [] });
+
 if (!pack.recordId || !pack.finding || !pack.repoEvidence) {
   console.error('Context pack missing required fields.');
   process.exit(1);
diff --git a/tests/truth-table.js b/tests/truth-table.js
new file mode 100644
index 000000000..25a58366c
--- /dev/null
+++ b/tests/truth-table.js
@@ -0,0 +1,81 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import path from 'node:path';
+
+const root = process.cwd();
+const tablePath = path.join(root, 'docs', 'truth-table.md');
+let raw = '';
+try {
+  raw = fs.readFileSync(tablePath, 'utf8');
+} catch (err) {
+  console.error(`Failed to read truth table at ${tablePath}: ${err?.message || err}`);
+  process.exit(1);
+}
+
+const lines = raw.split(/\r?\n/);
+const claims = [];
+let current = null;
+
+for (let i = 0; i < lines.length; i += 1) {
+  const line = lines[i];
+  const trimmed = line.trim();
+  if (trimmed.startsWith('- Claim:')) {
+    if (current) claims.push(current);
+    current = { line: i + 1, lines: [line] };
+    continue;
+  }
+  if (current) {
+    if (trimmed.startsWith('## ') || trimmed.startsWith('# ')) {
+      claims.push(current);
+      current = null;
+      continue;
+    }
+    current.lines.push(line);
+  }
+}
+if (current) claims.push(current);
+
+if (!claims.length) {
+  console.error('Truth table validation failed: no claims found.');
+  process.exit(1);
+}
+
+const requiredLabels = ['Implementation:', 'Config:', 'Tests:', 'Limitations:'];
+const issues = [];
+
+const findLabelLine = (blockLines, label) => {
+  for (const line of blockLines) {
+    if (line.includes(label)) return line;
+  }
+  return null;
+};
+
+for (const claim of claims) {
+  const blockText = claim.lines.join('\n');
+  for (const label of requiredLabels) {
+    const line = findLabelLine(claim.lines, label);
+    if (!line) {
+      issues.push(`Claim at line ${claim.line} missing ${label}`);
+      continue;
+    }
+    const content = line.split(label)[1];
+    if (!content || !content.trim()) {
+      issues.push(`Claim at line ${claim.line} has empty ${label}`);
+    }
+  }
+  const testsLine = findLabelLine(claim.lines, 'Tests:');
+  if (testsLine && !/tests\//.test(testsLine)) {
+    issues.push(`Claim at line ${claim.line} Tests line missing tests/ reference`);
+  }
+  if (!testsLine && /Tests:/.test(blockText)) {
+    issues.push(`Claim at line ${claim.line} has malformed Tests line`);
+  }
+}
+
+if (issues.length) {
+  console.error('Truth table validation failed:');
+  issues.forEach((issue) => console.error(`- ${issue}`));
+  process.exit(1);
+}
+
+console.log(`Truth table validation passed (${claims.length} claims).`);
diff --git a/tests/ts-jsx-fixtures.js b/tests/ts-jsx-fixtures.js
index f42c4dff8..e811ff8ee 100644
--- a/tests/ts-jsx-fixtures.js
+++ b/tests/ts-jsx-fixtures.js
@@ -1,7 +1,7 @@
 #!/usr/bin/env node
 import fs from 'node:fs';
 import path from 'node:path';
-import { buildJsChunks } from '../src/lang/javascript.js';
+import { buildJsChunks, collectImports } from '../src/lang/javascript.js';
 import { buildTypeScriptChunks, collectTypeScriptImports } from '../src/lang/typescript.js';
 
 const root = process.cwd();
@@ -50,4 +50,25 @@ if (!jsxHasApp || !jsxHasButton) {
   process.exit(1);
 }
 
-console.log('TS/JSX fixture parsing tests passed');
+const flowText = readFixture('javascript_flow.js');
+const flowChunks = buildJsChunks(flowText, {
+  ext: '.js',
+  javascript: { parser: 'babel', flow: 'auto' },
+  flowMode: 'auto'
+}) || [];
+const flowHasGreet = flowChunks.some((chunk) => chunk.name === 'greet');
+if (!flowHasGreet) {
+  console.error('Expected Flow chunks for greet.');
+  process.exit(1);
+}
+const flowImports = collectImports(flowText, {
+  ext: '.js',
+  javascript: { parser: 'babel', flow: 'auto' },
+  flowMode: 'auto'
+});
+if (!flowImports.includes('flow-parser') || !flowImports.includes('./types')) {
+  console.error('Missing Flow imports in JS parsing.');
+  process.exit(1);
+}
+
+console.log('TS/JSX/Flow fixture parsing tests passed');
diff --git a/tests/two-stage-state.js b/tests/two-stage-state.js
new file mode 100644
index 000000000..a951b6620
--- /dev/null
+++ b/tests/two-stage-state.js
@@ -0,0 +1,114 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, getRepoCacheRoot, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'two-stage-state');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({
+    indexing: {
+      twoStage: {
+        enabled: true,
+        stage2: {
+          embeddings: { enabled: false, mode: 'off' }
+        }
+      },
+      treeSitter: { enabled: false }
+    }
+  }, null, 2)
+);
+await fsPromises.writeFile(path.join(repoRoot, 'alpha.js'), 'const alpha = 1;\n');
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub'
+};
+
+const runBuild = (label, args) => {
+  const result = spawnSync(process.execPath, args, { cwd: repoRoot, env, stdio: 'inherit' });
+  if (result.status !== 0) {
+    console.error(`Failed: ${label}`);
+    process.exit(result.status ?? 1);
+  }
+};
+
+runBuild('stage1', [path.join(root, 'build_index.js'), '--stub-embeddings', '--stage', 'stage1', '--repo', repoRoot]);
+
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+const userConfig = loadUserConfig(repoRoot);
+const resolveStagePaths = () => {
+  const codeDir = getIndexDir(repoRoot, 'code', userConfig);
+  return {
+    codeDir,
+    statePath: path.join(codeDir, 'index_state.json'),
+    relationsPath: path.join(codeDir, 'file_relations.json'),
+    densePath: path.join(codeDir, 'dense_vectors_uint8.json')
+  };
+};
+let { codeDir, statePath, relationsPath, densePath } = resolveStagePaths();
+if (!fs.existsSync(statePath)) {
+  console.error('Missing index_state.json after stage1');
+  process.exit(1);
+}
+const stateStage1 = JSON.parse(await fsPromises.readFile(statePath, 'utf8'));
+if (stateStage1.stage !== 'stage1' || stateStage1.enrichment?.pending !== true) {
+  console.error('Expected stage1 index_state to show pending enrichment');
+  process.exit(1);
+}
+if (fs.existsSync(relationsPath)) {
+  console.error('Did not expect file_relations.json after stage1');
+  process.exit(1);
+}
+
+const repoCacheRoot = getRepoCacheRoot(repoRoot, userConfig);
+const enrichmentPath = path.join(repoCacheRoot, 'enrichment_state.json');
+const enrichmentStage1 = JSON.parse(await fsPromises.readFile(enrichmentPath, 'utf8'));
+if (enrichmentStage1.status !== 'pending') {
+  console.error('Expected enrichment_state pending after stage1');
+  process.exit(1);
+}
+
+runBuild('stage2', [path.join(root, 'build_index.js'), '--stub-embeddings', '--stage', 'stage2', '--repo', repoRoot]);
+
+({ codeDir, statePath, relationsPath, densePath } = resolveStagePaths());
+const stateStage2 = JSON.parse(await fsPromises.readFile(statePath, 'utf8'));
+if (stateStage2.stage !== 'stage2' || stateStage2.enrichment?.pending === true) {
+  console.error('Expected stage2 index_state to clear pending enrichment');
+  process.exit(1);
+}
+if (!fs.existsSync(relationsPath)) {
+  console.error('Expected file_relations.json after stage2');
+  process.exit(1);
+}
+const enrichmentStage2 = JSON.parse(await fsPromises.readFile(enrichmentPath, 'utf8'));
+if (enrichmentStage2.status !== 'done') {
+  console.error('Expected enrichment_state done after stage2');
+  process.exit(1);
+}
+
+runBuild('stage3', [path.join(root, 'build_index.js'), '--stub-embeddings', '--stage', 'stage3', '--repo', repoRoot]);
+
+({ codeDir, statePath, relationsPath, densePath } = resolveStagePaths());
+const stateStage3 = JSON.parse(await fsPromises.readFile(statePath, 'utf8'));
+if (stateStage3.embeddings?.ready !== true) {
+  console.error('Expected stage3 to mark embeddings ready');
+  process.exit(1);
+}
+if (!fs.existsSync(densePath)) {
+  console.error('Expected dense_vectors_uint8.json after stage3');
+  process.exit(1);
+}
+
+console.log('two-stage state test passed');
diff --git a/tests/type-inference-clangd-provider-no-clangd.js b/tests/type-inference-clangd-provider-no-clangd.js
new file mode 100644
index 000000000..23eb36f09
--- /dev/null
+++ b/tests/type-inference-clangd-provider-no-clangd.js
@@ -0,0 +1,47 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { collectClangdTypes } from '../src/index/tooling/clangd-provider.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'clangd-provider-no-clangd');
+const repoRoot = path.join(tempRoot, 'repo');
+const srcDir = path.join(repoRoot, 'src');
+
+await fs.rm(tempRoot, { recursive: true, force: true });
+await fs.mkdir(srcDir, { recursive: true });
+await fs.writeFile(
+  path.join(srcDir, 'sample.c'),
+  'int add(int a, int b) { return a + b; }\n'
+);
+
+const chunksByFile = new Map([
+  ['src/sample.c', [{ file: 'src/sample.c', name: 'add', start: 0, end: 10, docmeta: {} }]]
+]);
+
+const logs = [];
+const log = (msg) => logs.push(String(msg));
+
+const result = await collectClangdTypes({
+  rootDir: repoRoot,
+  chunksByFile,
+  log,
+  cmd: 'clangd-does-not-exist'
+});
+
+if (!result || !(result.typesByChunk instanceof Map)) {
+  console.error('clangd provider did not return a types map.');
+  process.exit(1);
+}
+
+if (result.typesByChunk.size !== 0) {
+  console.error('clangd provider should return empty map when clangd is missing.');
+  process.exit(1);
+}
+
+if (!logs.some((entry) => entry.includes('clangd not detected'))) {
+  console.error('clangd provider missing expected fallback log message.');
+  process.exit(1);
+}
+
+console.log('clangd provider fallback test passed');
diff --git a/tests/type-inference-crossfile-go.js b/tests/type-inference-crossfile-go.js
index 76e57877f..87b165315 100644
--- a/tests/type-inference-crossfile-go.js
+++ b/tests/type-inference-crossfile-go.js
@@ -8,6 +8,19 @@ import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
 const root = process.cwd();
 const tempRoot = path.join(root, 'tests', '.cache', 'type-inference-crossfile-go');
 const repoRoot = path.join(tempRoot, 'repo');
+const hasPython = () => {
+  const candidates = ['python', 'python3'];
+  for (const candidate of candidates) {
+    try {
+      const result = spawnSync(candidate, ['-c', 'import sys; sys.stdout.write("ok")'], {
+        encoding: 'utf8'
+      });
+      if (result.status === 0 && String(result.stdout || '').trim() === 'ok') return true;
+    } catch {}
+  }
+  return false;
+};
+const pythonAvailable = hasPython();
 
 await fsPromises.rm(tempRoot, { recursive: true, force: true });
 await fsPromises.mkdir(path.join(repoRoot, 'src'), { recursive: true });
@@ -98,6 +111,27 @@ public class JavaWidgetBuilder {
 `
 );
 
+if (pythonAvailable) {
+  await fsPromises.writeFile(
+    path.join(repoRoot, 'src', 'py_widget.py'),
+    `class PyWidget:
+    def __init__(self):
+        self.id = 1
+
+def make_py_widget() -> PyWidget:
+    return PyWidget()
+`
+  );
+
+  await fsPromises.writeFile(
+    path.join(repoRoot, 'src', 'py_builder.py'),
+    `from py_widget import make_py_widget, PyWidget
+
+def build_py_widget() -> PyWidget:
+    return make_py_widget()
+`
+  );
+}
 const env = {
   ...process.env,
   PAIROFCLEATS_CACHE_ROOT: path.join(tempRoot, 'cache'),
@@ -125,9 +159,17 @@ if (!fs.existsSync(chunkMetaPath)) {
 }
 
 const chunkMeta = JSON.parse(fs.readFileSync(chunkMetaPath, 'utf8'));
+const fileMetaPath = path.join(codeDir, 'file_meta.json');
+const fileMeta = fs.existsSync(fileMetaPath)
+  ? JSON.parse(fs.readFileSync(fileMetaPath, 'utf8'))
+  : [];
+const fileById = new Map(
+  (Array.isArray(fileMeta) ? fileMeta : []).map((entry) => [entry.id, entry.file])
+);
+const resolveChunkFile = (chunk) => chunk?.file || fileById.get(chunk?.fileId) || null;
 
 const buildGo = chunkMeta.find((chunk) =>
-  chunk.file === 'src/builder.go' &&
+  resolveChunkFile(chunk) === 'src/builder.go' &&
   chunk.name === 'BuildGoWidget'
 );
 if (!buildGo) {
@@ -142,7 +184,7 @@ if (!inferredGo.some((entry) => entry.type === 'GoWidget' && entry.source === 'f
 }
 
 const buildRust = chunkMeta.find((chunk) =>
-  chunk.file === 'src/lib.rs' &&
+  resolveChunkFile(chunk) === 'src/lib.rs' &&
   chunk.name === 'build_rust_widget'
 );
 if (!buildRust) {
@@ -157,7 +199,7 @@ if (!inferredRust.some((entry) => entry.type === 'RustWidget' && entry.source ==
 }
 
 const buildJava = chunkMeta.find((chunk) =>
-  chunk.file === 'src/JavaWidgetBuilder.java' &&
+  resolveChunkFile(chunk) === 'src/JavaWidgetBuilder.java' &&
   chunk.name === 'JavaWidgetBuilder.buildWidget'
 );
 if (!buildJava) {
@@ -171,4 +213,22 @@ if (!inferredJava.some((entry) => entry.type === 'JavaWidget' && entry.source ==
   process.exit(1);
 }
 
-console.log('Cross-file inference tests passed (Go/Rust/Java).');
+if (pythonAvailable) {
+  const buildPy = chunkMeta.find((chunk) =>
+    resolveChunkFile(chunk) === 'src/py_builder.py' &&
+    chunk.name === 'build_py_widget'
+  );
+  if (!buildPy) {
+    console.error('Missing build_py_widget chunk in py_builder.py.');
+    process.exit(1);
+  }
+  const inferredPy = buildPy.docmeta?.inferredTypes?.returns || [];
+  if (!inferredPy.some((entry) => entry.type === 'PyWidget' && entry.source === 'flow')) {
+    console.error('Python cross-file inference missing return type PyWidget for build_py_widget.');
+    process.exit(1);
+  }
+} else {
+  console.log('Skipping Python cross-file inference (python not available).');
+}
+
+console.log('Cross-file inference tests passed (Go/Rust/Java/Python).');
diff --git a/tests/type-inference-crossfile.js b/tests/type-inference-crossfile.js
index 2c1092ca6..108e94dca 100644
--- a/tests/type-inference-crossfile.js
+++ b/tests/type-inference-crossfile.js
@@ -4,6 +4,7 @@ import fsPromises from 'node:fs/promises';
 import path from 'node:path';
 import { spawnSync } from 'node:child_process';
 import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+import { applyCrossFileInference } from '../src/index/type-inference-crossfile.js';
 
 const root = process.cwd();
 const tempRoot = path.join(root, 'tests', '.cache', 'type-inference-crossfile');
@@ -11,12 +12,224 @@ const repoRoot = path.join(tempRoot, 'repo');
 
 await fsPromises.rm(tempRoot, { recursive: true, force: true });
 await fsPromises.mkdir(path.join(repoRoot, 'src'), { recursive: true });
+const graphsFixtureRoot = path.join(root, 'tests', 'fixtures', 'graphs', 'simple');
+const graphsTargetRoot = path.join(repoRoot, 'src', 'graphs');
+await fsPromises.mkdir(graphsTargetRoot, { recursive: true });
+await fsPromises.copyFile(
+  path.join(graphsFixtureRoot, 'producer.js'),
+  path.join(graphsTargetRoot, 'producer.js')
+);
+await fsPromises.copyFile(
+  path.join(graphsFixtureRoot, 'consumer.js'),
+  path.join(graphsTargetRoot, 'consumer.js')
+);
+
+const statsRoot = path.join(tempRoot, 'stats');
+await fsPromises.mkdir(statsRoot, { recursive: true });
+
+const writeScenarioFile = async (rootDir, relPath, contents) => {
+  const absPath = path.join(rootDir, relPath);
+  await fsPromises.mkdir(path.dirname(absPath), { recursive: true });
+  await fsPromises.writeFile(absPath, contents);
+  return absPath;
+};
+
+const runStatsScenario = async (name, { files, chunks, expect }) => {
+  const scenarioRoot = path.join(statsRoot, name);
+  await fsPromises.rm(scenarioRoot, { recursive: true, force: true });
+  await fsPromises.mkdir(scenarioRoot, { recursive: true });
+  for (const [relPath, contents] of Object.entries(files)) {
+    await writeScenarioFile(scenarioRoot, relPath, contents);
+  }
+  const stats = await applyCrossFileInference({
+    rootDir: scenarioRoot,
+    chunks,
+    enabled: true,
+    log: () => {},
+    useTooling: false,
+    enableTypeInference: true,
+    enableRiskCorrelation: true,
+    fileRelations: null
+  });
+  const entries = [
+    ['linkedCalls', stats.linkedCalls, expect.linkedCalls],
+    ['linkedUsages', stats.linkedUsages, expect.linkedUsages],
+    ['inferredReturns', stats.inferredReturns, expect.inferredReturns],
+    ['riskFlows', stats.riskFlows, expect.riskFlows]
+  ];
+  for (const [label, actual, expected] of entries) {
+    if (actual !== expected) {
+      console.error(
+        `Cross-file inference stats mismatch (${name}): ${label}=${actual}, expected ${expected}.`
+      );
+      process.exit(1);
+    }
+  }
+};
+
+const zeroContent = 'export function noop() { const x = 1; }\n';
+await runStatsScenario('zero', {
+  files: {
+    'src/zero.js': zeroContent
+  },
+  chunks: [
+    {
+      file: 'src/zero.js',
+      name: 'noop',
+      kind: 'function',
+      start: 0,
+      end: zeroContent.length,
+      docmeta: { returnsValue: false },
+      codeRelations: {}
+    }
+  ],
+  expect: {
+    linkedCalls: 0,
+    linkedUsages: 0,
+    inferredReturns: 0,
+    riskFlows: 0
+  }
+});
+
+const creatorContent = [
+  'export function makeWidget() { return {}; }',
+  'export class Widget {}',
+  ''
+].join('\n');
+const oneConsumerContent = 'export function buildWidget() { return makeWidget(); }\n';
+await runStatsScenario('one-each', {
+  files: {
+    'src/creator.js': creatorContent,
+    'src/consumer.js': oneConsumerContent
+  },
+  chunks: [
+    {
+      file: 'src/consumer.js',
+      name: 'buildWidget',
+      kind: 'function',
+      start: 0,
+      end: oneConsumerContent.length,
+      docmeta: {
+        returnsValue: true,
+        risk: { sources: [{ name: 'source', ruleId: 'rule-source', confidence: 0.8 }] }
+      },
+      codeRelations: {
+        calls: [['buildWidget', 'makeWidget']],
+        usages: ['Widget']
+      }
+    },
+    {
+      file: 'src/creator.js',
+      name: 'makeWidget',
+      kind: 'function',
+      start: 0,
+      end: creatorContent.length,
+      docmeta: {
+        returnType: 'Widget',
+        returnsValue: false,
+        risk: {
+          sinks: [{ name: 'sink', ruleId: 'rule-sink', category: 'test', severity: 'high', tags: ['taint'] }]
+        }
+      },
+      codeRelations: {}
+    },
+    {
+      file: 'src/creator.js',
+      name: 'Widget',
+      kind: 'class',
+      start: 0,
+      end: creatorContent.length,
+      docmeta: {},
+      codeRelations: {}
+    }
+  ],
+  expect: {
+    linkedCalls: 1,
+    linkedUsages: 1,
+    inferredReturns: 1,
+    riskFlows: 1
+  }
+});
+
+const secondConsumerContent = 'export function buildWidgetTwo() { return makeWidget(); }\n';
+await runStatsScenario('couple-each', {
+  files: {
+    'src/creator.js': creatorContent,
+    'src/consumer-one.js': oneConsumerContent,
+    'src/consumer-two.js': secondConsumerContent
+  },
+  chunks: [
+    {
+      file: 'src/consumer-one.js',
+      name: 'buildWidget',
+      kind: 'function',
+      start: 0,
+      end: oneConsumerContent.length,
+      docmeta: {
+        returnsValue: true,
+        risk: { sources: [{ name: 'source', ruleId: 'rule-source', confidence: 0.8 }] }
+      },
+      codeRelations: {
+        calls: [['buildWidget', 'makeWidget']],
+        usages: ['Widget']
+      }
+    },
+    {
+      file: 'src/consumer-two.js',
+      name: 'buildWidgetTwo',
+      kind: 'function',
+      start: 0,
+      end: secondConsumerContent.length,
+      docmeta: {
+        returnsValue: true,
+        risk: { sources: [{ name: 'source', ruleId: 'rule-source', confidence: 0.8 }] }
+      },
+      codeRelations: {
+        calls: [['buildWidgetTwo', 'makeWidget']],
+        usages: ['Widget']
+      }
+    },
+    {
+      file: 'src/creator.js',
+      name: 'makeWidget',
+      kind: 'function',
+      start: 0,
+      end: creatorContent.length,
+      docmeta: {
+        returnType: 'Widget',
+        returnsValue: false,
+        risk: {
+          sinks: [{ name: 'sink', ruleId: 'rule-sink', category: 'test', severity: 'high', tags: ['taint'] }]
+        }
+      },
+      codeRelations: {}
+    },
+    {
+      file: 'src/creator.js',
+      name: 'Widget',
+      kind: 'class',
+      start: 0,
+      end: creatorContent.length,
+      docmeta: {},
+      codeRelations: {}
+    }
+  ],
+  expect: {
+    linkedCalls: 2,
+    linkedUsages: 2,
+    inferredReturns: 2,
+    riskFlows: 2
+  }
+});
 
 const config = {
   indexing: {
     typeInference: true,
     typeInferenceCrossFile: true
   },
+  tooling: {
+    autoEnableOnDetect: false
+  },
   sqlite: { use: false }
 };
 await fsPromises.writeFile(
@@ -63,9 +276,19 @@ process.env.PAIROFCLEATS_EMBEDDINGS = env.PAIROFCLEATS_EMBEDDINGS;
 const result = spawnSync(process.execPath, [path.join(root, 'build_index.js'), '--stub-embeddings', '--repo', repoRoot], {
   cwd: repoRoot,
   env,
+  timeout: Number.isFinite(Number(process.env.PAIROFCLEATS_TEST_TIMEOUT_MS))
+    ? Math.max(1000, Number(process.env.PAIROFCLEATS_TEST_TIMEOUT_MS))
+    : 120000,
+  killSignal: 'SIGTERM',
   stdio: 'inherit'
 });
 if (result.status !== 0) {
+  if (result.signal) {
+    console.error(`Cross-file inference test failed: build_index terminated by ${result.signal}.`);
+  }
+  if (result.error) {
+    console.error(`Cross-file inference test failed: ${result.error.message || result.error}.`);
+  }
   console.error('Cross-file inference test failed: build_index failed.');
   process.exit(result.status ?? 1);
 }
@@ -79,8 +302,17 @@ if (!fs.existsSync(chunkMetaPath)) {
 }
 
 const chunkMeta = JSON.parse(fs.readFileSync(chunkMetaPath, 'utf8'));
+const fileMetaPath = path.join(codeDir, 'file_meta.json');
+const fileMeta = fs.existsSync(fileMetaPath)
+  ? JSON.parse(fs.readFileSync(fileMetaPath, 'utf8'))
+  : [];
+const fileById = new Map(
+  (Array.isArray(fileMeta) ? fileMeta : []).map((entry) => [entry.id, entry.file])
+);
+const resolveChunkFile = (chunk) => chunk?.file || fileById.get(chunk?.fileId) || null;
+
 const buildWidget = chunkMeta.find((chunk) =>
-  chunk.file === 'src/consumer.js' &&
+  resolveChunkFile(chunk) === 'src/consumer.js' &&
   chunk.name === 'buildWidget'
 );
 if (!buildWidget) {
@@ -118,4 +350,25 @@ if (!usageLinks.some((link) => link.target === 'Widget' && link.file === 'src/cr
   process.exit(1);
 }
 
+const graphPath = path.join(codeDir, 'graph_relations.json');
+if (!fs.existsSync(graphPath)) {
+  console.error(`Missing graph relations at ${graphPath}`);
+  process.exit(1);
+}
+const graphRelations = JSON.parse(fs.readFileSync(graphPath, 'utf8'));
+const findNode = (graph, id) => (graph?.nodes || []).find((node) => node.id === id);
+const graphConsumer = 'src/graphs/consumer.js::buildGraphWidget';
+const graphProducerFn = 'src/graphs/producer.js::createGraphWidget';
+const graphProducerType = 'src/graphs/producer.js::GraphWidget';
+const callNode = findNode(graphRelations.callGraph, graphConsumer);
+if (!callNode || !Array.isArray(callNode.out) || !callNode.out.includes(graphProducerFn)) {
+  console.error('Graph relations missing call link for fixture consumer.');
+  process.exit(1);
+}
+const usageNode = findNode(graphRelations.usageGraph, graphConsumer);
+if (!usageNode || !Array.isArray(usageNode.out) || !usageNode.out.includes(graphProducerType)) {
+  console.error('Graph relations missing usage link for fixture consumer.');
+  process.exit(1);
+}
+
 console.log('Cross-file inference test passed');
diff --git a/tests/type-inference-crossfile/apply.test.js b/tests/type-inference-crossfile/apply.test.js
new file mode 100644
index 000000000..b3f0baf92
--- /dev/null
+++ b/tests/type-inference-crossfile/apply.test.js
@@ -0,0 +1,48 @@
+#!/usr/bin/env node
+import { addInferredParam, addInferredReturn, mergeDiagnostics } from '../../src/index/type-inference-crossfile/apply.js';
+
+const fail = (message) => {
+  console.error(message);
+  process.exit(1);
+};
+
+const docmeta = {};
+addInferredReturn(docmeta, 'Widget', 'flow', 0.4);
+addInferredReturn(docmeta, 'Widget', 'flow', 0.8);
+addInferredReturn(docmeta, 'Widget', 'tooling', 0.2);
+const returns = docmeta.inferredTypes?.returns || [];
+if (returns.length !== 2) {
+  fail('addInferredReturn should dedupe entries by type/source.');
+}
+const flowEntry = returns.find((entry) => entry.source === 'flow');
+if (!flowEntry || flowEntry.confidence !== 0.8) {
+  fail('addInferredReturn should keep max confidence for repeated entries.');
+}
+
+const paramMeta = {};
+if (!addInferredParam(paramMeta, 'arg', 'string', 'flow', 0.6)) {
+  fail('addInferredParam should accept first param type.');
+}
+addInferredParam(paramMeta, 'arg', 'string', 'flow', 0.2);
+const params = paramMeta.inferredTypes?.params?.arg || [];
+if (params.length !== 1) {
+  fail('addInferredParam should dedupe entries by type/source.');
+}
+if (params[0].confidence !== 0.6) {
+  fail('addInferredParam should keep max confidence for repeated entries.');
+}
+if (addInferredParam(paramMeta, 'arg', 'number', 'flow', 0.5, 1)) {
+  fail('addInferredParam should respect maxCandidates limit.');
+}
+
+const target = new Map([['a', [{ message: 'one' }]]]);
+const incoming = new Map([
+  ['a', [{ message: 'two' }]],
+  ['b', [{ message: 'three' }]]
+]);
+mergeDiagnostics(target, incoming);
+if (target.get('a')?.length !== 2 || target.get('b')?.length !== 1) {
+  fail('mergeDiagnostics should append incoming diagnostics.');
+}
+
+console.log('type-inference-crossfile apply tests passed');
diff --git a/tests/type-inference-crossfile/extract.test.js b/tests/type-inference-crossfile/extract.test.js
new file mode 100644
index 000000000..e10995fc0
--- /dev/null
+++ b/tests/type-inference-crossfile/extract.test.js
@@ -0,0 +1,77 @@
+#!/usr/bin/env node
+import { extractParamTypes, extractReturnCalls, extractReturnTypes, inferArgType } from '../../src/index/type-inference-crossfile/extract.js';
+
+const fail = (message) => {
+  console.error(message);
+  process.exit(1);
+};
+
+const chunk = {
+  name: 'Widget',
+  kind: 'class',
+  docmeta: {
+    returnType: 'Widget',
+    returns: ['Widget', 'Gadget'],
+    inferredTypes: {
+      returns: [{ type: 'Thing' }, { type: 'Widget', source: 'flow' }],
+      params: {
+        b: [{ type: 'number' }, { type: 'number' }],
+        a: [{ type: 'boolean' }]
+      }
+    },
+    params: ['a', 'b'],
+    paramTypes: { a: 'string' }
+  }
+};
+
+const returnTypes = extractReturnTypes(chunk);
+const returnSet = new Set(returnTypes);
+if (!returnSet.has('Widget') || !returnSet.has('Gadget') || !returnSet.has('Thing')) {
+  fail('extractReturnTypes should collect explicit and inferred return types.');
+}
+if (returnTypes.length !== returnSet.size) {
+  fail('extractReturnTypes should dedupe return types.');
+}
+
+const { paramNames, paramTypes } = extractParamTypes(chunk);
+if (paramNames.join(',') !== 'a,b') {
+  fail('extractParamTypes should preserve param name order.');
+}
+const paramA = new Set(paramTypes.a || []);
+const paramB = new Set(paramTypes.b || []);
+if (!paramA.has('string') || !paramA.has('boolean')) {
+  fail('extractParamTypes should merge declared and inferred param types.');
+}
+if (!paramB.has('number') || paramB.size !== 1) {
+  fail('extractParamTypes should dedupe inferred param types.');
+}
+
+const callText = [
+  'return createWidget();',
+  'return await ns.Factory.build();',
+  'return new Widget();'
+].join('\n');
+const { calls, news } = extractReturnCalls(callText);
+if (!calls.has('createWidget') || !calls.has('ns.Factory.build')) {
+  fail('extractReturnCalls should collect return call targets.');
+}
+if (!news.has('Widget') || news.size !== 1) {
+  fail('extractReturnCalls should collect return new targets.');
+}
+
+const argChecks = [
+  ['123', 'number'],
+  ['true', 'boolean'],
+  ['"hello"', 'string'],
+  ['[1, 2]', 'array'],
+  ['{ a: 1 }', 'object'],
+  ['new Gadget()', 'Gadget'],
+  ['fn(...)', 'function']
+];
+for (const [value, expected] of argChecks) {
+  if (inferArgType(value) !== expected) {
+    fail(`inferArgType should infer ${expected} from ${value}.`);
+  }
+}
+
+console.log('type-inference-crossfile extract tests passed');
diff --git a/tests/type-inference-crossfile/symbols.test.js b/tests/type-inference-crossfile/symbols.test.js
new file mode 100644
index 000000000..58408d475
--- /dev/null
+++ b/tests/type-inference-crossfile/symbols.test.js
@@ -0,0 +1,42 @@
+#!/usr/bin/env node
+import { addSymbol, leafName, resolveUniqueSymbol, isTypeDeclaration } from '../../src/index/type-inference-crossfile/symbols.js';
+
+const fail = (message) => {
+  console.error(message);
+  process.exit(1);
+};
+
+if (leafName('Alpha::Beta.Gamma') !== 'Gamma') {
+  fail('leafName should return the last segment.');
+}
+
+if (!isTypeDeclaration('Class')) {
+  fail('isTypeDeclaration should match class-like kinds.');
+}
+
+if (isTypeDeclaration('function')) {
+  fail('isTypeDeclaration should ignore non-type kinds.');
+}
+
+const directIndex = new Map();
+const directEntry = { name: 'Widget', file: 'src/widget.js', kind: 'class' };
+addSymbol(directIndex, directEntry.name, directEntry);
+if (resolveUniqueSymbol(directIndex, 'Widget') !== directEntry) {
+  fail('resolveUniqueSymbol should resolve direct unique matches.');
+}
+
+const leafIndex = new Map();
+const leafEntry = { name: 'Namespace.Widget', file: 'src/ns.js', kind: 'class' };
+addSymbol(leafIndex, 'Widget', leafEntry);
+if (resolveUniqueSymbol(leafIndex, 'Namespace.Widget') !== leafEntry) {
+  fail('resolveUniqueSymbol should resolve unique leaf matches.');
+}
+
+const dupeIndex = new Map();
+addSymbol(dupeIndex, 'Dup', { name: 'Dup', file: 'src/one.js' });
+addSymbol(dupeIndex, 'Dup', { name: 'Dup', file: 'src/two.js' });
+if (resolveUniqueSymbol(dupeIndex, 'Dup') !== null) {
+  fail('resolveUniqueSymbol should return null for ambiguous matches.');
+}
+
+console.log('type-inference-crossfile symbols tests passed');
diff --git a/tests/type-inference-lsp-enrichment.js b/tests/type-inference-lsp-enrichment.js
new file mode 100644
index 000000000..ec3adfe50
--- /dev/null
+++ b/tests/type-inference-lsp-enrichment.js
@@ -0,0 +1,144 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { getIndexDir, loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'lsp-enrichment');
+const repoRoot = path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+const srcDir = path.join(repoRoot, 'src');
+const binRoot = path.join(root, 'tests', 'fixtures', 'lsp', 'bin');
+
+await fsPromises.rm(tempRoot, { recursive: true, force: true });
+await fsPromises.mkdir(srcDir, { recursive: true });
+
+const cppSource = 'int add(int a, int b) { return a + b; }\n';
+const swiftSource = 'func greet(name: String, count: Int) -> String { return "hi" }\n';
+const pythonSource = 'def greet(name: str) -> str:\n    return "hi"\n';
+await fsPromises.writeFile(path.join(srcDir, 'sample.cpp'), cppSource);
+await fsPromises.writeFile(path.join(srcDir, 'sample.swift'), swiftSource);
+await fsPromises.writeFile(path.join(srcDir, 'sample.py'), pythonSource);
+
+const config = {
+  indexing: {
+    typeInference: true,
+    typeInferenceCrossFile: true
+  },
+  sqlite: {
+    use: false
+  },
+  tooling: {
+    autoEnableOnDetect: true
+  }
+};
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify(config, null, 2)
+);
+
+for (const binName of ['clangd', 'sourcekit-lsp', 'pyright-langserver']) {
+  try {
+    await fsPromises.chmod(path.join(binRoot, binName), 0o755);
+  } catch {}
+}
+
+const env = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+  PAIROFCLEATS_EMBEDDINGS: 'stub',
+  PATH: `${binRoot}${path.delimiter}${process.env.PATH || ''}`
+};
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+
+const buildResult = spawnSync(
+  process.execPath,
+  [path.join(root, 'build_index.js'), '--repo', repoRoot, '--stub-embeddings'],
+  { env, encoding: 'utf8' }
+);
+
+if (buildResult.status !== 0) {
+  console.error('LSP enrichment test failed: build_index error.');
+  if (buildResult.stderr) console.error(buildResult.stderr.trim());
+  process.exit(buildResult.status ?? 1);
+}
+
+const userConfig = loadUserConfig(repoRoot);
+const indexDir = getIndexDir(repoRoot, 'code', userConfig);
+const metaPath = path.join(indexDir, 'chunk_meta.json');
+if (!fs.existsSync(metaPath)) {
+  console.error('LSP enrichment test failed: chunk_meta.json missing.');
+  process.exit(1);
+}
+
+const chunks = JSON.parse(fs.readFileSync(metaPath, 'utf8'));
+const fileMetaPath = path.join(indexDir, 'file_meta.json');
+const fileMeta = fs.existsSync(fileMetaPath)
+  ? JSON.parse(fs.readFileSync(fileMetaPath, 'utf8'))
+  : [];
+const fileById = new Map(
+  (Array.isArray(fileMeta) ? fileMeta : []).map((entry) => [entry.id, entry.file])
+);
+const resolveChunkFile = (chunk) => chunk?.file || fileById.get(chunk?.fileId) || null;
+
+const cppChunk = chunks.find((chunk) => resolveChunkFile(chunk) === 'src/sample.cpp' && chunk.name === 'add');
+const swiftChunk = chunks.find((chunk) => resolveChunkFile(chunk) === 'src/sample.swift' && chunk.name === 'greet');
+const pythonChunk = chunks.find((chunk) => resolveChunkFile(chunk) === 'src/sample.py' && chunk.name === 'greet');
+
+const hasToolingReturn = (chunk, type) => {
+  const returns = chunk?.docmeta?.inferredTypes?.returns || [];
+  return returns.some((entry) => entry?.source === 'tooling' && (!type || entry?.type === type));
+};
+const hasToolingParam = (chunk, name, type) => {
+  const params = chunk?.docmeta?.inferredTypes?.params || {};
+  const entries = params[name] || [];
+  return entries.some((entry) => entry?.source === 'tooling' && (!type || entry?.type === type));
+};
+
+if (!cppChunk) {
+  console.error('LSP enrichment test failed: missing C++ chunk.');
+  process.exit(1);
+}
+if (!swiftChunk) {
+  console.error('LSP enrichment test failed: missing Swift chunk.');
+  process.exit(1);
+}
+if (!pythonChunk) {
+  console.error('LSP enrichment test failed: missing Python chunk.');
+  process.exit(1);
+}
+
+if (!hasToolingReturn(cppChunk, 'int')) {
+  console.error('LSP enrichment test failed: missing tooling return type for C++.');
+  process.exit(1);
+}
+if (!hasToolingParam(cppChunk, 'a', 'int') || !hasToolingParam(cppChunk, 'b', 'int')) {
+  console.error('LSP enrichment test failed: missing tooling param types for C++.');
+  process.exit(1);
+}
+if (!hasToolingReturn(swiftChunk, 'String')) {
+  console.error('LSP enrichment test failed: missing tooling return type for Swift.');
+  process.exit(1);
+}
+if (!hasToolingParam(swiftChunk, 'name', 'String') || !hasToolingParam(swiftChunk, 'count', 'Int')) {
+  console.error('LSP enrichment test failed: missing tooling param types for Swift.');
+  process.exit(1);
+}
+if (!hasToolingReturn(pythonChunk, 'str')) {
+  console.error('LSP enrichment test failed: missing tooling return type for Python.');
+  process.exit(1);
+}
+if (!hasToolingParam(pythonChunk, 'name', 'str')) {
+  console.error('LSP enrichment test failed: missing tooling param types for Python.');
+  process.exit(1);
+}
+const pyDiagnostics = pythonChunk.docmeta?.tooling?.diagnostics || [];
+if (!pyDiagnostics.some((diag) => diag?.source === 'pyright')) {
+  console.error('LSP enrichment test failed: missing pyright diagnostics for Python.');
+  process.exit(1);
+}
+
+console.log('LSP enrichment test passed');
diff --git a/tests/type-inference-sourcekit-provider-no-sourcekit.js b/tests/type-inference-sourcekit-provider-no-sourcekit.js
new file mode 100644
index 000000000..2df087527
--- /dev/null
+++ b/tests/type-inference-sourcekit-provider-no-sourcekit.js
@@ -0,0 +1,47 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { collectSourcekitTypes } from '../src/index/tooling/sourcekit-provider.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'sourcekit-provider-no-sourcekit');
+const repoRoot = path.join(tempRoot, 'repo');
+const srcDir = path.join(repoRoot, 'src');
+
+await fs.rm(tempRoot, { recursive: true, force: true });
+await fs.mkdir(srcDir, { recursive: true });
+await fs.writeFile(
+  path.join(srcDir, 'sample.swift'),
+  'func greet(name: String) -> String { return "hi \\(name)" }\n'
+);
+
+const chunksByFile = new Map([
+  ['src/sample.swift', [{ file: 'src/sample.swift', name: 'greet', start: 0, end: 10, docmeta: {} }]]
+]);
+
+const logs = [];
+const log = (msg) => logs.push(String(msg));
+
+const result = await collectSourcekitTypes({
+  rootDir: repoRoot,
+  chunksByFile,
+  log,
+  cmd: 'sourcekit-lsp-does-not-exist'
+});
+
+if (!result || !(result.typesByChunk instanceof Map)) {
+  console.error('sourcekit provider did not return a types map.');
+  process.exit(1);
+}
+
+if (result.typesByChunk.size !== 0) {
+  console.error('sourcekit provider should return empty map when sourcekit-lsp is missing.');
+  process.exit(1);
+}
+
+if (!logs.some((entry) => entry.includes('sourcekit-lsp not detected'))) {
+  console.error('sourcekit provider missing expected fallback log message.');
+  process.exit(1);
+}
+
+console.log('sourcekit provider fallback test passed');
diff --git a/tests/type-inference-typescript-provider-no-ts.js b/tests/type-inference-typescript-provider-no-ts.js
new file mode 100644
index 000000000..30c5d7b34
--- /dev/null
+++ b/tests/type-inference-typescript-provider-no-ts.js
@@ -0,0 +1,55 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { collectTypeScriptTypes } from '../src/index/tooling/typescript-provider.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'typescript-provider-no-ts');
+const repoRoot = path.join(tempRoot, 'repo');
+const srcDir = path.join(repoRoot, 'src');
+
+await fs.rm(tempRoot, { recursive: true, force: true });
+await fs.mkdir(srcDir, { recursive: true });
+await fs.writeFile(
+  path.join(srcDir, 'sample.ts'),
+  'export function greet(name: string) { return `hi ${name}`; }\n'
+);
+
+const chunksByFile = new Map([
+  ['src/sample.ts', [{ file: 'src/sample.ts', name: 'greet', start: 0, end: 10, docmeta: {} }]]
+]);
+
+const logs = [];
+const log = (msg) => logs.push(String(msg));
+const toolingConfig = {
+  dir: path.join(repoRoot, '.tooling'),
+  typescript: {
+    enabled: true,
+    resolveOrder: ['repo'],
+    useTsconfig: true
+  }
+};
+
+const result = await collectTypeScriptTypes({
+  rootDir: repoRoot,
+  chunksByFile,
+  log,
+  toolingConfig
+});
+
+if (!result || !(result.typesByChunk instanceof Map)) {
+  console.error('TypeScript provider did not return a types map.');
+  process.exit(1);
+}
+
+if (result.typesByChunk.size !== 0) {
+  console.error('TypeScript provider should return empty map when module is missing.');
+  process.exit(1);
+}
+
+if (!logs.some((entry) => entry.includes('TypeScript tooling not detected'))) {
+  console.error('TypeScript provider missing expected fallback log message.');
+  process.exit(1);
+}
+
+console.log('TypeScript provider fallback test passed');
diff --git a/tests/typescript-imports-only.js b/tests/typescript-imports-only.js
new file mode 100644
index 000000000..eca3da0c1
--- /dev/null
+++ b/tests/typescript-imports-only.js
@@ -0,0 +1,35 @@
+#!/usr/bin/env node
+import { collectTypeScriptImports } from '../src/lang/typescript.js';
+import { smartChunk } from '../src/index/chunking.js';
+
+const text = "import type { Foo } from 'foo';\nexport = ???";
+let imports = [];
+try {
+  imports = collectTypeScriptImports(text, {
+    parser: 'babel',
+    typescript: { importsOnly: true }
+  });
+} catch (err) {
+  console.error(`typescript imports-only test failed: ${err?.message || err}`);
+  process.exit(1);
+}
+
+if (!imports.includes('foo')) {
+  console.error('typescript imports-only test failed: missing import');
+  process.exit(1);
+}
+
+const chunks = smartChunk({
+  text: 'export interface Foo { bar: string }',
+  ext: '.ts',
+  relPath: 'foo.ts',
+  mode: 'code',
+  context: { typescript: { importsOnly: true } }
+});
+
+if (!Array.isArray(chunks) || chunks.length === 0) {
+  console.error('typescript imports-only test failed: chunker returned empty.');
+  process.exit(1);
+}
+
+console.log('typescript imports-only test passed');
diff --git a/tests/typescript-parser-selection.js b/tests/typescript-parser-selection.js
new file mode 100644
index 000000000..d7afd16fd
--- /dev/null
+++ b/tests/typescript-parser-selection.js
@@ -0,0 +1,16 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { buildTypeScriptChunks } from '../src/lang/typescript.js';
+
+const sample = 'export function foo(a: number): string { return String(a); }';
+
+const heuristicChunks = buildTypeScriptChunks(sample, { parser: 'heuristic' });
+assert.ok(Array.isArray(heuristicChunks) && heuristicChunks.length > 0);
+
+const babelChunks = buildTypeScriptChunks(sample, { parser: 'babel' });
+assert.ok(Array.isArray(babelChunks) && babelChunks.length > 0);
+
+const tsChunks = buildTypeScriptChunks(sample, { parser: 'typescript', rootDir: process.cwd() });
+assert.ok(Array.isArray(tsChunks) && tsChunks.length > 0);
+
+console.log('typescript parser selection test passed');
diff --git a/tests/uv-threadpool-env.js b/tests/uv-threadpool-env.js
new file mode 100644
index 000000000..976c2c13d
--- /dev/null
+++ b/tests/uv-threadpool-env.js
@@ -0,0 +1,62 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import fs from 'node:fs';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const cacheRoot = path.join(root, 'tests', '.cache', 'uv-threadpool-env');
+const repoRoot = path.join(cacheRoot, 'repo');
+
+await fsPromises.rm(cacheRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({ runtime: { uvThreadpoolSize: 12 } }, null, 2)
+);
+
+const binPath = path.join(root, 'bin', 'pairofcleats.js');
+if (!fs.existsSync(binPath)) {
+  console.error(`Missing CLI wrapper: ${binPath}`);
+  process.exit(1);
+}
+
+const env = { ...process.env };
+delete env.UV_THREADPOOL_SIZE;
+
+const result = spawnSync(process.execPath, [binPath, 'config', 'dump', '--repo', repoRoot, '--json'], {
+  encoding: 'utf8',
+  env
+});
+
+if (result.status !== 0) {
+  console.error('config dump failed');
+  if (result.stderr) console.error(result.stderr.trim());
+  process.exit(result.status ?? 1);
+}
+
+let payload;
+try {
+  payload = JSON.parse(result.stdout || '{}');
+} catch {
+  console.error('config dump did not output valid JSON');
+  process.exit(1);
+}
+
+const runtime = payload?.derived?.runtime;
+if (!runtime || typeof runtime !== 'object') {
+  console.error('config dump payload missing derived.runtime');
+  process.exit(1);
+}
+
+if (runtime.uvThreadpoolSize !== 12) {
+  console.error(`expected runtime.uvThreadpoolSize=12 but got ${runtime.uvThreadpoolSize}`);
+  process.exit(1);
+}
+
+if (runtime.effectiveUvThreadpoolSize !== 12) {
+  console.error(`expected runtime.effectiveUvThreadpoolSize=12 but got ${runtime.effectiveUvThreadpoolSize}`);
+  process.exit(1);
+}
+
+console.log('uv threadpool env test passed');
diff --git a/tests/uv-threadpool-no-override.js b/tests/uv-threadpool-no-override.js
new file mode 100644
index 000000000..dc1dae0c2
--- /dev/null
+++ b/tests/uv-threadpool-no-override.js
@@ -0,0 +1,61 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import fs from 'node:fs';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const cacheRoot = path.join(root, 'tests', '.cache', 'uv-threadpool-no-override');
+const repoRoot = path.join(cacheRoot, 'repo');
+
+await fsPromises.rm(cacheRoot, { recursive: true, force: true });
+await fsPromises.mkdir(repoRoot, { recursive: true });
+await fsPromises.writeFile(
+  path.join(repoRoot, '.pairofcleats.json'),
+  JSON.stringify({ runtime: { uvThreadpoolSize: 64 } }, null, 2)
+);
+
+const binPath = path.join(root, 'bin', 'pairofcleats.js');
+if (!fs.existsSync(binPath)) {
+  console.error(`Missing CLI wrapper: ${binPath}`);
+  process.exit(1);
+}
+
+const env = { ...process.env, UV_THREADPOOL_SIZE: '5' };
+
+const result = spawnSync(process.execPath, [binPath, 'config', 'dump', '--repo', repoRoot, '--json'], {
+  encoding: 'utf8',
+  env
+});
+
+if (result.status !== 0) {
+  console.error('config dump failed');
+  if (result.stderr) console.error(result.stderr.trim());
+  process.exit(result.status ?? 1);
+}
+
+let payload;
+try {
+  payload = JSON.parse(result.stdout || '{}');
+} catch {
+  console.error('config dump did not output valid JSON');
+  process.exit(1);
+}
+
+const runtime = payload?.derived?.runtime;
+if (!runtime || typeof runtime !== 'object') {
+  console.error('config dump payload missing derived.runtime');
+  process.exit(1);
+}
+
+if (runtime.uvThreadpoolSize !== 64) {
+  console.error(`expected runtime.uvThreadpoolSize=64 but got ${runtime.uvThreadpoolSize}`);
+  process.exit(1);
+}
+
+if (runtime.effectiveUvThreadpoolSize !== 5) {
+  console.error(`expected runtime.effectiveUvThreadpoolSize=5 but got ${runtime.effectiveUvThreadpoolSize}`);
+  process.exit(1);
+}
+
+console.log('uv threadpool no-override test passed');
diff --git a/tests/vector-extension-sanitize.js b/tests/vector-extension-sanitize.js
new file mode 100644
index 000000000..6ab3fc06b
--- /dev/null
+++ b/tests/vector-extension-sanitize.js
@@ -0,0 +1,33 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { getVectorExtensionConfig } from '../tools/vector-extension.js';
+import { loadUserConfig } from '../tools/dict-utils.js';
+
+const root = process.cwd();
+const tempRoot = path.join(root, 'tests', '.cache', 'vector-extension-sanitize');
+await fs.rm(tempRoot, { recursive: true, force: true });
+await fs.mkdir(tempRoot, { recursive: true });
+
+const configPath = path.join(tempRoot, '.pairofcleats.json');
+await fs.writeFile(configPath, JSON.stringify({
+  sqlite: {
+    vectorExtension: {
+      enabled: true,
+      table: 'dense_vectors_ann; DROP TABLE chunks; --'
+    }
+  }
+}, null, 2));
+
+const userConfig = loadUserConfig(tempRoot);
+const config = getVectorExtensionConfig(tempRoot, userConfig);
+if (config.enabled) {
+  console.error('Expected vector extension to be disabled for invalid table name.');
+  process.exit(1);
+}
+if (!config.disabledReason) {
+  console.error('Expected vector extension disabled reason to be set.');
+  process.exit(1);
+}
+
+console.log('vector extension sanitize test passed');
diff --git a/tests/vscode-extension.js b/tests/vscode-extension.js
new file mode 100644
index 000000000..ef95b01fa
--- /dev/null
+++ b/tests/vscode-extension.js
@@ -0,0 +1,49 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import path from 'node:path';
+
+const root = process.cwd();
+const extensionDir = path.join(root, 'extensions', 'vscode');
+const manifestPath = path.join(extensionDir, 'package.json');
+const entryPath = path.join(extensionDir, 'extension.js');
+
+if (!fs.existsSync(manifestPath)) {
+  console.error('VS Code extension manifest missing.');
+  process.exit(1);
+}
+if (!fs.existsSync(entryPath)) {
+  console.error('VS Code extension entrypoint missing.');
+  process.exit(1);
+}
+
+const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
+const activationEvents = new Set(manifest.activationEvents || []);
+if (!activationEvents.has('onCommand:pairofcleats.search')) {
+  console.error('VS Code extension activation event missing.');
+  process.exit(1);
+}
+
+const commands = manifest.contributes?.commands || [];
+const commandIds = new Set(commands.map((cmd) => cmd.command));
+if (!commandIds.has('pairofcleats.search')) {
+  console.error('VS Code extension command missing.');
+  process.exit(1);
+}
+
+const configProps = manifest.contributes?.configuration?.properties || {};
+const requiredProps = [
+  'pairofcleats.cliPath',
+  'pairofcleats.cliArgs',
+  'pairofcleats.searchMode',
+  'pairofcleats.searchBackend',
+  'pairofcleats.searchAnn',
+  'pairofcleats.maxResults'
+];
+for (const prop of requiredProps) {
+  if (!configProps[prop]) {
+    console.error(`VS Code extension config missing ${prop}.`);
+    process.exit(1);
+  }
+}
+
+console.log('VS Code extension tests passed');
diff --git a/tests/watch-backend-selection.js b/tests/watch-backend-selection.js
new file mode 100644
index 000000000..4b382be30
--- /dev/null
+++ b/tests/watch-backend-selection.js
@@ -0,0 +1,27 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import { getCapabilities } from '../src/shared/capabilities.js';
+import { resolveWatcherBackend } from '../src/index/build/watch.js';
+
+const runtime = { userConfig: {}, argv: {} };
+const caps = getCapabilities({ refresh: true });
+
+process.env.PAIROFCLEATS_WATCHER_BACKEND = 'chokidar';
+const forcedChokidar = resolveWatcherBackend({ runtime, pollMs: 0 });
+assert.equal(forcedChokidar.resolved, 'chokidar', 'forced chokidar should resolve to chokidar');
+
+process.env.PAIROFCLEATS_WATCHER_BACKEND = 'parcel';
+const forcedParcel = resolveWatcherBackend({ runtime, pollMs: 0 });
+if (caps.watcher.parcel) {
+  assert.equal(forcedParcel.resolved, 'parcel', 'parcel should resolve when available');
+} else {
+  assert.equal(forcedParcel.resolved, 'chokidar', 'parcel should fall back when unavailable');
+  assert.ok(forcedParcel.warning, 'fallback should include warning');
+}
+
+const pollFallback = resolveWatcherBackend({ runtime, pollMs: 500 });
+assert.equal(pollFallback.resolved, 'chokidar', 'polling forces chokidar');
+
+delete process.env.PAIROFCLEATS_WATCHER_BACKEND;
+
+console.log('watch backend selection tests passed');
diff --git a/tests/watch-debounce.js b/tests/watch-debounce.js
new file mode 100644
index 000000000..80eb87e3c
--- /dev/null
+++ b/tests/watch-debounce.js
@@ -0,0 +1,53 @@
+import assert from 'node:assert/strict';
+import { createDebouncedScheduler } from '../src/index/build/watch.js';
+
+const wait = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
+
+let calls = 0;
+const scheduler = createDebouncedScheduler({
+  debounceMs: 30,
+  onRun: () => {
+    calls += 1;
+  }
+});
+
+scheduler.schedule();
+scheduler.schedule();
+await wait(10);
+scheduler.schedule();
+await wait(60);
+assert.equal(calls, 1, 'expected single debounced run');
+
+scheduler.schedule();
+await wait(50);
+assert.equal(calls, 2, 'expected second run after debounce');
+
+
+
+let errorCalls = 0;
+let unhandledRejections = 0;
+const onUnhandled = () => {
+  unhandledRejections += 1;
+};
+process.on('unhandledRejection', onUnhandled);
+
+const asyncScheduler = createDebouncedScheduler({
+  debounceMs: 10,
+  onRun: async () => {
+    throw new Error('boom');
+  },
+  onError: () => {
+    errorCalls += 1;
+  }
+});
+
+asyncScheduler.schedule();
+await wait(40);
+asyncScheduler.cancel();
+
+process.removeListener('unhandledRejection', onUnhandled);
+
+assert.equal(errorCalls, 1, 'expected onError to be invoked for async rejection');
+assert.equal(unhandledRejections, 0, 'expected no unhandledRejection events');
+
+console.log('watch debounce test passed');
diff --git a/tests/watch-filter.js b/tests/watch-filter.js
new file mode 100644
index 000000000..9d7e70061
--- /dev/null
+++ b/tests/watch-filter.js
@@ -0,0 +1,78 @@
+import assert from 'node:assert/strict';
+import path from 'node:path';
+import ignore from 'ignore';
+import { isIndexablePath } from '../src/index/build/watch.js';
+
+const root = path.join(process.cwd(), 'tests', 'fixtures', 'sample');
+const ignoreMatcher = ignore().add(['ignored/']);
+
+const jsPath = path.join(root, 'src', 'app.js');
+assert.equal(
+  isIndexablePath({ absPath: jsPath, root, ignoreMatcher, modes: ['code'] }),
+  true,
+  'expected code extension to be indexable for code mode'
+);
+
+const mdPath = path.join(root, 'docs', 'readme.md');
+assert.equal(
+  isIndexablePath({ absPath: mdPath, root, ignoreMatcher, modes: ['prose'] }),
+  true,
+  'expected prose extension to be indexable for prose mode'
+);
+
+assert.equal(
+  isIndexablePath({ absPath: mdPath, root, ignoreMatcher, modes: ['code'] }),
+  false,
+  'expected prose extension to be excluded for code-only mode'
+);
+
+const dockerfilePath = path.join(root, 'Dockerfile');
+assert.equal(
+  isIndexablePath({ absPath: dockerfilePath, root, ignoreMatcher, modes: ['code'] }),
+  true,
+  'expected special code filename to be indexable for code mode'
+);
+
+const dockerfileVariantPath = path.join(root, 'Dockerfile.dev');
+assert.equal(
+  isIndexablePath({ absPath: dockerfileVariantPath, root, ignoreMatcher, modes: ['code'] }),
+  true,
+  'expected dockerfile variants to be indexable for code mode'
+);
+
+const makefileVariantPath = path.join(root, 'Makefile.in');
+assert.equal(
+  isIndexablePath({ absPath: makefileVariantPath, root, ignoreMatcher, modes: ['code'] }),
+  true,
+  'expected makefile variants to be indexable for code mode'
+);
+
+const gnuMakefilePath = path.join(root, 'GNUmakefile');
+assert.equal(
+  isIndexablePath({ absPath: gnuMakefilePath, root, ignoreMatcher, modes: ['code'] }),
+  true,
+  'expected GNUmakefile to be indexable for code mode'
+);
+
+const ignoredPath = path.join(root, 'ignored', 'app.js');
+assert.equal(
+  isIndexablePath({ absPath: ignoredPath, root, ignoreMatcher, modes: ['code'] }),
+  false,
+  'expected ignored path to be excluded'
+);
+
+const outsidePath = path.join(root, '..', 'outside', 'file.js');
+assert.equal(
+  isIndexablePath({ absPath: outsidePath, root, ignoreMatcher, modes: ['code'] }),
+  false,
+  'expected path outside root to be excluded'
+);
+
+const mixedModesPath = path.join(root, 'content', 'story.md');
+assert.equal(
+  isIndexablePath({ absPath: mixedModesPath, root, ignoreMatcher, modes: ['code', 'prose'] }),
+  true,
+  'expected prose extension to be indexable when prose mode is enabled'
+);
+
+console.log('watch filter test passed');
diff --git a/tests/watch-stability-guard.js b/tests/watch-stability-guard.js
new file mode 100644
index 000000000..e2bd66bfb
--- /dev/null
+++ b/tests/watch-stability-guard.js
@@ -0,0 +1,27 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import os from 'node:os';
+import { waitForStableFile } from '../src/index/build/watch.js';
+
+const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'pairofcleats-watch-'));
+const filePath = path.join(tempRoot, 'partial.txt');
+
+await fs.writeFile(filePath, 'start');
+
+const appendPromise = new Promise((resolve) => {
+  setTimeout(() => {
+    void fs.appendFile(filePath, 'more').then(resolve);
+  }, 50);
+});
+
+const started = Date.now();
+const stable = await waitForStableFile(filePath, { checks: 3, intervalMs: 100 });
+const elapsed = Date.now() - started;
+await appendPromise;
+
+assert.equal(stable, true, 'stability guard should eventually resolve true');
+assert.ok(elapsed >= 150, 'stability guard should wait for file to settle');
+
+console.log('watch stability guard tests passed');
diff --git a/tests/worker-pool-windows.js b/tests/worker-pool-windows.js
new file mode 100644
index 000000000..e4fab604a
--- /dev/null
+++ b/tests/worker-pool-windows.js
@@ -0,0 +1,102 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { normalizePostingsConfig } from '../src/shared/postings-config.js';
+import { createTokenizationContext, tokenizeChunkText } from '../src/index/build/tokenization.js';
+import { createIndexerWorkerPool, normalizeWorkerPoolConfig } from '../src/index/build/worker-pool.js';
+
+if (process.platform !== 'win32') {
+  console.log('worker pool windows test skipped (non-windows).');
+  process.exit(0);
+}
+
+const root = path.resolve('tests', '.cache', 'worker-pool-windows');
+const deepDir = path.join(root, 'space dir', 'unicode-é', 'deep', 'path', 'more');
+await fs.mkdir(deepDir, { recursive: true });
+
+const originalCwd = process.cwd();
+try {
+  process.chdir(deepDir);
+  const postingsConfig = normalizePostingsConfig({
+    enablePhraseNgrams: true,
+    phraseMinN: 2,
+    phraseMaxN: 3,
+    enableChargrams: true,
+    chargramMinN: 3,
+    chargramMaxN: 3
+  });
+  const dictWords = new Set(['hello', 'world', 'foo', 'bar']);
+  const dictConfig = { segmentation: 'greedy' };
+  const workerConfig = normalizeWorkerPoolConfig({
+    enabled: true,
+    maxWorkers: 1,
+    maxFileBytes: 4096,
+    quantizeBatchSize: 2,
+    taskTimeoutMs: 5000
+  }, { cpuLimit: 1 });
+
+  const workerPool = await createIndexerWorkerPool({
+    config: workerConfig,
+    dictWords,
+    dictConfig,
+    postingsConfig
+  });
+  if (!workerPool) {
+    console.log('worker pool windows test skipped (worker pool unavailable).');
+    process.exit(0);
+  }
+
+  const context = createTokenizationContext({ dictWords, dictConfig, postingsConfig });
+  const sample = 'helloWorld fooBar';
+  const syncTokens = tokenizeChunkText({ text: sample, mode: 'code', ext: '.js', context });
+
+  const runs = [];
+  for (let i = 0; i < 50; i += 1) {
+    runs.push(workerPool.runTokenize({
+      text: sample,
+      mode: 'code',
+      ext: '.js',
+      file: `task-${i}`,
+      size: sample.length
+    }));
+  }
+  const results = await Promise.all(runs);
+  for (const result of results) {
+    if (!result) {
+      console.error('worker pool windows test failed: missing token result.');
+      process.exit(1);
+    }
+    if (JSON.stringify(syncTokens.tokens) !== JSON.stringify(result.tokens)) {
+      console.error('worker pool windows test failed: tokens mismatch.');
+      process.exit(1);
+    }
+  }
+
+  if (workerPool.pool?.destroy) {
+    await workerPool.pool.destroy();
+    await workerPool.runTokenize({
+      text: sample,
+      mode: 'code',
+      ext: '.js',
+      file: 'restart',
+      size: sample.length
+    });
+    await new Promise((resolve) => setTimeout(resolve, 1200));
+    const restarted = await workerPool.runTokenize({
+      text: sample,
+      mode: 'code',
+      ext: '.js',
+      file: 'restart-2',
+      size: sample.length
+    });
+    if (!restarted) {
+      console.error('worker pool windows test failed: restart did not recover.');
+      process.exit(1);
+    }
+  }
+
+  await workerPool.destroy();
+  console.log('worker pool windows test passed');
+} finally {
+  process.chdir(originalCwd);
+}
diff --git a/tests/worker-pool.js b/tests/worker-pool.js
new file mode 100644
index 000000000..4a1873355
--- /dev/null
+++ b/tests/worker-pool.js
@@ -0,0 +1,74 @@
+#!/usr/bin/env node
+import { normalizePostingsConfig } from '../src/shared/postings-config.js';
+import { quantizeVec } from '../src/index/embedding.js';
+import { createTokenizationContext, tokenizeChunkText } from '../src/index/build/tokenization.js';
+import { createIndexerWorkerPool, normalizeWorkerPoolConfig } from '../src/index/build/worker-pool.js';
+
+const postingsConfig = normalizePostingsConfig({
+  enablePhraseNgrams: true,
+  phraseMinN: 2,
+  phraseMaxN: 3,
+  enableChargrams: true,
+  chargramMinN: 3,
+  chargramMaxN: 3
+});
+const dictWords = new Set(['hello', 'world', 'foo', 'bar']);
+const dictConfig = { segmentation: 'greedy' };
+const workerConfig = normalizeWorkerPoolConfig({
+  enabled: true,
+  maxWorkers: 1,
+  maxFileBytes: 4096,
+  quantizeBatchSize: 2,
+  taskTimeoutMs: 5000
+}, { cpuLimit: 1 });
+
+const workerPool = await createIndexerWorkerPool({
+  config: workerConfig,
+  dictWords,
+  dictConfig,
+  postingsConfig
+});
+if (!workerPool) {
+  console.log('worker pool test skipped (worker pool unavailable).');
+  process.exit(0);
+}
+
+const context = createTokenizationContext({ dictWords, dictConfig, postingsConfig });
+const sample = 'helloWorld fooBar';
+const syncTokens = tokenizeChunkText({ text: sample, mode: 'code', ext: '.js', context });
+const workerTokens = await workerPool.runTokenize({ text: sample, mode: 'code', ext: '.js' });
+
+if (JSON.stringify(syncTokens.tokens) !== JSON.stringify(workerTokens.tokens)) {
+  console.error('worker pool test failed: tokens mismatch.');
+  process.exit(1);
+}
+if (JSON.stringify(syncTokens.seq) !== JSON.stringify(workerTokens.seq)) {
+  console.error('worker pool test failed: seq mismatch.');
+  process.exit(1);
+}
+if (JSON.stringify(syncTokens.ngrams) !== JSON.stringify(workerTokens.ngrams)) {
+  console.error('worker pool test failed: ngrams mismatch.');
+  process.exit(1);
+}
+if (JSON.stringify(syncTokens.chargrams) !== JSON.stringify(workerTokens.chargrams)) {
+  console.error('worker pool test failed: chargrams mismatch.');
+  process.exit(1);
+}
+if (JSON.stringify(syncTokens.minhashSig) !== JSON.stringify(workerTokens.minhashSig)) {
+  console.error('worker pool test failed: minhash mismatch.');
+  process.exit(1);
+}
+
+const vectors = [
+  [0, 0.5],
+  [1, -1]
+];
+const syncQuant = vectors.map((vec) => quantizeVec(vec));
+const workerQuant = await workerPool.runQuantize({ vectors });
+if (JSON.stringify(syncQuant) !== JSON.stringify(workerQuant)) {
+  console.error('worker pool test failed: quantize mismatch.');
+  process.exit(1);
+}
+
+await workerPool.destroy();
+console.log('worker pool test passed');
diff --git a/tests/xxhash-backends.js b/tests/xxhash-backends.js
new file mode 100644
index 000000000..8f5fd4db1
--- /dev/null
+++ b/tests/xxhash-backends.js
@@ -0,0 +1,30 @@
+#!/usr/bin/env node
+import assert from 'node:assert/strict';
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import os from 'node:os';
+import { getCapabilities } from '../src/shared/capabilities.js';
+import { checksumFile, checksumString, setXxhashBackend } from '../src/shared/hash.js';
+
+const baseline = '44bc2cf5ad770999';
+
+setXxhashBackend('wasm');
+const wasmHash = await checksumString('abc');
+assert.equal(wasmHash.value, baseline, 'wasm checksumString should match baseline');
+
+const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), 'pairofcleats-xxhash-'));
+const filePath = path.join(tempRoot, 'sample.txt');
+await fs.writeFile(filePath, 'abc');
+const fileHash = await checksumFile(filePath);
+assert.equal(fileHash.value, baseline, 'checksumFile should match checksumString');
+
+const caps = getCapabilities();
+if (caps.hash.nodeRsXxhash) {
+  setXxhashBackend('native');
+  const nativeHash = await checksumString('abc');
+  assert.equal(nativeHash.value, baseline, 'native checksumString should match baseline');
+}
+
+setXxhashBackend('');
+
+console.log('xxhash backend tests passed');
diff --git a/tools/api-server.js b/tools/api-server.js
new file mode 100644
index 000000000..5abe7efba
--- /dev/null
+++ b/tools/api-server.js
@@ -0,0 +1,72 @@
+#!/usr/bin/env node
+import http from 'node:http';
+import path from 'node:path';
+import { createCli } from '../src/shared/cli.js';
+import { getRuntimeConfig, loadUserConfig, resolveRepoRoot } from './dict-utils.js';
+import { getMetricsRegistry } from '../src/shared/metrics.js';
+import { createApiRouter } from './api/router.js';
+import { configureServiceLogger } from './service/logger.js';
+
+const argv = createCli({
+  scriptName: 'api-server',
+  options: {
+    host: { type: 'string', default: '127.0.0.1' },
+    port: { type: 'string', default: '7345' },
+    output: { type: 'string', default: 'compact' },
+    json: { type: 'boolean', default: false },
+    quiet: { type: 'boolean', default: false },
+    repo: { type: 'string' }
+  }
+}).parse();
+
+const host = argv.host || '127.0.0.1';
+const port = Number.isFinite(Number(argv.port)) ? Number(argv.port) : 7345;
+const defaultRepo = argv.repo ? path.resolve(argv.repo) : resolveRepoRoot(process.cwd());
+const jsonOutput = argv.json === true;
+const quiet = argv.quiet === true;
+const userConfig = loadUserConfig(defaultRepo);
+const runtimeConfig = getRuntimeConfig(defaultRepo, userConfig);
+const parsedUv = Number(process.env.UV_THREADPOOL_SIZE);
+const effectiveUvThreadpoolSize = Number.isFinite(parsedUv) && parsedUv > 0 ? Math.floor(parsedUv) : null;
+
+const metricsRegistry = getMetricsRegistry();
+const { logLine } = configureServiceLogger({ repoRoot: defaultRepo, service: 'api' });
+
+const log = (message) => {
+  if (quiet) return;
+  logLine(message);
+};
+
+const router = createApiRouter({
+  host,
+  defaultRepo,
+  defaultOutput: argv.output,
+  metricsRegistry
+});
+
+const server = http.createServer(router.handleRequest);
+
+server.listen({ port, host }, () => {
+  const address = server.address();
+  const actualPort = typeof address === 'object' && address ? address.port : port;
+  const baseUrl = `http://${host}:${actualPort}`;
+  if (jsonOutput) {
+    console.log(JSON.stringify({ ok: true, host, port: actualPort, repo: defaultRepo, baseUrl }));
+  } else {
+    log(`[api] listening at ${baseUrl}`);
+    log(`[api] repo root: ${defaultRepo}`);
+    log(`[api] UV_THREADPOOL_SIZE: ${effectiveUvThreadpoolSize ?? 'default'} (config=${runtimeConfig.uvThreadpoolSize ?? 'none'})`);
+  }
+});
+
+const shutdown = (signal) => {
+  log(`[api] ${signal} received; shutting down...`);
+  server.close(() => {
+    router.close();
+    log('[api] shutdown complete.');
+    process.exit(0);
+  });
+};
+
+process.on('SIGINT', () => shutdown('SIGINT'));
+process.on('SIGTERM', () => shutdown('SIGTERM'));
diff --git a/tools/api/response.js b/tools/api/response.js
new file mode 100644
index 000000000..e102c8b00
--- /dev/null
+++ b/tools/api/response.js
@@ -0,0 +1,28 @@
+/**
+ * Write a JSON payload to the HTTP response.
+ * @param {import('node:http').ServerResponse} res
+ * @param {number} statusCode
+ * @param {any} payload
+ */
+export const sendJson = (res, statusCode, payload) => {
+  const body = JSON.stringify(payload);
+  res.writeHead(statusCode, {
+    'Content-Type': 'application/json; charset=utf-8',
+    'Content-Length': Buffer.byteLength(body),
+    'Access-Control-Allow-Origin': '*'
+  });
+  res.end(body);
+};
+
+/**
+ * Write an error payload to the HTTP response.
+ * @param {import('node:http').ServerResponse} res
+ * @param {number} statusCode
+ * @param {string} code
+ * @param {string} message
+ * @param {object} [details]
+ */
+export const sendError = (res, statusCode, code, message, details = {}) => {
+  const { code: ignored, ...rest } = details || {};
+  sendJson(res, statusCode, { ok: false, code, message, ...rest });
+};
diff --git a/tools/api/router.js b/tools/api/router.js
new file mode 100644
index 000000000..721dd9fc4
--- /dev/null
+++ b/tools/api/router.js
@@ -0,0 +1,438 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { resolveRepoRoot } from '../dict-utils.js';
+import { search, status } from '../../src/integrations/core/index.js';
+import { createSqliteDbCache } from '../../src/retrieval/sqlite-cache.js';
+import { createSearchValidator, normalizeMetaFilters } from './validation.js';
+import { sendError, sendJson } from './response.js';
+import { ERROR_CODES } from '../../src/shared/error-codes.js';
+import { createSseResponder } from './sse.js';
+
+/**
+ * Create an API router for the HTTP server.
+ * @param {{host:string,defaultRepo:string,defaultOutput:string,metricsRegistry:any}} config
+ */
+export const createApiRouter = ({ host, defaultRepo, defaultOutput, metricsRegistry }) => {
+  const validateSearchPayload = createSearchValidator();
+  const repoCaches = new Map();
+
+  const getRepoCaches = (repoPath) => {
+    const key = repoPath || defaultRepo;
+    const existing = repoCaches.get(key);
+    if (existing) {
+      existing.lastUsed = Date.now();
+      return existing;
+    }
+    const entry = {
+      indexCache: new Map(),
+      sqliteCache: createSqliteDbCache(),
+      lastUsed: Date.now()
+    };
+    repoCaches.set(key, entry);
+    return entry;
+  };
+
+  const closeRepoCaches = () => {
+    for (const entry of repoCaches.values()) {
+      entry.sqliteCache?.closeAll?.();
+    }
+    repoCaches.clear();
+  };
+
+  /**
+   * Parse a JSON request body.
+   * @param {import('node:http').IncomingMessage} req
+   * @returns {Promise<string>}
+   */
+  const parseBody = (req) => new Promise((resolve, reject) => {
+    let data = '';
+    req.on('data', (chunk) => {
+      data += chunk;
+      if (data.length > 1_000_000) {
+        reject(new Error('Request body too large.'));
+        req.destroy();
+      }
+    });
+    req.on('aborted', () => reject(new Error('Request aborted.')));
+    req.on('end', () => resolve(data));
+    req.on('error', reject);
+  });
+
+  /**
+   * Resolve and validate a repo path.
+   * @param {string|null|undefined} value
+   * @returns {string}
+   */
+  const resolveRepo = (value) => {
+    const candidate = value ? path.resolve(value) : defaultRepo;
+    if (!fs.existsSync(candidate)) {
+      throw new Error(`Repo path not found: ${candidate}`);
+    }
+    if (!fs.statSync(candidate).isDirectory()) {
+      throw new Error(`Repo path is not a directory: ${candidate}`);
+    }
+    return value ? resolveRepoRoot(candidate) : candidate;
+  };
+
+  /**
+   * Build CLI search arguments from a request payload.
+   * @param {string} repoPath
+   * @param {any} payload
+   * @returns {{ok:boolean,message?:string,args?:string[],query?:string}}
+   */
+  const buildSearchParams = (repoPath, payload) => {
+    const query = payload?.query ? String(payload.query) : '';
+    if (!query) {
+      return { ok: false, message: 'Missing query.' };
+    }
+    const output = payload?.output || defaultOutput;
+    const useCompact = output !== 'full' && output !== 'json';
+    const searchArgs = [useCompact ? '--json-compact' : '--json', '--repo', repoPath];
+    const mode = payload?.mode ? String(payload.mode) : null;
+    const backend = payload?.backend ? String(payload.backend) : null;
+    const ann = payload?.ann;
+    const top = Number.isFinite(Number(payload?.top)) ? Number(payload.top) : null;
+    const context = Number.isFinite(Number(payload?.context)) ? Number(payload.context) : null;
+    const typeFilter = payload?.type ? String(payload.type) : null;
+    const authorFilter = payload?.author ? String(payload.author) : null;
+    const importFilter = payload?.import ? String(payload.import) : null;
+    const callsFilter = payload?.calls ? String(payload.calls) : null;
+    const usesFilter = payload?.uses ? String(payload.uses) : null;
+    const signatureFilter = payload?.signature ? String(payload.signature) : null;
+    const paramFilter = payload?.param ? String(payload.param) : null;
+    const decoratorFilter = payload?.decorator ? String(payload.decorator) : null;
+    const inferredTypeFilter = payload?.inferredType ? String(payload.inferredType) : null;
+    const returnTypeFilter = payload?.returnType ? String(payload.returnType) : null;
+    const throwsFilter = payload?.throws ? String(payload.throws) : null;
+    const readsFilter = payload?.reads ? String(payload.reads) : null;
+    const writesFilter = payload?.writes ? String(payload.writes) : null;
+    const mutatesFilter = payload?.mutates ? String(payload.mutates) : null;
+    const aliasFilter = payload?.alias ? String(payload.alias) : null;
+    const awaitsFilter = payload?.awaits ? String(payload.awaits) : null;
+    const riskFilter = payload?.risk ? String(payload.risk) : null;
+    const riskTagFilter = payload?.riskTag ? String(payload.riskTag) : null;
+    const riskSourceFilter = payload?.riskSource ? String(payload.riskSource) : null;
+    const riskSinkFilter = payload?.riskSink ? String(payload.riskSink) : null;
+    const riskCategoryFilter = payload?.riskCategory ? String(payload.riskCategory) : null;
+    const riskFlowFilter = payload?.riskFlow ? String(payload.riskFlow) : null;
+    const branchesMin = Number.isFinite(Number(payload?.branchesMin)) ? Number(payload.branchesMin) : null;
+    const loopsMin = Number.isFinite(Number(payload?.loopsMin)) ? Number(payload.loopsMin) : null;
+    const breaksMin = Number.isFinite(Number(payload?.breaksMin)) ? Number(payload.breaksMin) : null;
+    const continuesMin = Number.isFinite(Number(payload?.continuesMin)) ? Number(payload.continuesMin) : null;
+    const churnMin = Number.isFinite(Number(payload?.churnMin)) ? Number(payload.churnMin) : null;
+    const chunkAuthorFilter = payload?.chunkAuthor ? String(payload.chunkAuthor) : null;
+    const modifiedAfter = payload?.modifiedAfter ? String(payload.modifiedAfter) : null;
+    const modifiedSince = Number.isFinite(Number(payload?.modifiedSince)) ? Number(payload.modifiedSince) : null;
+    const visibilityFilter = payload?.visibility ? String(payload.visibility) : null;
+    const extendsFilter = payload?.extends ? String(payload.extends) : null;
+    const lintFilter = payload?.lint === true;
+    const asyncFilter = payload?.async === true;
+    const generatorFilter = payload?.generator === true;
+    const returnsFilter = payload?.returns === true;
+    const branchFilter = payload?.branch ? String(payload.branch) : null;
+    const langFilter = payload?.lang ? String(payload.lang) : null;
+    const caseAll = payload?.case === true;
+    const caseFile = payload?.caseFile === true || caseAll;
+    const caseTokens = payload?.caseTokens === true || caseAll;
+    const fileFilters = [];
+    const toList = (value) => (Array.isArray(value) ? value : (value == null ? [] : [value]));
+    fileFilters.push(...toList(payload?.path));
+    fileFilters.push(...toList(payload?.file));
+    const extFilters = toList(payload?.ext);
+    const metaFilters = normalizeMetaFilters(payload?.meta);
+    const metaJson = payload?.metaJson || null;
+
+    if (mode && mode !== 'both') searchArgs.push('--mode', mode);
+    if (backend) searchArgs.push('--backend', backend);
+    if (ann === true) searchArgs.push('--ann');
+    if (ann === false) searchArgs.push('--no-ann');
+    if (top) searchArgs.push('-n', String(top));
+    if (context !== null) searchArgs.push('--context', String(context));
+    if (typeFilter) searchArgs.push('--type', typeFilter);
+    if (authorFilter) searchArgs.push('--author', authorFilter);
+    if (importFilter) searchArgs.push('--import', importFilter);
+    if (callsFilter) searchArgs.push('--calls', callsFilter);
+    if (usesFilter) searchArgs.push('--uses', usesFilter);
+    if (signatureFilter) searchArgs.push('--signature', signatureFilter);
+    if (paramFilter) searchArgs.push('--param', paramFilter);
+    if (decoratorFilter) searchArgs.push('--decorator', decoratorFilter);
+    if (inferredTypeFilter) searchArgs.push('--inferred-type', inferredTypeFilter);
+    if (returnTypeFilter) searchArgs.push('--return-type', returnTypeFilter);
+    if (throwsFilter) searchArgs.push('--throws', throwsFilter);
+    if (readsFilter) searchArgs.push('--reads', readsFilter);
+    if (writesFilter) searchArgs.push('--writes', writesFilter);
+    if (mutatesFilter) searchArgs.push('--mutates', mutatesFilter);
+    if (aliasFilter) searchArgs.push('--alias', aliasFilter);
+    if (awaitsFilter) searchArgs.push('--awaits', awaitsFilter);
+    if (riskFilter) searchArgs.push('--risk', riskFilter);
+    if (riskTagFilter) searchArgs.push('--risk-tag', riskTagFilter);
+    if (riskSourceFilter) searchArgs.push('--risk-source', riskSourceFilter);
+    if (riskSinkFilter) searchArgs.push('--risk-sink', riskSinkFilter);
+    if (riskCategoryFilter) searchArgs.push('--risk-category', riskCategoryFilter);
+    if (riskFlowFilter) searchArgs.push('--risk-flow', riskFlowFilter);
+    if (branchesMin !== null) searchArgs.push('--branches', String(branchesMin));
+    if (loopsMin !== null) searchArgs.push('--loops', String(loopsMin));
+    if (breaksMin !== null) searchArgs.push('--breaks', String(breaksMin));
+    if (continuesMin !== null) searchArgs.push('--continues', String(continuesMin));
+    if (churnMin !== null) searchArgs.push('--churn', String(churnMin));
+    if (chunkAuthorFilter) searchArgs.push('--chunk-author', chunkAuthorFilter);
+    if (modifiedAfter) searchArgs.push('--modified-after', modifiedAfter);
+    if (modifiedSince !== null) searchArgs.push('--modified-since', String(modifiedSince));
+    if (visibilityFilter) searchArgs.push('--visibility', visibilityFilter);
+    if (extendsFilter) searchArgs.push('--extends', extendsFilter);
+    if (lintFilter) searchArgs.push('--lint');
+    if (asyncFilter) searchArgs.push('--async');
+    if (generatorFilter) searchArgs.push('--generator');
+    if (returnsFilter) searchArgs.push('--returns');
+    if (branchFilter) searchArgs.push('--branch', branchFilter);
+    if (langFilter) searchArgs.push('--lang', langFilter);
+    if (caseAll) searchArgs.push('--case');
+    if (!caseAll && caseFile) searchArgs.push('--case-file');
+    if (!caseAll && caseTokens) searchArgs.push('--case-tokens');
+    for (const entry of fileFilters) {
+      if (entry == null || entry === '') continue;
+      searchArgs.push('--path', String(entry));
+    }
+    for (const entry of extFilters) {
+      if (entry == null || entry === '') continue;
+      searchArgs.push('--ext', String(entry));
+    }
+    if (Array.isArray(metaFilters)) {
+      metaFilters.forEach((entry) => searchArgs.push('--meta', entry));
+    }
+    if (metaJson) {
+      const jsonValue = typeof metaJson === 'string' ? metaJson : JSON.stringify(metaJson);
+      searchArgs.push('--meta-json', jsonValue);
+    }
+
+    return { ok: true, args: searchArgs, query };
+  };
+
+  const isNoIndexError = (err) => {
+    if (err?.code === ERROR_CODES.NO_INDEX) return true;
+    const message = String(err?.message || err || '').toLowerCase();
+    if (!message) return false;
+    return message.includes('index not found')
+      || message.includes('missing required tables')
+      || message.includes('missing sqlite index')
+      || message.includes('missing lmdb index')
+      || message.includes('sqlite backend requested but index not found')
+      || message.includes('lmdb backend requested but index not found');
+  };
+
+  const handleRequest = async (req, res) => {
+    const requestUrl = new URL(req.url || '/', `http://${host}`);
+    res.setHeader('Access-Control-Allow-Origin', '*');
+    res.setHeader('Access-Control-Allow-Methods', 'GET,POST,OPTIONS');
+    res.setHeader('Access-Control-Allow-Headers', 'Content-Type');
+    if (req.method === 'OPTIONS') {
+      res.writeHead(204);
+      res.end();
+      return;
+    }
+
+    if (requestUrl.pathname === '/health' && req.method === 'GET') {
+      sendJson(res, 200, { ok: true, uptimeMs: Math.round(process.uptime() * 1000) });
+      return;
+    }
+
+    if (requestUrl.pathname === '/metrics' && req.method === 'GET') {
+      try {
+        const body = await metricsRegistry.metrics();
+        res.writeHead(200, {
+          'Content-Type': metricsRegistry.contentType || 'text/plain; version=0.0.4; charset=utf-8',
+          'Access-Control-Allow-Origin': '*'
+        });
+        res.end(body);
+      } catch (err) {
+        sendError(res, 500, ERROR_CODES.INTERNAL, 'Failed to render metrics.', {
+          error: err?.message || String(err)
+        });
+      }
+      return;
+    }
+
+    if (requestUrl.pathname === '/status/stream' && req.method === 'GET') {
+      const sse = createSseResponder(req, res);
+      let repoPath = '';
+      try {
+        repoPath = resolveRepo(requestUrl.searchParams.get('repo'));
+      } catch (err) {
+        await sse.sendHeaders();
+        await sse.sendEvent('error', {
+          ok: false,
+          code: ERROR_CODES.INVALID_REQUEST,
+          message: err?.message || 'Invalid repo path.'
+        });
+        await sse.sendEvent('done', { ok: false });
+        sse.end();
+        return;
+      }
+      await sse.sendHeaders();
+      await sse.sendEvent('start', { ok: true, repo: repoPath });
+      try {
+        const payload = await status(repoPath);
+        if (!sse.isClosed()) {
+          await sse.sendEvent('result', { ok: true, repo: repoPath, status: payload });
+          await sse.sendEvent('done', { ok: true });
+        }
+      } catch (err) {
+        await sse.sendEvent('error', {
+          ok: false,
+          code: ERROR_CODES.INTERNAL,
+          message: err?.message || 'Failed to collect status.'
+        });
+        await sse.sendEvent('done', { ok: false });
+      }
+      sse.end();
+      return;
+    }
+
+    if (requestUrl.pathname === '/status' && req.method === 'GET') {
+      let repoPath = '';
+      try {
+        repoPath = resolveRepo(requestUrl.searchParams.get('repo'));
+      } catch (err) {
+        sendError(res, 400, ERROR_CODES.INVALID_REQUEST, err?.message || 'Invalid repo path.');
+        return;
+      }
+      try {
+        const payload = await status(repoPath);
+        sendJson(res, 200, { ok: true, repo: repoPath, status: payload });
+      } catch (err) {
+        sendError(res, 500, ERROR_CODES.INTERNAL, 'Failed to collect status.', {
+          error: err?.message || String(err)
+        });
+      }
+      return;
+    }
+
+    if (requestUrl.pathname === '/search/stream' && req.method === 'POST') {
+      const sse = createSseResponder(req, res);
+      let raw;
+      try {
+        raw = await parseBody(req);
+      } catch (err) {
+        sendError(res, 413, ERROR_CODES.INVALID_REQUEST, err?.message || 'Request body too large.');
+        return;
+      }
+      let payload = null;
+      try {
+        payload = raw ? JSON.parse(raw) : null;
+      } catch {
+        sendError(res, 400, ERROR_CODES.INVALID_REQUEST, 'Invalid JSON payload.');
+        return;
+      }
+      const validation = validateSearchPayload(payload);
+      if (!validation.ok) {
+        sendError(res, 400, ERROR_CODES.INVALID_REQUEST, 'Invalid search payload.', {
+          errors: validation.errors
+        });
+        return;
+      }
+      let repoPath = '';
+      try {
+        repoPath = resolveRepo(payload?.repoPath || payload?.repo);
+      } catch (err) {
+        sendError(res, 400, ERROR_CODES.INVALID_REQUEST, err?.message || 'Invalid repo path.');
+        return;
+      }
+      const searchParams = buildSearchParams(repoPath, payload || {});
+      if (!searchParams.ok) {
+        sendError(res, 400, ERROR_CODES.INVALID_REQUEST, searchParams.message || 'Invalid search payload.');
+        return;
+      }
+      await sse.sendHeaders();
+      await sse.sendEvent('start', { ok: true });
+      const caches = getRepoCaches(repoPath);
+      try {
+        const body = await search(repoPath, {
+          args: searchParams.args,
+          query: searchParams.query,
+          emitOutput: false,
+          exitOnError: false,
+          indexCache: caches.indexCache,
+          sqliteCache: caches.sqliteCache
+        });
+        if (!sse.isClosed()) {
+          await sse.sendEvent('result', { ok: true, repo: repoPath, result: body });
+          await sse.sendEvent('done', { ok: true });
+        }
+      } catch (err) {
+        const isNoIndex = isNoIndexError(err);
+        await sse.sendEvent('error', {
+          ok: false,
+          code: isNoIndex ? ERROR_CODES.NO_INDEX : ERROR_CODES.INTERNAL,
+          message: err?.message || 'Search failed.'
+        });
+        await sse.sendEvent('done', { ok: false });
+      }
+      sse.end();
+      return;
+    }
+
+    if (requestUrl.pathname === '/search' && req.method === 'POST') {
+      let raw;
+      try {
+        raw = await parseBody(req);
+      } catch (err) {
+        sendError(res, 413, ERROR_CODES.INVALID_REQUEST, err?.message || 'Request body too large.');
+        return;
+      }
+      let payload = null;
+      try {
+        payload = raw ? JSON.parse(raw) : null;
+      } catch {
+        sendError(res, 400, ERROR_CODES.INVALID_REQUEST, 'Invalid JSON payload.');
+        return;
+      }
+      const validation = validateSearchPayload(payload);
+      if (!validation.ok) {
+        sendError(res, 400, ERROR_CODES.INVALID_REQUEST, 'Invalid search payload.', {
+          errors: validation.errors
+        });
+        return;
+      }
+      let repoPath = '';
+      try {
+        repoPath = resolveRepo(payload?.repoPath || payload?.repo);
+      } catch (err) {
+        sendError(res, 400, ERROR_CODES.INVALID_REQUEST, err?.message || 'Invalid repo path.');
+        return;
+      }
+      const searchParams = buildSearchParams(repoPath, payload || {});
+      if (!searchParams.ok) {
+        sendError(res, 400, ERROR_CODES.INVALID_REQUEST, searchParams.message || 'Invalid search payload.');
+        return;
+      }
+      try {
+        const caches = getRepoCaches(repoPath);
+        const body = await search(repoPath, {
+          args: searchParams.args,
+          query: searchParams.query,
+          emitOutput: false,
+          exitOnError: false,
+          indexCache: caches.indexCache,
+          sqliteCache: caches.sqliteCache
+        });
+        sendJson(res, 200, { ok: true, repo: repoPath, result: body });
+      } catch (err) {
+        if (isNoIndexError(err)) {
+          sendError(res, 409, ERROR_CODES.NO_INDEX, err?.message || 'Index not found.', {
+            error: err?.message || String(err)
+          });
+          return;
+        }
+        sendError(res, 500, ERROR_CODES.INTERNAL, 'Search failed.', { error: err?.message || String(err) });
+      }
+      return;
+    }
+
+    sendError(res, 404, ERROR_CODES.NOT_FOUND, 'Not found.');
+  };
+
+  return {
+    handleRequest,
+    close: closeRepoCaches
+  };
+};
diff --git a/tools/api/sse.js b/tools/api/sse.js
new file mode 100644
index 000000000..5bd1de716
--- /dev/null
+++ b/tools/api/sse.js
@@ -0,0 +1,49 @@
+/**
+ * Write SSE headers for streaming responses.
+ * @param {import('node:http').IncomingMessage} req
+ * @param {import('node:http').ServerResponse} res
+ */
+export const createSseResponder = (req, res) => {
+  let closed = false;
+  const markClosed = () => {
+    closed = true;
+  };
+  req.on('aborted', markClosed);
+  res.on('close', markClosed);
+  res.on('finish', markClosed);
+  res.on('error', markClosed);
+  const writeChunk = async (chunk) => {
+    if (closed || res.writableEnded || res.destroyed) return false;
+    if (!res.write(chunk)) {
+      await new Promise((resolve) => res.once('drain', resolve));
+      if (closed || res.writableEnded || res.destroyed) return false;
+    }
+    return true;
+  };
+  return {
+    sendHeaders() {
+      if (closed || res.headersSent) return false;
+      res.writeHead(200, {
+        'Content-Type': 'text/event-stream; charset=utf-8',
+        'Cache-Control': 'no-cache',
+        Connection: 'keep-alive',
+        'Access-Control-Allow-Origin': '*'
+      });
+      return writeChunk('\n');
+    },
+    async sendEvent(event, payload) {
+      if (closed || res.writableEnded || res.destroyed) return false;
+      const ok = await writeChunk(`event: ${event}\n`);
+      if (!ok) return false;
+      return writeChunk(`data: ${JSON.stringify(payload)}\n\n`);
+    },
+    end() {
+      if (closed || res.writableEnded || res.destroyed) return;
+      res.end();
+      closed = true;
+    },
+    isClosed() {
+      return closed || res.writableEnded || res.destroyed;
+    }
+  };
+};
diff --git a/tools/api/validation.js b/tools/api/validation.js
new file mode 100644
index 000000000..b4386114a
--- /dev/null
+++ b/tools/api/validation.js
@@ -0,0 +1,142 @@
+import Ajv from 'ajv';
+
+const stringListSchema = {
+  anyOf: [
+    { type: 'string' },
+    { type: 'array', items: { type: 'string' } }
+  ]
+};
+
+const metaSchema = {
+  anyOf: [
+    { type: 'string' },
+    {
+      type: 'array',
+      items: {
+        anyOf: [
+          { type: 'string' },
+          { type: 'number' },
+          { type: 'boolean' },
+          { type: 'object' },
+          { type: 'null' }
+        ]
+      }
+    },
+    { type: 'object', additionalProperties: true }
+  ]
+};
+
+const searchRequestSchema = {
+  type: 'object',
+  additionalProperties: false,
+  required: ['query'],
+  properties: {
+    query: { type: 'string', minLength: 1 },
+    repoPath: { type: 'string' },
+    repo: { type: 'string' },
+    output: { type: 'string', enum: ['compact', 'json', 'full'] },
+    mode: { type: 'string', enum: ['code', 'prose', 'records', 'both', 'all', 'extracted-prose'] },
+    backend: { type: 'string', enum: ['auto', 'memory', 'sqlite', 'sqlite-fts', 'lmdb'] },
+    ann: { type: 'boolean' },
+    top: { type: 'integer', minimum: 0 },
+    context: { type: 'integer', minimum: 0 },
+    type: { type: 'string' },
+    author: { type: 'string' },
+    import: { type: 'string' },
+    calls: { type: 'string' },
+    uses: { type: 'string' },
+    signature: { type: 'string' },
+    param: { type: 'string' },
+    decorator: { type: 'string' },
+    inferredType: { type: 'string' },
+    returnType: { type: 'string' },
+    throws: { type: 'string' },
+    reads: { type: 'string' },
+    writes: { type: 'string' },
+    mutates: { type: 'string' },
+    alias: { type: 'string' },
+    awaits: { type: 'string' },
+    risk: { type: 'string' },
+    riskTag: { type: 'string' },
+    riskSource: { type: 'string' },
+    riskSink: { type: 'string' },
+    riskCategory: { type: 'string' },
+    riskFlow: { type: 'string' },
+    branchesMin: { type: 'integer', minimum: 0 },
+    loopsMin: { type: 'integer', minimum: 0 },
+    breaksMin: { type: 'integer', minimum: 0 },
+    continuesMin: { type: 'integer', minimum: 0 },
+    churnMin: { type: 'integer', minimum: 0 },
+    chunkAuthor: { type: 'string' },
+    modifiedAfter: { type: 'string' },
+    modifiedSince: { type: 'integer', minimum: 0 },
+    visibility: { type: 'string' },
+    extends: { type: 'string' },
+    lint: { type: 'boolean' },
+    async: { type: 'boolean' },
+    generator: { type: 'boolean' },
+    returns: { type: 'boolean' },
+    branch: { type: 'string' },
+    lang: { type: 'string' },
+    case: { type: 'boolean' },
+    caseFile: { type: 'boolean' },
+    caseTokens: { type: 'boolean' },
+    path: stringListSchema,
+    file: stringListSchema,
+    ext: stringListSchema,
+    meta: metaSchema,
+    metaJson: {
+      type: ['string', 'object', 'array', 'number', 'boolean', 'null']
+    }
+  }
+};
+
+const formatValidationErrors = (errors = []) => errors.map((err) => {
+  const path = err.instancePath || '#';
+  if (err.keyword === 'additionalProperties') {
+    return `${path} has unknown field "${err.params?.additionalProperty}"`;
+  }
+  if (err.keyword === 'required') {
+    return `${path} missing required field "${err.params?.missingProperty}"`;
+  }
+  return `${path} ${err.message}`.trim();
+});
+
+/**
+ * Normalize meta filters into CLI-friendly key/value strings.
+ * @param {any} meta
+ * @returns {string[]|null}
+ */
+export const normalizeMetaFilters = (meta) => {
+  if (!meta) return null;
+  if (Array.isArray(meta)) {
+    const entries = meta.flatMap((entry) => {
+      if (entry == null) return [];
+      if (typeof entry === 'string') return [entry];
+      if (typeof entry === 'object') {
+        return Object.entries(entry).map(([key, value]) =>
+          value == null || value === '' ? String(key) : `${key}=${value}`
+        );
+      }
+      return [String(entry)];
+    });
+    return entries.length ? entries : null;
+  }
+  if (typeof meta === 'object') {
+    const entries = Object.entries(meta).map(([key, value]) =>
+      value == null || value === '' ? String(key) : `${key}=${value}`
+    );
+    return entries.length ? entries : null;
+  }
+  return [String(meta)];
+};
+
+export const createSearchValidator = () => {
+  const ajv = new Ajv({ allErrors: true, strict: false });
+  const validateSearchRequest = ajv.compile(searchRequestSchema);
+  return (payload) => {
+    const valid = validateSearchRequest(payload);
+    if (valid) return { ok: true };
+    return { ok: false, errors: formatValidationErrors(validateSearchRequest.errors || []) };
+  };
+};
diff --git a/tools/assemble-pieces.js b/tools/assemble-pieces.js
new file mode 100644
index 000000000..4d8ba5600
--- /dev/null
+++ b/tools/assemble-pieces.js
@@ -0,0 +1,72 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { createCli } from '../src/shared/cli.js';
+import { assembleIndexPieces } from '../src/index/build/piece-assembly.js';
+import { loadUserConfig, resolveRepoRoot } from './dict-utils.js';
+
+const argv = createCli({
+  scriptName: 'assemble-pieces',
+  argv: ['node', 'assemble-pieces.js', ...process.argv.slice(2)],
+  options: {
+    input: { type: 'string', array: true, describe: 'Input index directory (repeatable)' },
+    inputs: { type: 'string', describe: 'Comma-separated input index directories' },
+    out: { type: 'string', demandOption: true, describe: 'Output index directory' },
+    mode: { type: 'string', default: 'code' },
+    repo: { type: 'string' },
+    stage: { type: 'string' },
+    force: { type: 'boolean', default: false },
+    sort: {
+      type: 'boolean',
+      default: true,
+      describe: 'Sort input directories for deterministic assembly (disable with --no-sort)'
+    }
+  }
+}).parse();
+
+const inputDirs = [];
+if (Array.isArray(argv.input)) {
+  inputDirs.push(...argv.input.filter(Boolean));
+}
+if (typeof argv.inputs === 'string') {
+  inputDirs.push(...argv.inputs.split(',').map((entry) => entry.trim()).filter(Boolean));
+}
+if (!inputDirs.length) {
+  console.error('assemble-pieces requires at least one --input or --inputs entry.');
+  process.exit(1);
+}
+
+const outDir = path.resolve(argv.out);
+if (fsSync.existsSync(outDir) && !argv.force) {
+  const entries = fsSync.readdirSync(outDir);
+  if (entries.length) {
+    console.error(`assemble-pieces output directory is not empty: ${outDir}`);
+    console.error('Pass --force to reuse the directory.');
+    process.exit(1);
+  }
+}
+await fs.mkdir(outDir, { recursive: true });
+
+const repoRoot = argv.repo ? path.resolve(argv.repo) : resolveRepoRoot(process.cwd());
+const userConfig = loadUserConfig(repoRoot);
+const mode = argv.mode || 'code';
+
+const resolvedInputs = inputDirs.map((dir) => path.resolve(dir));
+if (argv.sort !== false) {
+  resolvedInputs.sort((a, b) => (a < b ? -1 : (a > b ? 1 : 0)));
+}
+
+try {
+  await assembleIndexPieces({
+    inputs: resolvedInputs,
+    outDir,
+    root: repoRoot,
+    mode,
+    userConfig,
+    stage: argv.stage
+  });
+} catch (err) {
+  console.error(err?.message || err);
+  process.exit(1);
+}
diff --git a/tools/bench-dict-seg.js b/tools/bench-dict-seg.js
new file mode 100644
index 000000000..045b4b610
--- /dev/null
+++ b/tools/bench-dict-seg.js
@@ -0,0 +1,187 @@
+#!/usr/bin/env node
+import fsSync from 'node:fs';
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { createCli } from '../src/shared/cli.js';
+import { splitWordsWithDict } from '../src/shared/tokenize.js';
+
+const argv = createCli({
+  scriptName: 'bench-dict-seg',
+  options: {
+    json: { type: 'boolean', default: false },
+    dict: { type: 'string' },
+    tokens: { type: 'string' },
+    fixture: { type: 'string' },
+    out: { type: 'string' },
+    sample: { type: 'number' },
+    'dp-max': { type: 'number' }
+  }
+}).parse();
+
+const root = process.cwd();
+const fixtureArg = typeof argv.fixture === 'string' ? argv.fixture.trim() : '';
+const fixtureDir = fixtureArg
+  ? (path.isAbsolute(fixtureArg)
+    ? path.resolve(fixtureArg)
+    : path.join(root, 'tests', 'fixtures', fixtureArg))
+  : null;
+const dictPath = fixtureDir
+  ? path.join(fixtureDir, 'words.txt')
+  : path.resolve(argv.dict || path.join(root, 'tests', 'fixtures', 'dicts', 'words.txt'));
+const tokensPath = fixtureDir
+  ? path.join(fixtureDir, 'tokens.txt')
+  : (argv.tokens ? path.resolve(argv.tokens) : null);
+const fixtureLabel = fixtureDir ? path.basename(fixtureDir) : 'default';
+const sampleLimit = Number.isFinite(Number(argv.sample))
+  ? Math.max(10, Number(argv.sample))
+  : 300;
+const dpMaxTokenLength = Number.isFinite(Number(argv['dp-max']))
+  ? Math.max(4, Math.floor(Number(argv['dp-max'])))
+  : 32;
+
+function camelize(a, b) {
+  if (!a) return b || '';
+  if (!b) return a;
+  return `${a}${b[0].toUpperCase()}${b.slice(1)}`;
+}
+
+function buildTokenSamples(words, limit) {
+  const base = words.slice(0, Math.min(words.length, 120));
+  const tokens = new Set();
+  for (const word of base) tokens.add(word);
+  for (let i = 0; i < base.length; i += 1) {
+    const a = base[i];
+    const b = base[(i + 1) % base.length];
+    const c = base[(i + 2) % base.length];
+    tokens.add(`${a}${b}`);
+    tokens.add(camelize(a, b));
+    tokens.add(`${a}_${b}`);
+    tokens.add(`${a}-${c}`);
+  }
+  const extras = [
+    'HTTPRequest',
+    'getUserProfile',
+    'userIDLookup',
+    'kubernetesClusterConfig',
+    'postgresConnectionString',
+    'lruCacheStats',
+    'xkcdToken',
+    'xyzzynotaword',
+    'foo2bar',
+    'ZalgoMode'
+  ];
+  extras.forEach((token) => tokens.add(token));
+  return Array.from(tokens).slice(0, limit);
+}
+
+async function loadTokens(words) {
+  if (tokensPath) {
+    try {
+      const raw = await fs.readFile(tokensPath, 'utf8');
+      return raw
+        .split(/\r?\n/)
+        .map((line) => line.trim())
+        .filter(Boolean)
+        .slice(0, sampleLimit);
+    } catch {
+      // Fall back to generated samples when fixture tokens are missing.
+    }
+  }
+  return buildTokenSamples(words, sampleLimit);
+}
+
+function measure(tokens, dict, segmentation) {
+  const start = Date.now();
+  let totalSegments = 0;
+  let totalChars = 0;
+  let dictChars = 0;
+  let unknownChars = 0;
+  let dictSegments = 0;
+  let unknownSegments = 0;
+  for (const token of tokens) {
+    if (!token) continue;
+    totalChars += token.length;
+    const segments = splitWordsWithDict(token.toLowerCase(), dict, {
+      segmentation,
+      dpMaxTokenLength
+    });
+    totalSegments += segments.length;
+    for (const seg of segments) {
+      if (dict.has(seg)) {
+        dictChars += seg.length;
+        dictSegments += 1;
+      } else {
+        unknownChars += seg.length;
+        unknownSegments += 1;
+      }
+    }
+  }
+  const durationMs = Date.now() - start;
+  const coverage = totalChars > 0 ? dictChars / totalChars : 0;
+  return {
+    segments: totalSegments,
+    avgSegmentsPerToken: tokens.length ? totalSegments / tokens.length : 0,
+    dictSegments,
+    unknownSegments,
+    dictChars,
+    unknownChars,
+    coverage,
+    durationMs
+  };
+}
+
+let dictRaw = '';
+try {
+  dictRaw = await fs.readFile(dictPath, 'utf8');
+} catch (err) {
+  console.error(`Failed to read dictionary at ${dictPath}`);
+  if (err?.message) console.error(err.message);
+  process.exit(1);
+}
+
+const dictWords = new Set(
+  dictRaw
+    .split(/\r?\n/)
+    .map((line) => line.trim().toLowerCase())
+    .filter(Boolean)
+);
+
+const tokens = await loadTokens(Array.from(dictWords));
+const greedy = measure(tokens, dictWords, 'greedy');
+const dp = measure(tokens, dictWords, 'dp');
+const aho = measure(tokens, dictWords, 'aho');
+
+const summary = {
+  generatedAt: new Date().toISOString(),
+  dictPath,
+  tokensPath: tokensPath && fsSync.existsSync(tokensPath) ? tokensPath : null,
+  fixture: fixtureLabel,
+  dictWords: dictWords.size,
+  tokens: tokens.length,
+  dpMaxTokenLength,
+  strategies: {
+    greedy,
+    dp,
+    aho
+  }
+};
+
+if (argv.out) {
+  const outPath = path.resolve(argv.out);
+  await fs.writeFile(outPath, JSON.stringify(summary, null, 2));
+}
+
+if (argv.json) {
+  console.log(JSON.stringify(summary, null, 2));
+} else {
+  console.log('Dictionary segmentation benchmark');
+  console.log(`- Dict: ${dictPath}`);
+  console.log(`- Words: ${dictWords.size}`);
+  console.log(`- Tokens: ${tokens.length}`);
+  console.log(`- dpMaxTokenLength: ${dpMaxTokenLength}`);
+  for (const [name, stats] of Object.entries(summary.strategies)) {
+    console.log(`- ${name} avg segments: ${stats.avgSegmentsPerToken.toFixed(2)}`);
+    console.log(`- ${name} coverage: ${(stats.coverage * 100).toFixed(1)}%`);
+    console.log(`- ${name} duration: ${stats.durationMs} ms`);
+  }
+}
diff --git a/tools/bench-language-matrix.js b/tools/bench-language-matrix.js
new file mode 100644
index 000000000..56344a8bb
--- /dev/null
+++ b/tools/bench-language-matrix.js
@@ -0,0 +1,224 @@
+#!/usr/bin/env node
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { execa } from 'execa';
+import { createCli } from '../src/shared/cli.js';
+import { BENCH_OPTIONS, mergeCliOptions, validateBenchArgs } from '../src/shared/cli-options.js';
+import { resolveToolRoot } from './dict-utils.js';
+
+const argv = createCli({
+  scriptName: 'bench-language-matrix',
+  options: mergeCliOptions(
+    BENCH_OPTIONS,
+    {
+      tier: { type: 'string', default: 'typical' },
+      backend: { type: 'string' },
+      backends: { type: 'string' },
+      'ann-modes': { type: 'string' },
+      'fts-profiles': { type: 'string' },
+      config: { type: 'string' },
+      root: { type: 'string' },
+      'cache-root': { type: 'string' },
+      'cache-suffix': { type: 'string' },
+      results: { type: 'string' },
+      'log-dir': { type: 'string' },
+      'out-dir': { type: 'string' },
+      language: { type: 'string' },
+      languages: { type: 'string' },
+      repos: { type: 'string' },
+      only: { type: 'string' },
+      'fts-weights': { type: 'string' },
+      'dry-run': { type: 'boolean', default: false },
+      'fail-fast': { type: 'boolean', default: false },
+      'lock-mode': { type: 'string' },
+      'lock-wait-ms': { type: 'number' },
+      'lock-stale-ms': { type: 'number' }
+    }
+  )
+}).parse();
+validateBenchArgs(argv);
+
+const scriptRoot = resolveToolRoot();
+const benchScript = path.join(scriptRoot, 'tools', 'bench-language-repos.js');
+const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
+const resultsRoot = path.resolve(argv.results || path.join(scriptRoot, 'benchmarks', 'results'));
+const runRoot = path.resolve(argv['out-dir'] || path.join(resultsRoot, 'matrix', timestamp));
+const logRoot = path.resolve(argv['log-dir'] || path.join(runRoot, 'logs'));
+const outRoot = path.join(runRoot, 'runs');
+
+const ALL_BACKENDS = ['sqlite-fts', 'sqlite', 'memory'];
+const DEFAULT_ANN_MODES = ['auto', 'on', 'off'];
+const DEFAULT_FTS_PROFILES = ['balanced', 'headline', 'name'];
+
+const parseList = (value) => {
+  if (!value) return [];
+  return String(value)
+    .split(',')
+    .map((entry) => entry.trim())
+    .filter(Boolean);
+};
+
+const normalizeBackend = (raw) => {
+  const value = String(raw || '').toLowerCase();
+  if (value === 'fts') return 'sqlite-fts';
+  return value;
+};
+
+const resolveBackends = () => {
+  const raw = argv.backends || argv.backend || '';
+  const list = parseList(raw).map(normalizeBackend).filter(Boolean);
+  if (!list.length || list.includes('all')) return ALL_BACKENDS.slice();
+  return list;
+};
+
+const resolveAnnModes = () => {
+  const list = parseList(argv['ann-modes']).map((entry) => entry.toLowerCase());
+  return list.length ? list : DEFAULT_ANN_MODES.slice();
+};
+
+const resolveFtsProfiles = () => {
+  const list = parseList(argv['fts-profiles']).map((entry) => entry.toLowerCase());
+  return list.length ? list : DEFAULT_FTS_PROFILES.slice();
+};
+
+const toSafeName = (value) => String(value || '')
+  .replace(/[^a-z0-9-_]+/gi, '_')
+  .replace(/^_+|_+$/g, '')
+  .toLowerCase();
+
+const buildConfigs = () => {
+  const configs = [];
+  const backends = resolveBackends();
+  const annModes = resolveAnnModes();
+  const ftsProfiles = resolveFtsProfiles();
+  for (const backend of backends) {
+    const usesFts = backend === 'sqlite-fts' || backend === 'fts';
+    const profiles = usesFts ? ftsProfiles : [null];
+    for (const annMode of annModes) {
+      for (const profile of profiles) {
+        const idParts = [backend, annMode];
+        if (profile) idParts.push(profile);
+        const id = toSafeName(idParts.join('-'));
+        configs.push({
+          id,
+          backend,
+          annMode,
+          ftsProfile: profile
+        });
+      }
+    }
+  }
+  return configs;
+};
+
+const appendArgs = (args, flag, value) => {
+  if (value === undefined || value === null || value === '') return;
+  args.push(flag, String(value));
+};
+
+const configToArgs = (config, outFile, logFile) => {
+  const args = [benchScript];
+  const tierArg = argv.tier || 'typical';
+  appendArgs(args, '--tier', tierArg);
+  appendArgs(args, '--backend', config.backend);
+  appendArgs(args, '--out', outFile);
+  appendArgs(args, '--log', logFile);
+
+  if (config.annMode === 'on') args.push('--ann');
+  if (config.annMode === 'off') args.push('--no-ann');
+  if (config.ftsProfile) appendArgs(args, '--fts-profile', config.ftsProfile);
+
+  if (argv.build) args.push('--build');
+  if (argv['build-index']) args.push('--build-index');
+  if (argv['build-sqlite']) args.push('--build-sqlite');
+  if (argv.incremental) args.push('--incremental');
+  if (argv['stub-embeddings']) args.push('--stub-embeddings');
+  if (argv['real-embeddings']) args.push('--real-embeddings');
+  if (argv['dry-run']) args.push('--dry-run');
+
+  appendArgs(args, '--config', argv.config);
+  appendArgs(args, '--root', argv.root);
+  appendArgs(args, '--cache-root', argv['cache-root']);
+  appendArgs(args, '--cache-suffix', argv['cache-suffix']);
+  appendArgs(args, '--results', argv.results);
+  appendArgs(args, '--index-profile', argv['index-profile']);
+  if (argv['no-index-profile']) args.push('--no-index-profile');
+  appendArgs(args, '--language', argv.language);
+  appendArgs(args, '--languages', argv.languages);
+  appendArgs(args, '--repos', argv.repos);
+  appendArgs(args, '--only', argv.only);
+  appendArgs(args, '--queries', argv.queries);
+  appendArgs(args, '--top', argv.top);
+  appendArgs(args, '--limit', argv.limit);
+  appendArgs(args, '--bm25-k1', argv['bm25-k1']);
+  appendArgs(args, '--bm25-b', argv['bm25-b']);
+  appendArgs(args, '--fts-weights', argv['fts-weights']);
+  appendArgs(args, '--threads', argv.threads);
+  appendArgs(args, '--heap-mb', argv['heap-mb']);
+  appendArgs(args, '--lock-mode', argv['lock-mode']);
+  appendArgs(args, '--lock-wait-ms', argv['lock-wait-ms']);
+  appendArgs(args, '--lock-stale-ms', argv['lock-stale-ms']);
+
+  if (argv['no-index-profile']) args.push('--no-index-profile');
+  return args;
+};
+
+async function main() {
+  await fsPromises.mkdir(logRoot, { recursive: true });
+  await fsPromises.mkdir(outRoot, { recursive: true });
+
+  const configs = buildConfigs();
+  if (!configs.length) {
+    console.error('No benchmark configurations resolved.');
+    process.exit(1);
+  }
+
+  const results = [];
+  for (const config of configs) {
+    const label = `${config.backend}/${config.annMode}${config.ftsProfile ? `/${config.ftsProfile}` : ''}`;
+    const outFile = path.join(outRoot, `${config.id}.json`);
+    const logFile = path.join(logRoot, `${config.id}.log`);
+    const args = configToArgs(config, outFile, logFile);
+
+    console.log(`\n[bench-matrix] ${label}`);
+    console.log(`node ${args.map((arg) => (arg.includes(' ') ? `"${arg}"` : arg)).join(' ')}`);
+
+    if (argv['dry-run']) {
+      results.push({ ...config, outFile, logFile, status: 'dry-run' });
+      continue;
+    }
+
+    try {
+      const child = execa(process.execPath, args, { stdio: 'inherit' });
+      await child;
+      results.push({ ...config, outFile, logFile, status: 'ok' });
+    } catch (err) {
+      results.push({
+        ...config,
+        outFile,
+        logFile,
+        status: 'failed',
+        exitCode: err?.exitCode ?? null,
+        error: err?.message || String(err)
+      });
+      if (argv['fail-fast']) break;
+    }
+  }
+
+  const summary = {
+    generatedAt: new Date().toISOString(),
+    runRoot,
+    outRoot,
+    logRoot,
+    tier: argv.tier,
+    results
+  };
+  const summaryPath = path.join(runRoot, 'matrix.json');
+  await fsPromises.writeFile(summaryPath, JSON.stringify(summary, null, 2));
+  console.log(`\n[bench-matrix] Summary written to ${summaryPath}`);
+}
+
+main().catch((err) => {
+  console.error(err?.message || err);
+  process.exit(1);
+});
diff --git a/tools/bench-language-repos.js b/tools/bench-language-repos.js
new file mode 100644
index 000000000..1184c9f89
--- /dev/null
+++ b/tools/bench-language-repos.js
@@ -0,0 +1,518 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { getEnvConfig } from '../src/shared/env.js';
+import { getRuntimeConfig, loadUserConfig, resolveRuntimeEnv } from './dict-utils.js';
+import { parseBenchLanguageArgs } from './bench/language/cli.js';
+import { loadBenchConfig } from './bench/language/config.js';
+import { checkIndexLock, formatLockDetail } from './bench/language/locks.js';
+import {
+  ensureLongPathsSupport,
+  needsIndexArtifacts,
+  needsSqliteArtifacts,
+  resolveCloneTool,
+  resolveRepoCacheRoot,
+  resolveRepoDir
+} from './bench/language/repos.js';
+import { createProcessRunner } from './bench/language/process.js';
+import {
+  buildLineStats,
+  formatDuration,
+  formatGb,
+  formatMetricSummary,
+  getRecommendedHeapMb,
+  stripMaxOldSpaceFlag,
+  validateEncodingFixtures
+} from './bench/language/metrics.js';
+import { buildReportOutput, printSummary } from './bench/language/report.js';
+import { createProgressState } from './bench/language/progress/state.js';
+import { createProgressRenderer } from './bench/language/progress/render.js';
+
+const parseList = (value) => {
+  if (!value) return [];
+  return String(value)
+    .split(',')
+    .map((entry) => entry.trim())
+    .filter(Boolean);
+};
+
+const {
+  argv,
+  scriptRoot,
+  configPath,
+  reposRoot,
+  cacheRoot,
+  resultsRoot,
+  logPath,
+  cloneEnabled,
+  dryRun,
+  quietMode,
+  interactive,
+  colorEnabled,
+  logWindowSize,
+  lockMode,
+  lockWaitMs,
+  lockStaleMs,
+  wantsSqlite,
+  indexProfile,
+  suppressProfileEnv
+} = parseBenchLanguageArgs();
+
+const baseEnv = { ...process.env };
+const envConfig = getEnvConfig();
+const heapArgRaw = argv['heap-mb'];
+const heapArg = Number.isFinite(Number(heapArgRaw)) ? Math.floor(Number(heapArgRaw)) : null;
+const heapRecommendation = getRecommendedHeapMb();
+let heapLogged = false;
+
+let logStream = null;
+const initLog = () => {
+  if (logStream) return;
+  fs.mkdirSync(path.dirname(logPath), { recursive: true });
+  logStream = fs.createWriteStream(logPath, { flags: 'a' });
+  logStream.write(`\n=== Bench run ${new Date().toISOString()} ===\n`);
+  logStream.write(`Config: ${configPath}\n`);
+  logStream.write(`Repos: ${reposRoot}\n`);
+  logStream.write(`Cache: ${cacheRoot}\n`);
+  logStream.write(`Results: ${resultsRoot}\n`);
+};
+
+const writeLog = (line) => {
+  if (!logStream) initLog();
+  if (!logStream) return;
+  logStream.write(`${line}\n`);
+};
+
+const writeLogSync = (line) => {
+  try {
+    fs.mkdirSync(path.dirname(logPath), { recursive: true });
+    fs.appendFileSync(logPath, `${line}\n`);
+  } catch {}
+};
+
+const progressState = createProgressState({ logWindowSize });
+let processRunner = null;
+const progress = createProgressRenderer({
+  state: progressState,
+  interactive,
+  quietMode,
+  colorEnabled,
+  writeLog,
+  getActiveLabel: () => (processRunner ? processRunner.getActiveLabel() : '')
+});
+processRunner = createProcessRunner({
+  appendLog: progress.appendLog,
+  writeLog,
+  writeLogSync,
+  logHistory: progressState.logHistory,
+  logPath
+});
+
+process.on('exit', (code) => {
+  processRunner.logExit('exit', code);
+  if (logStream) logStream.end();
+});
+process.on('SIGINT', () => {
+  writeLogSync('[signal] SIGINT received');
+  const active = processRunner.getActiveChild();
+  if (active) {
+    writeLogSync(`[signal] terminating ${processRunner.getActiveLabel()}`);
+    processRunner.killProcessTree(active.pid);
+  }
+  processRunner.logExit('SIGINT', 130);
+  process.exit(130);
+});
+process.on('SIGTERM', () => {
+  writeLogSync('[signal] SIGTERM received');
+  const active = processRunner.getActiveChild();
+  if (active) {
+    writeLogSync(`[signal] terminating ${processRunner.getActiveLabel()}`);
+    processRunner.killProcessTree(active.pid);
+  }
+  processRunner.logExit('SIGTERM', 143);
+  process.exit(143);
+});
+process.on('uncaughtException', (err) => {
+  writeLogSync(`[error] uncaughtException: ${err?.stack || err}`);
+  processRunner.logExit('uncaughtException', 1);
+  process.exit(1);
+});
+process.on('unhandledRejection', (err) => {
+  writeLogSync(`[error] unhandledRejection: ${err?.stack || err}`);
+  processRunner.logExit('unhandledRejection', 1);
+  process.exit(1);
+});
+
+const config = loadBenchConfig(configPath);
+await validateEncodingFixtures(scriptRoot);
+const languageFilter = parseList(argv.languages || argv.language).map((entry) => entry.toLowerCase());
+let tierFilter = parseList(argv.tier).map((entry) => entry.toLowerCase());
+const repoFilter = parseList(argv.only || argv.repos).map((entry) => entry.toLowerCase());
+if (!tierFilter.length && Array.isArray(argv._) && argv._.length) {
+  const positionalTiers = argv._
+    .map((entry) => String(entry).toLowerCase())
+    .filter((entry) => entry === 'large' || entry === 'typical' || entry === 'small' || entry === 'tiny');
+  if (positionalTiers.length) tierFilter = positionalTiers;
+}
+
+const tasks = [];
+for (const [language, entry] of Object.entries(config)) {
+  if (languageFilter.length && !languageFilter.includes(language.toLowerCase())) continue;
+  const queriesPath = argv.queries
+    ? path.resolve(argv.queries)
+    : path.resolve(scriptRoot, entry.queries || '');
+  if (!fs.existsSync(queriesPath)) {
+    console.error(`Missing queries file: ${queriesPath}`);
+    process.exit(1);
+  }
+  const repoGroups = entry.repos || {};
+  for (const [tier, repos] of Object.entries(repoGroups)) {
+    if (tierFilter.length && !tierFilter.includes(tier.toLowerCase())) continue;
+    for (const repo of repos) {
+      if (repoFilter.length && !repoFilter.includes(repo.toLowerCase())) continue;
+      tasks.push({ language, label: entry.label || language, tier, repo, queriesPath });
+    }
+  }
+}
+
+if (argv.list) {
+  const payload = {
+    config: configPath,
+    repoRoot: reposRoot,
+    cacheRoot,
+    resultsRoot,
+    languages: Object.keys(config),
+    tasks
+  };
+  if (argv.json) {
+    console.log(JSON.stringify(payload, null, 2));
+  } else {
+    console.log('Benchmark targets');
+    console.log(`- config: ${configPath}`);
+    console.log(`- repos: ${reposRoot}`);
+    console.log(`- cache: ${cacheRoot}`);
+    console.log(`- results: ${resultsRoot}`);
+    for (const task of tasks) {
+      console.log(`- ${task.language} ${task.tier} ${task.repo}`);
+    }
+  }
+  process.exit(0);
+}
+
+if (!tasks.length) {
+  console.error('No benchmark targets match the requested filters.');
+  process.exit(1);
+}
+
+let cloneTool = null;
+if (cloneEnabled && !dryRun) {
+  ensureLongPathsSupport();
+  cloneTool = resolveCloneTool();
+  if (!quietMode) console.log(`Clone tool: ${cloneTool.label}`);
+}
+await fsPromises.mkdir(reposRoot, { recursive: true });
+await fsPromises.mkdir(resultsRoot, { recursive: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+initLog();
+writeLog(`Clone tool: ${cloneTool ? cloneTool.label : 'disabled'}`);
+
+const benchScript = path.join(scriptRoot, 'tests', 'bench.js');
+const results = [];
+const startTime = Date.now();
+let completed = 0;
+
+progress.updateMetrics('Metrics: pending');
+progress.updateProgress(`Progress: 0/${tasks.length} | elapsed ${formatDuration(0)}`);
+
+for (const task of tasks) {
+  const repoPath = resolveRepoDir({ reposRoot, repo: task.repo, language: task.language });
+  await fsPromises.mkdir(path.dirname(repoPath), { recursive: true });
+  const repoLabel = `${task.language}/${task.repo}`;
+  const phaseLabel = `repo ${repoLabel} (${task.tier})`;
+  progressState.currentRepoLabel = repoLabel;
+  progress.resetBuildProgress(repoLabel);
+
+  if (!fs.existsSync(repoPath)) {
+    if (!cloneEnabled && !dryRun) {
+      console.error(`Missing repo ${task.repo} at ${repoPath}. Re-run with --clone.`);
+      process.exit(1);
+    }
+    progress.updateProgress(`Progress: ${completed}/${tasks.length} | cloning ${phaseLabel} | elapsed ${formatDuration(Date.now() - startTime)}`);
+    if (!dryRun && cloneEnabled && cloneTool) {
+      const args = cloneTool.buildArgs(task.repo, repoPath);
+      const cloneResult = await processRunner.runProcess(`clone ${task.repo}`, cloneTool.label, args, {
+        env: { ...process.env, GIT_TERMINAL_PROMPT: '0' },
+        continueOnError: true
+      });
+      if (!cloneResult.ok) {
+        progress.appendLog(`[error] Clone failed for ${repoLabel}; continuing to next repo.`);
+        completed += 1;
+        progress.updateProgress(`Progress: ${completed}/${tasks.length} | failed ${phaseLabel} | elapsed ${formatDuration(Date.now() - startTime)}`);
+        progress.updateMetrics('Metrics: failed (clone)');
+        results.push({
+          ...task,
+          repoPath,
+          outFile: null,
+          summary: null,
+          failed: true,
+          failureReason: 'clone',
+          failureCode: cloneResult.code ?? null
+        });
+        continue;
+      }
+    }
+  }
+
+  const repoUserConfig = loadUserConfig(
+    repoPath,
+    indexProfile ? { profile: indexProfile } : {}
+  );
+  const repoRuntimeConfig = getRuntimeConfig(repoPath, repoUserConfig);
+  let baseNodeOptions = baseEnv.NODE_OPTIONS || '';
+  if (Number.isFinite(heapArg) && heapArg > 0) {
+    baseNodeOptions = stripMaxOldSpaceFlag(baseNodeOptions);
+  }
+  const hasHeapFlag = baseNodeOptions.includes('--max-old-space-size');
+  let heapOverride = null;
+  if (Number.isFinite(heapArg) && heapArg > 0) {
+    heapOverride = heapArg;
+    if (!heapLogged) {
+      progress.appendLog(`[heap] Using ${formatGb(heapOverride)} (${heapOverride} MB) from --heap-mb.`);
+      heapLogged = true;
+    }
+  } else if (
+    !Number.isFinite(repoRuntimeConfig.maxOldSpaceMb)
+    && !envConfig.maxOldSpaceMb
+    && !hasHeapFlag
+  ) {
+    heapOverride = heapRecommendation.recommendedMb;
+    if (!heapLogged) {
+      progress.appendLog(
+        `[auto-heap] Using ${formatGb(heapOverride)} (${heapOverride} MB) for Node heap. `
+          + 'Override with --heap-mb or PAIROFCLEATS_MAX_OLD_SPACE_MB.'
+      );
+      heapLogged = true;
+    }
+  }
+  const runtimeConfigForRun = heapOverride
+    ? { ...repoRuntimeConfig, maxOldSpaceMb: heapOverride }
+    : repoRuntimeConfig;
+  const baseEnvForRun = { ...baseEnv };
+  if (baseNodeOptions) {
+    baseEnvForRun.NODE_OPTIONS = baseNodeOptions;
+  } else {
+    delete baseEnvForRun.NODE_OPTIONS;
+  }
+  const repoEnvBase = resolveRuntimeEnv(runtimeConfigForRun, baseEnvForRun);
+  if (suppressProfileEnv && repoEnvBase.PAIROFCLEATS_PROFILE) {
+    delete repoEnvBase.PAIROFCLEATS_PROFILE;
+  }
+  if (heapOverride) {
+    repoEnvBase.PAIROFCLEATS_MAX_OLD_SPACE_MB = String(heapOverride);
+  }
+  if (indexProfile) {
+    repoEnvBase.PAIROFCLEATS_PROFILE = indexProfile;
+  }
+
+  const outDir = path.join(resultsRoot, task.language);
+  const outFile = path.join(outDir, `${task.repo.replace('/', '__')}.json`);
+  await fsPromises.mkdir(outDir, { recursive: true });
+
+  const repoCacheRoot = resolveRepoCacheRoot({ repoPath, cacheRoot });
+  const wantsMemory = backendList.includes('memory');
+  const missingIndex = needsIndexArtifacts(repoPath);
+  const missingSqlite = wantsSqlite && needsSqliteArtifacts(repoPath);
+  let autoBuildIndex = false;
+  let autoBuildSqlite = false;
+  const buildIndexRequested = argv.build || argv['build-index'];
+  const buildSqliteRequested = argv.build || argv['build-sqlite'];
+  if (buildSqliteRequested && !buildIndexRequested && missingIndex) {
+    autoBuildIndex = true;
+    progress.appendLog('[auto-build] sqlite build requires index artifacts; enabling build-index.');
+  }
+  if (!argv.build && !argv['build-index'] && !argv['build-sqlite']) {
+    if (missingIndex && wantsMemory) autoBuildIndex = true;
+    if (missingSqlite) autoBuildSqlite = true;
+    if (autoBuildSqlite && missingIndex) autoBuildIndex = true;
+    if (autoBuildIndex || autoBuildSqlite) {
+      progress.appendLog(
+        `[auto-build] missing artifacts${autoBuildIndex ? ' index' : ''}${autoBuildSqlite ? ' sqlite' : ''}; enabling build.`
+      );
+    }
+  }
+
+  const shouldBuildIndex = argv.build || argv['build-index'] || autoBuildIndex;
+  if (shouldBuildIndex && !dryRun) {
+    try {
+      progress.appendLog(`[metrics] Collecting line counts for ${repoLabel}...`);
+      const stats = await buildLineStats(repoPath, repoUserConfig);
+      progressState.build.lineTotals = stats.totals;
+      progressState.build.linesByFile = stats.linesByFile;
+      progress.appendLog(
+        `[metrics] Line totals: code=${stats.totals.code.toLocaleString()} prose=${stats.totals.prose.toLocaleString()}`
+      );
+    } catch (err) {
+      progress.appendLog(`[metrics] Line counts unavailable: ${err?.message || err}`);
+    }
+  }
+
+  const lockCheck = await checkIndexLock({
+    repoCacheRoot,
+    repoLabel,
+    lockMode,
+    lockWaitMs,
+    lockStaleMs,
+    onLog: progress.appendLog
+  });
+  if (!lockCheck.ok) {
+    const detail = formatLockDetail(lockCheck.detail);
+    const message = `Skipping ${repoLabel}: index lock held ${detail}`.trim();
+    progress.appendLog(`[lock] ${message}`);
+    if (!quietMode) console.error(message);
+    completed += 1;
+    progress.updateProgress(`Progress: ${completed}/${tasks.length} | skipped ${phaseLabel} | elapsed ${formatDuration(Date.now() - startTime)}`);
+    progress.updateMetrics('Metrics: skipped (lock)');
+    results.push({
+      ...task,
+      repoPath,
+      outFile,
+      summary: null,
+      skipped: true,
+      skipReason: 'lock',
+      lock: lockCheck.detail || null
+    });
+    continue;
+  }
+
+  const benchArgs = [
+    benchScript,
+    '--repo',
+    repoPath,
+    '--queries',
+    task.queriesPath,
+    '--write-report',
+    '--out',
+    outFile
+  ];
+  if (indexProfile) benchArgs.push('--index-profile', indexProfile);
+  benchArgs.push('--real-embeddings');
+  if (argv.build) {
+    benchArgs.push('--build');
+  } else {
+    if (argv['build-index'] || autoBuildIndex) benchArgs.push('--build-index');
+    if (argv['build-sqlite'] || autoBuildSqlite) benchArgs.push('--build-sqlite');
+  }
+  if (argv.incremental) benchArgs.push('--incremental');
+  if (argv['stub-embeddings']) {
+    progress.appendLog('[bench] Stub embeddings requested; ignored for heavy language benchmarks.');
+  }
+  if (argv.ann) benchArgs.push('--ann');
+  if (argv['no-ann']) benchArgs.push('--no-ann');
+  if (argv.backend) benchArgs.push('--backend', String(argv.backend));
+  if (argv.top) benchArgs.push('--top', String(argv.top));
+  if (argv.limit) benchArgs.push('--limit', String(argv.limit));
+  if (argv['bm25-k1']) benchArgs.push('--bm25-k1', String(argv['bm25-k1']));
+  if (argv['bm25-b']) benchArgs.push('--bm25-b', String(argv['bm25-b']));
+  if (argv['fts-profile']) benchArgs.push('--fts-profile', String(argv['fts-profile']));
+  if (argv['fts-weights']) benchArgs.push('--fts-weights', String(argv['fts-weights']));
+  if (argv.threads) benchArgs.push('--threads', String(argv.threads));
+  if (argv['no-index-profile']) benchArgs.push('--no-index-profile');
+
+  progress.updateProgress(`Progress: ${completed}/${tasks.length} | bench ${phaseLabel} | elapsed ${formatDuration(Date.now() - startTime)}`);
+
+  let summary = null;
+  if (dryRun) {
+    progress.appendLog(`[dry-run] node ${benchArgs.join(' ')}`);
+  } else {
+    const benchResult = await processRunner.runProcess(`bench ${repoLabel}`, process.execPath, benchArgs, {
+      cwd: scriptRoot,
+      env: {
+        ...repoEnvBase,
+        PAIROFCLEATS_CACHE_ROOT: cacheRoot,
+        PAIROFCLEATS_PROGRESS_FILES: '1',
+        PAIROFCLEATS_PROGRESS_LINES: '1',
+        ...(Number.isFinite(Number(argv.threads)) && Number(argv.threads) > 0
+          ? { PAIROFCLEATS_THREADS: String(argv.threads) }
+          : {})
+      },
+      continueOnError: true
+    });
+    if (!benchResult.ok) {
+      progress.appendLog(`[error] Bench failed for ${repoLabel}; continuing to next repo.`);
+      completed += 1;
+      progress.updateProgress(`Progress: ${completed}/${tasks.length} | failed ${phaseLabel} | elapsed ${formatDuration(Date.now() - startTime)}`);
+      progress.updateMetrics('Metrics: failed (bench)');
+      results.push({
+        ...task,
+        repoPath,
+        outFile,
+        summary: null,
+        failed: true,
+        failureReason: 'bench',
+        failureCode: benchResult.code ?? null
+      });
+      continue;
+    }
+    try {
+      const raw = await fsPromises.readFile(outFile, 'utf8');
+      summary = JSON.parse(raw).summary || null;
+    } catch (err) {
+      progress.appendLog(`[error] Failed to read bench report for ${repoLabel}; continuing.`);
+      if (err && err.message) console.error(err.message);
+      completed += 1;
+      progress.updateProgress(`Progress: ${completed}/${tasks.length} | failed ${phaseLabel} | elapsed ${formatDuration(Date.now() - startTime)}`);
+      progress.updateMetrics('Metrics: failed (report)');
+      results.push({
+        ...task,
+        repoPath,
+        outFile,
+        summary: null,
+        failed: true,
+        failureReason: 'report',
+        failureCode: null
+      });
+      continue;
+    }
+  }
+
+  completed += 1;
+  progress.updateProgress(`Progress: ${completed}/${tasks.length} | finished ${phaseLabel} | elapsed ${formatDuration(Date.now() - startTime)}`);
+  progress.updateMetrics(formatMetricSummary(summary));
+
+  results.push({ ...task, repoPath, outFile, summary });
+}
+
+const output = buildReportOutput({
+  configPath,
+  cacheRoot,
+  resultsRoot,
+  results,
+  config
+});
+
+if (!quietMode) {
+  if (interactive) {
+    progress.renderStatus();
+    process.stdout.write('\n');
+  }
+  console.log('\nGrouped summary');
+  for (const [language, payload] of Object.entries(output.groupedSummary)) {
+    if (!payload.summary) continue;
+    printSummary(payload.label, payload.summary, payload.count, quietMode);
+  }
+  printSummary('Overall', output.overallSummary, results.length, quietMode);
+}
+
+if (argv.out) {
+  const outPath = path.resolve(argv.out);
+  await fsPromises.mkdir(path.dirname(outPath), { recursive: true });
+  await fsPromises.writeFile(outPath, JSON.stringify(output, null, 2));
+}
+
+if (argv.json) {
+  console.log(JSON.stringify(output, null, 2));
+} else {
+  console.log(`\nCompleted ${results.length} benchmark runs.`);
+  if (argv.out) console.log(`Summary written to ${path.resolve(argv.out)}`);
+}
diff --git a/tools/bench-query-generator.js b/tools/bench-query-generator.js
new file mode 100644
index 000000000..c25dbe329
--- /dev/null
+++ b/tools/bench-query-generator.js
@@ -0,0 +1,101 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import seedrandom from 'seedrandom';
+import { createCli } from '../src/shared/cli.js';
+import { loadChunkMeta } from '../src/shared/artifact-io.js';
+import { sha1 } from '../src/shared/hash.js';
+import { getIndexDir, loadUserConfig } from './dict-utils.js';
+
+const argv = createCli({
+  scriptName: 'bench-query-generator',
+  options: {
+    repo: { type: 'string' },
+    mode: { type: 'string', default: 'code' },
+    count: { type: 'number', default: 50 },
+    out: { type: 'string' },
+    seed: { type: 'string' },
+    json: { type: 'boolean', default: false },
+    'index-root': { type: 'string' }
+  }
+}).parse();
+
+const root = argv.repo ? path.resolve(argv.repo) : process.cwd();
+const userConfig = loadUserConfig(root);
+const mode = String(argv.mode || 'code').toLowerCase();
+const indexRoot = argv['index-root'] ? path.resolve(argv['index-root']) : null;
+const indexDir = getIndexDir(root, mode, userConfig, indexRoot ? { indexRoot } : {});
+const chunks = loadChunkMeta(indexDir);
+if (!Array.isArray(chunks) || !chunks.length) {
+  console.error(`No chunk metadata found at ${indexDir}`);
+  process.exit(1);
+}
+
+const count = Math.max(10, Math.min(200, Number(argv.count) || 50));
+const defaultSeed = sha1(`${indexDir}:${mode}:${chunks.length}`);
+const seed = argv.seed || defaultSeed;
+const rng = seedrandom(seed);
+
+const pick = (list) => list[Math.floor(rng() * list.length)];
+const uniq = (list) => Array.from(new Set(list.filter(Boolean)));
+const tokensFromDoc = (text) => {
+  if (!text) return [];
+  return text.split(/\s+/).map((t) => t.replace(/[^\w-]/g, '')).filter((t) => t.length >= 4);
+};
+
+const names = uniq(chunks.map((c) => c.name));
+const signatures = uniq(chunks.map((c) => c.docmeta?.signature || c.metaV2?.signature));
+const kinds = uniq(chunks.map((c) => c.kind || c.metaV2?.kind));
+const returnTypes = uniq(chunks.map((c) => c.docmeta?.returnType || c.metaV2?.returns));
+const docs = uniq(chunks.flatMap((c) => tokensFromDoc(c.docmeta?.doc || c.metaV2?.doc)));
+const riskTags = uniq(chunks.flatMap((c) => c.docmeta?.risk?.tags || c.metaV2?.risk?.tags || []));
+
+const strategies = [
+  () => (names.length ? `${pick(names)}` : null),
+  () => (signatures.length ? `${pick(signatures)} --signature` : null),
+  () => (names.length && kinds.length ? `${pick(names)} --kind ${pick(kinds)}` : null),
+  () => (returnTypes.length ? `widget --return-type ${pick(returnTypes)}` : null),
+  () => (docs.length ? `${pick(docs)}` : null),
+  () => (riskTags.length ? `exec --risk-tag ${pick(riskTags)}` : null)
+];
+
+const seen = new Set();
+const queries = [];
+let attempts = 0;
+while (queries.length < count && attempts < count * 20) {
+  attempts += 1;
+  const query = pick(strategies)();
+  if (!query) continue;
+  if (seen.has(query)) continue;
+  seen.add(query);
+  queries.push(query);
+}
+
+const payload = {
+  generatedAt: new Date().toISOString(),
+  seed,
+  indexDir,
+  mode,
+  count: queries.length,
+  queries
+};
+
+if (argv.json) {
+  const outPath = argv.out ? path.resolve(argv.out) : path.join(root, 'docs', 'benchmarks-queries.json');
+  await fs.writeFile(outPath, JSON.stringify(payload, null, 2));
+  console.log(`Wrote ${queries.length} queries to ${outPath}`);
+  process.exit(0);
+}
+
+const outPath = argv.out
+  ? path.resolve(argv.out)
+  : path.join(root, 'benchmarks', 'queries', `generated-${mode}.txt`);
+const lines = [
+  '# Generated by bench-query-generator',
+  `# seed: ${seed}`,
+  `# mode: ${mode}`,
+  ...queries
+];
+await fs.mkdir(path.dirname(outPath), { recursive: true });
+await fs.writeFile(outPath, lines.join('\n'));
+console.log(`Wrote ${queries.length} queries to ${outPath}`);
diff --git a/tools/bench-score-strategy.js b/tools/bench-score-strategy.js
new file mode 100644
index 000000000..7a4c0ac02
--- /dev/null
+++ b/tools/bench-score-strategy.js
@@ -0,0 +1,233 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { createCli } from '../src/shared/cli.js';
+import { execaSync } from 'execa';
+import { getIndexDir, loadUserConfig, resolveToolRoot } from './dict-utils.js';
+
+const argv = createCli({
+  scriptName: 'bench-score-strategy',
+  options: {
+    build: { type: 'boolean', default: false },
+    'build-index': { type: 'boolean', default: false },
+    json: { type: 'boolean', default: false },
+    'stub-embeddings': { type: 'boolean', default: false },
+    'in-place': { type: 'boolean', default: false },
+    repo: { type: 'string' },
+    queries: { type: 'string' },
+    out: { type: 'string' },
+    backend: { type: 'string' },
+    top: { type: 'number' },
+    limit: { type: 'number' }
+  }
+}).parse();
+
+const toolRoot = resolveToolRoot();
+const root = process.cwd();
+const repoSource = path.resolve(
+  argv.repo || path.join(root, 'tests', 'fixtures', 'sample')
+);
+const useInPlace = argv['in-place'] === true;
+const tempRoot = path.join(root, 'tests', '.cache', 'bench-score-strategy');
+const workRoot = useInPlace ? repoSource : path.join(tempRoot, 'repo');
+const cacheRoot = path.join(tempRoot, 'cache');
+const queriesPath = path.resolve(
+  argv.queries || path.join(repoSource, 'queries.txt')
+);
+const backend = argv.backend ? String(argv.backend) : 'memory';
+const topN = Number.isFinite(Number(argv.top)) ? Math.max(1, Number(argv.top)) : 5;
+const limit = Number.isFinite(Number(argv.limit)) ? Math.max(0, Number(argv.limit)) : 0;
+const buildRequested = argv.build === true || argv['build-index'] === true;
+const useStubEmbeddings = argv['stub-embeddings'] === true;
+
+function runCommand(label, args, env) {
+  const result = execaSync(process.execPath, args, { encoding: 'utf8', env, reject: false });
+  if (result.exitCode !== 0) {
+    console.error(`Failed: ${label}`);
+    if (result.stderr) console.error(result.stderr.trim());
+    process.exit(result.exitCode ?? 1);
+  }
+  return result.stdout || '';
+}
+
+async function loadQueries(filePath) {
+  try {
+    const raw = await fsPromises.readFile(filePath, 'utf8');
+    return raw
+      .split(/\r?\n/)
+      .map((line) => line.trim())
+      .filter((line) => line && !line.startsWith('#'));
+  } catch {
+    return [];
+  }
+}
+
+async function ensureWorkRoot() {
+  if (useInPlace) return;
+  await fsPromises.rm(tempRoot, { recursive: true, force: true });
+  await fsPromises.mkdir(workRoot, { recursive: true });
+  await fsPromises.cp(repoSource, workRoot, { recursive: true });
+}
+
+function hasIndexArtifacts(repoRoot, userConfig) {
+  const codeDir = getIndexDir(repoRoot, 'code', userConfig);
+  const proseDir = getIndexDir(repoRoot, 'prose', userConfig);
+  const codeMeta = path.join(codeDir, 'chunk_meta.json');
+  const proseMeta = path.join(proseDir, 'chunk_meta.json');
+  return fs.existsSync(codeMeta) && fs.existsSync(proseMeta);
+}
+
+async function writeBlendConfig(repoRoot, baseConfig, enabled) {
+  const next = { ...(baseConfig || {}) };
+  const search = { ...(next.search || {}) };
+  const existingBlend = search.scoreBlend || {};
+  search.scoreBlend = {
+    ...existingBlend,
+    enabled,
+    sparseWeight: Number.isFinite(Number(existingBlend.sparseWeight))
+      ? Number(existingBlend.sparseWeight)
+      : 1,
+    annWeight: Number.isFinite(Number(existingBlend.annWeight))
+      ? Number(existingBlend.annWeight)
+      : 1
+  };
+  next.search = search;
+  const configPath = path.join(repoRoot, '.pairofcleats.json');
+  await fsPromises.writeFile(configPath, JSON.stringify(next, null, 2));
+  return configPath;
+}
+
+async function restoreConfig(repoRoot, originalConfig, configExisted) {
+  const configPath = path.join(repoRoot, '.pairofcleats.json');
+  if (configExisted) {
+    await fsPromises.writeFile(configPath, originalConfig);
+  } else if (fs.existsSync(configPath)) {
+    await fsPromises.rm(configPath, { force: true });
+  }
+}
+
+await ensureWorkRoot();
+await fsPromises.rm(cacheRoot, { recursive: true, force: true });
+await fsPromises.mkdir(cacheRoot, { recursive: true });
+process.env.PAIROFCLEATS_CACHE_ROOT = cacheRoot;
+if (useStubEmbeddings) process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+
+const envBase = {
+  ...process.env,
+  PAIROFCLEATS_CACHE_ROOT: cacheRoot
+};
+if (useStubEmbeddings) envBase.PAIROFCLEATS_EMBEDDINGS = 'stub';
+
+const queries = await loadQueries(queriesPath);
+if (!queries.length) {
+  console.error(`No queries found at ${queriesPath}`);
+  process.exit(1);
+}
+const selectedQueries = limit > 0 ? queries.slice(0, limit) : queries;
+
+const configPath = path.join(workRoot, '.pairofcleats.json');
+const configExisted = fs.existsSync(configPath);
+const originalConfig = configExisted ? await fsPromises.readFile(configPath, 'utf8') : null;
+const userConfig = loadUserConfig(workRoot);
+const indexExists = hasIndexArtifacts(workRoot, userConfig);
+if (!indexExists || buildRequested) {
+  const buildArgs = [path.join(toolRoot, 'build_index.js'), '--repo', workRoot];
+  if (useStubEmbeddings) buildArgs.push('--stub-embeddings');
+  runCommand('build index', buildArgs, envBase);
+}
+
+const strategies = [
+  { id: 'sparse', annFlag: '--no-ann', blend: false },
+  { id: 'ann-fallback', annFlag: '--ann', blend: false },
+  { id: 'blend', annFlag: '--ann', blend: true }
+];
+
+function mean(values) {
+  if (!values.length) return 0;
+  return values.reduce((a, b) => a + b, 0) / values.length;
+}
+
+function runSearch(query, annFlag) {
+  const args = [
+    path.join(toolRoot, 'search.js'),
+    query,
+    '--repo',
+    workRoot,
+    '--json',
+    '--json-compact',
+    '--stats',
+    '--backend',
+    backend,
+    '--top',
+    String(topN),
+    annFlag
+  ];
+  const output = runCommand('search', args, envBase);
+  return JSON.parse(output || '{}');
+}
+
+const summaries = {};
+for (const strategy of strategies) {
+  await writeBlendConfig(workRoot, userConfig, strategy.blend);
+  const latencies = [];
+  const resultCounts = [];
+  const topScores = [];
+  const scoreTypeCounts = {};
+  let hits = 0;
+  for (const query of selectedQueries) {
+    const payload = runSearch(query, strategy.annFlag);
+    const stats = payload.stats || {};
+    if (Number.isFinite(stats.elapsedMs)) latencies.push(stats.elapsedMs);
+    const results = [
+      ...(Array.isArray(payload.code) ? payload.code : []),
+      ...(Array.isArray(payload.prose) ? payload.prose : [])
+    ];
+    resultCounts.push(results.length);
+    if (results.length) hits += 1;
+    if (results.length && Number.isFinite(results[0].score)) {
+      topScores.push(results[0].score);
+    }
+    for (const item of results) {
+      const type = item.scoreType || 'none';
+      scoreTypeCounts[type] = (scoreTypeCounts[type] || 0) + 1;
+    }
+  }
+  summaries[strategy.id] = {
+    queries: selectedQueries.length,
+    hitRate: selectedQueries.length ? hits / selectedQueries.length : 0,
+    resultCountAvg: mean(resultCounts),
+    latencyMsAvg: mean(latencies),
+    topScoreAvg: mean(topScores),
+    scoreTypes: scoreTypeCounts
+  };
+}
+
+await restoreConfig(workRoot, originalConfig, configExisted);
+
+const output = {
+  generatedAt: new Date().toISOString(),
+  repo: { source: repoSource, root: workRoot },
+  backend,
+  topN,
+  queries: selectedQueries.length,
+  strategies: summaries
+};
+
+if (argv.out) {
+  const outPath = path.resolve(argv.out);
+  await fsPromises.writeFile(outPath, JSON.stringify(output, null, 2));
+}
+
+if (argv.json) {
+  console.log(JSON.stringify(output, null, 2));
+} else {
+  console.log('Score strategy benchmark');
+  console.log(`- Repo: ${workRoot}`);
+  console.log(`- Queries: ${selectedQueries.length}`);
+  for (const [name, stats] of Object.entries(summaries)) {
+    console.log(`- ${name} hit rate: ${(stats.hitRate * 100).toFixed(1)}%`);
+    console.log(`- ${name} avg results: ${stats.resultCountAvg.toFixed(1)}`);
+    console.log(`- ${name} avg latency: ${stats.latencyMsAvg.toFixed(1)} ms`);
+  }
+}
diff --git a/tools/bench/language/cli.js b/tools/bench/language/cli.js
new file mode 100644
index 000000000..700e64e2b
--- /dev/null
+++ b/tools/bench/language/cli.js
@@ -0,0 +1,155 @@
+import path from 'node:path';
+import { createCli } from '../../../src/shared/cli.js';
+import { BENCH_OPTIONS, mergeCliOptions, validateBenchArgs } from '../../../src/shared/cli-options.js';
+import { resolveToolRoot } from '../../dict-utils.js';
+
+const parseMs = (value, fallback) => {
+  const parsed = Number(value);
+  if (Number.isFinite(parsed) && parsed >= 0) return Math.floor(parsed);
+  return fallback;
+};
+
+const normalizeLockMode = (value) => {
+  if (!value) return 'fail-fast';
+  const raw = String(value).trim().toLowerCase();
+  if (raw === 'wait' || raw === 'retry') return 'wait';
+  if (raw === 'stale-clear' || raw === 'stale') return 'stale-clear';
+  return 'fail-fast';
+};
+
+const resolveBackendList = (value) => {
+  if (!value) return ['memory', 'sqlite'];
+  const trimmed = String(value).trim().toLowerCase();
+  if (!trimmed) return ['memory', 'sqlite'];
+  if (trimmed === 'all') return ['memory', 'sqlite', 'sqlite-fts'];
+  return trimmed
+    .split(',')
+    .map((entry) => entry.trim())
+    .filter(Boolean);
+};
+
+const isBenchProfile = (value) => {
+  if (!value) return false;
+  const normalized = String(value).trim().toLowerCase();
+  if (!normalized) return false;
+  return normalized === 'bench' || normalized.startsWith('bench-');
+};
+
+const buildRunSuffix = () => {
+  const now = new Date();
+  const stamp = [
+    now.getFullYear(),
+    String(now.getMonth() + 1).padStart(2, '0'),
+    String(now.getDate()).padStart(2, '0')
+  ].join('');
+  const time = [
+    String(now.getHours()).padStart(2, '0'),
+    String(now.getMinutes()).padStart(2, '0'),
+    String(now.getSeconds()).padStart(2, '0')
+  ].join('');
+  return `run-${stamp}-${time}`;
+};
+
+export const parseBenchLanguageArgs = (rawArgs = process.argv.slice(2)) => {
+  const argv = createCli({
+    scriptName: 'bench-language',
+    options: mergeCliOptions(
+      BENCH_OPTIONS,
+      {
+        list: { type: 'boolean', default: false },
+        clone: { type: 'boolean', default: true },
+        'no-clone': { type: 'boolean', default: false },
+        'dry-run': { type: 'boolean', default: false },
+        'cache-run': { type: 'boolean', default: false },
+        config: { type: 'string' },
+        root: { type: 'string' },
+        'cache-root': { type: 'string' },
+        'cache-suffix': { type: 'string' },
+        results: { type: 'string' },
+        log: { type: 'string' },
+        language: { type: 'string' },
+        languages: { type: 'string' },
+        tier: { type: 'string' },
+        repos: { type: 'string' },
+        only: { type: 'string' },
+        'log-lines': { type: 'number' },
+        'lock-mode': { type: 'string' },
+        'lock-wait-ms': { type: 'number' },
+        'lock-stale-ms': { type: 'number' }
+      }
+    ),
+    argv: ['node', 'bench-language-repos.js', ...(rawArgs || [])]
+  }).parse();
+  validateBenchArgs(argv);
+
+  const scriptRoot = resolveToolRoot();
+  const configPath = path.resolve(argv.config || path.join(scriptRoot, 'benchmarks', 'repos.json'));
+  const reposRoot = path.resolve(argv.root || path.join(scriptRoot, 'benchmarks', 'repos'));
+  const cacheRootBase = path.resolve(argv['cache-root'] || path.join(scriptRoot, 'benchmarks', 'cache'));
+  const cacheSuffixRaw = typeof argv['cache-suffix'] === 'string' ? argv['cache-suffix'].trim() : '';
+  const cacheRun = argv['cache-run'] === true;
+  const cacheSuffix = cacheSuffixRaw || (cacheRun ? buildRunSuffix() : '');
+  const cacheRoot = cacheSuffix ? path.resolve(cacheRootBase, cacheSuffix) : cacheRootBase;
+  const resultsRoot = path.resolve(argv.results || path.join(scriptRoot, 'benchmarks', 'results'));
+  const logRoot = path.join(resultsRoot, 'logs', 'bench-language');
+  const logPath = argv.log
+    ? path.resolve(argv.log)
+    : path.join(logRoot, `${buildRunSuffix()}.log`);
+
+  const cloneEnabled = argv['no-clone'] ? false : argv.clone !== false;
+  const dryRun = argv['dry-run'] === true;
+  const quietMode = argv.json === true;
+  const interactive = !quietMode && process.stdout.isTTY;
+  const colorEnabled = interactive && !process.env.NO_COLOR;
+
+  const logLineArg = Number.parseInt(argv['log-lines'], 10);
+  const logWindowSize = Number.isFinite(logLineArg)
+    ? Math.max(3, Math.min(50, logLineArg))
+    : 20;
+
+  const lockMode = normalizeLockMode(
+    argv['lock-mode']
+    || ((argv.build || argv['build-index'] || argv['build-sqlite']) ? 'stale-clear' : '')
+  );
+  const lockWaitMs = parseMs(argv['lock-wait-ms'], 5 * 60 * 1000);
+  const lockStaleMs = parseMs(argv['lock-stale-ms'], 30 * 60 * 1000);
+
+  const backendList = resolveBackendList(argv.backend);
+  const wantsSqlite = backendList.includes('sqlite')
+    || backendList.includes('sqlite-fts')
+    || backendList.includes('fts');
+
+  const indexProfileRaw = typeof argv['index-profile'] === 'string'
+    ? argv['index-profile'].trim()
+    : '';
+  const defaultHeavyProfile = 'full';
+  const resolvedProfile = indexProfileRaw && !isBenchProfile(indexProfileRaw)
+    ? indexProfileRaw
+    : defaultHeavyProfile;
+  const indexProfile = argv['no-index-profile'] === true ? '' : resolvedProfile;
+  const suppressProfileEnv = argv['no-index-profile'] === true;
+
+  return {
+    argv,
+    scriptRoot,
+    configPath,
+    reposRoot,
+    cacheRoot,
+    resultsRoot,
+    logRoot,
+    logPath,
+    cloneEnabled,
+    dryRun,
+    quietMode,
+    interactive,
+    colorEnabled,
+    logWindowSize,
+    lockMode,
+    lockWaitMs,
+    lockStaleMs,
+    backendList,
+    wantsSqlite,
+    indexProfile,
+    suppressProfileEnv
+  };
+};
diff --git a/tools/bench/language/config.js b/tools/bench/language/config.js
new file mode 100644
index 000000000..b95853ed1
--- /dev/null
+++ b/tools/bench/language/config.js
@@ -0,0 +1,15 @@
+import { readJsoncFile } from '../../../src/shared/jsonc.js';
+
+export const loadBenchConfig = (configPath) => {
+  try {
+    const config = readJsoncFile(configPath);
+    if (!config || typeof config !== 'object') {
+      throw new Error('Bench config must be a JSON object.');
+    }
+    return config;
+  } catch (err) {
+    console.error(`Failed to read ${configPath}`);
+    if (err && err.message) console.error(err.message);
+    process.exit(1);
+  }
+};
diff --git a/tools/bench/language/locks.js b/tools/bench/language/locks.js
new file mode 100644
index 000000000..a81a34f9b
--- /dev/null
+++ b/tools/bench/language/locks.js
@@ -0,0 +1,107 @@
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { formatDuration } from './metrics.js';
+
+const isProcessAlive = (pid) => {
+  if (!Number.isFinite(pid) || pid <= 0) return false;
+  try {
+    process.kill(pid, 0);
+    return true;
+  } catch (err) {
+    return err?.code === 'EPERM';
+  }
+};
+
+const readLockInfo = async (lockPath) => {
+  try {
+    const raw = await fsPromises.readFile(lockPath, 'utf8');
+    const parsed = JSON.parse(raw);
+    return parsed && typeof parsed === 'object' ? parsed : null;
+  } catch {
+    return null;
+  }
+};
+
+const getLockAgeMs = async (lockPath, info) => {
+  if (info?.startedAt) {
+    const started = Date.parse(info.startedAt);
+    if (Number.isFinite(started)) return Math.max(0, Date.now() - started);
+  }
+  try {
+    const stat = await fsPromises.stat(lockPath);
+    return Math.max(0, Date.now() - stat.mtimeMs);
+  } catch {
+    return null;
+  }
+};
+
+export const formatLockDetail = (detail) => {
+  if (!detail) return '';
+  const parts = [];
+  if (Number.isFinite(detail.ageMs)) {
+    parts.push(`age ${formatDuration(detail.ageMs)}`);
+  }
+  if (Number.isFinite(detail.pid)) {
+    parts.push(`pid ${detail.pid}`);
+  }
+  return parts.length ? `(${parts.join(', ')})` : '';
+};
+
+export const checkIndexLock = async ({
+  repoCacheRoot,
+  repoLabel,
+  lockMode,
+  lockWaitMs,
+  lockStaleMs,
+  onLog
+}) => {
+  const lockPath = path.join(repoCacheRoot, 'locks', 'index.lock');
+  if (!fs.existsSync(lockPath)) return { ok: true };
+
+  const readDetail = async () => {
+    const info = await readLockInfo(lockPath);
+    const ageMs = await getLockAgeMs(lockPath, info);
+    const pid = Number.isFinite(Number(info?.pid)) ? Number(info.pid) : null;
+    const alive = pid ? isProcessAlive(pid) : null;
+    const detail = { lockPath, ageMs, pid, alive };
+    const isStale = (Number.isFinite(ageMs) && ageMs > lockStaleMs) || (pid && !alive);
+    return { detail, isStale };
+  };
+
+  const clearIfStale = async (detail) => {
+    try {
+      await fsPromises.rm(lockPath, { force: true });
+      if (onLog) {
+        onLog(`[lock] cleared stale lock for ${repoLabel} ${formatLockDetail(detail)}`);
+      }
+      return true;
+    } catch (err) {
+      if (onLog) {
+        onLog(`[lock] failed to clear stale lock for ${repoLabel}: ${err?.message || err}`);
+      }
+      return false;
+    }
+  };
+
+  const initial = await readDetail();
+  if (initial.isStale) {
+    const cleared = await clearIfStale(initial.detail);
+    if (cleared) return { ok: true, cleared: true, detail: initial.detail };
+  }
+
+  if (lockMode === 'wait') {
+    const deadline = Date.now() + lockWaitMs;
+    while (Date.now() < deadline) {
+      if (!fs.existsSync(lockPath)) return { ok: true };
+      const current = await readDetail();
+      if (current.isStale) {
+        const cleared = await clearIfStale(current.detail);
+        if (cleared) return { ok: true, cleared: true, detail: current.detail };
+      }
+      await new Promise((resolve) => setTimeout(resolve, 1000));
+    }
+  }
+
+  return { ok: false, detail: initial.detail };
+};
diff --git a/tools/bench/language/metrics.js b/tools/bench/language/metrics.js
new file mode 100644
index 000000000..36591d88a
--- /dev/null
+++ b/tools/bench/language/metrics.js
@@ -0,0 +1,110 @@
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import { buildIgnoreMatcher } from '../../../src/index/build/ignore.js';
+import { discoverFilesForModes } from '../../../src/index/build/discover.js';
+import { readTextFile } from '../../../src/shared/encoding.js';
+import { countLinesForEntries } from '../../../src/shared/file-stats.js';
+
+export const formatDuration = (ms) => {
+  const total = Math.max(0, Math.floor(ms / 1000));
+  const hours = Math.floor(total / 3600);
+  const minutes = Math.floor((total % 3600) / 60);
+  const seconds = total % 60;
+  if (hours > 0) return `${hours}h ${minutes}m ${seconds}s`;
+  if (minutes > 0) return `${minutes}m ${seconds}s`;
+  return `${seconds}s`;
+};
+
+export const formatGb = (mb) => `${(mb / 1024).toFixed(1)} GB`;
+
+export const formatLoc = (value) => {
+  if (!Number.isFinite(value)) return 'n/a';
+  if (value >= 1_000_000) return `${(value / 1_000_000).toFixed(2)}M`;
+  if (value >= 1_000) return `${(value / 1_000).toFixed(1)}k`;
+  return `${Math.floor(value)}`;
+};
+
+export const stripMaxOldSpaceFlag = (options) => {
+  if (!options) return '';
+  return options
+    .replace(/--max-old-space-size=\d+/g, '')
+    .replace(/\s+/g, ' ')
+    .trim();
+};
+
+export const getRecommendedHeapMb = () => {
+  const totalMb = Math.floor(os.totalmem() / (1024 * 1024));
+  const recommended = Math.max(4096, Math.floor(totalMb * 0.75));
+  const rounded = Math.floor(recommended / 256) * 256;
+  return {
+    totalMb,
+    recommendedMb: Math.max(4096, rounded)
+  };
+};
+
+export const formatMetricSummary = (summary) => {
+  if (!summary) return 'Metrics: pending';
+  const backends = summary.backends || Object.keys(summary.latencyMsAvg || {});
+  const parts = [];
+  for (const backend of backends) {
+    const latency = summary.latencyMsAvg?.[backend];
+    const hitRate = summary.hitRate?.[backend];
+    const latencyText = Number.isFinite(latency) ? `${latency.toFixed(1)}ms` : 'n/a';
+    const hitText = Number.isFinite(hitRate) ? `${(hitRate * 100).toFixed(1)}%` : 'n/a';
+    parts.push(`${backend} ${latencyText} hit ${hitText}`);
+  }
+  if (summary.embeddingProvider) {
+    parts.push(`embed ${summary.embeddingProvider}`);
+  }
+  return parts.length ? `Metrics: ${parts.join(' | ')}` : 'Metrics: pending';
+};
+
+const resolveMaxFileBytes = (userConfig) => {
+  const raw = userConfig?.indexing?.maxFileBytes;
+  const parsed = Number(raw);
+  if (raw === false || raw === 0) return null;
+  if (Number.isFinite(parsed) && parsed > 0) return Math.floor(parsed);
+  return 5 * 1024 * 1024;
+};
+
+export const buildLineStats = async (repoPath, userConfig) => {
+  const modes = ['code', 'prose'];
+  const { ignoreMatcher } = await buildIgnoreMatcher({ root: repoPath, userConfig });
+  const skippedByMode = { code: [], prose: [] };
+  const maxFileBytes = resolveMaxFileBytes(userConfig);
+  const entriesByMode = await discoverFilesForModes({
+    root: repoPath,
+    modes,
+    ignoreMatcher,
+    skippedByMode,
+    maxFileBytes
+  });
+  const linesByFile = { code: new Map(), prose: new Map() };
+  const totals = { code: 0, prose: 0 };
+  const concurrency = Math.max(1, Math.min(32, os.cpus().length * 2));
+  for (const mode of modes) {
+    const entries = entriesByMode[mode] || [];
+    if (!entries.length) continue;
+    const lineCounts = await countLinesForEntries(entries, { concurrency });
+    for (const [rel, lines] of lineCounts) {
+      linesByFile[mode].set(rel, lines);
+      totals[mode] += lines;
+    }
+  }
+  return { totals, linesByFile };
+};
+
+export const validateEncodingFixtures = async (scriptRoot) => {
+  const fixturePath = path.join(scriptRoot, 'tests', 'fixtures', 'encoding', 'latin1.js');
+  if (!fs.existsSync(fixturePath)) return;
+  try {
+    const { text, usedFallback } = await readTextFile(fixturePath);
+    const expected = 'caf\u00e9';
+    if (!text.includes(expected) || !usedFallback) {
+      console.warn(`[bench] Encoding fixture did not decode as expected: ${fixturePath}`);
+    }
+  } catch (err) {
+    console.warn(`[bench] Encoding fixture read failed: ${err?.message || err}`);
+  }
+};
diff --git a/tools/bench/language/process.js b/tools/bench/language/process.js
new file mode 100644
index 000000000..d8f25ae78
--- /dev/null
+++ b/tools/bench/language/process.js
@@ -0,0 +1,117 @@
+import { execa, execaSync } from 'execa';
+
+export const createProcessRunner = ({
+  appendLog,
+  writeLog,
+  writeLogSync,
+  logHistory,
+  logPath
+}) => {
+  let activeChild = null;
+  let activeLabel = '';
+  let exitLogged = false;
+
+  const setActiveChild = (child, label) => {
+    activeChild = child;
+    activeLabel = label;
+  };
+
+  const clearActiveChild = (child) => {
+    if (activeChild === child) {
+      activeChild = null;
+      activeLabel = '';
+    }
+  };
+
+  const killProcessTree = (pid) => {
+    if (!Number.isFinite(pid)) return;
+    try {
+      if (process.platform === 'win32') {
+        execaSync('taskkill', ['/PID', String(pid), '/T', '/F'], { stdio: 'ignore', reject: false });
+        return;
+      }
+      process.kill(pid, 'SIGTERM');
+    } catch {}
+  };
+
+  const logExit = (reason, code) => {
+    if (exitLogged) return;
+    writeLogSync(`[exit] ${reason}${Number.isFinite(code) ? ` code=${code}` : ''}`);
+    exitLogged = true;
+  };
+
+  const runProcess = async (label, cmd, args, options = {}) => {
+    const { continueOnError = false, ...spawnOptionsRest } = options;
+    const spawnOptions = {
+      ...spawnOptionsRest,
+      stdio: ['ignore', 'pipe', 'pipe'],
+      reject: false
+    };
+    const child = execa(cmd, args, spawnOptions);
+    setActiveChild(child, label);
+    writeLog(`[start] ${label}`);
+    const carry = { stdout: '', stderr: '' };
+    const handleChunk = (chunk, key) => {
+      const text = carry[key] + chunk.toString('utf8');
+      const normalized = text.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
+      const parts = normalized.split('\n');
+      carry[key] = parts.pop() || '';
+      for (const line of parts) appendLog(line);
+    };
+    child.stdout?.on('data', (chunk) => handleChunk(chunk, 'stdout'));
+    child.stderr?.on('data', (chunk) => handleChunk(chunk, 'stderr'));
+    try {
+      const result = await child;
+      if (carry.stdout) appendLog(carry.stdout);
+      if (carry.stderr) appendLog(carry.stderr);
+      const code = result.exitCode;
+      writeLog(`[finish] ${label} code=${code}`);
+      clearActiveChild(child);
+      if (code === 0) {
+        return { ok: true };
+      }
+      console.error(`Failed: ${label}`);
+      console.error(`Log: ${logPath}`);
+      writeLog(`[error] Failed: ${label}`);
+      writeLog(`[error] Log: ${logPath}`);
+      if (logHistory.length) {
+        console.error('Last log lines:');
+        logHistory.slice(-10).forEach((line) => console.error(`- ${line}`));
+        logHistory.slice(-10).forEach((line) => writeLog(`[error] ${line}`));
+      }
+      if (logHistory.some((line) => line.toLowerCase().includes('filename too long'))) {
+        console.error('Hint: On Windows, enable long paths and set `git config --global core.longpaths true` or use a shorter --root path.');
+        writeLog('[hint] Enable Windows long paths and set `git config --global core.longpaths true` or use a shorter --root path.');
+      }
+      if (!continueOnError) {
+        logExit('failure', code ?? 1);
+        process.exit(code ?? 1);
+      }
+      return { ok: false, code: code ?? 1 };
+    } catch (err) {
+      const message = err?.shortMessage || err?.message || err;
+      writeLog(`[error] ${label} spawn failed: ${message}`);
+      clearActiveChild(child);
+      console.error(`Failed: ${label}`);
+      console.error(`Log: ${logPath}`);
+      if (logHistory.length) {
+        console.error('Last log lines:');
+        logHistory.slice(-10).forEach((line) => console.error(`- ${line}`));
+        logHistory.slice(-10).forEach((line) => writeLog(`[error] ${line}`));
+      }
+      if (!continueOnError) {
+        logExit('failure', err?.exitCode ?? 1);
+        process.exit(err?.exitCode ?? 1);
+      }
+      return { ok: false, code: err?.exitCode ?? 1 };
+    }
+  };
+
+  return {
+    runProcess,
+    killProcessTree,
+    logExit,
+    getActiveChild: () => activeChild,
+    getActiveLabel: () => activeLabel
+  };
+};
diff --git a/tools/bench/language/progress/parse.js b/tools/bench/language/progress/parse.js
new file mode 100644
index 000000000..2e209ed1a
--- /dev/null
+++ b/tools/bench/language/progress/parse.js
@@ -0,0 +1,87 @@
+const buildProgressRegex = /^\s*(Files|Imports)\s+(\d+)\/(\d+)\s+\((\d+(?:\.\d+)?)%\)/i;
+const buildCombinedFileRegex = /^\s*Files\s+(\d+)\/(\d+)\s+\((\d+(?:\.\d+)?)%\)\s+(?:\[(.+?)\]\s+)?(?:File\s+)?(\d+)\/(\d+)(?:\s+lines\s+[0-9,\.]+)?\s+(.+)$/i;
+const buildFileOnlyRegex = /^\s*(?:\[(.+?)\]\s+)?(?:File\s+)?(\d+)\/(\d+)(?:\s+lines\s+[0-9,\.]+)?\s+(.+)$/i;
+const buildShardRegex = /^\s*(?:\u2192|->)\s+Shard\s+(\d+)\/(\d+):\s+([^\r\n\[]+?)(?:\s+\[[^\]]+\])?\s+\((\d+)\s+files\)/i;
+const buildImportStatsRegex = /^\s*\u2192\s*Imports:\s+modules=(\d+),\s*edges=(\d+),\s*files=(\d+)/i;
+const buildScanRegex = /Scanning\s+(code|prose)/i;
+const buildLineRegex = /^\s*Line\s+(\d+)\s*\/\s*(\d+)/i;
+
+export const normalizeShardLabel = (raw) => {
+  if (!raw) return '';
+  const trimmed = raw.trim();
+  if (!trimmed || /^shard$/i.test(trimmed)) return '';
+  return trimmed.replace(/^shard\s+/i, '').trim();
+};
+
+export const parseShardLine = (line) => {
+  const match = buildShardRegex.exec(line);
+  if (!match) return null;
+  return {
+    index: Number.parseInt(match[1], 10),
+    total: Number.parseInt(match[2], 10),
+    shardLabel: match[3] ? match[3].trim() : '',
+    fileCount: Number.parseInt(match[4], 10)
+  };
+};
+
+export const parseImportStatsLine = (line) => {
+  const match = buildImportStatsRegex.exec(line);
+  if (!match) return null;
+  return {
+    modules: Number.parseInt(match[1], 10),
+    edges: Number.parseInt(match[2], 10),
+    files: Number.parseInt(match[3], 10)
+  };
+};
+
+export const parseFileProgressLine = (line) => {
+  const combined = buildCombinedFileRegex.exec(line);
+  if (combined) {
+    return {
+      count: Number.parseInt(combined[1], 10),
+      total: Number.parseInt(combined[2], 10),
+      pct: Number.parseFloat(combined[3]),
+      shardLabel: normalizeShardLabel(combined[4]),
+      fileIndex: Number.parseInt(combined[5], 10),
+      fileTotal: Number.parseInt(combined[6], 10),
+      file: combined[7] ? combined[7].trim() : ''
+    };
+  }
+  const solo = buildFileOnlyRegex.exec(line);
+  if (!solo) return null;
+  return {
+    count: null,
+    total: null,
+    pct: null,
+    shardLabel: normalizeShardLabel(solo[1]),
+    fileIndex: Number.parseInt(solo[2], 10),
+    fileTotal: Number.parseInt(solo[3], 10),
+    file: solo[4] ? solo[4].trim() : ''
+  };
+};
+
+export const parseProgressLine = (line) => {
+  const match = buildProgressRegex.exec(line);
+  if (!match) return null;
+  return {
+    step: match[1],
+    count: Number.parseInt(match[2], 10),
+    total: Number.parseInt(match[3], 10),
+    pct: Number.parseFloat(match[4])
+  };
+};
+
+export const parseLineProgress = (line) => {
+  const match = buildLineRegex.exec(line);
+  if (!match) return null;
+  return {
+    current: Number.parseInt(match[1], 10),
+    total: Number.parseInt(match[2], 10)
+  };
+};
+
+export const parseScanMode = (line) => {
+  const match = buildScanRegex.exec(line);
+  if (!match) return null;
+  return match[1].toLowerCase();
+};
diff --git a/tools/bench/language/progress/render.js b/tools/bench/language/progress/render.js
new file mode 100644
index 000000000..2e1b91141
--- /dev/null
+++ b/tools/bench/language/progress/render.js
@@ -0,0 +1,530 @@
+import readline from 'node:readline';
+import { formatShardFileProgress } from '../../../../src/shared/bench-progress.js';
+import { toPosix } from '../../../../src/shared/files.js';
+import { formatDuration, formatLoc } from '../metrics.js';
+import {
+  parseFileProgressLine,
+  parseImportStatsLine,
+  parseLineProgress,
+  parseProgressLine,
+  parseScanMode,
+  parseShardLine
+} from './parse.js';
+import { resetBuildProgressState } from './state.js';
+
+const ansi = {
+  reset: '\x1b[0m',
+  fgDim: '\x1b[90m',
+  fgLight: '\x1b[37m',
+  fgBright: '\x1b[97m',
+  bgBlack: '\x1b[40m'
+};
+
+export const createProgressRenderer = ({
+  state,
+  interactive,
+  quietMode,
+  colorEnabled,
+  writeLog,
+  getActiveLabel
+}) => {
+  const pushHistory = (line) => {
+    if (!line) return;
+    state.logHistory.push(line);
+    if (state.logHistory.length > state.logHistorySize) state.logHistory.shift();
+  };
+
+  const truncateDisplay = (line) => {
+    if (!line) return '';
+    const width = Number.isFinite(process.stdout.columns) ? process.stdout.columns : 120;
+    if (line.length <= width) return line;
+    return `${line.slice(0, Math.max(0, width - 1))}\u2026`;
+  };
+
+  const extractLogTag = (line) => {
+    if (!line) return '';
+    const match = /^\s*\[([^\]]+)\]\s*/.exec(line);
+    return match ? match[1].trim().toLowerCase() : '';
+  };
+
+  const resolveLogTag = (line, tagOverride) => {
+    if (tagOverride) return String(tagOverride).trim().toLowerCase();
+    return extractLogTag(line);
+  };
+
+  const shouldUpdateLogWindowLine = (line, tag) => {
+    if (!tag) return true;
+    const now = Date.now();
+    const last = state.logUpdateByTag.get(tag);
+    if (last) {
+      if (last.line === line) return false;
+      if (now - last.at < state.logUpdateDebounceMs) return false;
+    }
+    state.logUpdateByTag.set(tag, { line, at: now });
+    return true;
+  };
+
+  const upsertLogWindowLine = (line, tagOverride) => {
+    const tag = resolveLogTag(line, tagOverride);
+    if (!tag) return false;
+    for (let i = state.logLines.length - 1; i >= 0; i -= 1) {
+      const existingTag = state.logLineTags[i] || extractLogTag(state.logLines[i]);
+      if (existingTag && existingTag === tag) {
+        state.logLines[i] = line;
+        state.logLineTags[i] = tag;
+        return true;
+      }
+    }
+    return false;
+  };
+
+  const pushLogWindowLine = (line, options = {}) => {
+    if (!interactive) return;
+    const tag = resolveLogTag(line, options.tag);
+    if (!shouldUpdateLogWindowLine(line, tag)) return;
+    const replaced = tag ? upsertLogWindowLine(line, tag) : false;
+    if (!replaced) {
+      state.logLines.push(line);
+      state.logLineTags.push(tag || '');
+      if (state.logLines.length > state.logWindowSize) state.logLines.shift();
+      if (state.logLineTags.length > state.logWindowSize) state.logLineTags.shift();
+    }
+    renderStatus();
+  };
+
+  const styleText = (text, prefix) => {
+    if (!colorEnabled || !text) return text;
+    return `${prefix}${text}${ansi.reset}`;
+  };
+
+  const formatBarLine = (line, width) => {
+    const content = line || '';
+    const truncated = content.length > width
+      ? `${content.slice(0, Math.max(0, width - 1))}\u2026`
+      : content;
+    if (!colorEnabled) return truncated;
+    const padded = truncated.padEnd(width, ' ');
+    return `${ansi.bgBlack}${ansi.fgLight}${padded}${ansi.reset}`;
+  };
+
+  const formatLogLine = (line) => {
+    const content = line || '';
+    if (!colorEnabled) return content;
+    if (/^\s*(?:\u2192|->)\s*Shard\s+/i.test(content)) {
+      return styleText(content, ansi.fgBright);
+    }
+    if (/^\s*\[shard\s+/i.test(content)
+      || /^\s*Files\s+\d+\/\d+/i.test(content)
+      || /^\s*File\s+\d+\/\d+/i.test(content)) {
+      return styleText(content, ansi.fgDim);
+    }
+    return content;
+  };
+
+  const renderStatus = () => {
+    if (!interactive) return;
+    if (!state.statusRendered) {
+      process.stdout.write('\n'.repeat(state.logWindowSize + 3));
+      state.statusRendered = true;
+    }
+    readline.moveCursor(process.stdout, 0, -(state.logWindowSize + 3));
+    const lines = [...state.logLines];
+    const width = Number.isFinite(process.stdout.columns) ? process.stdout.columns : 120;
+    while (lines.length < state.logWindowSize) lines.push('');
+    lines.push(state.metricsLine);
+    lines.push(state.fileProgressLine);
+    lines.push(state.progressLine);
+    for (let i = 0; i < lines.length; i += 1) {
+      const line = lines[i];
+      const isBar = i >= state.logWindowSize;
+      readline.clearLine(process.stdout, 0);
+      const output = isBar
+        ? formatBarLine(line || '', width)
+        : formatLogLine(truncateDisplay(line || ''));
+      process.stdout.write(output);
+      process.stdout.write('\n');
+    }
+  };
+
+  const parseDurationText = (text) => {
+    if (!text) return null;
+    const hours = /(\d+)\s*h/i.exec(text);
+    const minutes = /(\d+)\s*m/i.exec(text);
+    const seconds = /(\d+)\s*s/i.exec(text);
+    const totalSeconds = (hours ? Number(hours[1]) * 3600 : 0)
+      + (minutes ? Number(minutes[1]) * 60 : 0)
+      + (seconds ? Number(seconds[1]) : 0);
+    return Number.isFinite(totalSeconds) ? totalSeconds * 1000 : null;
+  };
+
+  const setProgressBase = (message) => {
+    state.progressLineBase = message || '';
+    state.progressLinePrefix = '';
+    state.progressLineSuffix = '';
+    state.progressElapsedStartMs = null;
+    if (!message) return;
+    const match = message.match(/^(.*\| elapsed )([^|]+)(.*)$/);
+    if (!match) return;
+    const parsedMs = parseDurationText(match[2].trim());
+    if (!Number.isFinite(parsedMs)) return;
+    state.progressLinePrefix = match[1];
+    state.progressLineSuffix = match[3] || '';
+    state.progressElapsedStartMs = Date.now() - parsedMs;
+  };
+
+  const getActiveShardList = (now = Date.now()) => {
+    const active = [];
+    for (const [index, lastSeen] of state.activeShards.entries()) {
+      if (now - lastSeen <= state.activeShardWindowMs) {
+        active.push(index);
+      } else {
+        state.activeShards.delete(index);
+      }
+    }
+    active.sort((a, b) => a - b);
+    return active;
+  };
+
+  const formatImportStats = (stats) => {
+    if (!stats) return '';
+    const parts = [];
+    if (Number.isFinite(stats.modules)) parts.push(`${stats.modules} mods`);
+    if (Number.isFinite(stats.edges)) parts.push(`${stats.edges} edges`);
+    if (Number.isFinite(stats.files)) parts.push(`${stats.files} files`);
+    if (!parts.length) return '';
+    return `imports ${parts.join(', ')}`;
+  };
+
+  const buildProgressLineExtras = (now = Date.now()) => {
+    const segments = [];
+    const shardList = getActiveShardList(now);
+    if (shardList.length) {
+      segments.push(`shards ${shardList.join(',')}`);
+    }
+    if (state.build.step?.toLowerCase() === 'imports') {
+      const importText = formatImportStats(state.build.importStats);
+      if (importText) segments.push(importText);
+    }
+    return segments.length ? ` | ${segments.join(' | ')}` : '';
+  };
+
+  const buildProgressLineBase = (now = Date.now()) => {
+    if (state.progressLinePrefix && Number.isFinite(state.progressElapsedStartMs)) {
+      return `${state.progressLinePrefix}${formatDuration(now - state.progressElapsedStartMs)}${state.progressLineSuffix}`;
+    }
+    return state.progressLineBase;
+  };
+
+  const renderProgressLine = ({ now = Date.now(), log = false, force = false } = {}) => {
+    const baseLine = buildProgressLineBase(now);
+    const extra = buildProgressLineExtras(now);
+    let line = baseLine || '';
+    if (extra) {
+      line = baseLine ? `${baseLine}${extra}` : extra.replace(/^\s*\|\s*/, '');
+    }
+    if (!force && line === state.progressLine) return;
+    state.progressLine = line;
+    renderStatus();
+    if (log && line && line !== state.lastProgressLogged) {
+      writeLog(`[progress] ${line}`);
+      state.lastProgressLogged = line;
+    }
+    if (log && !interactive && !quietMode && line !== state.lastProgressMessage) {
+      console.log(line);
+      state.lastProgressMessage = line;
+    }
+  };
+
+  const updateProgress = (message) => {
+    setProgressBase(message);
+    renderProgressLine({ log: true, force: true });
+  };
+
+  const updateMetrics = (message) => {
+    state.metricsLine = message;
+    renderStatus();
+    if (message && message !== state.lastMetricsLogged) {
+      writeLog(`[metrics] ${message}`);
+      state.lastMetricsLogged = message;
+    }
+    if (!interactive && !quietMode && message) {
+      console.log(message);
+    }
+  };
+
+  const updateFileProgressLine = () => {
+    const file = state.build.currentFile;
+    const current = state.build.currentLine;
+    const total = state.build.currentLineTotal;
+    if (!file) {
+      state.fileProgressLine = '';
+      renderStatus();
+      return;
+    }
+    const lineSegment = total > 0 ? ` [${current}/${total}]` : '';
+    const shardIndex = state.build.currentShardIndex;
+    const shardTotal = state.build.currentShardTotal;
+    const shardLabel = (Number.isFinite(shardIndex) && Number.isFinite(shardTotal))
+      ? `${shardIndex}/${shardTotal}`
+      : '';
+    const shardSegment = shardLabel ? `[shard ${shardLabel}] ` : '[shard] ';
+    state.fileProgressLine = `${shardSegment}${file}${lineSegment}`;
+    renderStatus();
+  };
+
+  const refreshProgressLine = (now = Date.now(), force = false) => {
+    if (!interactive) return;
+    if (!force && now - state.lastProgressRefreshMs < state.progressRefreshMs) return;
+    state.lastProgressRefreshMs = now;
+    renderProgressLine({ now, force });
+  };
+
+  const handleShardLine = (line) => {
+    const entry = parseShardLine(line);
+    if (!entry) return false;
+    if (entry.shardLabel && Number.isFinite(entry.index) && Number.isFinite(entry.total)) {
+      state.shardByLabel.set(entry.shardLabel, { index: entry.index, total: entry.total });
+    }
+    return true;
+  };
+
+  const handleImportStatsLine = (line) => {
+    const stats = parseImportStatsLine(line);
+    if (!stats) return false;
+    state.build.importStats = stats;
+    return true;
+  };
+
+  const handleBuildMode = (line) => {
+    const mode = parseScanMode(line);
+    if (!mode) return;
+    if (mode === 'code' || mode === 'prose') {
+      state.build.mode = mode;
+    }
+  };
+
+  const resolveModeForFile = (rel) => {
+    if (!rel) return null;
+    if (state.build.linesByFile.code?.has(rel)) return 'code';
+    if (state.build.linesByFile.prose?.has(rel)) return 'prose';
+    return null;
+  };
+
+  const handleBuildFileLine = (lineOrEntry) => {
+    const entry = typeof lineOrEntry === 'string' ? parseFileProgressLine(lineOrEntry) : lineOrEntry;
+    if (!entry || !entry.file) return;
+    const rawPath = entry.file.trim();
+    if (!rawPath) return;
+    const rel = toPosix(rawPath);
+    const inferredMode = resolveModeForFile(rel);
+    if (inferredMode && inferredMode !== state.build.mode) {
+      state.build.mode = inferredMode;
+    }
+    const mode = state.build.mode;
+    if (!mode || !state.build.linesByFile[mode]) return;
+    state.build.currentFile = rel;
+    state.build.currentLineTotal = state.build.linesByFile[mode].get(rel) || 0;
+    state.build.currentLine = 0;
+    const shardLabel = entry.shardLabel;
+    const shardInfo = shardLabel ? state.shardByLabel.get(shardLabel) : null;
+    state.build.currentShard = shardLabel || null;
+    state.build.currentShardIndex = shardInfo?.index ?? null;
+    state.build.currentShardTotal = shardInfo?.total ?? null;
+    if (Number.isFinite(state.build.currentShardIndex)) {
+      state.activeShards.set(state.build.currentShardIndex, Date.now());
+    }
+    updateFileProgressLine();
+    const seen = state.build.filesSeen[mode];
+    if (seen.has(rel)) return;
+    const lineCount = state.build.linesByFile[mode].get(rel);
+    if (!Number.isFinite(lineCount)) return;
+    seen.add(rel);
+    state.build.linesProcessed[mode] += lineCount;
+  };
+
+  const handleBuildLineProgress = (line) => {
+    const progress = parseLineProgress(line);
+    if (!progress) return;
+    const { current, total } = progress;
+    if (!Number.isFinite(current) || !Number.isFinite(total) || total <= 0) return;
+    state.build.currentLine = current;
+    state.build.currentLineTotal = total;
+    updateFileProgressLine();
+  };
+
+  const handleBuildProgress = (line) => {
+    const parsed = parseProgressLine(line);
+    if (!parsed) return false;
+    const { step, count, total, pct } = parsed;
+    if (!Number.isFinite(count) || !Number.isFinite(total) || !Number.isFinite(pct) || total <= 0) {
+      return true;
+    }
+    const label = state.currentRepoLabel || (getActiveLabel ? getActiveLabel() : '') || '';
+    const now = Date.now();
+    if (
+      state.build.step !== step
+      || state.build.total !== total
+      || count < state.build.lastCount
+      || state.build.label !== label
+    ) {
+      state.build.step = step;
+      state.build.total = total;
+      state.build.startMs = now;
+      state.build.lastLoggedMs = 0;
+      state.build.lastCount = 0;
+      state.build.lastPct = 0;
+      state.build.label = label;
+    }
+    if (!state.build.startMs) state.build.startMs = now;
+    const elapsedMs = now - state.build.startMs;
+    const rate = elapsedMs > 0 ? count / (elapsedMs / 1000) : 0;
+    const remaining = total - count;
+    let etaMs = rate > 0 && remaining > 0 ? (remaining / rate) * 1000 : 0;
+    let lineRate = 0;
+    let remainingLines = 0;
+    let totalLines = 0;
+    if (step.toLowerCase() === 'files' && !state.build.mode) {
+      const fallbackMode = resolveModeForFile(state.build.currentFile);
+      if (fallbackMode) {
+        state.build.mode = fallbackMode;
+      }
+    }
+    if (step.toLowerCase() === 'files' && state.build.mode) {
+      const mode = state.build.mode;
+      totalLines = state.build.lineTotals[mode] || 0;
+      const processedLines = state.build.linesProcessed[mode] || 0;
+      if (elapsedMs > 0 && processedLines > 0) {
+        lineRate = processedLines / (elapsedMs / 1000);
+      }
+      remainingLines = totalLines - processedLines;
+      if (lineRate > 0 && remainingLines > 0) {
+        etaMs = (remainingLines / lineRate) * 1000;
+      }
+    }
+    const pctDelta = pct - state.build.lastPct;
+    const countDelta = count - state.build.lastCount;
+    const shouldLog =
+      count === total
+      || now - state.build.lastLoggedMs >= 5000
+      || pctDelta >= 1
+      || countDelta >= 500;
+    if (shouldLog) {
+      const rateText = rate > 0 ? `${rate.toFixed(1)}/s` : 'n/a';
+      const lineRateText = lineRate > 0 ? `${Math.round(lineRate).toLocaleString()}/s` : null;
+      const etaText = etaMs > 0 ? formatDuration(etaMs) : 'n/a';
+      const labelText = label ? ` ${label}` : '';
+      const lineRateSegment = lineRateText ? ` | lines ${lineRateText}` : '';
+      const totalLinesText = totalLines > 0 ? `${formatLoc(totalLines)}` : null;
+      const processedLinesText = totalLines > 0
+        ? `${formatLoc(totalLines - remainingLines)}/${totalLinesText}`
+        : null;
+      const linesElapsedSegment = processedLinesText ? ` (${processedLinesText})` : '';
+      const remainingLinesText = remainingLines > 0 ? formatLoc(remainingLines) : null;
+      const etaSegment = remainingLinesText ? `${etaText} (${remainingLinesText} rem)` : etaText;
+      const currentLineSegment = state.build.currentLineTotal > 0
+        ? ` [${state.build.currentLine}/${state.build.currentLineTotal}]`
+        : '';
+      const message = `Indexing${labelText} ${step} ${count}/${total} (${pct.toFixed(1)}%)${currentLineSegment} | rate ${rateText}${lineRateSegment} | elapsed ${formatDuration(elapsedMs)}${linesElapsedSegment} | eta ${etaSegment}`;
+      updateMetrics(message);
+      state.build.lastLoggedMs = now;
+      state.build.lastCount = count;
+      state.build.lastPct = pct;
+    }
+    refreshProgressLine(now);
+    return true;
+  };
+
+  const formatProgressLine = (line) => {
+    const parsed = parseProgressLine(line);
+    if (!parsed) return null;
+    const { step, count, total, pct } = parsed;
+    if (!Number.isFinite(count) || !Number.isFinite(total)) return null;
+    const pctText = Number.isFinite(pct) ? `${pct.toFixed(1)}%` : null;
+    const lineText = `${step} ${count}/${total}${pctText ? ` (${pctText})` : ''}`;
+    return {
+      line: lineText,
+      tag: `progress:${step.toLowerCase()}`
+    };
+  };
+
+  const appendLog = (line) => {
+    const cleaned = line.replace(/\r/g, '').trimEnd();
+    if (!cleaned) return;
+    if (handleImportStatsLine(cleaned)) {
+      refreshProgressLine(Date.now(), true);
+    }
+    if (handleShardLine(cleaned)) {
+      pushHistory(cleaned);
+      if (interactive) {
+        pushLogWindowLine(cleaned);
+      } else if (!quietMode) {
+        console.log(cleaned);
+      }
+      return;
+    }
+    if (parseLineProgress(cleaned)) {
+      handleBuildLineProgress(cleaned);
+      handleBuildProgress(cleaned);
+      return;
+    }
+    const fileProgress = parseFileProgressLine(cleaned);
+    if (fileProgress && fileProgress.file) {
+      pushHistory(cleaned);
+      handleBuildMode(cleaned);
+      handleBuildFileLine(fileProgress);
+      handleBuildLineProgress(cleaned);
+      handleBuildProgress(cleaned);
+      const formatted = formatShardFileProgress(fileProgress, {
+        shardByLabel: state.shardByLabel,
+        lineTotal: state.build.currentLineTotal
+      });
+      if (formatted) {
+        if (interactive) {
+          pushLogWindowLine(formatted);
+        } else if (!quietMode) {
+          console.log(formatted);
+        }
+      }
+      return;
+    }
+    const formattedProgress = formatProgressLine(cleaned);
+    if (formattedProgress) {
+      const { line: formattedLine, tag } = formattedProgress;
+      pushHistory(cleaned);
+      handleBuildMode(cleaned);
+      handleBuildLineProgress(cleaned);
+      handleBuildProgress(cleaned);
+      if (interactive) {
+        pushLogWindowLine(formattedLine, { tag });
+      } else if (!quietMode) {
+        console.log(formattedLine);
+      }
+      return;
+    }
+    pushHistory(cleaned);
+    writeLog(cleaned);
+    handleBuildMode(cleaned);
+    handleBuildFileLine(cleaned);
+    handleBuildLineProgress(cleaned);
+    handleBuildProgress(cleaned);
+    if (interactive) {
+      pushLogWindowLine(cleaned);
+    } else if (!quietMode) {
+      console.log(cleaned);
+    }
+  };
+
+  const resetBuildProgress = (label = '') => {
+    resetBuildProgressState(state, label);
+    updateFileProgressLine();
+  };
+
+  return {
+    appendLog,
+    updateProgress,
+    updateMetrics,
+    updateFileProgressLine,
+    resetBuildProgress,
+    renderStatus
+  };
+};
diff --git a/tools/bench/language/progress/state.js b/tools/bench/language/progress/state.js
new file mode 100644
index 000000000..b3a3efb22
--- /dev/null
+++ b/tools/bench/language/progress/state.js
@@ -0,0 +1,59 @@
+const createBuildProgressState = () => ({
+  step: null,
+  total: 0,
+  startMs: 0,
+  lastLoggedMs: 0,
+  lastCount: 0,
+  lastPct: 0,
+  label: '',
+  mode: null,
+  lineTotals: { code: 0, prose: 0 },
+  linesProcessed: { code: 0, prose: 0 },
+  linesByFile: { code: new Map(), prose: new Map() },
+  filesSeen: { code: new Set(), prose: new Set() },
+  currentFile: null,
+  currentLine: 0,
+  currentLineTotal: 0,
+  currentShard: null,
+  currentShardIndex: null,
+  currentShardTotal: null,
+  importStats: null
+});
+
+export const createProgressState = ({ logWindowSize = 20, logHistorySize = 50 } = {}) => {
+  return {
+    logWindowSize,
+    logHistorySize,
+    logLines: Array(logWindowSize).fill(''),
+    logLineTags: Array(logWindowSize).fill(''),
+    logHistory: [],
+    logUpdateByTag: new Map(),
+    logUpdateDebounceMs: 250,
+    metricsLine: '',
+    progressLine: '',
+    fileProgressLine: '',
+    progressLineBase: '',
+    progressLinePrefix: '',
+    progressLineSuffix: '',
+    progressElapsedStartMs: null,
+    lastProgressRefreshMs: 0,
+    progressRefreshMs: 1000,
+    statusRendered: false,
+    lastProgressLogged: '',
+    lastProgressMessage: '',
+    lastMetricsLogged: '',
+    shardByLabel: new Map(),
+    activeShards: new Map(),
+    activeShardWindowMs: 5000,
+    build: createBuildProgressState(),
+    currentRepoLabel: ''
+  };
+};
+
+export const resetBuildProgressState = (state, label = '') => {
+  state.build = createBuildProgressState();
+  state.build.label = label;
+  state.shardByLabel.clear();
+  state.activeShards.clear();
+  state.logUpdateByTag.clear();
+};
diff --git a/tools/bench/language/report.js b/tools/bench/language/report.js
new file mode 100644
index 000000000..69e361e04
--- /dev/null
+++ b/tools/bench/language/report.js
@@ -0,0 +1,100 @@
+export const summarizeResults = (items) => {
+  const valid = items.filter((entry) => entry.summary);
+  if (!valid.length) return null;
+  const backendSet = new Set();
+  for (const entry of valid) {
+    const summary = entry.summary;
+    const backends = summary.backends || Object.keys(summary.latencyMsAvg || {});
+    for (const backend of backends) backendSet.add(backend);
+  }
+  const backends = Array.from(backendSet);
+  const latencyMsAvg = {};
+  const hitRate = {};
+  const resultCountAvg = {};
+  const memoryRssAvgMb = {};
+  const buildMsAvg = {};
+  for (const backend of backends) {
+    const latencies = valid.map((entry) => entry.summary?.latencyMsAvg?.[backend]).filter(Number.isFinite);
+    const hits = valid.map((entry) => entry.summary?.hitRate?.[backend]).filter(Number.isFinite);
+    const results = valid.map((entry) => entry.summary?.resultCountAvg?.[backend]).filter(Number.isFinite);
+    const mem = valid
+      .map((entry) => entry.summary?.memoryRss?.[backend]?.mean)
+      .filter(Number.isFinite)
+      .map((value) => value / (1024 * 1024));
+    if (latencies.length) latencyMsAvg[backend] = latencies.reduce((a, b) => a + b, 0) / latencies.length;
+    if (hits.length) hitRate[backend] = hits.reduce((a, b) => a + b, 0) / hits.length;
+    if (results.length) resultCountAvg[backend] = results.reduce((a, b) => a + b, 0) / results.length;
+    if (mem.length) memoryRssAvgMb[backend] = mem.reduce((a, b) => a + b, 0) / mem.length;
+  }
+  for (const entry of valid) {
+    const build = entry.summary?.buildMs;
+    if (!build) continue;
+    for (const [key, value] of Object.entries(build)) {
+      if (!Number.isFinite(value)) continue;
+      if (!buildMsAvg[key]) buildMsAvg[key] = [];
+      buildMsAvg[key].push(value);
+    }
+  }
+  const buildMs = Object.fromEntries(
+    Object.entries(buildMsAvg).map(([key, values]) => [
+      key,
+      values.reduce((a, b) => a + b, 0) / values.length
+    ])
+  );
+  return {
+    backends,
+    latencyMsAvg,
+    hitRate,
+    resultCountAvg,
+    memoryRssAvgMb,
+    buildMs: Object.keys(buildMs).length ? buildMs : null
+  };
+};
+
+export const printSummary = (label, summary, count, quietMode) => {
+  if (!summary || quietMode) return;
+  console.log(`\n${label} summary (${count} repos)`);
+  for (const backend of summary.backends) {
+    const latency = summary.latencyMsAvg?.[backend];
+    const hit = summary.hitRate?.[backend];
+    const results = summary.resultCountAvg?.[backend];
+    const mem = summary.memoryRssAvgMb?.[backend];
+    const latencyText = Number.isFinite(latency) ? `${latency.toFixed(1)}ms` : 'n/a';
+    const hitText = Number.isFinite(hit) ? `${(hit * 100).toFixed(1)}%` : 'n/a';
+    const resultText = Number.isFinite(results) ? results.toFixed(1) : 'n/a';
+    const memText = Number.isFinite(mem) ? `${mem.toFixed(1)} MB` : 'n/a';
+    console.log(`- ${backend} avg ${latencyText} | hit ${hitText} | avg hits ${resultText} | rss ${memText}`);
+  }
+  if (summary.buildMs) {
+    for (const [key, value] of Object.entries(summary.buildMs)) {
+      if (!Number.isFinite(value)) continue;
+      console.log(`- build ${key} avg ${(value / 1000).toFixed(1)}s`);
+    }
+  }
+};
+
+export const buildReportOutput = ({ configPath, cacheRoot, resultsRoot, results, config }) => {
+  const groupedResults = new Map();
+  for (const entry of results) {
+    if (!groupedResults.has(entry.language)) groupedResults.set(entry.language, []);
+    groupedResults.get(entry.language).push(entry);
+  }
+  const groupedSummary = {};
+  for (const [language, items] of groupedResults.entries()) {
+    groupedSummary[language] = {
+      label: config[language]?.label || language,
+      count: items.length,
+      summary: summarizeResults(items)
+    };
+  }
+  const overallSummary = summarizeResults(results);
+  return {
+    generatedAt: new Date().toISOString(),
+    config: configPath,
+    cacheRoot,
+    resultsRoot,
+    tasks: results,
+    groupedSummary,
+    overallSummary
+  };
+};
diff --git a/tools/bench/language/repos.js b/tools/bench/language/repos.js
new file mode 100644
index 000000000..7e1c81c11
--- /dev/null
+++ b/tools/bench/language/repos.js
@@ -0,0 +1,114 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { execaSync } from 'execa';
+import { getIndexDir, getRepoCacheRoot, loadUserConfig, resolveSqlitePaths } from '../../dict-utils.js';
+
+const canRun = (cmd, args) => {
+  try {
+    const result = execaSync(cmd, args, { encoding: 'utf8', reject: false });
+    return result.exitCode === 0;
+  } catch {
+    return false;
+  }
+};
+
+export const resolveCloneTool = () => {
+  const gitAvailable = canRun('git', ['--version']);
+  const ghAvailable = canRun('gh', ['--version']);
+  const preferGit = process.platform === 'win32' && gitAvailable;
+  if (preferGit) {
+    return {
+      label: 'git',
+      buildArgs: (repo, repoPath) => [
+        '-c',
+        'core.longpaths=true',
+        '-c',
+        'checkout.workers=0',
+        '-c',
+        'checkout.thresholdForParallelism=0',
+        'clone',
+        `https://github.com/${repo}.git`,
+        repoPath
+      ]
+    };
+  }
+  if (ghAvailable) {
+    return {
+      label: 'gh',
+      buildArgs: (repo, repoPath) => ['repo', 'clone', repo, repoPath]
+    };
+  }
+  if (gitAvailable) {
+    return {
+      label: 'git',
+      buildArgs: (repo, repoPath) => [
+        '-c',
+        'checkout.workers=0',
+        '-c',
+        'checkout.thresholdForParallelism=0',
+        'clone',
+        `https://github.com/${repo}.git`,
+        repoPath
+      ]
+    };
+  }
+  console.error('GitHub CLI (gh) or git is required to clone benchmark repos.');
+  process.exit(1);
+};
+
+export const ensureLongPathsSupport = () => {
+  if (process.platform !== 'win32') return;
+  if (canRun('git', ['--version'])) {
+    try {
+      execaSync('git', ['config', '--global', 'core.longpaths', 'true'], { stdio: 'ignore', reject: false });
+    } catch {}
+  }
+  let regResult;
+  try {
+    regResult = execaSync(
+      'reg',
+      ['query', 'HKLM\\SYSTEM\\CurrentControlSet\\Control\\FileSystem', '/v', 'LongPathsEnabled'],
+      { encoding: 'utf8', reject: false }
+    );
+  } catch {
+    regResult = null;
+  }
+  if (!regResult || regResult.exitCode !== 0) {
+    console.warn('Warning: Unable to confirm Windows long path setting. Enable LongPathsEnabled=1 if clones fail.');
+    return;
+  }
+  const match = String(regResult.stdout || '').match(/LongPathsEnabled\s+REG_DWORD\s+0x([0-9a-f]+)/i);
+  if (!match) return;
+  const value = Number.parseInt(match[1], 16);
+  if (value === 0) {
+    console.warn('Warning: Windows long paths are disabled. Enable LongPathsEnabled=1 to avoid clone failures.');
+  }
+};
+
+export const resolveRepoDir = ({ reposRoot, repo, language }) => {
+  const safeName = repo.replace('/', '__');
+  return path.join(reposRoot, language, safeName);
+};
+
+export const resolveRepoCacheRoot = ({ repoPath, cacheRoot }) => {
+  return getRepoCacheRoot(repoPath, { cache: { root: cacheRoot } });
+};
+
+export const needsIndexArtifacts = (repoRoot) => {
+  const userConfig = loadUserConfig(repoRoot);
+  const codeDir = getIndexDir(repoRoot, 'code', userConfig);
+  const proseDir = getIndexDir(repoRoot, 'prose', userConfig);
+  const hasChunkMeta = (dir) => (
+    fs.existsSync(path.join(dir, 'chunk_meta.json'))
+    || fs.existsSync(path.join(dir, 'chunk_meta.jsonl'))
+    || fs.existsSync(path.join(dir, 'chunk_meta.meta.json'))
+    || fs.existsSync(path.join(dir, 'chunk_meta.parts'))
+  );
+  return !hasChunkMeta(codeDir) || !hasChunkMeta(proseDir);
+};
+
+export const needsSqliteArtifacts = (repoRoot) => {
+  const userConfig = loadUserConfig(repoRoot);
+  const sqlitePaths = resolveSqlitePaths(repoRoot, userConfig);
+  return !fs.existsSync(sqlitePaths.codePath) || !fs.existsSync(sqlitePaths.prosePath);
+};
diff --git a/tools/bench/micro/index-build.js b/tools/bench/micro/index-build.js
new file mode 100644
index 000000000..488a2cb50
--- /dev/null
+++ b/tools/bench/micro/index-build.js
@@ -0,0 +1,50 @@
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { buildIndex } from '../../../src/integrations/core/index.js';
+import { getRepoCacheRoot } from '../../dict-utils.js';
+import { hrtimeMs, summarizeDurations } from './utils.js';
+
+export async function runIndexBuildBenchmark({
+  repoRoot,
+  mode,
+  threads,
+  sqlite,
+  stubEmbeddings,
+  warmRuns,
+  cleanCache
+}) {
+  const cacheRoot = getRepoCacheRoot(repoRoot);
+  if (cleanCache) {
+    await fs.rm(cacheRoot, { recursive: true, force: true });
+  }
+
+  const coldStart = process.hrtime.bigint();
+  await buildIndex(repoRoot, {
+    mode,
+    threads,
+    incremental: false,
+    sqlite,
+    stubEmbeddings
+  });
+  const coldMs = hrtimeMs(coldStart);
+
+  const warmTimes = [];
+  for (let i = 0; i < warmRuns; i += 1) {
+    const start = process.hrtime.bigint();
+    await buildIndex(repoRoot, {
+      mode,
+      threads,
+      incremental: true,
+      sqlite,
+      stubEmbeddings
+    });
+    warmTimes.push(hrtimeMs(start));
+  }
+
+  return {
+    repoRoot,
+    cacheRoot: path.resolve(cacheRoot),
+    coldMs,
+    warm: summarizeDurations(warmTimes)
+  };
+}
diff --git a/tools/bench/micro/run.js b/tools/bench/micro/run.js
new file mode 100644
index 000000000..5f8807e1a
--- /dev/null
+++ b/tools/bench/micro/run.js
@@ -0,0 +1,228 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import yargs from 'yargs/yargs';
+import { hideBin } from 'yargs/helpers';
+import { buildIndex } from '../../../src/integrations/core/index.js';
+import { getIndexDir, resolveRepoRoot, resolveToolRoot } from '../../dict-utils.js';
+import { formatMs, formatStats } from './utils.js';
+import { runIndexBuildBenchmark } from './index-build.js';
+import { runSearchBenchmark } from './search.js';
+
+const toolRoot = resolveToolRoot();
+const defaultRepo = path.resolve(toolRoot, 'tests', 'fixtures', 'sample');
+
+const argv = yargs(hideBin(process.argv))
+  .option('repo', {
+    type: 'string',
+    describe: 'Repo root to benchmark',
+    default: defaultRepo
+  })
+  .option('mode', {
+    type: 'string',
+    describe: 'Index/search mode (code|prose)',
+    default: 'code'
+  })
+  .option('query', {
+    type: 'string',
+    describe: 'Search query for microbench runs',
+    default: 'function'
+  })
+  .option('backend', {
+    type: 'string',
+    describe: 'Search backend (memory|sqlite|sqlite-fts)',
+    default: 'memory'
+  })
+  .option('runs', {
+    type: 'number',
+    describe: 'Warm run count per component',
+    default: 5
+  })
+  .option('warmup', {
+    type: 'number',
+    describe: 'Warmup runs discarded before measuring warm stats',
+    default: 1
+  })
+  .option('threads', {
+    type: 'number',
+    describe: 'Index build worker threads',
+    default: 0
+  })
+  .option('build', {
+    type: 'boolean',
+    describe: 'Build indexes before search benchmarks',
+    default: true
+  })
+  .option('clean', {
+    type: 'boolean',
+    describe: 'Clean repo cache before cold index build',
+    default: true
+  })
+  .option('sqlite', {
+    type: 'boolean',
+    describe: 'Enable SQLite builds during index benchmark',
+    default: false
+  })
+  .option('stub-embeddings', {
+    type: 'boolean',
+    describe: 'Use stub embeddings for index build',
+    default: true
+  })
+  .option('components', {
+    type: 'string',
+    describe: 'Comma-separated component list: index-build,sparse,dense,hybrid',
+    default: 'index-build,sparse,dense,hybrid'
+  })
+  .option('json', {
+    type: 'boolean',
+    describe: 'Emit JSON output only',
+    default: false
+  })
+  .option('out', {
+    type: 'string',
+    describe: 'Write JSON results to a file'
+  })
+  .help()
+  .argv;
+
+const repoRoot = path.resolve(argv.repo || resolveRepoRoot(process.cwd()));
+const warmRuns = Math.max(0, Math.floor(argv.runs));
+const warmupRuns = Math.max(0, Math.floor(argv.warmup));
+const threads = Number(argv.threads) > 0 ? Math.floor(argv.threads) : undefined;
+const mode = argv.mode === 'prose' ? 'prose' : 'code';
+const components = parseComponents(argv.components);
+
+const results = {
+  repoRoot,
+  mode,
+  query: argv.query,
+  backend: argv.backend,
+  components: {}
+};
+
+const log = argv.json ? () => {} : console.log;
+
+await maybeBuildIndexes();
+
+if (components.includes('index-build')) {
+  log('\n[index-build]');
+  const bench = await runIndexBuildBenchmark({
+    repoRoot,
+    mode,
+    threads,
+    sqlite: argv.sqlite === true,
+    stubEmbeddings: argv['stub-embeddings'] !== false,
+    warmRuns,
+    cleanCache: argv.clean === true
+  });
+  results.components['index-build'] = bench;
+  if (!argv.json) {
+    log(`cold: ${formatMs(bench.coldMs)}`);
+    log(`warm: ${formatStats(bench.warm)}`);
+  }
+}
+
+const indexCache = new Map();
+const sqliteCache = null;
+
+if (components.includes('sparse')) {
+  log('\n[search-sparse]');
+  const bench = await runSearchBenchmark({
+    repoRoot,
+    query: argv.query,
+    mode,
+    backend: argv.backend,
+    ann: false,
+    profile: null,
+    warmRuns,
+    warmupRuns,
+    indexCache,
+    sqliteCache
+  });
+  results.components['search-sparse'] = bench;
+  if (!argv.json) {
+    log(`cold: ${formatMs(bench.coldMs)}`);
+    log(`warm: ${formatStats(bench.warm)}`);
+  }
+}
+
+if (components.includes('dense')) {
+  log('\n[search-dense]');
+  const bench = await runSearchBenchmark({
+    repoRoot,
+    query: argv.query,
+    mode,
+    backend: argv.backend,
+    ann: true,
+    profile: null,
+    warmRuns,
+    warmupRuns,
+    indexCache,
+    sqliteCache
+  });
+  results.components['search-dense'] = bench;
+  if (!argv.json) {
+    log(`cold: ${formatMs(bench.coldMs)}`);
+    log(`warm: ${formatStats(bench.warm)}`);
+  }
+}
+
+if (components.includes('hybrid')) {
+  log('\n[search-hybrid]');
+  const bench = await runSearchBenchmark({
+    repoRoot,
+    query: argv.query,
+    mode,
+    backend: argv.backend,
+    ann: true,
+    profile: null,
+    warmRuns,
+    warmupRuns,
+    indexCache,
+    sqliteCache
+  });
+  results.components['search-hybrid'] = bench;
+  if (!argv.json) {
+    log(`cold: ${formatMs(bench.coldMs)}`);
+    log(`warm: ${formatStats(bench.warm)}`);
+  }
+}
+
+if (argv.out) {
+  const outPath = path.resolve(argv.out);
+  fs.writeFileSync(outPath, `${JSON.stringify(results, null, 2)}\n`);
+  log(`\nSaved results to ${outPath}`);
+}
+
+if (argv.json) {
+  console.log(JSON.stringify(results, null, 2));
+}
+
+async function maybeBuildIndexes() {
+  if (!argv.build) return;
+  const indexDir = getIndexDir(repoRoot, mode);
+  const metaExists = hasChunkMeta(indexDir);
+  if (metaExists) return;
+  log('[setup] building indexes before search benchmarks');
+  await buildIndex(repoRoot, {
+    mode,
+    threads,
+    incremental: true,
+    sqlite: argv.sqlite === true,
+    stubEmbeddings: argv['stub-embeddings'] !== false
+  });
+}
+
+function hasChunkMeta(indexDir) {
+  const json = path.join(indexDir, 'chunk_meta.json');
+  const jsonl = path.join(indexDir, 'chunk_meta.jsonl');
+  const meta = path.join(indexDir, 'chunk_meta.meta.json');
+  return fs.existsSync(json) || fs.existsSync(jsonl) || fs.existsSync(meta);
+}
+
+function parseComponents(value) {
+  if (!value) return [];
+  return value
+    .split(',')
+    .map((entry) => entry.trim().toLowerCase())
+    .filter(Boolean);
+}
diff --git a/tools/bench/micro/search.js b/tools/bench/micro/search.js
new file mode 100644
index 000000000..0478731f8
--- /dev/null
+++ b/tools/bench/micro/search.js
@@ -0,0 +1,66 @@
+import { search } from '../../../src/integrations/core/index.js';
+import { hrtimeMs, summarizeDurations } from './utils.js';
+
+export async function runSearchBenchmark({
+  repoRoot,
+  query,
+  mode,
+  backend,
+  ann,
+  profile,
+  warmRuns,
+  warmupRuns,
+  indexCache,
+  sqliteCache
+}) {
+  const previousProfile = process.env.PAIROFCLEATS_PROFILE;
+  if (profile) {
+    process.env.PAIROFCLEATS_PROFILE = profile;
+  } else {
+    delete process.env.PAIROFCLEATS_PROFILE;
+  }
+
+  const executeSearch = async () => {
+    const start = process.hrtime.bigint();
+    await search(repoRoot, {
+      query,
+      mode,
+      backend,
+      ann,
+      json: true,
+      jsonCompact: true,
+      emitOutput: false,
+      indexCache,
+      sqliteCache
+    });
+    return hrtimeMs(start);
+  };
+
+  try {
+    if (indexCache?.clear) indexCache.clear();
+    if (sqliteCache?.clearAll) sqliteCache.clearAll();
+
+    const coldMs = await executeSearch();
+
+    for (let i = 0; i < warmupRuns; i += 1) {
+      await executeSearch();
+    }
+
+    const warmTimes = [];
+    for (let i = 0; i < warmRuns; i += 1) {
+      warmTimes.push(await executeSearch());
+    }
+
+    return {
+      repoRoot,
+      coldMs,
+      warm: summarizeDurations(warmTimes)
+    };
+  } finally {
+    if (previousProfile !== undefined) {
+      process.env.PAIROFCLEATS_PROFILE = previousProfile;
+    } else {
+      delete process.env.PAIROFCLEATS_PROFILE;
+    }
+  }
+}
diff --git a/tools/bench/micro/tinybench.js b/tools/bench/micro/tinybench.js
new file mode 100644
index 000000000..5044a7ecd
--- /dev/null
+++ b/tools/bench/micro/tinybench.js
@@ -0,0 +1,342 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import os from 'node:os';
+import path from 'node:path';
+import yargs from 'yargs/yargs';
+import { hideBin } from 'yargs/helpers';
+import { Bench } from 'tinybench';
+import { build as buildHistogram } from 'hdr-histogram-js';
+import { buildIndex, search } from '../../../src/integrations/core/index.js';
+import { getIndexDir, resolveRepoRoot, resolveToolRoot } from '../../dict-utils.js';
+
+const toolRoot = resolveToolRoot();
+const defaultRepo = path.resolve(toolRoot, 'tests', 'fixtures', 'sample');
+
+const argv = yargs(hideBin(process.argv))
+  .option('repo', {
+    type: 'string',
+    describe: 'Repo root to benchmark',
+    default: defaultRepo
+  })
+  .option('mode', {
+    type: 'string',
+    describe: 'Index/search mode (code|prose)',
+    default: 'code'
+  })
+  .option('backend', {
+    type: 'string',
+    describe: 'Search backend (memory|sqlite|sqlite-fts)',
+    default: 'memory'
+  })
+  .option('query', {
+    type: 'string',
+    describe: 'Query used for search benchmarks',
+    default: 'function'
+  })
+  .option('iterations', {
+    type: 'number',
+    describe: 'Iterations per task',
+    default: 64
+  })
+  .option('warmup-iterations', {
+    type: 'number',
+    describe: 'Warmup iterations per task',
+    default: 8
+  })
+  .option('time', {
+    type: 'number',
+    describe: 'Target runtime per task in ms',
+    default: 1000
+  })
+  .option('warmup-time', {
+    type: 'number',
+    describe: 'Warmup time per task in ms',
+    default: 250
+  })
+  .option('components', {
+    type: 'string',
+    describe: 'Comma-separated components (search-sparse,search-ann,search-dense,search-hybrid)',
+    default: 'search-sparse,search-ann'
+  })
+  .option('build', {
+    type: 'boolean',
+    describe: 'Build indexes before running the bench',
+    default: true
+  })
+  .option('stub-embeddings', {
+    type: 'boolean',
+    describe: 'Use stub embeddings when building indexes',
+    default: true
+  })
+  .option('baseline', {
+    type: 'string',
+    describe: 'Baseline file for comparisons'
+  })
+  .option('write-baseline', {
+    type: 'boolean',
+    describe: 'Write results to the baseline file',
+    default: false
+  })
+  .option('compare', {
+    type: 'boolean',
+    describe: 'Compare results against the baseline file',
+    default: true
+  })
+  .option('json', {
+    type: 'boolean',
+    describe: 'Emit JSON output only',
+    default: false
+  })
+  .option('out', {
+    type: 'string',
+    describe: 'Write JSON results to a file'
+  })
+  .help()
+  .argv;
+
+const repoRoot = path.resolve(argv.repo || resolveRepoRoot(process.cwd()));
+const mode = argv.mode === 'prose' ? 'prose' : 'code';
+const backend = String(argv.backend || 'memory').toLowerCase();
+const components = parseComponents(argv.components);
+const baselinePath = path.resolve(
+  argv.baseline || path.join(toolRoot, 'benchmarks', 'baselines', 'microbench.json')
+);
+
+if (argv['stub-embeddings'] !== false) {
+  process.env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+} else {
+  delete process.env.PAIROFCLEATS_EMBEDDINGS;
+}
+
+await maybeBuildIndexes();
+
+const bench = new Bench({
+  name: 'pairofcleats-microbench',
+  iterations: Math.max(1, Math.floor(argv.iterations)),
+  warmupIterations: Math.max(0, Math.floor(argv['warmup-iterations'])),
+  time: Math.max(0, Math.floor(argv.time)),
+  warmupTime: Math.max(0, Math.floor(argv['warmup-time'])),
+  throws: true,
+  retainSamples: true
+});
+
+const indexCache = new Map();
+const sqliteCache = null;
+const annConfig = {
+  sparse: false,
+  ann: true,
+  dense: true,
+  hybrid: true
+};
+
+for (const component of components) {
+  const normalized = component.toLowerCase();
+  if (normalized === 'search-sparse') {
+    bench.add('search-sparse', () => runSearch(false));
+  } else if (normalized === 'search-ann' || normalized === 'search-dense') {
+    bench.add(normalized, () => runSearch(annConfig.ann));
+  } else if (normalized === 'search-hybrid') {
+    bench.add('search-hybrid', () => runSearch(annConfig.hybrid));
+  }
+}
+
+if (!bench.tasks.length) {
+  console.error('[tinybench] No tasks defined. Check --components.');
+  process.exit(1);
+}
+
+await bench.run();
+
+const results = {
+  generatedAt: new Date().toISOString(),
+  repoRoot,
+  mode,
+  backend,
+  bench: {
+    iterations: bench.iterations,
+    warmupIterations: bench.warmupIterations,
+    timeMs: bench.time,
+    warmupTimeMs: bench.warmupTime
+  },
+  env: buildEnvSnapshot(),
+  components: summarizeBenchTasks(bench.tasks)
+};
+
+const comparison = argv.compare ? compareBaseline(results, baselinePath) : null;
+if (comparison) {
+  results.baseline = comparison;
+}
+
+if (argv['write-baseline']) {
+  ensureDir(path.dirname(baselinePath));
+  fs.writeFileSync(baselinePath, `${JSON.stringify(results, null, 2)}\n`);
+}
+
+if (argv.out) {
+  const outPath = path.resolve(argv.out);
+  ensureDir(path.dirname(outPath));
+  fs.writeFileSync(outPath, `${JSON.stringify(results, null, 2)}\n`);
+}
+
+if (argv.json) {
+  console.log(JSON.stringify(results, null, 2));
+} else {
+  printSummary(results, comparison);
+}
+
+async function runSearch(ann) {
+  await search(repoRoot, {
+    query: argv.query,
+    mode,
+    backend,
+    ann,
+    json: true,
+    jsonCompact: true,
+    emitOutput: false,
+    indexCache,
+    sqliteCache
+  });
+}
+
+async function maybeBuildIndexes() {
+  if (!argv.build) return;
+  const indexDir = getIndexDir(repoRoot, mode);
+  const metaExists = hasChunkMeta(indexDir);
+  if (metaExists) return;
+  await buildIndex(repoRoot, {
+    mode,
+    incremental: true,
+    sqlite: backend !== 'memory',
+    stubEmbeddings: argv['stub-embeddings'] !== false
+  });
+}
+
+function hasChunkMeta(indexDir) {
+  const json = path.join(indexDir, 'chunk_meta.json');
+  const jsonl = path.join(indexDir, 'chunk_meta.jsonl');
+  const meta = path.join(indexDir, 'chunk_meta.meta.json');
+  return fs.existsSync(json) || fs.existsSync(jsonl) || fs.existsSync(meta);
+}
+
+function parseComponents(value) {
+  if (!value) return [];
+  return value
+    .split(',')
+    .map((entry) => entry.trim())
+    .filter(Boolean);
+}
+
+function buildEnvSnapshot() {
+  const cpu = os.cpus();
+  return {
+    node: process.version,
+    platform: process.platform,
+    arch: process.arch,
+    cpuModel: cpu[0]?.model || 'unknown',
+    cpuCount: cpu.length
+  };
+}
+
+function summarizeBenchTasks(tasks) {
+  const entries = {};
+  for (const task of tasks) {
+    entries[task.name] = summarizeTask(task);
+  }
+  return entries;
+}
+
+function summarizeTask(task) {
+  const latency = task.result?.latency || {};
+  const samples = Array.isArray(latency.samples) ? latency.samples : [];
+  const percentiles = summarizeSamples(samples);
+  return {
+    samples: latency.samplesCount || samples.length || 0,
+    meanMs: latency.mean || 0,
+    minMs: latency.min || 0,
+    maxMs: latency.max || 0,
+    p50Ms: percentiles.p50,
+    p95Ms: percentiles.p95,
+    p99Ms: percentiles.p99,
+    totalTimeMs: task.result?.totalTime || 0
+  };
+}
+
+function summarizeSamples(samples) {
+  if (!samples.length) return { p50: 0, p95: 0, p99: 0 };
+  const scaled = samples.map((value) => Math.max(1, Math.round(value * 1000)));
+  const maxValue = Math.max(...scaled, 1);
+  const histogram = buildHistogram({
+    lowestDiscernibleValue: 1,
+    highestTrackableValue: maxValue,
+    numberOfSignificantValueDigits: 3
+  });
+  scaled.forEach((value) => histogram.recordValue(value));
+  return {
+    p50: histogram.getValueAtPercentile(50) / 1000,
+    p95: histogram.getValueAtPercentile(95) / 1000,
+    p99: histogram.getValueAtPercentile(99) / 1000
+  };
+}
+
+function compareBaseline(current, baselineFile) {
+  if (!fs.existsSync(baselineFile)) return null;
+  let baseline = null;
+  try {
+    baseline = JSON.parse(fs.readFileSync(baselineFile, 'utf8'));
+  } catch {
+    return null;
+  }
+  if (!baseline?.components) return null;
+  const deltas = {};
+  for (const [name, stats] of Object.entries(current.components || {})) {
+    const base = baseline.components?.[name];
+    if (!base) continue;
+    deltas[name] = {
+      meanPct: deltaPct(stats.meanMs, base.meanMs),
+      p50Pct: deltaPct(stats.p50Ms, base.p50Ms),
+      p95Pct: deltaPct(stats.p95Ms, base.p95Ms),
+      p99Pct: deltaPct(stats.p99Ms, base.p99Ms)
+    };
+  }
+  return {
+    path: baselineFile,
+    deltas
+  };
+}
+
+function deltaPct(current, baseline) {
+  if (!Number.isFinite(current) || !Number.isFinite(baseline) || baseline === 0) return null;
+  return ((current - baseline) / baseline) * 100;
+}
+
+function formatMs(value) {
+  if (!Number.isFinite(value)) return 'n/a';
+  return `${value.toFixed(1)}ms`;
+}
+
+function formatDelta(value) {
+  if (!Number.isFinite(value)) return 'n/a';
+  const sign = value >= 0 ? '+' : '';
+  return `${sign}${value.toFixed(1)}%`;
+}
+
+function printSummary(results, comparison) {
+  console.log('[tinybench] Results');
+  for (const [name, stats] of Object.entries(results.components || {})) {
+    console.log(`- ${name}: mean ${formatMs(stats.meanMs)} | p50 ${formatMs(stats.p50Ms)} | p95 ${formatMs(stats.p95Ms)} | p99 ${formatMs(stats.p99Ms)} | n=${stats.samples}`);
+    if (comparison?.deltas?.[name]) {
+      const delta = comparison.deltas[name];
+      console.log(`  delta: mean ${formatDelta(delta.meanPct)} | p50 ${formatDelta(delta.p50Pct)} | p95 ${formatDelta(delta.p95Pct)} | p99 ${formatDelta(delta.p99Pct)}`);
+    }
+  }
+  if (argv['write-baseline']) {
+    console.log(`- baseline saved: ${baselinePath}`);
+  } else if (comparison?.path) {
+    console.log(`- baseline: ${comparison.path}`);
+  }
+}
+
+function ensureDir(dir) {
+  if (!dir) return;
+  fs.mkdirSync(dir, { recursive: true });
+}
diff --git a/tools/bench/micro/utils.js b/tools/bench/micro/utils.js
new file mode 100644
index 000000000..f0d6fb6f0
--- /dev/null
+++ b/tools/bench/micro/utils.js
@@ -0,0 +1,47 @@
+import { build as buildHistogram } from 'hdr-histogram-js';
+
+const buildLatencyHistogram = (values) => {
+  if (!values.length) return null;
+  const scaled = values.map((value) => Math.max(1, Math.round(value * 1000)));
+  const maxValue = Math.max(...scaled, 1);
+  const histogram = buildHistogram({
+    lowestDiscernibleValue: 1,
+    highestTrackableValue: maxValue,
+    numberOfSignificantValueDigits: 3
+  });
+  scaled.forEach((value) => histogram.recordValue(value));
+  return histogram;
+};
+
+export function summarizeDurations(values) {
+  if (!values.length) {
+    return { count: 0, mean: 0, min: 0, max: 0, p50: 0, p95: 0, p99: 0 };
+  }
+  const total = values.reduce((sum, value) => sum + value, 0);
+  const min = Math.min(...values);
+  const max = Math.max(...values);
+  const histogram = buildLatencyHistogram(values);
+  const pct = (p) => (histogram ? histogram.getValueAtPercentile(p) / 1000 : 0);
+  return {
+    count: values.length,
+    mean: total / values.length,
+    min,
+    max,
+    p50: pct(50),
+    p95: pct(95),
+    p99: pct(99)
+  };
+}
+
+export function formatMs(value) {
+  if (!Number.isFinite(value)) return 'n/a';
+  return `${value.toFixed(1)}ms`;
+}
+
+export function formatStats(stats) {
+  return `mean ${formatMs(stats.mean)} | p50 ${formatMs(stats.p50)} | p95 ${formatMs(stats.p95)} | p99 ${formatMs(stats.p99)} | min ${formatMs(stats.min)} | max ${formatMs(stats.max)} | n=${stats.count}`;
+}
+
+export function hrtimeMs(start) {
+  return Number(process.hrtime.bigint() - start) / 1e6;
+}
diff --git a/tools/bootstrap.js b/tools/bootstrap.js
index 32155368b..c870d13c7 100644
--- a/tools/bootstrap.js
+++ b/tools/bootstrap.js
@@ -1,34 +1,35 @@
 #!/usr/bin/env node
 import fs from 'node:fs';
 import path from 'node:path';
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
 import { runCommand, runCommandOrExit } from './cli-utils.js';
-import { getDictionaryPaths, getDictConfig, getRepoCacheRoot, getToolingConfig, loadUserConfig, resolveRepoRoot } from './dict-utils.js';
+import { getDictionaryPaths, getDictConfig, getRepoCacheRoot, getRuntimeConfig, getToolingConfig, loadUserConfig, resolveRepoRoot, resolveRuntimeEnv, resolveToolRoot } from './dict-utils.js';
 import { getVectorExtensionConfig, resolveVectorExtensionPath } from './vector-extension.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['skip-install', 'skip-dicts', 'skip-index', 'with-sqlite', 'incremental', 'skip-artifacts', 'skip-tooling', 'validate-config'],
-  string: ['repo'],
-  alias: { s: 'with-sqlite', i: 'incremental' },
-  default: {
-    'skip-install': false,
-    'skip-dicts': false,
-    'skip-index': false,
-    'with-sqlite': false,
-    'incremental': false,
-    'skip-artifacts': false,
-    'skip-tooling': false,
-    'validate-config': false
-  }
-});
+const argv = createCli({
+  scriptName: 'bootstrap',
+  options: {
+    'skip-install': { type: 'boolean', default: false },
+    'skip-dicts': { type: 'boolean', default: false },
+    'skip-index': { type: 'boolean', default: false },
+    'with-sqlite': { type: 'boolean', default: false },
+    incremental: { type: 'boolean', default: false },
+    'skip-artifacts': { type: 'boolean', default: false },
+    'skip-tooling': { type: 'boolean', default: false },
+    'validate-config': { type: 'boolean', default: false },
+    repo: { type: 'string' }
+  },
+  aliases: { s: 'with-sqlite', i: 'incremental' }
+}).parse();
 
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
 const root = rootArg || resolveRepoRoot(process.cwd());
+const toolRoot = resolveToolRoot();
 const configPath = path.join(root, '.pairofcleats.json');
 if (argv['validate-config'] && fs.existsSync(configPath)) {
   const result = runCommand(
     process.execPath,
-    [path.join('tools', 'validate-config.js'), '--config', configPath],
+    [path.join(toolRoot, 'tools', 'validate-config.js'), '--config', configPath],
     { cwd: root, stdio: 'inherit' }
   );
   if (!result.ok) {
@@ -37,6 +38,8 @@ if (argv['validate-config'] && fs.existsSync(configPath)) {
 }
 
 const userConfig = loadUserConfig(root);
+const runtimeConfig = getRuntimeConfig(root, userConfig);
+const baseEnv = resolveRuntimeEnv(runtimeConfig, process.env);
 const vectorExtension = getVectorExtensionConfig(root, userConfig);
 const repoCacheRoot = getRepoCacheRoot(root, userConfig);
 const incrementalCacheRoot = path.join(repoCacheRoot, 'incremental');
@@ -54,7 +57,7 @@ let restoredArtifacts = false;
  * @param {string} label
  */
 function run(cmd, args, label) {
-  runCommandOrExit(label || cmd, cmd, args, { cwd: root, stdio: 'inherit' });
+  runCommandOrExit(label || cmd, cmd, args, { cwd: root, stdio: 'inherit', env: baseEnv });
 }
 
 if (!argv['skip-install']) {
@@ -68,7 +71,7 @@ if (!argv['skip-dicts']) {
   const dictConfig = getDictConfig(root, userConfig);
   const englishPath = path.join(dictConfig.dir, 'en.txt');
   if (!fs.existsSync(englishPath)) {
-    run(process.execPath, [path.join('tools', 'download-dicts.js'), '--lang', 'en'], 'download English dictionary');
+    run(process.execPath, [path.join(toolRoot, 'tools', 'download-dicts.js'), '--lang', 'en'], 'download English dictionary');
   }
   const dictionaryPaths = await getDictionaryPaths(root, dictConfig);
   if (dictionaryPaths.length) {
@@ -91,8 +94,8 @@ if (!argv['skip-tooling']) {
   const toolingConfig = getToolingConfig(root, userConfig);
   const detectResult = runCommand(
     process.execPath,
-    [path.join('tools', 'tooling-detect.js'), '--root', root, '--json'],
-    { cwd: root, encoding: 'utf8', stdio: 'pipe' }
+    [path.join(toolRoot, 'tools', 'tooling-detect.js'), '--root', root, '--json'],
+    { cwd: root, encoding: 'utf8', stdio: 'pipe', env: baseEnv }
   );
   if (detectResult.status === 0 && detectResult.stdout) {
     try {
@@ -101,7 +104,7 @@ if (!argv['skip-tooling']) {
         ? report.tools.filter((tool) => tool && tool.found === false)
         : [];
       if (toolingConfig.autoInstallOnDetect && missingTools.length) {
-        const installArgs = [path.join('tools', 'tooling-install.js'), '--root', root, '--scope', toolingConfig.installScope];
+        const installArgs = [path.join(toolRoot, 'tools', 'tooling-install.js'), '--root', root, '--scope', toolingConfig.installScope];
         if (!toolingConfig.allowGlobalFallback) installArgs.push('--no-fallback');
         run(process.execPath, installArgs, 'install tooling');
       } else if (missingTools.length) {
@@ -118,22 +121,23 @@ if (!argv['skip-tooling']) {
 if (!argv['skip-artifacts'] && fs.existsSync(path.join(artifactsDir, 'manifest.json'))) {
   const result = runCommand(
     process.execPath,
-    [path.join('tools', 'ci-restore-artifacts.js'), '--from', artifactsDir],
-    { cwd: root, stdio: 'inherit' }
+    [path.join(toolRoot, 'tools', 'ci-restore-artifacts.js'), '--from', artifactsDir],
+    { cwd: root, stdio: 'inherit', env: baseEnv }
   );
   restoredArtifacts = result.ok;
 }
 
 if (!argv['skip-index'] && !restoredArtifacts) {
-  const indexArgs = ['build_index.js'];
+  const indexArgs = [path.join(toolRoot, 'build_index.js')];
   if (useIncremental) indexArgs.push('--incremental');
   run(process.execPath, indexArgs, 'build index');
 }
 
 if (argv['with-sqlite']) {
-  const sqliteArgs = [path.join('tools', 'build-sqlite-index.js')];
+  const sqliteArgs = [path.join(toolRoot, 'tools', 'build-sqlite-index.js')];
   if (useIncremental) sqliteArgs.push('--incremental');
   run(process.execPath, sqliteArgs, 'build sqlite index');
 }
 
+console.log('[bootstrap] Tip: run npm run index-validate to verify index artifacts.');
 console.log('\nBootstrap complete.');
diff --git a/tools/build-embeddings.js b/tools/build-embeddings.js
new file mode 100644
index 000000000..99dfa8ad0
--- /dev/null
+++ b/tools/build-embeddings.js
@@ -0,0 +1,12 @@
+#!/usr/bin/env node
+import { fileURLToPath } from 'node:url';
+import { runBuildEmbeddings } from './build-embeddings/run.js';
+
+export { runBuildEmbeddings };
+
+if (process.argv[1] === fileURLToPath(import.meta.url)) {
+  runBuildEmbeddings().catch((err) => {
+    console.error(err?.message || err);
+    process.exit(1);
+  });
+}
diff --git a/tools/build-embeddings/atomic.js b/tools/build-embeddings/atomic.js
new file mode 100644
index 000000000..254e293a2
--- /dev/null
+++ b/tools/build-embeddings/atomic.js
@@ -0,0 +1,89 @@
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+
+export const createTempPath = (filePath) => {
+  const suffix = `.tmp-${process.pid}-${Date.now()}-${Math.random().toString(16).slice(2, 8)}`;
+  const tempPath = `${filePath}${suffix}`;
+  if (process.platform !== 'win32' || tempPath.length <= 240) {
+    return tempPath;
+  }
+  const dir = path.dirname(filePath);
+  const ext = path.extname(filePath) || '.bin';
+  const shortName = `.tmp-${Math.random().toString(16).slice(2, 10)}${ext}`;
+  return path.join(dir, shortName);
+};
+
+export const replaceFile = async (tempPath, finalPath) => {
+  const bakPath = `${finalPath}.bak`;
+  const finalExists = fsSync.existsSync(finalPath);
+  let backupAvailable = fsSync.existsSync(bakPath);
+  if (finalExists && !backupAvailable) {
+    try {
+      await fs.rename(finalPath, bakPath);
+      backupAvailable = true;
+    } catch (err) {
+      if (err?.code !== 'ENOENT') {
+        backupAvailable = fsSync.existsSync(bakPath);
+      }
+    }
+  }
+  try {
+    await fs.rename(tempPath, finalPath);
+  } catch (err) {
+    if (err?.code !== 'EEXIST' && err?.code !== 'EPERM' && err?.code !== 'ENOTEMPTY') {
+      throw err;
+    }
+    if (!backupAvailable) {
+      throw err;
+    }
+    try {
+      await fs.rm(finalPath, { force: true });
+    } catch {}
+    await fs.rename(tempPath, finalPath);
+  }
+};
+
+/**
+ * Replace a file atomically without creating a .bak. This is intended for
+ * ephemeral cache entries where retaining backups would create excessive churn.
+ */
+export const replaceFileNoBak = async (tempPath, finalPath) => {
+  const copyFallback = async () => {
+    try {
+      await fs.copyFile(tempPath, finalPath);
+      await fs.rm(tempPath, { force: true });
+      return true;
+    } catch {
+      return false;
+    }
+  };
+
+  try {
+    await fs.rename(tempPath, finalPath);
+    return;
+  } catch (err) {
+    if (err?.code === 'EXDEV') {
+      if (await copyFallback()) return;
+      throw err;
+    }
+    if (err?.code !== 'EEXIST'
+      && err?.code !== 'EPERM'
+      && err?.code !== 'ENOTEMPTY'
+      && err?.code !== 'EACCES') {
+      throw err;
+    }
+  }
+
+  try {
+    await fs.rm(finalPath, { force: true });
+  } catch {}
+  try {
+    await fs.rename(tempPath, finalPath);
+  } catch (err) {
+    if (err?.code === 'EXDEV') {
+      if (await copyFallback()) return;
+    }
+    throw err;
+  }
+};
diff --git a/tools/build-embeddings/cache.js b/tools/build-embeddings/cache.js
new file mode 100644
index 000000000..d53909fcf
--- /dev/null
+++ b/tools/build-embeddings/cache.js
@@ -0,0 +1,79 @@
+import path from 'node:path';
+import { sha1 } from '../../src/shared/hash.js';
+
+// Keep in sync with src/index/embedding.js defaults.
+const DEFAULT_POOLING = 'mean';
+const DEFAULT_NORMALIZE = true;
+const DEFAULT_TRUNCATION = true;
+const DEFAULT_QUANT_MIN = -1;
+const DEFAULT_QUANT_MAX = 1;
+const DEFAULT_QUANT_LEVELS = 256;
+
+export const buildCacheIdentity = ({
+  modelId,
+  provider,
+  mode,
+  stub,
+  dims,
+  scale,
+  onnx,
+  preprocess,
+  quantization
+} = {}) => {
+  const providerValue = provider || null;
+  const resolvedPreprocess = preprocess && typeof preprocess === 'object' ? preprocess : {};
+  const resolvedQuant = quantization && typeof quantization === 'object' ? quantization : {};
+  const resolvedOnnx = onnx && typeof onnx === 'object' ? onnx : null;
+
+  const identity = {
+    // Bump to invalidate caches when embedding semantics change.
+    version: 2,
+    modelId: modelId || null,
+    provider: providerValue,
+    mode: mode || null,
+    stub: stub === true,
+    dims: dims ?? null,
+    scale,
+    preprocess: {
+      pooling: resolvedPreprocess.pooling ?? DEFAULT_POOLING,
+      normalize: resolvedPreprocess.normalize ?? DEFAULT_NORMALIZE,
+      truncation: resolvedPreprocess.truncation ?? DEFAULT_TRUNCATION,
+      // Reserved for future use (explicit max_length / tokenizer policy).
+      maxLength: resolvedPreprocess.maxLength ?? null
+    },
+    quantization: {
+      // Allows future changes (e.g., asymmetric / per-channel / float16) to invalidate caches.
+      version: resolvedQuant.version ?? 1,
+      minVal: resolvedQuant.minVal ?? DEFAULT_QUANT_MIN,
+      maxVal: resolvedQuant.maxVal ?? DEFAULT_QUANT_MAX,
+      levels: resolvedQuant.levels ?? DEFAULT_QUANT_LEVELS
+    },
+    onnx: providerValue === 'onnx' && resolvedOnnx ? {
+      modelPath: resolvedOnnx.modelPath ?? null,
+      tokenizerId: resolvedOnnx.tokenizerId ?? null,
+      executionProviders: resolvedOnnx.executionProviders ?? null,
+      intraOpNumThreads: resolvedOnnx.intraOpNumThreads ?? null,
+      interOpNumThreads: resolvedOnnx.interOpNumThreads ?? null,
+      graphOptimizationLevel: resolvedOnnx.graphOptimizationLevel ?? null
+    } : null
+  };
+  const key = sha1(JSON.stringify(identity));
+  return { identity, key };
+};
+
+export const resolveCacheRoot = ({ repoCacheRoot, cacheDirConfig }) => {
+  if (cacheDirConfig) return path.resolve(cacheDirConfig);
+  return path.join(repoCacheRoot, 'embeddings');
+};
+
+export const resolveCacheDir = (cacheRoot, mode) => path.join(cacheRoot, mode, 'files');
+
+export const buildCacheKey = ({ file, hash, signature, identityKey }) => {
+  if (!hash) return null;
+  return sha1(`${file}:${hash}:${signature}:${identityKey}`);
+};
+
+export const isCacheValid = ({ cached, signature, identityKey }) => {
+  if (!cached || cached.chunkSignature !== signature) return false;
+  return cached.cacheMeta?.identityKey === identityKey;
+};
diff --git a/tools/build-embeddings/chunks.js b/tools/build-embeddings/chunks.js
new file mode 100644
index 000000000..eb41aa079
--- /dev/null
+++ b/tools/build-embeddings/chunks.js
@@ -0,0 +1,76 @@
+import fsSync from 'node:fs';
+import path from 'node:path';
+import {
+  normalizeBundleFormat,
+  readBundleFile,
+  resolveBundleFilename,
+  resolveBundleFormatFromName
+} from '../../src/shared/bundle-io.js';
+import { sha1 } from '../../src/shared/hash.js';
+
+export const buildChunkSignature = (items) => sha1(
+  items.map(({ chunk }) => `${chunk.start}:${chunk.end}`).join('|')
+);
+
+export const buildChunksFromBundles = async (bundleDir, manifestFiles, bundleFormat) => {
+  const resolvedBundleFormat = normalizeBundleFormat(bundleFormat);
+  const chunksByFile = new Map();
+  let maxChunkId = -1;
+  let total = 0;
+  // Ensure deterministic chunk ordering regardless of JSON object insertion order.
+  const manifestEntries = Object.entries(manifestFiles || {}).sort(([a], [b]) => (
+    a < b ? -1 : (a > b ? 1 : 0)
+  ));
+  for (const [relPath, entry] of manifestEntries) {
+    const bundleName = entry?.bundle || resolveBundleFilename(relPath, resolvedBundleFormat);
+    const bundlePath = path.join(bundleDir, bundleName);
+    if (!fsSync.existsSync(bundlePath)) continue;
+    let bundle;
+    try {
+      const result = await readBundleFile(bundlePath, {
+        format: resolveBundleFormatFromName(bundleName, resolvedBundleFormat)
+      });
+      if (!result.ok) continue;
+      bundle = result.bundle;
+    } catch {
+      continue;
+    }
+    const filePath = bundle?.file || relPath;
+    const chunks = Array.isArray(bundle?.chunks) ? bundle.chunks : [];
+    if (!chunks.length) continue;
+    const list = chunksByFile.get(filePath) || [];
+    for (const chunk of chunks) {
+      if (!chunk) continue;
+      const id = Number.isFinite(chunk.id) ? chunk.id : null;
+      if (Number.isFinite(id) && id > maxChunkId) maxChunkId = id;
+      list.push({ index: Number.isFinite(id) ? id : null, chunk });
+      total += 1;
+    }
+    chunksByFile.set(filePath, list);
+  }
+  if (!chunksByFile.size) {
+    return { chunksByFile, totalChunks: 0 };
+  }
+  let totalChunks = maxChunkId >= 0 ? maxChunkId + 1 : total;
+  if (maxChunkId < 0) {
+    let next = 0;
+    for (const list of chunksByFile.values()) {
+      for (const item of list) {
+        item.index = next;
+        next += 1;
+      }
+    }
+    totalChunks = next;
+  } else {
+    let next = maxChunkId + 1;
+    for (const list of chunksByFile.values()) {
+      for (const item of list) {
+        if (Number.isFinite(item.index)) continue;
+        item.index = next;
+        next += 1;
+      }
+    }
+    totalChunks = Math.max(totalChunks, next);
+  }
+  return { chunksByFile, totalChunks };
+};
diff --git a/tools/build-embeddings/cli.js b/tools/build-embeddings/cli.js
new file mode 100644
index 000000000..5e7d29bb2
--- /dev/null
+++ b/tools/build-embeddings/cli.js
@@ -0,0 +1,91 @@
+import os from 'node:os';
+import path from 'node:path';
+import { createCli } from '../../src/shared/cli.js';
+import { getEnvConfig } from '../../src/shared/env.js';
+import { normalizeEmbeddingProvider, normalizeOnnxConfig } from '../../src/shared/onnx-embeddings.js';
+import { normalizeHnswConfig } from '../../src/shared/hnsw.js';
+import { getModelConfig, loadUserConfig, resolveIndexRoot, resolveRepoRoot } from '../dict-utils.js';
+
+export const parseBuildEmbeddingsArgs = (rawArgs = process.argv.slice(2)) => {
+  const argv = createCli({
+    scriptName: 'build-embeddings',
+    argv: ['node', 'build-embeddings.js', ...(rawArgs || [])],
+    options: {
+      mode: { type: 'string', default: 'all' },
+      repo: { type: 'string' },
+      dims: { type: 'number' },
+      batch: { type: 'number' },
+      'stub-embeddings': { type: 'boolean', default: false },
+      'index-root': { type: 'string' }
+    }
+  }).parse();
+
+  const root = argv.repo ? path.resolve(argv.repo) : resolveRepoRoot(process.cwd());
+  const userConfig = loadUserConfig(root);
+  const envConfig = getEnvConfig();
+  const indexingConfig = userConfig.indexing || {};
+  const embeddingsConfig = indexingConfig.embeddings || {};
+  const embeddingProvider = normalizeEmbeddingProvider(embeddingsConfig.provider);
+  const embeddingOnnx = normalizeOnnxConfig(embeddingsConfig.onnx || {});
+  const hnswConfig = normalizeHnswConfig(embeddingsConfig.hnsw || {});
+
+  const embeddingModeRaw = typeof embeddingsConfig.mode === 'string'
+    ? embeddingsConfig.mode.trim().toLowerCase()
+    : 'auto';
+  const baseStubEmbeddings = argv['stub-embeddings'] === true
+    || envConfig.embeddings === 'stub';
+  const normalizedEmbeddingMode = ['auto', 'inline', 'service', 'stub', 'off'].includes(embeddingModeRaw)
+    ? embeddingModeRaw
+    : 'auto';
+  const resolvedEmbeddingMode = normalizedEmbeddingMode === 'auto'
+    ? (baseStubEmbeddings ? 'stub' : 'inline')
+    : (normalizedEmbeddingMode === 'service'
+      ? (baseStubEmbeddings ? 'stub' : 'inline')
+      : normalizedEmbeddingMode);
+
+  const embeddingBatchRaw = Number(argv.batch ?? indexingConfig.embeddingBatchSize);
+  let embeddingBatchSize = Number.isFinite(embeddingBatchRaw)
+    ? Math.max(0, Math.floor(embeddingBatchRaw))
+    : 0;
+  if (!embeddingBatchSize) {
+    const totalGb = os.totalmem() / (1024 ** 3);
+    const autoBatch = Math.floor(totalGb * 32);
+    embeddingBatchSize = Math.min(128, Math.max(32, autoBatch));
+  }
+
+  const useStubEmbeddings = resolvedEmbeddingMode === 'stub' || baseStubEmbeddings;
+  const configuredDims = Number.isFinite(Number(argv.dims))
+    ? Math.max(1, Math.floor(Number(argv.dims)))
+    : null;
+
+  const modelConfig = getModelConfig(root, userConfig);
+  const indexRoot = argv['index-root']
+    ? path.resolve(argv['index-root'])
+    : resolveIndexRoot(root, userConfig);
+
+  const embedModeRaw = (argv.mode || 'all').toLowerCase();
+  const embedMode = embedModeRaw === 'both' ? 'all' : embedModeRaw;
+  const modes = embedMode === 'all' ? ['code', 'prose'] : [embedMode];
+
+  return {
+    argv,
+    root,
+    userConfig,
+    envConfig,
+    indexingConfig,
+    embeddingsConfig,
+    embeddingProvider,
+    embeddingOnnx,
+    hnswConfig,
+    normalizedEmbeddingMode,
+    resolvedEmbeddingMode,
+    useStubEmbeddings,
+    embeddingBatchSize,
+    configuredDims,
+    modelConfig,
+    modelId: modelConfig.id,
+    modelsDir: modelConfig.dir || null,
+    indexRoot,
+    modes
+  };
+};
diff --git a/tools/build-embeddings/embed.js b/tools/build-embeddings/embed.js
new file mode 100644
index 000000000..5949a3097
--- /dev/null
+++ b/tools/build-embeddings/embed.js
@@ -0,0 +1,90 @@
+import { normalizeVec, quantizeVec } from '../../src/index/embedding.js';
+
+export const runBatched = async ({ texts, batchSize, embed }) => {
+  if (!texts.length) return [];
+  if (!batchSize || texts.length <= batchSize) {
+    return embed(texts);
+  }
+  const out = [];
+  for (let i = 0; i < texts.length; i += batchSize) {
+    const slice = texts.slice(i, i + batchSize);
+    const batch = await embed(slice);
+    out.push(...batch);
+  }
+  return out;
+};
+
+export const ensureVectorArrays = (vectors, count) => {
+  if (Array.isArray(vectors) && vectors.length === count) return vectors;
+  const out = [];
+  for (let i = 0; i < count; i += 1) {
+    out.push(Array.isArray(vectors?.[i]) ? vectors[i] : []);
+  }
+  return out;
+};
+
+export const createDimsValidator = ({ mode, configuredDims }) => {
+  let dims = 0;
+  const assertDims = (length) => {
+    if (!length) return;
+    if (configuredDims && configuredDims !== length) {
+      throw new Error(
+        `[embeddings] ${mode} embedding dims mismatch (configured=${configuredDims}, observed=${length}).`
+      );
+    }
+    if (dims && dims !== length) {
+      throw new Error(
+        `[embeddings] ${mode} embedding dims mismatch (configured=${dims}, observed=${length}).`
+      );
+    }
+    if (!dims) dims = length;
+  };
+  const getDims = () => dims;
+  return { assertDims, getDims };
+};
+
+export const isDimsMismatch = (err) =>
+  err?.message?.includes('embedding dims mismatch');
+
+export const validateCachedDims = ({ vectors, expectedDims, mode }) => {
+  if (!expectedDims || !Array.isArray(vectors)) return;
+  for (const vec of vectors) {
+    if (!Array.isArray(vec) || !vec.length) continue;
+    if (vec.length !== expectedDims) {
+      throw new Error(
+        `[embeddings] ${mode} embedding dims mismatch (configured=${expectedDims}, observed=${vec.length}).`
+      );
+    }
+  }
+};
+
+export const buildQuantizedVectors = ({
+  chunkIndex,
+  codeVector,
+  docVector,
+  zeroVector,
+  addHnswVector
+}) => {
+  const embedCode = Array.isArray(codeVector) ? codeVector : [];
+  const embedDoc = Array.isArray(docVector) ? docVector : zeroVector;
+  const merged = embedCode.length
+    ? embedCode.map((value, idx) => (value + (embedDoc[idx] ?? 0)) / 2)
+    : embedDoc;
+  const normalized = normalizeVec(merged);
+  if (addHnswVector && normalized.length) {
+    addHnswVector(chunkIndex, normalized);
+  }
+  const quantizedCode = embedCode.length ? quantizeVec(embedCode) : [];
+  const quantizedDoc = embedDoc.length ? quantizeVec(embedDoc) : [];
+  const quantizedMerged = normalized.length ? quantizeVec(normalized) : [];
+  return { quantizedCode, quantizedDoc, quantizedMerged };
+};
+
+export const fillMissingVectors = (vectorList, dims) => {
+  const fallback = new Array(dims).fill(0);
+  for (let i = 0; i < vectorList.length; i += 1) {
+    if (!Array.isArray(vectorList[i]) || vectorList[i].length !== dims) {
+      vectorList[i] = fallback;
+    }
+  }
+};
diff --git a/tools/build-embeddings/hnsw.js b/tools/build-embeddings/hnsw.js
new file mode 100644
index 000000000..a27331b8b
--- /dev/null
+++ b/tools/build-embeddings/hnsw.js
@@ -0,0 +1,78 @@
+import fs from 'node:fs/promises';
+import hnswlib from 'hnswlib-node';
+import { writeJsonObjectFile } from '../../src/shared/json-stream.js';
+import { createTempPath, replaceFile } from './atomic.js';
+
+const { HierarchicalNSW } = hnswlib?.default || hnswlib || {};
+
+export const createHnswBuilder = ({ enabled, config, totalChunks, mode }) => {
+  let index = null;
+  let added = 0;
+  let expected = 0;
+
+  const initHnsw = (vector) => {
+    if (!enabled || index || !Array.isArray(vector) || !vector.length) return;
+    if (!HierarchicalNSW) return;
+    index = new HierarchicalNSW(config.space, vector.length);
+    index.initIndex({
+      maxElements: totalChunks,
+      m: config.m,
+      efConstruction: config.efConstruction,
+      randomSeed: config.randomSeed,
+      allowReplaceDeleted: config.allowReplaceDeleted
+    });
+  };
+
+  const addVector = (chunkIndex, vector) => {
+    if (!enabled || !vector || !vector.length) return;
+    const data = Array.isArray(vector) ? vector : Array.from(vector);
+    initHnsw(data);
+    if (!index) return;
+    expected += 1;
+    try {
+      index.addPoint(data, chunkIndex);
+      added += 1;
+    } catch {
+      // Ignore HNSW insert failures.
+    }
+  };
+
+  const writeIndex = async ({ indexPath, metaPath, modelId, dims }) => {
+    if (!enabled || !index || !expected) return { skipped: true };
+    if (expected !== added) {
+      throw new Error(`HNSW insert count mismatch (${added} of ${expected}).`);
+    }
+    const tempHnswPath = createTempPath(indexPath);
+    try {
+      index.writeIndexSync(tempHnswPath);
+      await replaceFile(tempHnswPath, indexPath);
+    } catch (err) {
+      try {
+        await fs.rm(tempHnswPath, { force: true });
+      } catch {}
+      throw err;
+    }
+    const hnswMeta = {
+      version: 1,
+      generatedAt: new Date().toISOString(),
+      model: modelId || null,
+      dims,
+      count: added,
+      expectedCount: expected,
+      space: config.space,
+      m: config.m,
+      efConstruction: config.efConstruction,
+      efSearch: config.efSearch
+    };
+    await writeJsonObjectFile(metaPath, { fields: hnswMeta, atomic: true });
+    return { skipped: false, count: added };
+  };
+
+  const getStats = () => ({ added, expected, ready: !!index });
+
+  return {
+    addVector,
+    writeIndex,
+    getStats
+  };
+};
diff --git a/tools/build-embeddings/lancedb.js b/tools/build-embeddings/lancedb.js
new file mode 100644
index 000000000..72dc367cc
--- /dev/null
+++ b/tools/build-embeddings/lancedb.js
@@ -0,0 +1,142 @@
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import { tryImport } from '../../src/shared/optional-deps.js';
+import { writeJsonObjectFile } from '../../src/shared/json-stream.js';
+import { dequantizeUint8ToFloat32 } from '../../src/storage/sqlite/vector.js';
+import { normalizeLanceDbConfig, resolveLanceDbPaths } from '../../src/shared/lancedb.js';
+
+let warnedMissing = false;
+
+const loadLanceDb = async () => {
+  const result = await tryImport('@lancedb/lancedb');
+  if (!result.ok) {
+    if (!warnedMissing) {
+      warnedMissing = true;
+      console.warn('[embeddings] LanceDB unavailable; skipping LanceDB build.');
+    }
+    return null;
+  }
+  return result.mod?.default || result.mod;
+};
+
+const createTable = async (db, tableName, rows) => {
+  if (!db || typeof db.createTable !== 'function') return null;
+  return db.createTable(tableName, rows, { mode: 'overwrite' });
+};
+
+const addRows = async (table, rows) => {
+  if (!table) return;
+  if (typeof table.add === 'function') {
+    await table.add(rows);
+    return;
+  }
+  if (typeof table.insert === 'function') {
+    await table.insert(rows);
+    return;
+  }
+  if (typeof table.append === 'function') {
+    await table.append(rows);
+  }
+};
+
+const buildBatch = (vectors, start, end, idColumn, embeddingColumn) => {
+  const rows = [];
+  for (let i = start; i < end; i += 1) {
+    const vec = vectors[i];
+    if (!vec || typeof vec.length !== 'number') continue;
+    const floatVec = dequantizeUint8ToFloat32(vec);
+    if (!floatVec) continue;
+    rows.push({
+      [idColumn]: i,
+      [embeddingColumn]: floatVec
+    });
+  }
+  return rows;
+};
+
+export async function writeLanceDbIndex({
+  indexDir,
+  variant,
+  vectors,
+  dims,
+  modelId,
+  config,
+  emitOutput = true,
+  label = null
+}) {
+  const resolvedConfig = normalizeLanceDbConfig(config);
+  if (!resolvedConfig.enabled) return { skipped: true, reason: 'disabled' };
+  if (!Array.isArray(vectors) || !vectors.length) {
+    return { skipped: true, reason: 'empty' };
+  }
+  const lancedb = await loadLanceDb();
+  if (!lancedb) return { skipped: true, reason: 'missing dependency' };
+
+  const paths = resolveLanceDbPaths(indexDir);
+  const target = paths[variant];
+  if (!target) return { skipped: true, reason: 'unknown variant' };
+  const dir = target.dir;
+  const metaPath = target.metaPath;
+
+  try {
+    if (fsSync.existsSync(dir)) {
+      await fs.rm(dir, { recursive: true, force: true });
+    }
+  } catch {}
+
+  const connect = lancedb.connect || lancedb.default?.connect;
+  if (typeof connect !== 'function') {
+    return { skipped: true, reason: 'invalid module' };
+  }
+
+  const db = await connect(dir);
+  const tableName = resolvedConfig.table;
+  const idColumn = resolvedConfig.idColumn;
+  const embeddingColumn = resolvedConfig.embeddingColumn;
+  const batchSize = Math.max(1, Math.floor(resolvedConfig.batchSize || 1024));
+
+  let table = null;
+  try {
+    const firstBatch = buildBatch(vectors, 0, Math.min(batchSize, vectors.length), idColumn, embeddingColumn);
+    table = await createTable(db, tableName, firstBatch);
+    if (!table && typeof db.openTable === 'function') {
+      table = await db.openTable(tableName);
+      if (firstBatch.length) await addRows(table, firstBatch);
+    }
+    for (let start = batchSize; start < vectors.length; start += batchSize) {
+      const rows = buildBatch(
+        vectors,
+        start,
+        Math.min(start + batchSize, vectors.length),
+        idColumn,
+        embeddingColumn
+      );
+      if (rows.length) {
+        await addRows(table, rows);
+      }
+    }
+  } finally {
+    if (db?.close) {
+      await db.close();
+    }
+  }
+
+  const meta = {
+    version: 1,
+    generatedAt: new Date().toISOString(),
+    model: modelId || null,
+    dims: Number.isFinite(Number(dims)) ? Number(dims) : null,
+    count: vectors.length,
+    metric: resolvedConfig.metric,
+    table: tableName,
+    embeddingColumn,
+    idColumn
+  };
+  await writeJsonObjectFile(metaPath, { fields: meta, atomic: true });
+
+  if (emitOutput) {
+    const targetLabel = label || variant;
+    console.log(`[embeddings] ${targetLabel}: wrote LanceDB table (${vectors.length} vectors).`);
+  }
+  return { skipped: false, count: vectors.length };
+}
diff --git a/tools/build-embeddings/manifest.js b/tools/build-embeddings/manifest.js
new file mode 100644
index 000000000..b3bbf1a81
--- /dev/null
+++ b/tools/build-embeddings/manifest.js
@@ -0,0 +1,82 @@
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { MAX_JSON_BYTES, readJsonFile } from '../../src/shared/artifact-io.js';
+import { writeJsonObjectFile } from '../../src/shared/json-stream.js';
+import { checksumFile } from '../../src/shared/hash.js';
+
+export const updatePieceManifest = async ({ indexDir, mode, totalChunks, dims }) => {
+  const piecesDir = path.join(indexDir, 'pieces');
+  const manifestPath = path.join(piecesDir, 'manifest.json');
+  let existing = {};
+  if (fsSync.existsSync(manifestPath)) {
+    try {
+      existing = readJsonFile(manifestPath, { maxBytes: MAX_JSON_BYTES }) || {};
+    } catch {
+      existing = {};
+    }
+  }
+  const priorPieces = Array.isArray(existing.pieces) ? existing.pieces : [];
+  const retained = [];
+  for (const entry of priorPieces) {
+    if (!entry || entry.type === 'embeddings') continue;
+    if (entry.path === 'index_state.json') {
+      const absPath = path.join(indexDir, entry.path.split('/').join(path.sep));
+      let bytes = null;
+      let checksum = null;
+      let checksumAlgo = null;
+      try {
+        const stat = await fs.stat(absPath);
+        bytes = stat.size;
+        const result = await checksumFile(absPath);
+        checksum = result?.value || null;
+        checksumAlgo = result?.algo || null;
+      } catch {}
+      retained.push({
+        ...entry,
+        bytes,
+        checksum: checksum && checksumAlgo ? `${checksumAlgo}:${checksum}` : null
+      });
+      continue;
+    }
+    retained.push(entry);
+  }
+  const embeddingPieces = [
+    { type: 'embeddings', name: 'dense_vectors', format: 'json', path: 'dense_vectors_uint8.json', count: totalChunks, dims },
+    { type: 'embeddings', name: 'dense_vectors_doc', format: 'json', path: 'dense_vectors_doc_uint8.json', count: totalChunks, dims },
+    { type: 'embeddings', name: 'dense_vectors_code', format: 'json', path: 'dense_vectors_code_uint8.json', count: totalChunks, dims },
+    { type: 'embeddings', name: 'dense_vectors_hnsw', format: 'bin', path: 'dense_vectors_hnsw.bin', count: totalChunks, dims },
+    { type: 'embeddings', name: 'dense_vectors_hnsw_meta', format: 'json', path: 'dense_vectors_hnsw.meta.json', count: totalChunks, dims }
+  ];
+  const enriched = [];
+  for (const entry of embeddingPieces) {
+    const absPath = path.join(indexDir, entry.path);
+    if (!fsSync.existsSync(absPath)) continue;
+    let bytes = null;
+    let checksum = null;
+    let checksumAlgo = null;
+    try {
+      const stat = await fs.stat(absPath);
+      bytes = stat.size;
+      const result = await checksumFile(absPath);
+      checksum = result?.value || null;
+      checksumAlgo = result?.algo || null;
+    } catch {}
+    enriched.push({
+      ...entry,
+      bytes,
+      checksum: checksum && checksumAlgo ? `${checksumAlgo}:${checksum}` : null
+    });
+  }
+  const now = new Date().toISOString();
+  const manifest = {
+    version: existing.version || 2,
+    generatedAt: existing.generatedAt || now,
+    updatedAt: now,
+    mode,
+    stage: existing.stage || 'stage3',
+    pieces: [...retained, ...enriched]
+  };
+  await fs.mkdir(piecesDir, { recursive: true });
+  await writeJsonObjectFile(manifestPath, { fields: manifest, atomic: true });
+};
diff --git a/tools/build-embeddings/run.js b/tools/build-embeddings/run.js
new file mode 100644
index 000000000..dd76fdab7
--- /dev/null
+++ b/tools/build-embeddings/run.js
@@ -0,0 +1,593 @@
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { createEmbedder } from '../../src/index/embedding.js';
+import { validateIndexArtifacts } from '../../src/index/validate.js';
+import { markBuildPhase, resolveBuildStatePath, startBuildHeartbeat } from '../../src/index/build/build-state.js';
+import { loadIncrementalManifest } from '../../src/storage/sqlite/incremental.js';
+import { dequantizeUint8ToFloat32 } from '../../src/storage/sqlite/vector.js';
+import { loadChunkMeta, readJsonFile, MAX_JSON_BYTES } from '../../src/shared/artifact-io.js';
+import { readTextFileWithHash } from '../../src/shared/encoding.js';
+import { writeJsonObjectFile } from '../../src/shared/json-stream.js';
+import { resolveHnswPaths } from '../../src/shared/hnsw.js';
+import { resolveOnnxModelPath } from '../../src/shared/onnx-embeddings.js';
+import { getIndexDir, getRepoCacheRoot } from '../dict-utils.js';
+import { buildCacheIdentity, buildCacheKey, isCacheValid, resolveCacheDir, resolveCacheRoot } from './cache.js';
+import { createTempPath, replaceFileNoBak } from './atomic.js';
+import { buildChunkSignature, buildChunksFromBundles } from './chunks.js';
+import {
+  buildQuantizedVectors,
+  createDimsValidator,
+  ensureVectorArrays,
+  fillMissingVectors,
+  isDimsMismatch,
+  runBatched,
+  validateCachedDims
+} from './embed.js';
+import { createHnswBuilder } from './hnsw.js';
+import { updatePieceManifest } from './manifest.js';
+import { updateSqliteDense } from './sqlite-dense.js';
+import { parseBuildEmbeddingsArgs } from './cli.js';
+
+let Database = null;
+try {
+  ({ default: Database } = await import('better-sqlite3'));
+} catch {}
+
+const loadIndexState = (statePath) => {
+  if (!fsSync.existsSync(statePath)) return {};
+  try {
+    return readJsonFile(statePath, { maxBytes: MAX_JSON_BYTES }) || {};
+  } catch {
+    return {};
+  }
+};
+
+const writeIndexState = async (statePath, state) => {
+  await writeJsonObjectFile(statePath, { fields: state, atomic: true });
+};
+
+export async function runBuildEmbeddings(rawArgs = process.argv.slice(2), _options = {}) {
+  const config = parseBuildEmbeddingsArgs(rawArgs, _options);
+  const {
+    argv,
+    root,
+    userConfig,
+    embeddingsConfig,
+    embeddingProvider,
+    embeddingOnnx,
+    hnswConfig,
+    normalizedEmbeddingMode,
+    resolvedEmbeddingMode,
+    useStubEmbeddings,
+    embeddingBatchSize,
+    configuredDims,
+    modelId,
+    modelsDir,
+    indexRoot,
+    modes
+  } = config;
+
+  if (embeddingsConfig.enabled === false || resolvedEmbeddingMode === 'off') {
+    console.error('Embeddings disabled; skipping build-embeddings.');
+    return { skipped: true };
+  }
+
+  const denseScale = 2 / 255;
+  const cacheDims = useStubEmbeddings ? (configuredDims || 384) : configuredDims;
+  const cacheOnnx = embeddingProvider === 'onnx' ? {
+    modelPath: resolveOnnxModelPath({
+      rootDir: root,
+      modelPath: embeddingOnnx?.modelPath,
+      modelsDir,
+      modelId
+    }),
+    tokenizerId: embeddingOnnx?.tokenizerId || modelId || null,
+    executionProviders: embeddingOnnx?.executionProviders || null,
+    intraOpNumThreads: embeddingOnnx?.intraOpNumThreads || null,
+    interOpNumThreads: embeddingOnnx?.interOpNumThreads || null,
+    graphOptimizationLevel: embeddingOnnx?.graphOptimizationLevel || null
+  } : null;
+  const { identity: cacheIdentity, key: cacheIdentityKey } = buildCacheIdentity({
+    modelId,
+    provider: embeddingProvider,
+    mode: resolvedEmbeddingMode,
+    stub: useStubEmbeddings,
+    dims: cacheDims,
+    scale: denseScale,
+    preprocess: {
+      pooling: 'mean',
+      normalize: true,
+      truncation: true
+    },
+    quantization: {
+      version: 1,
+      minVal: -1,
+      maxVal: 1,
+      levels: 256
+    },
+    onnx: cacheOnnx
+  });
+
+  const embedder = createEmbedder({
+    rootDir: root,
+    useStubEmbeddings,
+    modelId,
+    dims: argv.dims,
+    modelsDir,
+    provider: embeddingProvider,
+    onnx: embeddingOnnx
+  });
+  const getChunkEmbeddings = embedder.getChunkEmbeddings;
+
+  const repoCacheRoot = getRepoCacheRoot(root, userConfig);
+  const buildStatePath = resolveBuildStatePath(indexRoot);
+  const hasBuildState = buildStatePath && fsSync.existsSync(buildStatePath);
+  const stopHeartbeat = hasBuildState ? startBuildHeartbeat(indexRoot, 'stage3') : () => {};
+
+  const cacheRoot = resolveCacheRoot({
+    repoCacheRoot,
+    cacheDirConfig: embeddingsConfig.cache?.dir
+  });
+
+  if (hasBuildState) {
+    await markBuildPhase(indexRoot, 'stage3', 'running');
+  }
+
+  for (const mode of modes) {
+    if (!['code', 'prose'].includes(mode)) {
+      console.error(`Invalid mode: ${mode}`);
+      process.exit(1);
+    }
+    const indexDir = getIndexDir(root, mode, userConfig, { indexRoot });
+    const statePath = path.join(indexDir, 'index_state.json');
+    const stateNow = new Date().toISOString();
+    let indexState = loadIndexState(statePath);
+    indexState.generatedAt = indexState.generatedAt || stateNow;
+    indexState.updatedAt = stateNow;
+    indexState.mode = indexState.mode || mode;
+    indexState.embeddings = {
+      ...(indexState.embeddings || {}),
+      enabled: true,
+      ready: false,
+      pending: true,
+      mode: indexState.embeddings?.mode || resolvedEmbeddingMode,
+      service: indexState.embeddings?.service ?? (normalizedEmbeddingMode === 'service'),
+      updatedAt: stateNow
+    };
+    try {
+      await writeIndexState(statePath, indexState);
+    } catch {
+      // Ignore index state write failures.
+    }
+
+    const chunkMetaPath = path.join(indexDir, 'chunk_meta.json');
+    const chunkMetaJsonlPath = path.join(indexDir, 'chunk_meta.jsonl');
+    const chunkMetaMetaPath = path.join(indexDir, 'chunk_meta.meta.json');
+    const incremental = loadIncrementalManifest(repoCacheRoot, mode);
+    const manifestFiles = incremental?.manifest?.files || {};
+    const hasChunkMeta = fsSync.existsSync(chunkMetaPath)
+      || fsSync.existsSync(chunkMetaJsonlPath)
+      || fsSync.existsSync(chunkMetaMetaPath);
+
+    let chunkMeta;
+    try {
+      if (hasChunkMeta) {
+        chunkMeta = loadChunkMeta(indexDir, { maxBytes: MAX_JSON_BYTES });
+      }
+    } catch (err) {
+      if (err?.code === 'ERR_JSON_TOO_LARGE') {
+        console.warn(`[embeddings] chunk_meta too large for ${mode}; using incremental bundles if available.`);
+      } else {
+        console.warn(`[embeddings] Failed to load chunk_meta for ${mode}: ${err?.message || err}`);
+      }
+      chunkMeta = null;
+    }
+
+    let chunksByFile = new Map();
+    let totalChunks = 0;
+    if (Array.isArray(chunkMeta)) {
+      const fileMetaPath = path.join(indexDir, 'file_meta.json');
+      let fileMeta = [];
+      if (fsSync.existsSync(fileMetaPath)) {
+        try {
+          fileMeta = readJsonFile(fileMetaPath, { maxBytes: MAX_JSON_BYTES });
+        } catch (err) {
+          console.warn(`[embeddings] Failed to read file_meta for ${mode}: ${err?.message || err}`);
+          fileMeta = [];
+        }
+      }
+      const fileMetaById = new Map();
+      if (Array.isArray(fileMeta)) {
+        for (const entry of fileMeta) {
+          if (!entry || !Number.isFinite(entry.id)) continue;
+          fileMetaById.set(entry.id, entry);
+        }
+      }
+      for (let i = 0; i < chunkMeta.length; i += 1) {
+        const chunk = chunkMeta[i];
+        if (!chunk) continue;
+        const filePath = chunk.file || fileMetaById.get(chunk.fileId)?.file;
+        if (!filePath) continue;
+        const list = chunksByFile.get(filePath) || [];
+        list.push({ index: i, chunk });
+        chunksByFile.set(filePath, list);
+      }
+      totalChunks = chunkMeta.length;
+    } else {
+      if (!manifestFiles || !Object.keys(manifestFiles).length) {
+        console.warn(`[embeddings] Missing chunk_meta and no incremental bundles for ${mode}; skipping.`);
+        continue;
+      }
+      const bundleResult = await buildChunksFromBundles(
+        incremental.bundleDir,
+        manifestFiles,
+        incremental?.manifest?.bundleFormat
+      );
+      chunksByFile = bundleResult.chunksByFile;
+      totalChunks = bundleResult.totalChunks;
+      if (!chunksByFile.size || !totalChunks) {
+        console.warn(`[embeddings] Incremental bundles empty for ${mode}; skipping.`);
+        continue;
+      }
+      console.log(`[embeddings] ${mode}: using incremental bundles (${chunksByFile.size} files).`);
+    }
+
+    const codeVectors = new Array(totalChunks).fill(null);
+    const docVectors = new Array(totalChunks).fill(null);
+    const mergedVectors = new Array(totalChunks).fill(null);
+    const { indexPath: hnswIndexPath, metaPath: hnswMetaPath } = resolveHnswPaths(indexDir);
+    const hnswBuilder = createHnswBuilder({ enabled: hnswConfig.enabled, config: hnswConfig, totalChunks, mode });
+
+    const cacheDir = resolveCacheDir(cacheRoot, mode);
+    await fs.mkdir(cacheDir, { recursive: true });
+
+    const dimsValidator = createDimsValidator({ mode, configuredDims });
+    const assertDims = dimsValidator.assertDims;
+
+    if (configuredDims) {
+      try {
+        const entries = await fs.readdir(cacheDir);
+        for (const entry of entries) {
+          if (!entry.endsWith('.json')) continue;
+          const cached = JSON.parse(await fs.readFile(path.join(cacheDir, entry), 'utf8'));
+          if (cached.cacheMeta?.identityKey !== cacheIdentityKey) continue;
+          const expectedDims = configuredDims || cached.cacheMeta?.identity?.dims || null;
+          validateCachedDims({ vectors: cached.codeVectors, expectedDims, mode });
+          validateCachedDims({ vectors: cached.docVectors, expectedDims, mode });
+          validateCachedDims({ vectors: cached.mergedVectors, expectedDims, mode });
+        }
+      } catch (err) {
+        if (isDimsMismatch(err)) throw err;
+        // Ignore cache preflight errors.
+      }
+    }
+
+    let processedFiles = 0;
+    for (const [relPath, itemsRaw] of chunksByFile.entries()) {
+      // Ensure stable mapping between chunkSignature, cache vectors, and HNSW insertion.
+      const items = Array.isArray(itemsRaw)
+        ? [...itemsRaw].sort((a, b) => (a.index ?? 0) - (b.index ?? 0))
+        : [];
+      const normalizedRel = relPath.replace(/\\/g, '/');
+      const chunkSignature = buildChunkSignature(items);
+      const manifestEntry = manifestFiles[normalizedRel] || null;
+      const manifestHash = typeof manifestEntry?.hash === 'string' ? manifestEntry.hash : null;
+      let fileHash = manifestHash;
+      let cacheKey = buildCacheKey({
+        file: normalizedRel,
+        hash: fileHash,
+        signature: chunkSignature,
+        identityKey: cacheIdentityKey
+      });
+      let cachePath = cacheKey ? path.join(cacheDir, `${cacheKey}.json`) : null;
+
+      if (cachePath && fsSync.existsSync(cachePath)) {
+        try {
+          const cached = JSON.parse(await fs.readFile(cachePath, 'utf8'));
+          const cacheIdentityMatches = cached.cacheMeta?.identityKey === cacheIdentityKey;
+          if (cacheIdentityMatches) {
+            const expectedDims = configuredDims || cached.cacheMeta?.identity?.dims || null;
+            validateCachedDims({ vectors: cached.codeVectors, expectedDims, mode });
+            validateCachedDims({ vectors: cached.docVectors, expectedDims, mode });
+            validateCachedDims({ vectors: cached.mergedVectors, expectedDims, mode });
+          }
+          if (isCacheValid({ cached, signature: chunkSignature, identityKey: cacheIdentityKey })) {
+            const cachedCode = ensureVectorArrays(cached.codeVectors, items.length);
+            const cachedDoc = ensureVectorArrays(cached.docVectors, items.length);
+            const cachedMerged = ensureVectorArrays(cached.mergedVectors, items.length);
+            for (let i = 0; i < items.length; i += 1) {
+              const chunkIndex = items[i].index;
+              const codeVec = cachedCode[i] || [];
+              const docVec = cachedDoc[i] || [];
+              const mergedVec = cachedMerged[i] || [];
+              if (codeVec.length) assertDims(codeVec.length);
+              if (docVec.length) assertDims(docVec.length);
+              if (mergedVec.length) assertDims(mergedVec.length);
+              codeVectors[chunkIndex] = codeVec;
+              docVectors[chunkIndex] = docVec;
+              mergedVectors[chunkIndex] = mergedVec;
+              if (hnswConfig.enabled && mergedVec.length) {
+                const floatVec = dequantizeUint8ToFloat32(mergedVec);
+                if (floatVec) hnswBuilder.addVector(chunkIndex, floatVec);
+              }
+            }
+            processedFiles += 1;
+            continue;
+          }
+        } catch (err) {
+          if (isDimsMismatch(err)) throw err;
+          // Ignore cache parse errors.
+        }
+      }
+
+      const absPath = path.resolve(root, normalizedRel.split('/').join(path.sep));
+      let textInfo;
+      try {
+        textInfo = await readTextFileWithHash(absPath);
+      } catch {
+        console.warn(`[embeddings] Failed to read ${normalizedRel}; skipping.`);
+        continue;
+      }
+      const text = textInfo.text;
+      if (!fileHash) {
+        fileHash = textInfo.hash;
+        cacheKey = buildCacheKey({
+          file: normalizedRel,
+          hash: fileHash,
+          signature: chunkSignature,
+          identityKey: cacheIdentityKey
+        });
+        cachePath = cacheKey ? path.join(cacheDir, `${cacheKey}.json`) : null;
+        if (cachePath && fsSync.existsSync(cachePath)) {
+          try {
+            const cached = JSON.parse(await fs.readFile(cachePath, 'utf8'));
+            const cacheIdentityMatches = cached.cacheMeta?.identityKey === cacheIdentityKey;
+            if (cacheIdentityMatches) {
+              const expectedDims = configuredDims || cached.cacheMeta?.identity?.dims || null;
+              validateCachedDims({ vectors: cached.codeVectors, expectedDims, mode });
+              validateCachedDims({ vectors: cached.docVectors, expectedDims, mode });
+              validateCachedDims({ vectors: cached.mergedVectors, expectedDims, mode });
+            }
+            if (isCacheValid({ cached, signature: chunkSignature, identityKey: cacheIdentityKey })) {
+              const cachedCode = ensureVectorArrays(cached.codeVectors, items.length);
+              const cachedDoc = ensureVectorArrays(cached.docVectors, items.length);
+              const cachedMerged = ensureVectorArrays(cached.mergedVectors, items.length);
+              for (let i = 0; i < items.length; i += 1) {
+                const chunkIndex = items[i].index;
+                const codeVec = cachedCode[i] || [];
+                const docVec = cachedDoc[i] || [];
+                const mergedVec = cachedMerged[i] || [];
+                if (codeVec.length) assertDims(codeVec.length);
+                if (docVec.length) assertDims(docVec.length);
+                if (mergedVec.length) assertDims(mergedVec.length);
+                codeVectors[chunkIndex] = codeVec;
+                docVectors[chunkIndex] = docVec;
+                mergedVectors[chunkIndex] = mergedVec;
+                if (hnswConfig.enabled && mergedVec.length) {
+                  const floatVec = dequantizeUint8ToFloat32(mergedVec);
+                  if (floatVec) hnswBuilder.addVector(chunkIndex, floatVec);
+                }
+              }
+              processedFiles += 1;
+              continue;
+            }
+          } catch (err) {
+            if (isDimsMismatch(err)) throw err;
+            // Ignore cache parse errors.
+          }
+        }
+      }
+
+      const codeTexts = [];
+      const docTexts = [];
+      for (const { chunk } of items) {
+        const start = Number(chunk.start) || 0;
+        const end = Number(chunk.end) || start;
+        codeTexts.push(text.slice(start, end));
+        const docText = typeof chunk.docmeta?.doc === 'string' ? chunk.docmeta.doc : '';
+        docTexts.push(docText.trim() ? docText : '');
+      }
+
+      let codeEmbeds = await runBatched({
+        texts: codeTexts,
+        batchSize: embeddingBatchSize,
+        embed: getChunkEmbeddings
+      });
+      codeEmbeds = ensureVectorArrays(codeEmbeds, codeTexts.length);
+      for (const vec of codeEmbeds) {
+        if (Array.isArray(vec) && vec.length) assertDims(vec.length);
+      }
+
+      const docVectorsRaw = new Array(items.length).fill(null);
+      const docIndexes = [];
+      const docPayloads = [];
+      for (let i = 0; i < docTexts.length; i += 1) {
+        if (docTexts[i]) {
+          docIndexes.push(i);
+          docPayloads.push(docTexts[i]);
+        }
+      }
+      if (docPayloads.length) {
+        const embeddedDocs = await runBatched({
+          texts: docPayloads,
+          batchSize: embeddingBatchSize,
+          embed: getChunkEmbeddings
+        });
+        for (let i = 0; i < docIndexes.length; i += 1) {
+          docVectorsRaw[docIndexes[i]] = embeddedDocs[i] || null;
+        }
+      }
+      for (const vec of docVectorsRaw) {
+        if (Array.isArray(vec) && vec.length) assertDims(vec.length);
+      }
+
+      const dims = dimsValidator.getDims();
+      const zeroVec = dims ? Array.from({ length: dims }, () => 0) : [];
+
+      const cachedCodeVectors = [];
+      const cachedDocVectors = [];
+      const cachedMergedVectors = [];
+      for (let i = 0; i < items.length; i += 1) {
+        const chunkIndex = items[i].index;
+        const embedCode = Array.isArray(codeEmbeds[i]) ? codeEmbeds[i] : [];
+        const embedDoc = Array.isArray(docVectorsRaw[i]) ? docVectorsRaw[i] : zeroVec;
+        const quantized = buildQuantizedVectors({
+          chunkIndex,
+          codeVector: embedCode,
+          docVector: embedDoc,
+          zeroVector: zeroVec,
+          addHnswVector: hnswConfig.enabled ? hnswBuilder.addVector : null
+        });
+        codeVectors[chunkIndex] = quantized.quantizedCode;
+        docVectors[chunkIndex] = quantized.quantizedDoc;
+        mergedVectors[chunkIndex] = quantized.quantizedMerged;
+        cachedCodeVectors.push(quantized.quantizedCode);
+        cachedDocVectors.push(quantized.quantizedDoc);
+        cachedMergedVectors.push(quantized.quantizedMerged);
+      }
+
+      if (cachePath) {
+        const payload = JSON.stringify({
+          key: cacheKey,
+          file: normalizedRel,
+          hash: fileHash,
+          chunkSignature,
+          cacheMeta: {
+            identityKey: cacheIdentityKey,
+            identity: cacheIdentity,
+            createdAt: new Date().toISOString()
+          },
+          codeVectors: cachedCodeVectors,
+          docVectors: cachedDocVectors,
+          mergedVectors: cachedMergedVectors
+        });
+        let tempPath;
+        try {
+          tempPath = createTempPath(cachePath);
+          await fs.writeFile(tempPath, payload);
+          await replaceFileNoBak(tempPath, cachePath);
+        } catch {
+          // Ignore cache write failures.
+          if (tempPath) {
+            try {
+              await fs.rm(tempPath, { force: true });
+            } catch {}
+          }
+        }
+      }
+
+      processedFiles += 1;
+      if (processedFiles % 50 === 0) {
+        console.log(`[embeddings] ${mode}: processed ${processedFiles}/${chunksByFile.size} files`);
+      }
+    }
+
+    const observedDims = dimsValidator.getDims();
+    if (configuredDims && observedDims && configuredDims !== observedDims) {
+      throw new Error(
+        `[embeddings] ${mode} embedding dims mismatch (configured=${configuredDims}, observed=${observedDims}).`
+      );
+    }
+    const finalDims = observedDims || configuredDims || 384;
+    fillMissingVectors(codeVectors, finalDims);
+    fillMissingVectors(docVectors, finalDims);
+    fillMissingVectors(mergedVectors, finalDims);
+
+    await writeJsonObjectFile(path.join(indexDir, 'dense_vectors_uint8.json'), {
+      fields: { model: modelId, dims: finalDims, scale: denseScale },
+      arrays: { vectors: mergedVectors },
+      atomic: true
+    });
+    await writeJsonObjectFile(path.join(indexDir, 'dense_vectors_doc_uint8.json'), {
+      fields: { model: modelId, dims: finalDims, scale: denseScale },
+      arrays: { vectors: docVectors },
+      atomic: true
+    });
+    await writeJsonObjectFile(path.join(indexDir, 'dense_vectors_code_uint8.json'), {
+      fields: { model: modelId, dims: finalDims, scale: denseScale },
+      arrays: { vectors: codeVectors },
+      atomic: true
+    });
+
+    if (hnswConfig.enabled) {
+      try {
+        const result = await hnswBuilder.writeIndex({
+          indexPath: hnswIndexPath,
+          metaPath: hnswMetaPath,
+          modelId,
+          dims: finalDims
+        });
+        if (!result.skipped) {
+          console.log(`[embeddings] ${mode}: wrote HNSW index (${result.count} vectors).`);
+        }
+      } catch (err) {
+        console.warn(`[embeddings] ${mode}: failed to write HNSW index: ${err?.message || err}`);
+      }
+    }
+
+    const now = new Date().toISOString();
+    indexState.generatedAt = indexState.generatedAt || now;
+    indexState.updatedAt = now;
+    indexState.mode = indexState.mode || mode;
+    indexState.embeddings = {
+      ...(indexState.embeddings || {}),
+      enabled: true,
+      ready: true,
+      pending: false,
+      mode: indexState.embeddings?.mode || resolvedEmbeddingMode,
+      service: indexState.embeddings?.service ?? (normalizedEmbeddingMode === 'service'),
+      updatedAt: now
+    };
+    if (indexState.enrichment && indexState.enrichment.enabled) {
+      indexState.enrichment = {
+        ...indexState.enrichment,
+        pending: false,
+        stage: indexState.enrichment.stage || indexState.stage || 'stage2'
+      };
+    }
+    try {
+      await writeIndexState(statePath, indexState);
+    } catch {
+      // Ignore index state write failures.
+    }
+
+    try {
+      await updatePieceManifest({ indexDir, mode, totalChunks, dims: finalDims });
+    } catch {
+      // Ignore piece manifest write failures.
+    }
+
+    updateSqliteDense({
+      Database,
+      root,
+      userConfig,
+      indexRoot,
+      mode,
+      vectors: mergedVectors,
+      dims: finalDims,
+      scale: denseScale,
+      modelId,
+      emitOutput: true
+    });
+
+    const validation = await validateIndexArtifacts({
+      root,
+      indexRoot,
+      modes: [mode],
+      userConfig,
+      sqliteEnabled: false
+    });
+    if (!validation.ok) {
+      throw new Error(`[embeddings] ${mode} index validation failed; see index-validate output for details.`);
+    }
+
+    console.log(`[embeddings] ${mode}: wrote ${totalChunks} vectors (dims=${finalDims}).`);
+  }
+
+  if (hasBuildState) {
+    await markBuildPhase(indexRoot, 'stage3', 'done');
+  }
+  stopHeartbeat();
+  return { modes };
+}
diff --git a/tools/build-embeddings/sqlite-dense.js b/tools/build-embeddings/sqlite-dense.js
new file mode 100644
index 000000000..43fe8d65d
--- /dev/null
+++ b/tools/build-embeddings/sqlite-dense.js
@@ -0,0 +1,133 @@
+import fsSync from 'node:fs';
+import path from 'node:path';
+import {
+  encodeVector,
+  ensureVectorTable,
+  getVectorExtensionConfig,
+  hasVectorTable,
+  loadVectorExtension
+} from '../vector-extension.js';
+import { resolveSqlitePaths } from '../dict-utils.js';
+import { dequantizeUint8ToFloat32, packUint8, toVectorId } from '../../src/storage/sqlite/vector.js';
+
+const hasTable = (db, table) => {
+  try {
+    const row = db.prepare(
+      "SELECT name FROM sqlite_master WHERE type='table' AND name = ?"
+    ).get(table);
+    return !!row;
+  } catch {
+    return false;
+  }
+};
+
+export const updateSqliteDense = ({
+  Database,
+  root,
+  userConfig,
+  indexRoot,
+  mode,
+  vectors,
+  dims,
+  scale,
+  modelId,
+  dbPath,
+  emitOutput = true
+}) => {
+  if (userConfig?.sqlite?.use === false) {
+    return { skipped: true, reason: 'sqlite disabled' };
+  }
+  if (!Database) {
+    if (emitOutput) {
+      console.warn(`[embeddings] better-sqlite3 not available; skipping SQLite update for ${mode}.`);
+    }
+    return { skipped: true, reason: 'sqlite unavailable' };
+  }
+  const resolvedDbPath = dbPath || (() => {
+    const sqlitePaths = resolveSqlitePaths(root, userConfig, indexRoot ? { indexRoot } : {});
+    return mode === 'code' ? sqlitePaths.codePath : sqlitePaths.prosePath;
+  })();
+  if (!resolvedDbPath || !fsSync.existsSync(resolvedDbPath)) {
+    if (emitOutput) {
+      console.warn(`[embeddings] SQLite ${mode} index missing; skipping.`);
+    }
+    return { skipped: true, reason: 'sqlite missing' };
+  }
+
+  const db = new Database(resolvedDbPath);
+  try {
+    if (!hasTable(db, 'dense_vectors') || !hasTable(db, 'dense_meta')) {
+      if (emitOutput) {
+        console.warn(`[embeddings] SQLite ${mode} index missing dense tables; skipping.`);
+      }
+      return { skipped: true, reason: 'missing dense tables' };
+    }
+    try {
+      db.pragma('journal_mode = WAL');
+      db.pragma('synchronous = NORMAL');
+    } catch {}
+
+    const vectorExtension = getVectorExtensionConfig(root, userConfig);
+    let vectorAnnReady = false;
+    let vectorAnnTable = vectorExtension.table || 'dense_vectors_ann';
+    let vectorAnnColumn = vectorExtension.column || 'embedding';
+    let insertVectorAnn = null;
+    if (vectorExtension.enabled) {
+      const loadResult = loadVectorExtension(db, vectorExtension, `embeddings ${mode}`);
+      if (loadResult.ok) {
+        if (hasVectorTable(db, vectorAnnTable)) {
+          vectorAnnReady = true;
+        } else {
+          const created = ensureVectorTable(db, vectorExtension, dims);
+          if (created.ok) {
+            vectorAnnReady = true;
+            vectorAnnTable = created.tableName;
+            vectorAnnColumn = created.column;
+          } else if (emitOutput) {
+            console.warn(`[embeddings] Failed to create vector table for ${mode}: ${created.reason}`);
+          }
+        }
+        if (vectorAnnReady) {
+          insertVectorAnn = db.prepare(
+            `INSERT OR REPLACE INTO ${vectorAnnTable} (rowid, ${vectorAnnColumn}) VALUES (?, ?)`
+          );
+        }
+      } else if (emitOutput) {
+        console.warn(`[embeddings] Vector extension unavailable for ${mode}: ${loadResult.reason}`);
+      }
+    }
+
+    const deleteDense = db.prepare('DELETE FROM dense_vectors WHERE mode = ?');
+    const deleteMeta = db.prepare('DELETE FROM dense_meta WHERE mode = ?');
+    const insertDense = db.prepare(
+      'INSERT OR REPLACE INTO dense_vectors (mode, doc_id, vector) VALUES (?, ?, ?)'
+    );
+    const insertMeta = db.prepare(
+      'INSERT OR REPLACE INTO dense_meta (mode, dims, scale, model) VALUES (?, ?, ?, ?)'
+    );
+    const run = db.transaction(() => {
+      deleteDense.run(mode);
+      deleteMeta.run(mode);
+      if (vectorAnnReady) {
+        db.exec(`DELETE FROM ${vectorAnnTable}`);
+      }
+      insertMeta.run(mode, dims, scale, modelId || null);
+      for (let docId = 0; docId < vectors.length; docId += 1) {
+        const vec = vectors[docId];
+        insertDense.run(mode, docId, packUint8(vec));
+        if (vectorAnnReady && insertVectorAnn) {
+          const floatVec = dequantizeUint8ToFloat32(vec);
+          const encoded = encodeVector(floatVec, vectorExtension);
+          if (encoded) insertVectorAnn.run(toVectorId(docId), encoded);
+        }
+      }
+    });
+    run();
+    if (emitOutput) {
+      console.log(`[embeddings] ${mode}: SQLite dense vectors updated (${resolvedDbPath}).`);
+    }
+    return { skipped: false, count: vectors.length };
+  } finally {
+    db.close();
+  }
+};
diff --git a/tools/build-lmdb-index.js b/tools/build-lmdb-index.js
new file mode 100644
index 000000000..73904c967
--- /dev/null
+++ b/tools/build-lmdb-index.js
@@ -0,0 +1,270 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { createCli } from '../src/shared/cli.js';
+import { loadChunkMeta, loadTokenPostings, readJsonFile, MAX_JSON_BYTES } from '../src/shared/artifact-io.js';
+import { writeJsonObjectFile } from '../src/shared/json-stream.js';
+import { checksumFile } from '../src/shared/hash.js';
+import { LMDB_ARTIFACT_KEYS, LMDB_META_KEYS, LMDB_SCHEMA_VERSION } from '../src/storage/lmdb/schema.js';
+import { getIndexDir, getMetricsDir, loadUserConfig, resolveIndexRoot, resolveLmdbPaths, resolveRepoRoot } from './dict-utils.js';
+import { Packr } from 'msgpackr';
+
+let open = null;
+try {
+  ({ open } = await import('lmdb'));
+} catch {}
+
+const argv = createCli({
+  scriptName: 'build-lmdb-index',
+  options: {
+    mode: { type: 'string', default: 'all' },
+    repo: { type: 'string' },
+    'index-root': { type: 'string' }
+  }
+}).parse();
+
+if (!open) {
+  console.error('lmdb is required. Run npm install first.');
+  process.exit(1);
+}
+
+const rootArg = argv.repo ? path.resolve(argv.repo) : null;
+const root = rootArg || resolveRepoRoot(process.cwd());
+const userConfig = loadUserConfig(root);
+const indexRoot = argv['index-root']
+  ? path.resolve(argv['index-root'])
+  : resolveIndexRoot(root, userConfig);
+const lmdbPaths = resolveLmdbPaths(root, userConfig, { indexRoot });
+const metricsDir = getMetricsDir(root, userConfig);
+
+const readJsonOptional = (filePath) => {
+  if (!filePath || !fsSync.existsSync(filePath)) return null;
+  return readJsonFile(filePath, { maxBytes: MAX_JSON_BYTES });
+};
+
+const sumDocLengths = (docLengths) => {
+  if (!Array.isArray(docLengths)) return null;
+  let total = 0;
+  for (const entry of docLengths) {
+    const value = Number(entry);
+    if (Number.isFinite(value)) total += value;
+  }
+  return total;
+};
+
+const updateIndexStateManifest = async (indexDir) => {
+  const manifestPath = path.join(indexDir, 'pieces', 'manifest.json');
+  if (!fsSync.existsSync(manifestPath)) return;
+  let manifest = null;
+  try {
+    manifest = readJsonFile(manifestPath) || null;
+  } catch {
+    return;
+  }
+  if (!manifest || !Array.isArray(manifest.pieces)) return;
+  const statePath = path.join(indexDir, 'index_state.json');
+  if (!fsSync.existsSync(statePath)) return;
+  let bytes = null;
+  let checksum = null;
+  let checksumAlgo = null;
+  try {
+    const stat = await fs.stat(statePath);
+    bytes = stat.size;
+    const result = await checksumFile(statePath);
+    checksum = result?.value || null;
+    checksumAlgo = result?.algo || null;
+  } catch {}
+  if (!bytes || !checksum) return;
+  const pieces = manifest.pieces.map((piece) => {
+    if (piece?.name !== 'index_state' || piece?.path !== 'index_state.json') {
+      return piece;
+    }
+    return {
+      ...piece,
+      bytes,
+      checksum: checksum && checksumAlgo ? `${checksumAlgo}:${checksum}` : piece.checksum
+    };
+  });
+  const next = {
+    ...manifest,
+    updatedAt: new Date().toISOString(),
+    pieces
+  };
+  try {
+    await writeJsonObjectFile(manifestPath, { fields: next, atomic: true });
+  } catch {
+    // Ignore manifest write failures.
+  }
+};
+
+const updateLmdbState = async (indexDir, patch) => {
+  if (!indexDir) return null;
+  const statePath = path.join(indexDir, 'index_state.json');
+  let state = {};
+  if (fsSync.existsSync(statePath)) {
+    try {
+      state = readJsonFile(statePath, { maxBytes: MAX_JSON_BYTES }) || {};
+    } catch {
+      state = {};
+    }
+  }
+  const now = new Date().toISOString();
+  state.generatedAt = state.generatedAt || now;
+  state.updatedAt = now;
+  state.lmdb = {
+    ...(state.lmdb || {}),
+    ...patch,
+    updatedAt: now
+  };
+  try {
+    await writeJsonObjectFile(statePath, { fields: state, atomic: true });
+  } catch {
+    // Ignore index state write failures.
+  }
+  await updateIndexStateManifest(indexDir);
+  return state;
+};
+
+const buildModeRaw = String(argv.mode || 'all').trim().toLowerCase();
+const buildMode = buildModeRaw === 'both' ? 'all' : buildModeRaw;
+const modes = buildMode === 'all' ? ['code', 'prose'] : [buildMode];
+
+const packr = new Packr();
+
+const storeValue = (db, key, value) => {
+  if (value == null) return false;
+  db.putSync(key, packr.pack(value));
+  return true;
+};
+
+const storeArtifacts = (db, meta, artifacts) => {
+  db.clearSync();
+  db.transactionSync(() => {
+    storeValue(db, LMDB_META_KEYS.schemaVersion, LMDB_SCHEMA_VERSION);
+    storeValue(db, LMDB_META_KEYS.createdAt, meta.createdAt);
+    storeValue(db, LMDB_META_KEYS.mode, meta.mode);
+    storeValue(db, LMDB_META_KEYS.sourceIndex, meta.sourceIndex);
+    storeValue(db, LMDB_META_KEYS.chunkCount, meta.chunkCount);
+    storeValue(db, LMDB_META_KEYS.artifacts, meta.artifacts);
+    for (const [key, value] of Object.entries(artifacts)) {
+      storeValue(db, key, value);
+    }
+  });
+};
+
+const loadArtifactsForMode = (indexDir, mode) => {
+  const chunkMeta = loadChunkMeta(indexDir, { maxBytes: MAX_JSON_BYTES });
+  const tokenPostings = loadTokenPostings(indexDir, { maxBytes: MAX_JSON_BYTES });
+  const fileMeta = readJsonOptional(path.join(indexDir, 'file_meta.json'));
+  const fileRelations = readJsonOptional(path.join(indexDir, 'file_relations.json'));
+  const repoMap = readJsonOptional(path.join(indexDir, 'repo_map.json'));
+  const filterIndex = readJsonOptional(path.join(indexDir, 'filter_index.json'));
+  const fieldPostings = readJsonOptional(path.join(indexDir, 'field_postings.json'));
+  const fieldTokens = readJsonOptional(path.join(indexDir, 'field_tokens.json'));
+  const phraseNgrams = readJsonOptional(path.join(indexDir, 'phrase_ngrams.json'));
+  const chargramPostings = readJsonOptional(path.join(indexDir, 'chargram_postings.json'));
+  const minhashSignatures = readJsonOptional(path.join(indexDir, 'minhash_signatures.json'));
+  const denseVectors = readJsonOptional(path.join(indexDir, 'dense_vectors_uint8.json'));
+  const denseVectorsDoc = readJsonOptional(path.join(indexDir, 'dense_vectors_doc_uint8.json'));
+  const denseVectorsCode = readJsonOptional(path.join(indexDir, 'dense_vectors_code_uint8.json'));
+  const denseHnswMeta = readJsonOptional(path.join(indexDir, 'dense_vectors_hnsw.meta.json'));
+  const indexState = readJsonOptional(path.join(indexDir, 'index_state.json'));
+  const artifacts = {
+    [LMDB_ARTIFACT_KEYS.chunkMeta]: chunkMeta,
+    [LMDB_ARTIFACT_KEYS.tokenPostings]: tokenPostings,
+    [LMDB_ARTIFACT_KEYS.fileMeta]: fileMeta,
+    [LMDB_ARTIFACT_KEYS.fileRelations]: fileRelations,
+    [LMDB_ARTIFACT_KEYS.repoMap]: repoMap,
+    [LMDB_ARTIFACT_KEYS.filterIndex]: filterIndex,
+    [LMDB_ARTIFACT_KEYS.fieldPostings]: fieldPostings,
+    [LMDB_ARTIFACT_KEYS.fieldTokens]: fieldTokens,
+    [LMDB_ARTIFACT_KEYS.phraseNgrams]: phraseNgrams,
+    [LMDB_ARTIFACT_KEYS.chargramPostings]: chargramPostings,
+    [LMDB_ARTIFACT_KEYS.minhashSignatures]: minhashSignatures,
+    [LMDB_ARTIFACT_KEYS.denseVectors]: denseVectors,
+    [LMDB_ARTIFACT_KEYS.denseVectorsDoc]: denseVectorsDoc,
+    [LMDB_ARTIFACT_KEYS.denseVectorsCode]: denseVectorsCode,
+    [LMDB_ARTIFACT_KEYS.denseHnswMeta]: denseHnswMeta,
+    [LMDB_ARTIFACT_KEYS.indexState]: indexState
+  };
+  const artifactKeys = Object.entries(artifacts)
+    .filter(([, value]) => value != null)
+    .map(([key]) => key);
+  const meta = {
+    createdAt: new Date().toISOString(),
+    mode,
+    sourceIndex: indexDir,
+    chunkCount: Array.isArray(chunkMeta) ? chunkMeta.length : 0,
+    artifacts: artifactKeys
+  };
+  const stats = {
+    chunkCount: meta.chunkCount,
+    fileCount: Array.isArray(fileMeta) ? fileMeta.length : null,
+    tokenCount: sumDocLengths(tokenPostings?.docLengths)
+  };
+  return { meta, artifacts, stats };
+};
+
+for (const mode of modes) {
+  if (!['code', 'prose'].includes(mode)) {
+    console.error(`Invalid mode: ${mode}`);
+    process.exit(1);
+  }
+  const indexDir = getIndexDir(root, mode, userConfig, { indexRoot });
+  const targetPath = mode === 'code' ? lmdbPaths.codePath : lmdbPaths.prosePath;
+  const buildStart = Date.now();
+  await fs.mkdir(targetPath, { recursive: true });
+  await updateLmdbState(indexDir, {
+    enabled: true,
+    ready: false,
+    pending: true,
+    schemaVersion: LMDB_SCHEMA_VERSION
+  });
+
+  const readStart = Date.now();
+  const { meta, artifacts, stats } = loadArtifactsForMode(indexDir, mode);
+  const readMs = Date.now() - readStart;
+  const writeStart = Date.now();
+  const db = open({ path: targetPath, readOnly: false });
+  storeArtifacts(db, meta, artifacts);
+  db.close();
+  const writeMs = Date.now() - writeStart;
+
+  const finalState = await updateLmdbState(indexDir, {
+    enabled: true,
+    ready: true,
+    pending: false,
+    schemaVersion: LMDB_SCHEMA_VERSION,
+    path: targetPath
+  });
+  const finalDb = open({ path: targetPath, readOnly: false });
+  storeValue(finalDb, LMDB_ARTIFACT_KEYS.indexState, finalState);
+  finalDb.close();
+
+  const totalMs = Date.now() - buildStart;
+  const metrics = {
+    generatedAt: new Date().toISOString(),
+    mode,
+    sourceIndex: meta.sourceIndex,
+    artifacts: meta.artifacts,
+    files: { candidates: stats.fileCount },
+    chunks: { total: stats.chunkCount },
+    tokens: { total: stats.tokenCount },
+    lmdb: { path: targetPath },
+    timings: {
+      totalMs,
+      readMs,
+      writeMs
+    }
+  };
+  try {
+    await fs.mkdir(metricsDir, { recursive: true });
+    await writeJsonObjectFile(
+      path.join(metricsDir, `lmdb-${mode}.json`),
+      { fields: metrics, atomic: true }
+    );
+  } catch {}
+
+  console.log(`[lmdb] ${mode} index built at ${targetPath}.`);
+}
diff --git a/tools/build-sqlite-index.js b/tools/build-sqlite-index.js
index 757a03711..f98ee163c 100644
--- a/tools/build-sqlite-index.js
+++ b/tools/build-sqlite-index.js
@@ -1,1001 +1,12 @@
 #!/usr/bin/env node
-import fs from 'node:fs/promises';
-import fsSync from 'node:fs';
-import path from 'node:path';
-import minimist from 'minimist';
-import { getIndexDir, getModelConfig, getRepoCacheRoot, loadUserConfig, resolveRepoRoot, resolveSqlitePaths } from './dict-utils.js';
-import { encodeVector, ensureVectorTable, getVectorExtensionConfig, hasVectorTable, loadVectorExtension } from './vector-extension.js';
-import { compactDatabase } from './compact-sqlite-index.js';
-import { CREATE_TABLES_SQL, REQUIRED_TABLES, SCHEMA_VERSION } from '../src/sqlite/schema.js';
-import { buildChunkRow, buildTokenFrequency, prepareVectorAnnTable } from '../src/sqlite/build-helpers.js';
-import { loadIncrementalManifest } from '../src/sqlite/incremental.js';
-import { chunkArray, hasRequiredTables, loadIndex, normalizeFilePath, readJson } from '../src/sqlite/utils.js';
-import { dequantizeUint8ToFloat32, packUint32, packUint8, quantizeVec, toVectorId } from '../src/sqlite/vector.js';
+import { fileURLToPath } from 'node:url';
+import { runBuildSqliteIndex } from './build-sqlite-index/run.js';
 
-let Database;
-try {
-  ({ default: Database } = await import('better-sqlite3'));
-} catch (err) {
-  console.error('better-sqlite3 is required. Run npm install first.');
-  process.exit(1);
-}
-
-const argv = minimist(process.argv.slice(2), {
-  string: ['code-dir', 'prose-dir', 'out', 'mode', 'repo'],
-  boolean: ['incremental', 'compact'],
-  default: { mode: 'all', incremental: false, compact: false }
-});
-
-const rootArg = argv.repo ? path.resolve(argv.repo) : null;
-const root = rootArg || resolveRepoRoot(process.cwd());
-const userConfig = loadUserConfig(root);
-const modelConfig = getModelConfig(root, userConfig);
-const vectorExtension = getVectorExtensionConfig(root, userConfig);
-const vectorAnnEnabled = vectorExtension.enabled;
-const vectorConfig = {
-  enabled: vectorAnnEnabled,
-  extension: vectorExtension,
-  loadVectorExtension,
-  ensureVectorTable
-};
-const repoCacheRoot = getRepoCacheRoot(root, userConfig);
-const compactFlag = argv.compact;
-const compactOnIncremental = compactFlag === true
-  || (compactFlag !== false && userConfig?.sqlite?.compactOnIncremental === true);
-const codeDir = argv['code-dir'] ? path.resolve(argv['code-dir']) : getIndexDir(root, 'code', userConfig);
-const proseDir = argv['prose-dir'] ? path.resolve(argv['prose-dir']) : getIndexDir(root, 'prose', userConfig);
-const sqlitePaths = resolveSqlitePaths(root, userConfig);
-const incrementalRequested = argv.incremental === true;
-
-const modeArg = (argv.mode || 'all').toLowerCase();
-if (!['all', 'code', 'prose'].includes(modeArg)) {
-  console.error('Invalid mode. Use --mode all|code|prose');
-  process.exit(1);
-}
-
-const outArg = argv.out ? path.resolve(argv.out) : null;
-let outPath = null;
-let codeOutPath = sqlitePaths.codePath;
-let proseOutPath = sqlitePaths.prosePath;
-if (outArg) {
-  if (modeArg === 'all') {
-    const outDir = outArg.endsWith('.db') ? path.dirname(outArg) : outArg;
-    codeOutPath = path.join(outDir, 'index-code.db');
-    proseOutPath = path.join(outDir, 'index-prose.db');
-  } else {
-    const targetName = modeArg === 'code' ? 'index-code.db' : 'index-prose.db';
-    outPath = outArg.endsWith('.db') ? outArg : path.join(outArg, targetName);
-  }
-}
-if (!outPath && modeArg !== 'all') {
-  outPath = modeArg === 'code' ? codeOutPath : proseOutPath;
-}
-
-if (modeArg === 'all') {
-  await fs.mkdir(path.dirname(codeOutPath), { recursive: true });
-  await fs.mkdir(path.dirname(proseOutPath), { recursive: true });
-} else if (outPath) {
-  await fs.mkdir(path.dirname(outPath), { recursive: true });
-}
-
-
-
-const codeIndex = loadIndex(codeDir, modelConfig.id);
-const proseIndex = loadIndex(proseDir, modelConfig.id);
-const incrementalCode = loadIncrementalManifest(repoCacheRoot, 'code');
-const incrementalProse = loadIncrementalManifest(repoCacheRoot, 'prose');
-if (!codeIndex && !proseIndex) {
-  console.error('No index found. Build index-code/index-prose first.');
-  process.exit(1);
-}
-
-if (sqlitePaths.legacyExists) {
-  try {
-    await fs.rm(sqlitePaths.legacyPath, { force: true });
-    console.warn(`Removed legacy SQLite index at ${sqlitePaths.legacyPath}`);
-  } catch (err) {
-    console.warn(`Failed to remove legacy SQLite index at ${sqlitePaths.legacyPath}: ${err?.message || err}`);
-  }
-}
-
-const canIncrementalCode = incrementalRequested && incrementalCode?.manifest;
-const canIncrementalProse = incrementalRequested && incrementalProse?.manifest;
-if (modeArg === 'code' && !codeIndex && !canIncrementalCode) {
-  console.error('Code index missing; build index-code first.');
-  process.exit(1);
-}
-if (modeArg === 'prose' && !proseIndex && !canIncrementalProse) {
-  console.error('Prose index missing; build index-prose first.');
-  process.exit(1);
-}
-
-
-/**
- * Build a full SQLite index from file-backed artifacts.
- * @param {string} outPath
- * @param {object} index
- * @param {'code'|'prose'} mode
- * @param {object|null} manifestFiles
- * @returns {number}
- */
-function buildDatabase(outPath, index, mode, manifestFiles) {
-  if (!index) return 0;
-  const db = new Database(outPath);
-  try {
-    db.pragma('journal_mode = WAL');
-    db.pragma('synchronous = NORMAL');
-  } catch {}
-
-  db.exec(CREATE_TABLES_SQL);
-  db.pragma(`user_version = ${SCHEMA_VERSION}`);
-  const vectorAnn = prepareVectorAnnTable({ db, indexData: index, mode, vectorConfig });
-
-  const insertChunk = db.prepare(`
-    INSERT OR REPLACE INTO chunks (
-      id, mode, file, start, end, startLine, endLine, ext, kind, name, headline,
-      preContext, postContext, weight, tokens, ngrams, codeRelations, docmeta,
-      stats, complexity, lint, externalDocs, last_modified, last_author, churn,
-      chunk_authors
-    ) VALUES (
-      @id, @mode, @file, @start, @end, @startLine, @endLine, @ext, @kind, @name, @headline,
-      @preContext, @postContext, @weight, @tokens, @ngrams, @codeRelations, @docmeta,
-      @stats, @complexity, @lint, @externalDocs, @last_modified, @last_author, @churn,
-      @chunk_authors
-    );
-  `);
-
-  const insertFts = db.prepare(`
-    INSERT OR REPLACE INTO chunks_fts (rowid, mode, file, name, kind, headline, tokens)
-    VALUES (@id, @mode, @file, @name, @kind, @headline, @tokensText);
-  `);
-
-  const insertTokenVocab = db.prepare(
-    'INSERT OR REPLACE INTO token_vocab (mode, token_id, token) VALUES (?, ?, ?)'
-  );
-  const insertTokenPosting = db.prepare(
-    'INSERT OR REPLACE INTO token_postings (mode, token_id, doc_id, tf) VALUES (?, ?, ?, ?)'
-  );
-  const insertDocLength = db.prepare(
-    'INSERT OR REPLACE INTO doc_lengths (mode, doc_id, len) VALUES (?, ?, ?)'
-  );
-  const insertTokenStats = db.prepare(
-    'INSERT OR REPLACE INTO token_stats (mode, avg_doc_len, total_docs) VALUES (?, ?, ?)'
-  );
-  const insertPhraseVocab = db.prepare(
-    'INSERT OR REPLACE INTO phrase_vocab (mode, phrase_id, ngram) VALUES (?, ?, ?)'
-  );
-  const insertPhrasePosting = db.prepare(
-    'INSERT OR REPLACE INTO phrase_postings (mode, phrase_id, doc_id) VALUES (?, ?, ?)'
-  );
-  const insertChargramVocab = db.prepare(
-    'INSERT OR REPLACE INTO chargram_vocab (mode, gram_id, gram) VALUES (?, ?, ?)'
-  );
-  const insertChargramPosting = db.prepare(
-    'INSERT OR REPLACE INTO chargram_postings (mode, gram_id, doc_id) VALUES (?, ?, ?)'
-  );
-  const insertMinhash = db.prepare(
-    'INSERT OR REPLACE INTO minhash_signatures (mode, doc_id, sig) VALUES (?, ?, ?)'
-  );
-  const insertDense = db.prepare(
-    'INSERT OR REPLACE INTO dense_vectors (mode, doc_id, vector) VALUES (?, ?, ?)'
-  );
-  const insertDenseMeta = db.prepare(
-    'INSERT OR REPLACE INTO dense_meta (mode, dims, scale, model) VALUES (?, ?, ?, ?)'
-  );
-  const insertFileManifest = db.prepare(
-    'INSERT OR REPLACE INTO file_manifest (mode, file, hash, mtimeMs, size, chunk_count) VALUES (?, ?, ?, ?, ?, ?)'
-  );
-
-  /**
-   * Ingest token postings into SQLite.
-   * @param {object} tokenIndex
-   * @param {'code'|'prose'} targetMode
-   */
-  function ingestTokenIndex(tokenIndex, targetMode) {
-    if (!tokenIndex?.vocab || !tokenIndex?.postings) return;
-    const vocab = tokenIndex.vocab;
-    const postings = tokenIndex.postings;
-    const docLengths = Array.isArray(tokenIndex.docLengths) ? tokenIndex.docLengths : [];
-    const avgDocLen = typeof tokenIndex.avgDocLen === 'number' ? tokenIndex.avgDocLen : null;
-    const totalDocs = typeof tokenIndex.totalDocs === 'number' ? tokenIndex.totalDocs : docLengths.length;
-
-    const insertVocabTx = db.transaction(() => {
-      for (let i = 0; i < vocab.length; i++) {
-        insertTokenVocab.run(targetMode, i, vocab[i]);
-      }
-    });
-    insertVocabTx();
-
-    const insertPostingsTx = db.transaction(() => {
-      for (let tokenId = 0; tokenId < postings.length; tokenId++) {
-        const posting = postings[tokenId] || [];
-        for (const entry of posting) {
-          if (!entry) continue;
-          const docId = entry[0];
-          const tf = entry[1];
-          insertTokenPosting.run(targetMode, tokenId, docId, tf);
-        }
-      }
-    });
-    insertPostingsTx();
-
-    const insertLengthsTx = db.transaction(() => {
-      for (let docId = 0; docId < docLengths.length; docId++) {
-        insertDocLength.run(targetMode, docId, docLengths[docId]);
-      }
-    });
-    insertLengthsTx();
-
-    insertTokenStats.run(targetMode, avgDocLen, totalDocs);
-  }
-
-  /**
-   * Ingest a generic postings index (phrase/chargram).
-   * @param {object} indexData
-   * @param {'code'|'prose'} targetMode
-   * @param {import('better-sqlite3').Statement} insertVocabStmt
-   * @param {import('better-sqlite3').Statement} insertPostingStmt
-   */
-  function ingestPostingIndex(indexData, targetMode, insertVocabStmt, insertPostingStmt) {
-    if (!indexData?.vocab || !indexData?.postings) return;
-    const vocab = indexData.vocab;
-    const postings = indexData.postings;
-
-    const insertVocabTx = db.transaction(() => {
-      for (let i = 0; i < vocab.length; i++) {
-        insertVocabStmt.run(targetMode, i, vocab[i]);
-      }
-    });
-    insertVocabTx();
-
-    const insertPostingsTx = db.transaction(() => {
-      for (let tokenId = 0; tokenId < postings.length; tokenId++) {
-        const posting = postings[tokenId] || [];
-        for (const docId of posting) {
-          insertPostingStmt.run(targetMode, tokenId, docId);
-        }
-      }
-    });
-    insertPostingsTx();
-  }
-
-  /**
-   * Ingest minhash signatures into SQLite.
-   * @param {object} minhash
-   * @param {'code'|'prose'} targetMode
-   */
-  function ingestMinhash(minhash, targetMode) {
-    if (!minhash?.signatures || !minhash.signatures.length) return;
-    const insertTx = db.transaction(() => {
-      for (let docId = 0; docId < minhash.signatures.length; docId++) {
-        const sig = minhash.signatures[docId];
-        if (!sig) continue;
-        insertMinhash.run(targetMode, docId, packUint32(sig));
-      }
-    });
-    insertTx();
-  }
-
-  /**
-   * Ingest dense vectors into SQLite.
-   * @param {object} dense
-   * @param {'code'|'prose'} targetMode
-   */
-  function ingestDense(dense, targetMode) {
-    if (!dense?.vectors || !dense.vectors.length) return;
-    insertDenseMeta.run(
-      targetMode,
-      dense.dims || null,
-      typeof dense.scale === 'number' ? dense.scale : 1.0,
-      dense.model || modelConfig.id || null
-    );
-    const insertTx = db.transaction(() => {
-      for (let docId = 0; docId < dense.vectors.length; docId++) {
-        const vec = dense.vectors[docId];
-        if (!vec) continue;
-        insertDense.run(targetMode, docId, packUint8(vec));
-        if (vectorAnn?.insert) {
-          const floatVec = dequantizeUint8ToFloat32(vec);
-          const encoded = encodeVector(floatVec, vectorExtension);
-          if (encoded) vectorAnn.insert.run(toVectorId(docId), encoded);
-        }
-      }
-    });
-    insertTx();
-  }
-
-  /**
-   * Ingest all index components for a mode.
-   * @param {object} indexData
-   * @param {'code'|'prose'} targetMode
-   */
-  function ingestIndex(indexData, targetMode) {
-    if (!indexData) return 0;
-    const { chunkMeta } = indexData;
-    let count = 0;
-
-    const insert = db.transaction((rows) => {
-      for (const row of rows) {
-        insertChunk.run(row);
-        insertFts.run(row);
-      }
-    });
-
-    const rows = [];
-    for (const chunk of chunkMeta) {
-      const id = chunk.id;
-      const tokensArray = Array.isArray(chunk.tokens) ? chunk.tokens : [];
-      const tokensText = tokensArray.join(' ');
-      rows.push({
-        id,
-        mode: targetMode,
-        file: normalizeFilePath(chunk.file),
-        start: chunk.start,
-        end: chunk.end,
-        startLine: chunk.startLine || null,
-        endLine: chunk.endLine || null,
-        ext: chunk.ext || null,
-        kind: chunk.kind || null,
-        name: chunk.name || null,
-        headline: chunk.headline || null,
-        preContext: chunk.preContext ? JSON.stringify(chunk.preContext) : null,
-        postContext: chunk.postContext ? JSON.stringify(chunk.postContext) : null,
-        weight: typeof chunk.weight === 'number' ? chunk.weight : 1,
-        tokens: tokensArray.length ? JSON.stringify(tokensArray) : null,
-        tokensText,
-        ngrams: chunk.ngrams ? JSON.stringify(chunk.ngrams) : null,
-        codeRelations: chunk.codeRelations ? JSON.stringify(chunk.codeRelations) : null,
-        docmeta: chunk.docmeta ? JSON.stringify(chunk.docmeta) : null,
-        stats: chunk.stats ? JSON.stringify(chunk.stats) : null,
-        complexity: chunk.complexity ? JSON.stringify(chunk.complexity) : null,
-        lint: chunk.lint ? JSON.stringify(chunk.lint) : null,
-        externalDocs: chunk.externalDocs ? JSON.stringify(chunk.externalDocs) : null,
-        last_modified: chunk.last_modified || null,
-        last_author: chunk.last_author || null,
-        churn: typeof chunk.churn === 'number' ? chunk.churn : null,
-        chunk_authors: chunk.chunk_authors ? JSON.stringify(chunk.chunk_authors) : null
-      });
-      count++;
-    }
-
-    insert(rows);
-    ingestTokenIndex(indexData.tokenPostings, targetMode);
-    ingestPostingIndex(indexData.phraseNgrams, targetMode, insertPhraseVocab, insertPhrasePosting);
-    ingestPostingIndex(indexData.chargrams, targetMode, insertChargramVocab, insertChargramPosting);
-    ingestMinhash(indexData.minhash, targetMode);
-    ingestDense(indexData.denseVec, targetMode);
-
-    return count;
-  }
-
-  /**
-   * Ingest file manifest metadata if available.
-   * @param {object} indexData
-   * @param {'code'|'prose'} targetMode
-   */
-  function ingestFileManifest(indexData, targetMode) {
-    if (!indexData?.chunkMeta) return;
-    const fileCounts = new Map();
-    for (const chunk of indexData.chunkMeta) {
-      if (!chunk?.file) continue;
-      const normalizedFile = normalizeFilePath(chunk.file);
-      fileCounts.set(normalizedFile, (fileCounts.get(normalizedFile) || 0) + 1);
-    }
-    const insertTx = db.transaction(() => {
-      for (const [file, count] of fileCounts.entries()) {
-        const entry = manifestFiles && manifestFiles[file] ? manifestFiles[file] : null;
-        insertFileManifest.run(
-          targetMode,
-          file,
-          entry?.hash || null,
-          Number.isFinite(entry?.mtimeMs) ? entry.mtimeMs : null,
-          Number.isFinite(entry?.size) ? entry.size : null,
-          count
-        );
-      }
-    });
-    insertTx();
-  }
-
-  const count = ingestIndex(index, mode);
-  ingestFileManifest(index, mode);
-  db.close();
-  return count;
-}
-
-/**
- * Read the SQLite schema version.
- * @param {import('better-sqlite3').Database} db
- * @returns {number|null}
- */
-function getSchemaVersion(db) {
-  try {
-    const value = db.pragma('user_version', { simple: true });
-    return Number.isFinite(value) ? value : null;
-  } catch {
-    return null;
-  }
-}
-
-/**
- * Load file manifest entries from SQLite.
- * @param {import('better-sqlite3').Database} db
- * @param {'code'|'prose'} mode
- * @returns {object}
- */
-function getFileManifest(db, mode) {
-  const rows = db.prepare('SELECT file, hash, mtimeMs, size FROM file_manifest WHERE mode = ?').all(mode);
-  const map = new Map();
-  for (const row of rows) {
-    map.set(row.file, row);
-  }
-  return map;
-}
-
-/**
- * Check if a manifest entry matches the DB entry.
- * @param {object} entry
- * @param {object} dbEntry
- * @returns {boolean}
- */
-function isManifestMatch(entry, dbEntry) {
-  if (!dbEntry) return false;
-  if (entry?.hash && dbEntry.hash) return entry.hash === dbEntry.hash;
-  const mtimeMatch = Number.isFinite(entry?.mtimeMs) && Number.isFinite(dbEntry.mtimeMs)
-    ? entry.mtimeMs === dbEntry.mtimeMs
-    : false;
-  const sizeMatch = Number.isFinite(entry?.size) && Number.isFinite(dbEntry.size)
-    ? entry.size === dbEntry.size
-    : false;
-  return mtimeMatch && sizeMatch;
-}
+export { runBuildSqliteIndex };
 
-/**
- * Diff file manifests into added/changed/deleted sets.
- * @param {object} manifestFiles
- * @param {object} dbFiles
- * @returns {{added:string[],changed:string[],deleted:string[]}}
- */
-function diffFileManifests(manifestFiles, dbFiles) {
-  const changed = [];
-  const deleted = [];
-  const manifestKeys = Object.keys(manifestFiles || {});
-  const manifestSet = new Set(manifestKeys);
-
-  for (const file of manifestKeys) {
-    const entry = manifestFiles[file];
-    const dbEntry = dbFiles.get(file);
-    if (!isManifestMatch(entry, dbEntry)) {
-      changed.push(file);
-    }
-  }
-
-  for (const [file] of dbFiles.entries()) {
-    if (!manifestSet.has(file)) deleted.push(file);
-  }
-
-  return { changed, deleted };
-}
-
-/**
- * Fetch vocab rows by value for a given mode/table.
- * @param {import('better-sqlite3').Database} db
- * @param {'code'|'prose'} mode
- * @param {string} table
- * @param {string} idColumn
- * @param {string} valueColumn
- * @param {string[]} values
- * @returns {Array<{id:number,value:string}>}
- */
-function fetchVocabRows(db, mode, table, idColumn, valueColumn, values) {
-  const unique = Array.from(new Set(values.filter(Boolean)));
-  if (!unique.length) return [];
-  const rows = [];
-  for (const chunk of chunkArray(unique)) {
-    const placeholders = chunk.map(() => '?').join(',');
-    const stmt = db.prepare(
-      `SELECT ${idColumn} AS id, ${valueColumn} AS value FROM ${table} WHERE mode = ? AND ${valueColumn} IN (${placeholders})`
-    );
-    rows.push(...stmt.all(mode, ...chunk));
-  }
-  return rows;
-}
-
-/**
- * Ensure vocab ids exist for a list of values.
- * @param {import('better-sqlite3').Database} db
- * @param {'code'|'prose'} mode
- * @param {string} table
- * @param {string} idColumn
- * @param {string} valueColumn
- * @param {string[]} values
- * @param {import('better-sqlite3').Statement} insertStmt
- * @returns {Map<string,number>}
- */
-function ensureVocabIds(db, mode, table, idColumn, valueColumn, values, insertStmt) {
-  const unique = Array.from(new Set(values.filter(Boolean)));
-  if (!unique.length) return new Map();
-  const existing = fetchVocabRows(db, mode, table, idColumn, valueColumn, unique);
-  const map = new Map(existing.map((row) => [row.value, row.id]));
-  const missing = unique.filter((value) => !map.has(value));
-  if (!missing.length) return map;
-
-  missing.sort();
-  const maxRow = db.prepare(`SELECT MAX(${idColumn}) AS maxId FROM ${table} WHERE mode = ?`).get(mode);
-  let nextId = Number.isFinite(maxRow?.maxId) ? maxRow.maxId + 1 : 0;
-  const insertTx = db.transaction(() => {
-    for (const value of missing) {
-      insertStmt.run(mode, nextId, value);
-      map.set(value, nextId);
-      nextId += 1;
-    }
-  });
-  insertTx();
-
-  return map;
-}
-
-/**
- * Delete doc ids from all tables for a mode.
- * @param {import('better-sqlite3').Database} db
- * @param {'code'|'prose'} mode
- * @param {number[]} docIds
- * @param {Array<{table:string,column:string,withMode:boolean,transform?:(value:any)=>any}>} [extraTables]
- */
-function deleteDocIds(db, mode, docIds, extraTables = []) {
-  if (!docIds.length) return;
-  const deleteTargets = [
-    { table: 'chunks', column: 'id' },
-    { table: 'chunks_fts', column: 'rowid' },
-    { table: 'token_postings', column: 'doc_id' },
-    { table: 'phrase_postings', column: 'doc_id' },
-    { table: 'chargram_postings', column: 'doc_id' },
-    { table: 'minhash_signatures', column: 'doc_id' },
-    { table: 'dense_vectors', column: 'doc_id' },
-    { table: 'doc_lengths', column: 'doc_id' }
-  ];
-  for (const extra of extraTables) {
-    if (extra?.table && extra?.column) deleteTargets.push(extra);
-  }
-  for (const chunk of chunkArray(docIds)) {
-    const placeholders = chunk.map(() => '?').join(',');
-    for (const target of deleteTargets) {
-      const withMode = target.withMode !== false;
-      const values = target.transform ? chunk.map(target.transform) : chunk;
-      const where = withMode
-        ? `mode = ? AND ${target.column} IN (${placeholders})`
-        : `${target.column} IN (${placeholders})`;
-      const stmt = db.prepare(
-        `DELETE FROM ${target.table} WHERE ${where}`
-      );
-      if (withMode) {
-        stmt.run(mode, ...values);
-      } else {
-        stmt.run(...values);
-      }
-    }
-  }
-}
-
-/**
- * Recompute and update token stats for a mode.
- * @param {import('better-sqlite3').Database} db
- * @param {'code'|'prose'} mode
- * @param {import('better-sqlite3').Statement} insertTokenStats
- */
-function updateTokenStats(db, mode, insertTokenStats) {
-  const row = db.prepare(
-    'SELECT COUNT(*) AS total_docs, AVG(len) AS avg_doc_len FROM doc_lengths WHERE mode = ?'
-  ).get(mode) || {};
-  insertTokenStats.run(
-    mode,
-    typeof row.avg_doc_len === 'number' ? row.avg_doc_len : 0,
-    typeof row.total_docs === 'number' ? row.total_docs : 0
-  );
-}
-
-/**
- * Apply incremental updates to a SQLite index using cached bundles.
- * @param {string} outPath
- * @param {'code'|'prose'} mode
- * @param {object|null} incrementalData
- * @param {{expectedDense?:{model?:string|null,dims?:number|null}}} [options]
- * @returns {{used:boolean,reason?:string,changedFiles?:number,deletedFiles?:number,insertedChunks?:number}}
- */
-function incrementalUpdateDatabase(outPath, mode, incrementalData, options = {}) {
-  if (!incrementalData?.manifest) {
-    return { used: false, reason: 'missing incremental manifest' };
-  }
-  if (!fsSync.existsSync(outPath)) {
-    return { used: false, reason: 'sqlite db missing' };
-  }
-
-  const expectedDense = options.expectedDense || null;
-  const expectedModel = expectedDense?.model || modelConfig.id || null;
-  const expectedDims = Number.isFinite(expectedDense?.dims) ? expectedDense.dims : null;
-
-  const db = new Database(outPath);
-  try {
-    db.pragma('journal_mode = WAL');
-    db.pragma('synchronous = NORMAL');
-  } catch {}
-
-  const schemaVersion = getSchemaVersion(db);
-  if (schemaVersion !== SCHEMA_VERSION) {
-    db.close();
-    return {
-      used: false,
-      reason: `schema mismatch (db=${schemaVersion ?? 'unknown'}, expected=${SCHEMA_VERSION})`
-    };
-  }
-
-  if (!hasRequiredTables(db, REQUIRED_TABLES)) {
-    db.close();
-    return { used: false, reason: 'schema missing' };
-  }
-
-  const dbDenseMeta = db.prepare(
-    'SELECT dims, scale, model FROM dense_meta WHERE mode = ?'
-  ).get(mode);
-  const dbDims = Number.isFinite(dbDenseMeta?.dims) ? dbDenseMeta.dims : null;
-  const dbModel = dbDenseMeta?.model || null;
-  if ((expectedModel || expectedDims !== null) && !dbDenseMeta) {
-    db.close();
-    return { used: false, reason: 'dense metadata missing' };
-  }
-  if (expectedModel) {
-    if (!dbModel) {
-      db.close();
-      return { used: false, reason: 'dense metadata model missing' };
-    }
-    if (dbModel !== expectedModel) {
-      db.close();
-      return { used: false, reason: `model mismatch (db=${dbModel}, expected=${expectedModel})` };
-    }
-  }
-  if (expectedDims !== null) {
-    if (dbDims === null) {
-      db.close();
-      return { used: false, reason: 'dense metadata dims missing' };
-    }
-    if (dbDims !== expectedDims) {
-      db.close();
-      return { used: false, reason: `dense dims mismatch (db=${dbDims}, expected=${expectedDims})` };
-    }
-  }
-
-  const manifestFiles = incrementalData.manifest.files || {};
-  const dbFiles = getFileManifest(db, mode);
-  const { changed, deleted } = diffFileManifests(manifestFiles, dbFiles);
-  if (!changed.length && !deleted.length) {
-    db.close();
-    return { used: true, changedFiles: 0, deletedFiles: 0, insertedChunks: 0 };
-  }
-
-  const bundles = new Map();
-  for (const file of changed) {
-    const entry = manifestFiles[file];
-    const bundleName = entry?.bundle;
-    if (!bundleName) {
-      db.close();
-      return { used: false, reason: `missing bundle for ${file}` };
-    }
-    const bundlePath = path.join(incrementalData.bundleDir, bundleName);
-    if (!fsSync.existsSync(bundlePath)) {
-      db.close();
-      return { used: false, reason: `bundle missing for ${file}` };
-    }
-    const bundle = readJson(bundlePath);
-    if (!bundle || !Array.isArray(bundle.chunks)) {
-      db.close();
-      return { used: false, reason: `invalid bundle for ${file}` };
-    }
-    bundles.set(file, bundle);
-  }
-
-  const tokenValues = [];
-  const phraseValues = [];
-  const chargramValues = [];
-  const incomingDimsSet = new Set();
-  for (const bundle of bundles.values()) {
-    for (const chunk of bundle.chunks || []) {
-      const tokensArray = Array.isArray(chunk.tokens) ? chunk.tokens : [];
-      if (tokensArray.length) tokenValues.push(...tokensArray);
-      if (Array.isArray(chunk.ngrams)) phraseValues.push(...chunk.ngrams);
-      if (Array.isArray(chunk.chargrams)) chargramValues.push(...chunk.chargrams);
-      if (Array.isArray(chunk.embedding) && chunk.embedding.length) {
-        incomingDimsSet.add(chunk.embedding.length);
-      }
-    }
-  }
-  if (incomingDimsSet.size > 1) {
-    db.close();
-    return { used: false, reason: 'embedding dims mismatch across bundles' };
-  }
-  const incomingDims = incomingDimsSet.size ? [...incomingDimsSet][0] : null;
-  if (incomingDims !== null && dbDims !== null && incomingDims !== dbDims) {
-    db.close();
-    return { used: false, reason: `embedding dims mismatch (db=${dbDims}, incoming=${incomingDims})` };
-  }
-  if (incomingDims !== null && expectedDims !== null && incomingDims !== expectedDims) {
-    db.close();
-    return { used: false, reason: `embedding dims mismatch (expected=${expectedDims}, incoming=${incomingDims})` };
-  }
-
-  const insertChunk = db.prepare(`
-    INSERT OR REPLACE INTO chunks (
-      id, mode, file, start, end, startLine, endLine, ext, kind, name, headline,
-      preContext, postContext, weight, tokens, ngrams, codeRelations, docmeta,
-      stats, complexity, lint, externalDocs, last_modified, last_author, churn,
-      chunk_authors
-    ) VALUES (
-      @id, @mode, @file, @start, @end, @startLine, @endLine, @ext, @kind, @name, @headline,
-      @preContext, @postContext, @weight, @tokens, @ngrams, @codeRelations, @docmeta,
-      @stats, @complexity, @lint, @externalDocs, @last_modified, @last_author, @churn,
-      @chunk_authors
-    );
-  `);
-
-  const insertFts = db.prepare(`
-    INSERT OR REPLACE INTO chunks_fts (rowid, mode, file, name, kind, headline, tokens)
-    VALUES (@id, @mode, @file, @name, @kind, @headline, @tokensText);
-  `);
-
-  const insertTokenVocab = db.prepare(
-    'INSERT OR REPLACE INTO token_vocab (mode, token_id, token) VALUES (?, ?, ?)'
-  );
-  const insertTokenPosting = db.prepare(
-    'INSERT OR REPLACE INTO token_postings (mode, token_id, doc_id, tf) VALUES (?, ?, ?, ?)'
-  );
-  const insertDocLength = db.prepare(
-    'INSERT OR REPLACE INTO doc_lengths (mode, doc_id, len) VALUES (?, ?, ?)'
-  );
-  const insertTokenStats = db.prepare(
-    'INSERT OR REPLACE INTO token_stats (mode, avg_doc_len, total_docs) VALUES (?, ?, ?)'
-  );
-  const insertPhraseVocab = db.prepare(
-    'INSERT OR REPLACE INTO phrase_vocab (mode, phrase_id, ngram) VALUES (?, ?, ?)'
-  );
-  const insertPhrasePosting = db.prepare(
-    'INSERT OR REPLACE INTO phrase_postings (mode, phrase_id, doc_id) VALUES (?, ?, ?)'
-  );
-  const insertChargramVocab = db.prepare(
-    'INSERT OR REPLACE INTO chargram_vocab (mode, gram_id, gram) VALUES (?, ?, ?)'
-  );
-  const insertChargramPosting = db.prepare(
-    'INSERT OR REPLACE INTO chargram_postings (mode, gram_id, doc_id) VALUES (?, ?, ?)'
-  );
-  const insertMinhash = db.prepare(
-    'INSERT OR REPLACE INTO minhash_signatures (mode, doc_id, sig) VALUES (?, ?, ?)'
-  );
-  const insertDense = db.prepare(
-    'INSERT OR REPLACE INTO dense_vectors (mode, doc_id, vector) VALUES (?, ?, ?)'
-  );
-  const insertDenseMeta = db.prepare(
-    'INSERT OR REPLACE INTO dense_meta (mode, dims, scale, model) VALUES (?, ?, ?, ?)'
-  );
-  const insertFileManifest = db.prepare(
-    'INSERT OR REPLACE INTO file_manifest (mode, file, hash, mtimeMs, size, chunk_count) VALUES (?, ?, ?, ?, ?, ?)'
-  );
-
-  const tokenIdMap = ensureVocabIds(db, mode, 'token_vocab', 'token_id', 'token', tokenValues, insertTokenVocab);
-  const phraseIdMap = ensureVocabIds(db, mode, 'phrase_vocab', 'phrase_id', 'ngram', phraseValues, insertPhraseVocab);
-  const chargramIdMap = ensureVocabIds(db, mode, 'chargram_vocab', 'gram_id', 'gram', chargramValues, insertChargramVocab);
-
-  const maxRow = db.prepare('SELECT MAX(id) AS maxId FROM chunks WHERE mode = ?').get(mode);
-  let nextDocId = Number.isFinite(maxRow?.maxId) ? maxRow.maxId + 1 : 0;
-  const denseMetaRow = dbDenseMeta;
-  let denseMetaSet = !!denseMetaRow;
-  let denseDims = typeof denseMetaRow?.dims === 'number' ? denseMetaRow.dims : null;
-  let denseWarned = false;
-  let insertedChunks = 0;
-  let vectorAnnLoaded = false;
-  let vectorAnnReady = false;
-  let vectorAnnTable = vectorExtension.table || 'dense_vectors_ann';
-  let vectorAnnColumn = vectorExtension.column || 'embedding';
-  let insertVectorAnn = null;
-  if (vectorAnnEnabled) {
-    const loadResult = loadVectorExtension(db, vectorExtension, `sqlite ${mode}`);
-    if (loadResult.ok) {
-      vectorAnnLoaded = true;
-      if (hasVectorTable(db, vectorAnnTable)) {
-        vectorAnnReady = true;
-      } else if (denseDims) {
-        const created = ensureVectorTable(db, vectorExtension, denseDims);
-        if (created.ok) {
-          vectorAnnReady = true;
-          vectorAnnTable = created.tableName;
-          vectorAnnColumn = created.column;
-        } else {
-          console.warn(`[sqlite] Failed to create vector table for ${mode}: ${created.reason}`);
-        }
-      }
-      if (vectorAnnReady) {
-        insertVectorAnn = db.prepare(
-          `INSERT OR REPLACE INTO ${vectorAnnTable} (rowid, ${vectorAnnColumn}) VALUES (?, ?)`
-        );
-      }
-    } else {
-      console.warn(`[sqlite] Vector extension unavailable for ${mode}: ${loadResult.reason}`);
-    }
-  }
-  const vectorDeleteTargets = vectorAnnLoaded && vectorAnnReady
-    ? [{ table: vectorAnnTable, column: 'rowid', withMode: false, transform: toVectorId }]
-    : [];
-
-  const applyChanges = db.transaction(() => {
-    for (const file of deleted) {
-      const normalizedFile = normalizeFilePath(file);
-      const docRows = db.prepare('SELECT id FROM chunks WHERE mode = ? AND file = ?').all(mode, normalizedFile);
-      const docIds = docRows.map((row) => row.id);
-      deleteDocIds(db, mode, docIds, vectorDeleteTargets);
-      db.prepare('DELETE FROM file_manifest WHERE mode = ? AND file = ?').run(mode, normalizedFile);
-    }
-
-    for (const file of changed) {
-      const normalizedFile = normalizeFilePath(file);
-      const docRows = db.prepare('SELECT id FROM chunks WHERE mode = ? AND file = ?').all(mode, normalizedFile);
-      const docIds = docRows.map((row) => row.id);
-      deleteDocIds(db, mode, docIds, vectorDeleteTargets);
-
-      const bundle = bundles.get(file);
-      let chunkCount = 0;
-      for (const chunk of bundle.chunks || []) {
-        const docId = nextDocId;
-        nextDocId += 1;
-        const row = buildChunkRow(chunk, mode, docId);
-        insertChunk.run(row);
-        insertFts.run(row);
-
-        const tokensArray = Array.isArray(chunk.tokens) ? chunk.tokens : [];
-        insertDocLength.run(mode, docId, tokensArray.length);
-        const freq = buildTokenFrequency(tokensArray);
-        for (const [token, tf] of freq.entries()) {
-          const tokenId = tokenIdMap.get(token);
-          if (tokenId === undefined) continue;
-          insertTokenPosting.run(mode, tokenId, docId, tf);
-        }
-
-        if (Array.isArray(chunk.ngrams)) {
-          const unique = new Set(chunk.ngrams);
-          for (const ng of unique) {
-            const phraseId = phraseIdMap.get(ng);
-            if (phraseId === undefined) continue;
-            insertPhrasePosting.run(mode, phraseId, docId);
-          }
-        }
-
-        if (Array.isArray(chunk.chargrams)) {
-          const unique = new Set(chunk.chargrams);
-          for (const gram of unique) {
-            const gramId = chargramIdMap.get(gram);
-            if (gramId === undefined) continue;
-            insertChargramPosting.run(mode, gramId, docId);
-          }
-        }
-
-        if (Array.isArray(chunk.minhashSig) && chunk.minhashSig.length) {
-          insertMinhash.run(mode, docId, packUint32(chunk.minhashSig));
-        }
-
-        if (Array.isArray(chunk.embedding) && chunk.embedding.length) {
-          const dims = chunk.embedding.length;
-          if (!denseMetaSet) {
-            insertDenseMeta.run(mode, dims, 1.0, modelConfig.id || null);
-            denseMetaSet = true;
-            denseDims = dims;
-          } else if (denseDims !== null && dims !== denseDims && !denseWarned) {
-            console.warn(`Dense vector dims mismatch for ${mode}: expected ${denseDims}, got ${dims}`);
-            denseWarned = true;
-          }
-          insertDense.run(mode, docId, packUint8(quantizeVec(chunk.embedding)));
-          if (vectorAnnLoaded) {
-            if (!vectorAnnReady) {
-              const created = ensureVectorTable(db, vectorExtension, dims);
-              if (created.ok) {
-                vectorAnnReady = true;
-                vectorAnnTable = created.tableName;
-                vectorAnnColumn = created.column;
-                insertVectorAnn = db.prepare(
-                  `INSERT OR REPLACE INTO ${vectorAnnTable} (rowid, ${vectorAnnColumn}) VALUES (?, ?)`
-                );
-              }
-            }
-            if (vectorAnnReady && insertVectorAnn) {
-              const encoded = encodeVector(chunk.embedding, vectorExtension);
-              if (encoded) insertVectorAnn.run(toVectorId(docId), encoded);
-            }
-          }
-        }
-
-        chunkCount += 1;
-        insertedChunks += 1;
-      }
-
-      const entry = manifestFiles[file] || {};
-      insertFileManifest.run(
-        mode,
-        normalizedFile,
-        entry?.hash || null,
-        Number.isFinite(entry?.mtimeMs) ? entry.mtimeMs : null,
-        Number.isFinite(entry?.size) ? entry.size : null,
-        chunkCount
-      );
-    }
-
-    updateTokenStats(db, mode, insertTokenStats);
+if (process.argv[1] === fileURLToPath(import.meta.url)) {
+  runBuildSqliteIndex().catch((err) => {
+    console.error(err?.message || err);
+    process.exit(1);
   });
-
-  applyChanges();
-  db.close();
-  return {
-    used: true,
-    changedFiles: changed.length,
-    deletedFiles: deleted.length,
-    insertedChunks
-  };
-}
-
-/**
- * Build or incrementally update an index for a mode.
- * @param {'code'|'prose'} mode
- * @param {object|null} index
- * @param {string} targetPath
- * @param {object|null} incrementalData
- * @returns {{count?:number,incremental:boolean,changedFiles?:number,deletedFiles?:number,insertedChunks?:number}}
- */
-async function runMode(mode, index, targetPath, incrementalData) {
-  if (incrementalRequested) {
-    const expectedDense = index?.denseVec
-      ? { model: index.denseVec.model, dims: index.denseVec.dims }
-      : null;
-    const result = incrementalUpdateDatabase(targetPath, mode, incrementalData, {
-      expectedDense
-    });
-    if (result.used) {
-      if (compactOnIncremental && (result.changedFiles || result.deletedFiles)) {
-        console.log(`[sqlite] Compaction requested for ${mode} index...`);
-        await compactDatabase({
-          dbPath: targetPath,
-          mode,
-          vectorExtension,
-          dryRun: false,
-          keepBackup: false
-        });
-      }
-      return { ...result, incremental: true };
-    }
-    if (result.reason) {
-      console.warn(`[sqlite] Incremental ${mode} update skipped (${result.reason}); rebuilding full index.`);
-    }
-  }
-  const count = buildDatabase(targetPath, index, mode, incrementalData?.manifest?.files);
-  return { count, incremental: false, changedFiles: null, deletedFiles: null, insertedChunks: count };
-}
-
-const results = {};
-if (modeArg === 'all' || modeArg === 'code') {
-  const targetPath = modeArg === 'all' ? codeOutPath : outPath;
-  results.code = await runMode('code', codeIndex, targetPath, incrementalCode);
-}
-if (modeArg === 'all' || modeArg === 'prose') {
-  const targetPath = modeArg === 'all' ? proseOutPath : outPath;
-  results.prose = await runMode('prose', proseIndex, targetPath, incrementalProse);
-}
-
-if (modeArg === 'all') {
-  const codeResult = results.code || {};
-  const proseResult = results.prose || {};
-  if (codeResult.incremental || proseResult.incremental) {
-    console.log(`SQLite indexes updated at code=${codeOutPath} prose=${proseOutPath}. code+${codeResult.insertedChunks || 0} prose+${proseResult.insertedChunks || 0}`);
-  } else {
-    console.log(`SQLite indexes built at code=${codeOutPath} prose=${proseOutPath}. code=${codeResult.count || 0} prose=${proseResult.count || 0}`);
-  }
-} else {
-  const result = modeArg === 'code' ? results.code : results.prose;
-  if (result?.incremental) {
-    console.log(`SQLite ${modeArg} index updated at ${outPath}. +${result.insertedChunks || 0} chunks`);
-  } else {
-    console.log(`SQLite ${modeArg} index built at ${outPath}. ${modeArg}=${result?.count || 0}`);
-  }
 }
diff --git a/tools/build-sqlite-index/cli.js b/tools/build-sqlite-index/cli.js
new file mode 100644
index 000000000..3bb8ac7ef
--- /dev/null
+++ b/tools/build-sqlite-index/cli.js
@@ -0,0 +1,40 @@
+import { createCli } from '../../src/shared/cli.js';
+
+export const normalizeValidateMode = (value) => {
+  if (value === false || value == null) return 'off';
+  const normalized = String(value).trim().toLowerCase();
+  if (!normalized || normalized === 'true') return 'smoke';
+  if (['off', 'false', '0', 'no'].includes(normalized)) return 'off';
+  if (['full', 'integrity'].includes(normalized)) return 'full';
+  return 'smoke';
+};
+
+export const parseBuildSqliteArgs = (rawArgs, options = {}) => {
+  const emitOutput = options.emitOutput !== false;
+  const exitOnError = options.exitOnError !== false;
+  const argv = createCli({
+    scriptName: 'build-sqlite-index',
+    argv: ['node', 'build-sqlite-index.js', ...(rawArgs || [])],
+    options: {
+      'code-dir': { type: 'string' },
+      'prose-dir': { type: 'string' },
+      out: { type: 'string' },
+      mode: { type: 'string', default: 'all' },
+      repo: { type: 'string' },
+      incremental: { type: 'boolean', default: false },
+      compact: { type: 'boolean', default: false },
+      validate: { type: 'string', default: 'smoke' },
+      'index-root': { type: 'string' }
+    }
+  }).parse();
+  const validateMode = normalizeValidateMode(argv.validate);
+  const modeArg = (argv.mode || 'all').toLowerCase();
+  return {
+    argv,
+    emitOutput,
+    exitOnError,
+    validateMode,
+    modeArg,
+    rawArgs: rawArgs || []
+  };
+};
diff --git a/tools/build-sqlite-index/index-state.js b/tools/build-sqlite-index/index-state.js
new file mode 100644
index 000000000..b483b4e81
--- /dev/null
+++ b/tools/build-sqlite-index/index-state.js
@@ -0,0 +1,78 @@
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { readJson } from '../../src/storage/sqlite/utils.js';
+import { writeJsonObjectFile } from '../../src/shared/json-stream.js';
+import { checksumFile } from '../../src/shared/hash.js';
+
+export const updateIndexStateManifest = async (indexDir) => {
+  const manifestPath = path.join(indexDir, 'pieces', 'manifest.json');
+  if (!fsSync.existsSync(manifestPath)) return;
+  let manifest = null;
+  try {
+    manifest = readJson(manifestPath) || null;
+  } catch {
+    return;
+  }
+  if (!manifest || !Array.isArray(manifest.pieces)) return;
+  const statePath = path.join(indexDir, 'index_state.json');
+  if (!fsSync.existsSync(statePath)) return;
+  let bytes = null;
+  let checksum = null;
+  let checksumAlgo = null;
+  try {
+    const stat = await fs.stat(statePath);
+    bytes = stat.size;
+    const result = await checksumFile(statePath);
+    checksum = result?.value || null;
+    checksumAlgo = result?.algo || null;
+  } catch {}
+  if (!bytes || !checksum) return;
+  const pieces = manifest.pieces.map((piece) => {
+    if (piece?.name !== 'index_state' || piece?.path !== 'index_state.json') {
+      return piece;
+    }
+    return {
+      ...piece,
+      bytes,
+      checksum: checksum && checksumAlgo ? `${checksumAlgo}:${checksum}` : piece.checksum
+    };
+  });
+  const next = {
+    ...manifest,
+    updatedAt: new Date().toISOString(),
+    pieces
+  };
+  try {
+    await writeJsonObjectFile(manifestPath, { fields: next, atomic: true });
+  } catch {
+    // Ignore manifest write failures.
+  }
+};
+
+export const updateSqliteState = async (indexDir, patch) => {
+  if (!indexDir) return;
+  const statePath = path.join(indexDir, 'index_state.json');
+  let state = {};
+  if (fsSync.existsSync(statePath)) {
+    try {
+      state = readJson(statePath) || {};
+    } catch {
+      state = {};
+    }
+  }
+  const now = new Date().toISOString();
+  state.generatedAt = state.generatedAt || now;
+  state.updatedAt = now;
+  state.sqlite = {
+    ...(state.sqlite || {}),
+    ...patch,
+    updatedAt: now
+  };
+  try {
+    await writeJsonObjectFile(statePath, { fields: state, atomic: true });
+  } catch {
+    // Ignore index state write failures.
+  }
+  await updateIndexStateManifest(indexDir);
+};
diff --git a/tools/build-sqlite-index/run.js b/tools/build-sqlite-index/run.js
new file mode 100644
index 000000000..68f36da48
--- /dev/null
+++ b/tools/build-sqlite-index/run.js
@@ -0,0 +1,345 @@
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { parseBuildSqliteArgs } from './cli.js';
+import { createTempPath } from './temp-path.js';
+import { updateSqliteState } from './index-state.js';
+import { getEnvConfig } from '../../src/shared/env.js';
+import { resolveThreadLimits } from '../../src/shared/threads.js';
+import { markBuildPhase, resolveBuildStatePath, startBuildHeartbeat } from '../../src/index/build/build-state.js';
+import {
+  getIndexDir,
+  getModelConfig,
+  getRepoCacheRoot,
+  loadUserConfig,
+  resolveIndexRoot,
+  resolveRepoRoot,
+  resolveSqlitePaths
+} from '../dict-utils.js';
+import {
+  encodeVector,
+  ensureVectorTable,
+  getVectorExtensionConfig,
+  hasVectorTable,
+  loadVectorExtension
+} from '../vector-extension.js';
+import { compactDatabase } from '../compact-sqlite-index.js';
+import { loadIncrementalManifest } from '../../src/storage/sqlite/incremental.js';
+import { loadIndex, replaceSqliteDatabase } from '../../src/storage/sqlite/utils.js';
+import { buildDatabaseFromArtifacts, loadIndexPieces } from '../../src/storage/sqlite/build/from-artifacts.js';
+import { buildDatabaseFromBundles } from '../../src/storage/sqlite/build/from-bundles.js';
+import { incrementalUpdateDatabase } from '../../src/storage/sqlite/build/incremental-update.js';
+
+let Database = null;
+try {
+  ({ default: Database } = await import('better-sqlite3'));
+} catch {}
+
+const resolveOutputPaths = ({ modeArg, outArg, sqlitePaths }) => {
+  let outPath = null;
+  let codeOutPath = sqlitePaths.codePath;
+  let proseOutPath = sqlitePaths.prosePath;
+  if (outArg) {
+    if (modeArg === 'all') {
+      const outDir = outArg.endsWith('.db') ? path.dirname(outArg) : outArg;
+      codeOutPath = path.join(outDir, 'index-code.db');
+      proseOutPath = path.join(outDir, 'index-prose.db');
+    } else {
+      const targetName = modeArg === 'code' ? 'index-code.db' : 'index-prose.db';
+      outPath = outArg.endsWith('.db') ? outArg : path.join(outArg, targetName);
+    }
+  }
+  if (!outPath && modeArg !== 'all') {
+    outPath = modeArg === 'code' ? codeOutPath : proseOutPath;
+  }
+  return { outPath, codeOutPath, proseOutPath };
+};
+
+export async function runBuildSqliteIndex(rawArgs = process.argv.slice(2), options = {}) {
+  const {
+    argv,
+    emitOutput,
+    exitOnError,
+    validateMode,
+    modeArg,
+    rawArgs: parsedRawArgs
+  } = parseBuildSqliteArgs(rawArgs, options);
+  const bail = (message, code = 1) => {
+    if (emitOutput && message) console.error(message);
+    if (exitOnError) process.exit(code);
+    throw new Error(message || 'SQLite index build failed.');
+  };
+  if (!Database) return bail('better-sqlite3 is required. Run npm install first.');
+
+  const rootArg = options.root ? path.resolve(options.root) : (argv.repo ? path.resolve(argv.repo) : null);
+  const root = rootArg || resolveRepoRoot(process.cwd());
+  const envConfig = getEnvConfig();
+  const userConfig = loadUserConfig(root);
+  const indexRoot = argv['index-root']
+    ? path.resolve(argv['index-root'])
+    : resolveIndexRoot(root, userConfig);
+  const buildStatePath = resolveBuildStatePath(indexRoot);
+  const hasBuildState = buildStatePath && fsSync.existsSync(buildStatePath);
+  const stopHeartbeat = hasBuildState ? startBuildHeartbeat(indexRoot, 'stage4') : () => {};
+  const threadLimits = resolveThreadLimits({
+    argv,
+    rawArgv: parsedRawArgs,
+    envConfig,
+    configConcurrency: userConfig?.indexing?.concurrency,
+    importConcurrencyConfig: userConfig?.indexing?.importConcurrency
+  });
+  if (emitOutput && envConfig.verbose === true) {
+    console.log(
+      `[sqlite] Thread limits (${threadLimits.source}): ` +
+      `cpu=${threadLimits.cpuCount}, cap=${threadLimits.maxConcurrencyCap}, ` +
+      `files=${threadLimits.fileConcurrency}, imports=${threadLimits.importConcurrency}, ` +
+      `io=${threadLimits.ioConcurrency}, cpuWork=${threadLimits.cpuConcurrency}.`
+    );
+  }
+  const modelConfig = getModelConfig(root, userConfig);
+  const vectorExtension = getVectorExtensionConfig(root, userConfig);
+  const vectorAnnEnabled = vectorExtension.enabled;
+  const vectorConfig = {
+    enabled: vectorAnnEnabled,
+    extension: vectorExtension,
+    encodeVector,
+    hasVectorTable,
+    loadVectorExtension,
+    ensureVectorTable
+  };
+  const repoCacheRoot = getRepoCacheRoot(root, userConfig);
+  const compactFlag = argv.compact;
+  const compactOnIncremental = compactFlag === true
+    || (compactFlag !== false && userConfig?.sqlite?.compactOnIncremental === true);
+  const codeDir = argv['code-dir']
+    ? path.resolve(argv['code-dir'])
+    : getIndexDir(root, 'code', userConfig, { indexRoot });
+  const proseDir = argv['prose-dir']
+    ? path.resolve(argv['prose-dir'])
+    : getIndexDir(root, 'prose', userConfig, { indexRoot });
+  const sqlitePaths = resolveSqlitePaths(root, userConfig, indexRoot ? { indexRoot } : {});
+  const incrementalRequested = argv.incremental === true;
+
+  if (!['all', 'code', 'prose'].includes(modeArg)) {
+    return bail('Invalid mode. Use --mode all|code|prose');
+  }
+
+  const sqliteStateTargets = [];
+  if (modeArg === 'all' || modeArg === 'code') sqliteStateTargets.push(codeDir);
+  if (modeArg === 'all' || modeArg === 'prose') sqliteStateTargets.push(proseDir);
+  if (hasBuildState) {
+    await markBuildPhase(indexRoot, 'stage4', 'running');
+  }
+  await Promise.all(sqliteStateTargets.map((dir) => updateSqliteState(dir, {
+    enabled: true,
+    ready: false,
+    pending: true
+  })));
+
+  const outArg = argv.out ? path.resolve(argv.out) : null;
+  const { outPath, codeOutPath, proseOutPath } = resolveOutputPaths({
+    modeArg,
+    outArg,
+    sqlitePaths
+  });
+
+  if (modeArg === 'all') {
+    await fs.mkdir(path.dirname(codeOutPath), { recursive: true });
+    await fs.mkdir(path.dirname(proseOutPath), { recursive: true });
+  } else if (outPath) {
+    await fs.mkdir(path.dirname(outPath), { recursive: true });
+  }
+
+  const loadIndexSafe = (dir, label) => {
+    try {
+      const index = loadIndex(dir, modelConfig.id);
+      if (index) return { index, tooLarge: false, pieces: null };
+      return { index: null, tooLarge: false, pieces: loadIndexPieces(dir, modelConfig.id) };
+    } catch (err) {
+      if (err?.code === 'ERR_JSON_TOO_LARGE') {
+        console.warn(`[sqlite] ${label} chunk_meta too large; will use pieces if available.`);
+        return { index: null, tooLarge: true, pieces: loadIndexPieces(dir, modelConfig.id) };
+      }
+      throw err;
+    }
+  };
+
+  const { index: codeIndex, pieces: codePieces } = loadIndexSafe(codeDir, 'code');
+  const { index: proseIndex, pieces: prosePieces } = loadIndexSafe(proseDir, 'prose');
+  const incrementalCode = loadIncrementalManifest(repoCacheRoot, 'code');
+  const incrementalProse = loadIncrementalManifest(repoCacheRoot, 'prose');
+  if (!codeIndex && !codePieces && !proseIndex && !prosePieces
+    && !incrementalCode?.manifest && !incrementalProse?.manifest) {
+    return bail('No index found. Build index-code/index-prose first.');
+  }
+
+  if (sqlitePaths.legacyExists) {
+    try {
+      await fs.rm(sqlitePaths.legacyPath, { force: true });
+      console.warn(`Removed legacy SQLite index at ${sqlitePaths.legacyPath}`);
+    } catch (err) {
+      console.warn(`Failed to remove legacy SQLite index at ${sqlitePaths.legacyPath}: ${err?.message || err}`);
+    }
+  }
+
+  if (modeArg === 'code' && !codeIndex && !codePieces && !incrementalCode?.manifest) {
+    return bail('Code index missing; build index-code first.');
+  }
+  if (modeArg === 'prose' && !proseIndex && !prosePieces && !incrementalProse?.manifest) {
+    return bail('Prose index missing; build index-prose first.');
+  }
+
+  const workerPath = fileURLToPath(new URL('../workers/bundle-reader.js', import.meta.url));
+
+  const runMode = async (mode, index, indexDir, targetPath, incrementalData) => {
+    const hasBundles = incrementalData?.manifest?.files
+      ? Object.keys(incrementalData.manifest.files).length > 0
+      : false;
+
+    if (incrementalRequested) {
+      const expectedDense = index?.denseVec
+        ? { model: index.denseVec.model, dims: index.denseVec.dims }
+        : null;
+      const result = await incrementalUpdateDatabase({
+        Database,
+        outPath: targetPath,
+        mode,
+        incrementalData,
+        modelConfig,
+        vectorConfig,
+        emitOutput,
+        validateMode,
+        expectedDense
+      });
+      if (result.used) {
+        if (compactOnIncremental && (result.changedFiles || result.deletedFiles)) {
+          console.log(`[sqlite] Compaction requested for ${mode} index...`);
+          await compactDatabase({
+            dbPath: targetPath,
+            mode,
+            vectorExtension,
+            dryRun: false,
+            keepBackup: false
+          });
+        }
+        return { ...result, incremental: true };
+      }
+      if (result.reason) {
+        console.warn(`[sqlite] Incremental ${mode} update skipped (${result.reason}); rebuilding full index.`);
+      }
+    }
+    if (hasBundles) {
+      console.log(`[sqlite] Using incremental bundles for ${mode} full rebuild.`);
+      const tempPath = createTempPath(targetPath);
+      let bundleResult = { count: 0 };
+      try {
+        bundleResult = await buildDatabaseFromBundles({
+          Database,
+          outPath: tempPath,
+          mode,
+          incrementalData,
+          envConfig,
+          threadLimits,
+          emitOutput,
+          validateMode,
+          vectorConfig,
+          modelConfig,
+          workerPath
+        });
+        if (bundleResult.count) {
+          await replaceSqliteDatabase(tempPath, targetPath, { keepBackup: true });
+        } else {
+          await fs.rm(tempPath, { force: true });
+        }
+      } catch (err) {
+        try { await fs.rm(tempPath, { force: true }); } catch {}
+        throw err;
+      }
+      if (bundleResult.count) {
+        return {
+          count: bundleResult.count,
+          incremental: false,
+          changedFiles: null,
+          deletedFiles: null,
+          insertedChunks: bundleResult.count
+        };
+      }
+      if (bundleResult.reason) {
+        console.warn(`[sqlite] Bundle build skipped (${bundleResult.reason}); falling back to file-backed artifacts.`);
+      }
+    }
+    const tempPath = createTempPath(targetPath);
+    let count = 0;
+    try {
+      count = await buildDatabaseFromArtifacts({
+        Database,
+        outPath: tempPath,
+        index,
+        indexDir,
+        mode,
+        manifestFiles: incrementalData?.manifest?.files,
+        emitOutput,
+        validateMode,
+        vectorConfig,
+        modelConfig
+      });
+      await replaceSqliteDatabase(tempPath, targetPath, { keepBackup: true });
+    } catch (err) {
+      try { await fs.rm(tempPath, { force: true }); } catch {}
+      throw err;
+    }
+    return { count, incremental: false, changedFiles: null, deletedFiles: null, insertedChunks: count };
+  };
+
+  const results = {};
+  if (modeArg === 'all' || modeArg === 'code') {
+    const targetPath = modeArg === 'all' ? codeOutPath : outPath;
+    const codeInput = codeIndex || codePieces;
+    results.code = await runMode('code', codeInput, codeDir, targetPath, incrementalCode);
+  }
+  if (modeArg === 'all' || modeArg === 'prose') {
+    const targetPath = modeArg === 'all' ? proseOutPath : outPath;
+    const proseInput = proseIndex || prosePieces;
+    results.prose = await runMode('prose', proseInput, proseDir, targetPath, incrementalProse);
+  }
+
+  if (modeArg === 'all') {
+    const codeResult = results.code || {};
+    const proseResult = results.prose || {};
+    if (codeResult.incremental || proseResult.incremental) {
+      console.log(`SQLite indexes updated at code=${codeOutPath} prose=${proseOutPath}. ` +
+        `code+${codeResult.insertedChunks || 0} prose+${proseResult.insertedChunks || 0}`);
+    } else {
+      console.log(`SQLite indexes built at code=${codeOutPath} prose=${proseOutPath}. ` +
+        `code=${codeResult.count || 0} prose=${proseResult.count || 0}`);
+    }
+  } else {
+    const result = modeArg === 'code' ? results.code : results.prose;
+    if (result?.incremental) {
+      console.log(`SQLite ${modeArg} index updated at ${outPath}. +${result.insertedChunks || 0} chunks`);
+    } else {
+      console.log(`SQLite ${modeArg} index built at ${outPath}. ${modeArg}=${result?.count || 0}`);
+    }
+  }
+
+  await Promise.all(sqliteStateTargets.map((dir) => updateSqliteState(dir, {
+    enabled: true,
+    ready: true,
+    pending: false
+  })));
+  if (hasBuildState) {
+    await markBuildPhase(indexRoot, 'stage4', 'done');
+  }
+  stopHeartbeat();
+
+  return {
+    mode: modeArg,
+    results,
+    paths: {
+      code: codeOutPath,
+      prose: proseOutPath,
+      out: outPath
+    }
+  };
+}
diff --git a/tools/build-sqlite-index/temp-path.js b/tools/build-sqlite-index/temp-path.js
new file mode 100644
index 000000000..5849e0fd1
--- /dev/null
+++ b/tools/build-sqlite-index/temp-path.js
@@ -0,0 +1 @@
+export { createTempPath } from '../build-embeddings/atomic.js';
diff --git a/tools/cache-gc.js b/tools/cache-gc.js
index fab4c8fda..380dd221d 100644
--- a/tools/cache-gc.js
+++ b/tools/cache-gc.js
@@ -2,20 +2,28 @@
 import fs from 'node:fs/promises';
 import fsSync from 'node:fs';
 import path from 'node:path';
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
+import { getEnvConfig } from '../src/shared/env.js';
 import { getCacheRoot, loadUserConfig, resolveRepoRoot } from './dict-utils.js';
 import { isRootPath } from './path-utils.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['dry-run', 'json'],
-  string: ['max-bytes', 'max-gb', 'max-age-days', 'repo'],
-  default: { 'dry-run': false, json: false }
-});
+const argv = createCli({
+  scriptName: 'cache-gc',
+  options: {
+    'dry-run': { type: 'boolean', default: false },
+    json: { type: 'boolean', default: false },
+    'max-bytes': { type: 'number' },
+    'max-gb': { type: 'number' },
+    'max-age-days': { type: 'number' },
+    repo: { type: 'string' }
+  }
+}).parse();
 
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
 const root = rootArg || resolveRepoRoot(process.cwd());
 const userConfig = loadUserConfig(root);
-const cacheRoot = (userConfig.cache && userConfig.cache.root) || process.env.PAIROFCLEATS_CACHE_ROOT || getCacheRoot();
+const envConfig = getEnvConfig();
+const cacheRoot = (userConfig.cache && userConfig.cache.root) || envConfig.cacheRoot || getCacheRoot();
 const gcConfig = userConfig.cache?.gc || {};
 
 const parseNumber = (value) => {
@@ -84,17 +92,21 @@ if (!fsSync.existsSync(repoRoot)) {
 
 const entries = await fs.readdir(repoRoot, { withFileTypes: true });
 const repos = [];
+const needsSizeScan = maxBytes != null;
 for (const entry of entries) {
   if (!entry.isDirectory()) continue;
   const repoPath = path.join(repoRoot, entry.name);
   const stat = await fs.stat(repoPath);
-  const bytes = await sizeOfPath(repoPath);
-  repos.push({
+  const repo = {
     id: entry.name,
     path: repoPath,
-    bytes,
+    bytes: null,
     mtimeMs: stat.mtimeMs
-  });
+  };
+  if (needsSizeScan) {
+    repo.bytes = await sizeOfPath(repoPath);
+  }
+  repos.push(repo);
 }
 
 const removals = [];
@@ -123,6 +135,14 @@ if (maxBytes != null) {
   }
 }
 
+if (!needsSizeScan && removals.length) {
+  for (const repo of removals) {
+    if (!Number.isFinite(repo.bytes)) {
+      repo.bytes = await sizeOfPath(repo.path);
+    }
+  }
+}
+
 for (const repo of removals) {
   if (isRootPath(repo.path)) {
     console.error(`refusing to delete root path: ${repo.path}`);
@@ -132,8 +152,11 @@ for (const repo of removals) {
   await fs.rm(repo.path, { recursive: true, force: true });
 }
 
-const totalBytes = repos.reduce((sum, repo) => sum + repo.bytes, 0);
-const freedBytes = removals.reduce((sum, repo) => sum + repo.bytes, 0);
+const hasSizeData = repos.some((repo) => Number.isFinite(repo.bytes));
+const totalBytes = hasSizeData
+  ? repos.reduce((sum, repo) => sum + (Number.isFinite(repo.bytes) ? repo.bytes : 0), 0)
+  : null;
+const freedBytes = removals.reduce((sum, repo) => sum + (Number.isFinite(repo.bytes) ? repo.bytes : 0), 0);
 const payload = {
   ok: true,
   dryRun,
diff --git a/tools/ci-build-artifacts.js b/tools/ci-build-artifacts.js
index cbc726147..314cbcc7b 100644
--- a/tools/ci-build-artifacts.js
+++ b/tools/ci-build-artifacts.js
@@ -3,25 +3,27 @@ import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
 import { spawnSync } from 'node:child_process';
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
 import simpleGit from 'simple-git';
-import { fileURLToPath } from 'node:url';
-import { getIndexDir, loadUserConfig, resolveRepoRoot, resolveSqlitePaths } from './dict-utils.js';
+import { getIndexDir, getRuntimeConfig, loadUserConfig, resolveRepoRoot, resolveRuntimeEnv, resolveSqlitePaths, resolveToolRoot } from './dict-utils.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['skip-build', 'skip-sqlite', 'incremental'],
-  string: ['out', 'repo'],
-  default: {
-    'skip-build': false,
-    'skip-sqlite': false,
-    'incremental': false
+const argv = createCli({
+  scriptName: 'ci-build',
+  options: {
+    'skip-build': { type: 'boolean', default: false },
+    'skip-sqlite': { type: 'boolean', default: false },
+    incremental: { type: 'boolean', default: false },
+    out: { type: 'string' },
+    repo: { type: 'string' }
   }
-});
+}).parse();
 
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
 const root = rootArg || resolveRepoRoot(process.cwd());
-const scriptRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
+const scriptRoot = resolveToolRoot();
 const userConfig = loadUserConfig(root);
+const runtimeConfig = getRuntimeConfig(root, userConfig);
+const baseEnv = resolveRuntimeEnv(runtimeConfig, process.env);
 const outDir = argv.out ? path.resolve(argv.out) : path.join(root, 'ci-artifacts');
 const codeDir = getIndexDir(root, 'code', userConfig);
 const proseDir = getIndexDir(root, 'prose', userConfig);
@@ -34,7 +36,7 @@ const sqlitePaths = resolveSqlitePaths(root, userConfig);
  * @param {string} label
  */
 function run(cmd, args, label) {
-  const result = spawnSync(cmd, args, { stdio: 'inherit' });
+  const result = spawnSync(cmd, args, { stdio: 'inherit', env: baseEnv });
   if (result.status !== 0) {
     console.error(`Failed: ${label || cmd}`);
     process.exit(result.status ?? 1);
@@ -42,13 +44,13 @@ function run(cmd, args, label) {
 }
 
 if (!argv['skip-build']) {
-  const args = [path.join(scriptRoot, 'build_index.js')];
+  const args = [path.join(scriptRoot, 'build_index.js'), '--repo', root];
   if (argv.incremental) args.push('--incremental');
   run(process.execPath, args, 'build index');
 }
 
 if (!argv['skip-sqlite']) {
-  const args = [path.join(scriptRoot, 'tools', 'build-sqlite-index.js')];
+  const args = [path.join(scriptRoot, 'tools', 'build-sqlite-index.js'), '--repo', root];
   if (argv.incremental) args.push('--incremental');
   run(process.execPath, args, 'build sqlite index');
 }
diff --git a/tools/ci-restore-artifacts.js b/tools/ci-restore-artifacts.js
index 4767036fe..508333d85 100644
--- a/tools/ci-restore-artifacts.js
+++ b/tools/ci-restore-artifacts.js
@@ -2,15 +2,18 @@
 import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
 import simpleGit from 'simple-git';
 import { getIndexDir, loadUserConfig, resolveRepoRoot, resolveSqlitePaths } from './dict-utils.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['force'],
-  string: ['from', 'repo'],
-  default: { force: false }
-});
+const argv = createCli({
+  scriptName: 'ci-restore',
+  options: {
+    force: { type: 'boolean', default: false },
+    from: { type: 'string' },
+    repo: { type: 'string' }
+  }
+}).parse();
 
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
 const root = rootArg || resolveRepoRoot(process.cwd());
diff --git a/tools/clean-artifacts.js b/tools/clean-artifacts.js
index aabba5939..7be9054ca 100644
--- a/tools/clean-artifacts.js
+++ b/tools/clean-artifacts.js
@@ -2,20 +2,25 @@
 import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
-import minimist from 'minimist';
-import { getCacheRoot, getRepoCacheRoot, loadUserConfig, resolveRepoRoot, resolveSqlitePaths } from './dict-utils.js';
+import { createCli } from '../src/shared/cli.js';
+import { getEnvConfig } from '../src/shared/env.js';
+import { getCacheRoot, getRepoCacheRoot, loadUserConfig, resolveLmdbPaths, resolveRepoRoot, resolveSqlitePaths } from './dict-utils.js';
 import { isInside, isRootPath } from './path-utils.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['all', 'dry-run'],
-  string: ['repo'],
-  default: { all: false, 'dry-run': false }
-});
+const argv = createCli({
+  scriptName: 'clean-artifacts',
+  options: {
+    all: { type: 'boolean', default: false },
+    'dry-run': { type: 'boolean', default: false },
+    repo: { type: 'string' }
+  }
+}).parse();
 
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
 const root = rootArg || resolveRepoRoot(process.cwd());
 const userConfig = loadUserConfig(root);
-const cacheRoot = (userConfig.cache && userConfig.cache.root) || process.env.PAIROFCLEATS_CACHE_ROOT || getCacheRoot();
+const envConfig = getEnvConfig();
+const cacheRoot = (userConfig.cache && userConfig.cache.root) || envConfig.cacheRoot || getCacheRoot();
 const repoCacheRoot = getRepoCacheRoot(root, userConfig);
 const defaultSqliteDir = path.join(repoCacheRoot, 'index-sqlite');
 const legacyRepoSqliteDir = path.join(root, 'index-sqlite');
@@ -23,6 +28,7 @@ const defaultCodePath = path.join(defaultSqliteDir, 'index-code.db');
 const defaultProsePath = path.join(defaultSqliteDir, 'index-prose.db');
 const defaultLegacyPath = path.join(defaultSqliteDir, 'index.db');
 const sqlitePaths = resolveSqlitePaths(root, userConfig);
+const lmdbPaths = resolveLmdbPaths(root, userConfig);
 
 
 const targets = [];
@@ -60,6 +66,14 @@ if (fs.existsSync(legacyRepoSqliteDir) && !isInside(base, path.resolve(legacyRep
   targets.push(legacyRepoSqliteDir);
 }
 
+const lmdbDirs = [lmdbPaths.codePath, lmdbPaths.prosePath];
+for (const dir of lmdbDirs) {
+  if (!dir || !fs.existsSync(dir)) continue;
+  if (!isInside(base, path.resolve(dir))) {
+    targets.push(dir);
+  }
+}
+
 const uniqueTargets = Array.from(new Set(targets.map((target) => path.resolve(target))));
 for (const target of uniqueTargets) {
   if (!fs.existsSync(target)) {
diff --git a/tools/cli-utils.js b/tools/cli-utils.js
index 4f7974ef0..f100c1437 100644
--- a/tools/cli-utils.js
+++ b/tools/cli-utils.js
@@ -1,4 +1,4 @@
-import { spawnSync } from 'node:child_process';
+import { execaSync } from 'execa';
 
 /**
  * Run a command and return a normalized result.
@@ -8,10 +8,10 @@ import { spawnSync } from 'node:child_process';
  * @returns {{ok:boolean,status:number|null,stdout?:string,stderr?:string}}
  */
 export function runCommand(cmd, args, options = {}) {
-  const result = spawnSync(cmd, args, options);
+  const result = execaSync(cmd, args, { reject: false, ...options });
   return {
-    ok: result.status === 0,
-    status: result.status,
+    ok: result.exitCode === 0,
+    status: result.exitCode,
     stdout: result.stdout,
     stderr: result.stderr
   };
diff --git a/tools/combined-summary.js b/tools/combined-summary.js
index 5d3365f29..b3acae9c8 100644
--- a/tools/combined-summary.js
+++ b/tools/combined-summary.js
@@ -3,27 +3,41 @@ import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
 import { spawnSync } from 'node:child_process';
-import minimist from 'minimist';
-import { fileURLToPath } from 'node:url';
-import { resolveAnnSetting, resolveBaseline, resolveCompareModels } from '../src/compare/config.js';
-import { DEFAULT_MODEL_ID, getIndexDir, loadUserConfig, resolveRepoRoot, resolveSqlitePaths } from './dict-utils.js';
+import { createCli } from '../src/shared/cli.js';
+import { resolveAnnSetting, resolveBaseline, resolveCompareModels } from '../src/experimental/compare/config.js';
+import { DEFAULT_MODEL_ID, getIndexDir, getRuntimeConfig, loadUserConfig, resolveRepoRoot, resolveRuntimeEnv, resolveSqlitePaths, resolveToolRoot } from './dict-utils.js';
 
 const rawArgs = process.argv.slice(2);
-const argv = minimist(rawArgs, {
-  boolean: ['json', 'build', 'ann', 'no-ann', 'incremental'],
-  string: ['models', 'baseline', 'queries', 'out', 'top', 'limit', 'mode', 'repo'],
-  default: {
-    json: false,
-    build: true,
-    top: 5,
-    limit: 0
+const argv = createCli({
+  scriptName: 'summary-report',
+  options: {
+    json: { type: 'boolean', default: false },
+    build: { type: 'boolean', default: true },
+    ann: { type: 'boolean' },
+    'no-ann': { type: 'boolean' },
+    incremental: { type: 'boolean', default: false },
+    models: { type: 'string' },
+    baseline: { type: 'string' },
+    queries: { type: 'string' },
+    out: { type: 'string' },
+    top: { type: 'number', default: 5 },
+    limit: { type: 'number', default: 0 },
+    mode: { type: 'string' },
+    repo: { type: 'string' },
+    profile: { type: 'string' }
   }
-});
+}).parse();
 
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
 const root = rootArg || resolveRepoRoot(process.cwd());
-const userConfig = loadUserConfig(root);
-const scriptRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
+const userConfig = loadUserConfig(root, { profile: argv.profile });
+if (userConfig.profile !== 'full') {
+  console.error('summary-report is experimental. Run with profile=full or set PAIROFCLEATS_PROFILE=full.');
+  process.exit(1);
+}
+const runtimeConfig = getRuntimeConfig(root, userConfig);
+const baseEnv = resolveRuntimeEnv(runtimeConfig, process.env);
+const scriptRoot = resolveToolRoot();
 
 const configCompare = Array.isArray(userConfig.models?.compare) ? userConfig.models.compare : [];
 const defaultModel = userConfig.models?.id || DEFAULT_MODEL_ID;
@@ -61,7 +75,7 @@ const reportPaths = {
  * @returns {void}
  */
 function runNode(args, label) {
-  const result = spawnSync(process.execPath, args, { stdio: 'inherit' });
+  const result = spawnSync(process.execPath, args, { stdio: 'inherit', cwd: root, env: baseEnv });
   if (result.status !== 0) {
     console.error(`Failed: ${label}`);
     process.exit(result.status ?? 1);
@@ -95,7 +109,7 @@ function ensureParityIndexes() {
       console.error('Index missing for parity. Re-run with --build.');
       process.exit(1);
     }
-    const args = [path.join(scriptRoot, 'build_index.js')];
+    const args = [path.join(scriptRoot, 'build_index.js'), '--repo', root];
     if (argv.incremental) args.push('--incremental');
     runNode(args, 'build index');
   }
@@ -107,7 +121,7 @@ function ensureParityIndexes() {
       console.error('SQLite index missing for parity. Re-run with --build.');
       process.exit(1);
     }
-    const args = [path.join(scriptRoot, 'tools', 'build-sqlite-index.js')];
+    const args = [path.join(scriptRoot, 'tools', 'build-sqlite-index.js'), '--repo', root];
     if (argv.incremental) args.push('--incremental');
     runNode(args, 'build sqlite index');
   }
@@ -118,9 +132,11 @@ function ensureParityIndexes() {
  * @param {{backend?:string,outPath:string}} params
  * @returns {string[]}
  */
-function buildCompareArgs({ backend, outPath }) {
+function buildCompareArgs({ backend, outPath, buildIndex, buildSqlite }) {
   const args = [
     path.join(scriptRoot, 'tools', 'compare-models.js'),
+    '--repo',
+    root,
     '--models',
     models.join(','),
     '--baseline',
@@ -134,8 +150,8 @@ function buildCompareArgs({ backend, outPath }) {
   if (argv.limit) args.push('--limit', String(argv.limit));
   if (argv.mode) args.push('--mode', argv.mode);
   if (!annEnabled) args.push('--no-ann');
-  if (buildEnabled) args.push('--build');
-  if (buildEnabled && backend === 'sqlite') args.push('--build-sqlite');
+  if (buildIndex) args.push('--build');
+  if (buildSqlite) args.push('--build-sqlite');
   if (argv.incremental) args.push('--incremental');
   return args;
 }
@@ -164,8 +180,14 @@ function buildParityArgs({ backend, outPath }) {
   return args;
 }
 
-runNode(buildCompareArgs({ outPath: reportPaths.compareMemory }), 'compare models (memory)');
-runNode(buildCompareArgs({ outPath: reportPaths.compareSqlite, backend: 'sqlite' }), 'compare models (sqlite)');
+runNode(
+  buildCompareArgs({ outPath: reportPaths.compareMemory, buildIndex: buildEnabled, buildSqlite: false }),
+  'compare models (memory)'
+);
+runNode(
+  buildCompareArgs({ outPath: reportPaths.compareSqlite, backend: 'sqlite', buildIndex: false, buildSqlite: buildEnabled }),
+  'compare models (sqlite)'
+);
 
 ensureParityIndexes();
 runNode(buildParityArgs({ backend: 'sqlite', outPath: reportPaths.paritySqlite }), 'parity sqlite');
diff --git a/tools/compact-pieces.js b/tools/compact-pieces.js
new file mode 100644
index 000000000..28de08f68
--- /dev/null
+++ b/tools/compact-pieces.js
@@ -0,0 +1,325 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+import readline from 'node:readline';
+import { createCli } from '../src/shared/cli.js';
+import { writeJsonLinesFile, writeJsonObjectFile } from '../src/shared/json-stream.js';
+import { checksumFile } from '../src/shared/hash.js';
+import { getIndexDir, loadUserConfig, resolveRepoRoot } from './dict-utils.js';
+
+const argv = createCli({
+  scriptName: 'compact-pieces',
+  options: {
+    repo: { type: 'string' },
+    mode: { type: 'string', default: 'code' },
+    'chunk-meta-size': { type: 'number' },
+    'token-postings-size': { type: 'number' },
+    'dry-run': { type: 'boolean', default: false }
+  }
+}).parse();
+
+const rootArg = argv.repo ? path.resolve(argv.repo) : null;
+const root = rootArg || resolveRepoRoot(process.cwd());
+const userConfig = loadUserConfig(root);
+const modeArg = (argv.mode || 'code').toLowerCase();
+const modes = modeArg === 'all' ? ['code', 'prose'] : [modeArg];
+const dryRun = argv['dry-run'] === true;
+
+const listShardFiles = (dir, prefix) => {
+  if (!fsSync.existsSync(dir)) return [];
+  return fsSync
+    .readdirSync(dir)
+    .filter((name) => name.startsWith(prefix) && name.endsWith('.jsonl'))
+    .sort()
+    .map((name) => path.join(dir, name));
+};
+
+const readJsonLinesFile = async (filePath, onEntry) => {
+  const stream = fsSync.createReadStream(filePath, { encoding: 'utf8' });
+  const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
+  for await (const line of rl) {
+    const trimmed = line.trim();
+    if (!trimmed) continue;
+    const result = onEntry(JSON.parse(trimmed));
+    if (result && typeof result.then === 'function') {
+      await result;
+    }
+  }
+};
+
+const readJson = async (filePath) => JSON.parse(await fs.readFile(filePath, 'utf8'));
+
+const resolveChunkMetaParts = async (indexDir) => {
+  const metaPath = path.join(indexDir, 'chunk_meta.meta.json');
+  const partsDir = path.join(indexDir, 'chunk_meta.parts');
+  if (!fsSync.existsSync(metaPath) && !fsSync.existsSync(partsDir)) return null;
+  let parts = [];
+  let metaFields = null;
+  if (fsSync.existsSync(metaPath)) {
+    const meta = await readJson(metaPath);
+    metaFields = meta.fields || meta;
+    if (Array.isArray(metaFields.parts)) {
+      parts = metaFields.parts.map((name) => path.join(indexDir, name));
+    }
+  }
+  if (!parts.length) {
+    parts = listShardFiles(partsDir, 'chunk_meta.part-');
+  }
+  if (!parts.length) return null;
+  return { metaPath, partsDir, parts, metaFields };
+};
+
+const resolveTokenPostingsParts = async (indexDir) => {
+  const metaPath = path.join(indexDir, 'token_postings.meta.json');
+  const shardsDir = path.join(indexDir, 'token_postings.shards');
+  if (!fsSync.existsSync(metaPath) && !fsSync.existsSync(shardsDir)) return null;
+  let parts = [];
+  let metaFields = null;
+  let metaArrays = null;
+  if (fsSync.existsSync(metaPath)) {
+    const meta = await readJson(metaPath);
+    metaFields = meta.fields || meta;
+    metaArrays = meta.arrays || meta;
+    if (Array.isArray(metaFields.parts)) {
+      parts = metaFields.parts.map((name) => path.join(indexDir, name));
+    }
+  }
+  if (!parts.length) {
+    parts = fsSync
+      .readdirSync(shardsDir)
+      .filter((name) => name.startsWith('token_postings.part-') && name.endsWith('.json'))
+      .sort()
+      .map((name) => path.join(shardsDir, name));
+  }
+  if (!parts.length) return null;
+  return { metaPath, shardsDir, parts, metaFields, metaArrays };
+};
+
+const appendAudit = async (indexDir, line) => {
+  if (dryRun) return;
+  const piecesDir = path.join(indexDir, 'pieces');
+  await fs.mkdir(piecesDir, { recursive: true });
+  const logPath = path.join(piecesDir, 'compaction.log');
+  await fs.appendFile(logPath, `${line}\n`);
+};
+
+const compactChunkMeta = async (indexDir, targetSize) => {
+  const resolved = await resolveChunkMetaParts(indexDir);
+  if (!resolved) return null;
+  const { metaPath, partsDir, parts, metaFields } = resolved;
+  const totalChunks = Number.isFinite(metaFields?.totalChunks) ? metaFields.totalChunks : null;
+  const target = Number.isFinite(Number(targetSize)) && Number(targetSize) > 0
+    ? Math.floor(Number(targetSize))
+    : (Number.isFinite(metaFields?.shardSize) ? metaFields.shardSize : 100000);
+  if (parts.length <= 1 || target <= 0) return null;
+
+  const tmpDir = path.join(indexDir, 'chunk_meta.parts.compact');
+  if (!dryRun) {
+    await fs.rm(tmpDir, { recursive: true, force: true });
+    await fs.mkdir(tmpDir, { recursive: true });
+  }
+  const newParts = [];
+  const newCounts = [];
+  let buffer = [];
+  let partIndex = 0;
+  let total = 0;
+  const flush = async () => {
+    if (!buffer.length) return;
+    const name = `chunk_meta.part-${String(partIndex).padStart(5, '0')}.jsonl`;
+    const relPath = path.join('chunk_meta.parts', name).split(path.sep).join('/');
+    const outPath = path.join(tmpDir, name);
+    if (!dryRun) {
+      await writeJsonLinesFile(outPath, buffer, { atomic: true });
+    }
+    newParts.push(relPath);
+    newCounts.push(buffer.length);
+    total += buffer.length;
+    buffer = [];
+    partIndex += 1;
+  };
+
+  for (const partPath of parts) {
+    await readJsonLinesFile(partPath, async (entry) => {
+      buffer.push(entry);
+      if (buffer.length >= target) {
+        await flush();
+      }
+    });
+  }
+  await flush();
+  if (!dryRun) {
+    await fs.rm(partsDir, { recursive: true, force: true });
+    await fs.rename(tmpDir, partsDir);
+    await writeJsonObjectFile(metaPath, {
+      fields: {
+        format: 'jsonl',
+        shardSize: target,
+        totalChunks: totalChunks ?? total,
+        parts: newParts
+      },
+      atomic: true
+    });
+  }
+  return { type: 'chunks', name: 'chunk_meta', metaName: 'chunk_meta_meta', parts: newParts, counts: newCounts };
+};
+
+const compactTokenPostings = async (indexDir, targetSize) => {
+  const resolved = await resolveTokenPostingsParts(indexDir);
+  if (!resolved) return null;
+  const { metaPath, shardsDir, parts, metaFields, metaArrays } = resolved;
+  const target = Number.isFinite(Number(targetSize)) && Number(targetSize) > 0
+    ? Math.floor(Number(targetSize))
+    : (Number.isFinite(metaFields?.shardSize) ? metaFields.shardSize : 50000);
+  if (parts.length <= 1 || target <= 0) return null;
+
+  const tmpDir = path.join(indexDir, 'token_postings.shards.compact');
+  if (!dryRun) {
+    await fs.rm(tmpDir, { recursive: true, force: true });
+    await fs.mkdir(tmpDir, { recursive: true });
+  }
+  const newParts = [];
+  const newCounts = [];
+  let vocabBuffer = [];
+  let postingsBuffer = [];
+  let partIndex = 0;
+  const flush = async () => {
+    if (!vocabBuffer.length) return;
+    const name = `token_postings.part-${String(partIndex).padStart(5, '0')}.json`;
+    const relPath = path.join('token_postings.shards', name).split(path.sep).join('/');
+    const outPath = path.join(tmpDir, name);
+    if (!dryRun) {
+      await writeJsonObjectFile(outPath, {
+        arrays: { vocab: vocabBuffer, postings: postingsBuffer },
+        atomic: true
+      });
+    }
+    newParts.push(relPath);
+    newCounts.push(vocabBuffer.length);
+    vocabBuffer = [];
+    postingsBuffer = [];
+    partIndex += 1;
+  };
+
+  for (const partPath of parts) {
+    const shard = await readJson(partPath);
+    const vocab = Array.isArray(shard?.vocab) ? shard.vocab : (Array.isArray(shard?.arrays?.vocab) ? shard.arrays.vocab : []);
+    const postings = Array.isArray(shard?.postings) ? shard.postings : (Array.isArray(shard?.arrays?.postings) ? shard.arrays.postings : []);
+    for (let i = 0; i < vocab.length; i++) {
+      vocabBuffer.push(vocab[i]);
+      postingsBuffer.push(postings[i] || []);
+      if (vocabBuffer.length >= target) {
+        await flush();
+      }
+    }
+  }
+  await flush();
+  const docLengths = Array.isArray(metaArrays?.docLengths) ? metaArrays.docLengths : [];
+  const totalDocs = Number.isFinite(metaFields?.totalDocs) ? metaFields.totalDocs : docLengths.length;
+  const avgDocLen = Number.isFinite(metaFields?.avgDocLen)
+    ? metaFields.avgDocLen
+    : (docLengths.length
+      ? docLengths.reduce((sum, len) => sum + (Number.isFinite(len) ? len : 0), 0) / docLengths.length
+      : 0);
+  const vocabCount = newCounts.reduce((sum, count) => sum + count, 0);
+  if (!dryRun) {
+    await fs.rm(shardsDir, { recursive: true, force: true });
+    await fs.rename(tmpDir, shardsDir);
+    await writeJsonObjectFile(metaPath, {
+      fields: {
+        avgDocLen,
+        totalDocs,
+        format: 'sharded',
+        shardSize: target,
+        vocabCount,
+        parts: newParts
+      },
+      arrays: { docLengths },
+      atomic: true
+    });
+  }
+  return { type: 'postings', name: 'token_postings', metaName: 'token_postings_meta', parts: newParts, counts: newCounts };
+};
+
+const updateManifest = async (indexDir, updates) => {
+  if (!updates?.length) return;
+  const manifestPath = path.join(indexDir, 'pieces', 'manifest.json');
+  if (!fsSync.existsSync(manifestPath)) return;
+  const manifestRaw = await readJson(manifestPath);
+  const fields = manifestRaw.fields || manifestRaw;
+  const pieces = Array.isArray(fields.pieces) ? fields.pieces : [];
+  const removeNames = new Set();
+  updates.forEach((update) => {
+    removeNames.add(update.name);
+    removeNames.add(update.metaName);
+  });
+  const retained = pieces.filter((piece) => !removeNames.has(piece?.name));
+  const newPieces = [...retained];
+  for (const update of updates) {
+    for (let i = 0; i < update.parts.length; i++) {
+      const relPath = update.parts[i];
+      const absPath = path.join(indexDir, relPath.split('/').join(path.sep));
+      const stat = await fs.stat(absPath);
+      const result = await checksumFile(absPath);
+      const checksum = result?.value || null;
+      const checksumAlgo = result?.algo || null;
+      newPieces.push({
+        type: update.type,
+        name: update.name,
+        format: update.type === 'chunks' ? 'jsonl' : 'json',
+        count: update.counts[i],
+        path: relPath,
+        bytes: stat.size,
+        checksum: checksum && checksumAlgo ? `${checksumAlgo}:${checksum}` : null
+      });
+    }
+    const metaRel = update.type === 'chunks' ? 'chunk_meta.meta.json' : 'token_postings.meta.json';
+    const metaAbs = path.join(indexDir, metaRel);
+    if (fsSync.existsSync(metaAbs)) {
+      const stat = await fs.stat(metaAbs);
+      const result = await checksumFile(metaAbs);
+      const checksum = result?.value || null;
+      const checksumAlgo = result?.algo || null;
+      newPieces.push({
+        type: update.type,
+        name: update.metaName,
+        format: 'json',
+        count: null,
+        path: metaRel,
+        bytes: stat.size,
+        checksum: checksum && checksumAlgo ? `${checksumAlgo}:${checksum}` : null
+      });
+    }
+  }
+  fields.pieces = newPieces;
+  fields.generatedAt = new Date().toISOString();
+  if (!dryRun) {
+    await fs.mkdir(path.join(indexDir, 'pieces'), { recursive: true });
+    await writeJsonObjectFile(manifestPath, { fields, atomic: true });
+  }
+};
+
+for (const mode of modes) {
+  const indexDir = getIndexDir(root, mode, userConfig);
+  const chunkMetaTarget = argv['chunk-meta-size'];
+  const tokenPostingsTarget = argv['token-postings-size'];
+  const updates = [];
+  const chunkUpdate = await compactChunkMeta(indexDir, chunkMetaTarget);
+  if (chunkUpdate) {
+    updates.push(chunkUpdate);
+    await appendAudit(indexDir, `${new Date().toISOString()} chunk_meta compacted: parts=${chunkUpdate.parts.length}`);
+  }
+  const tokenUpdate = await compactTokenPostings(indexDir, tokenPostingsTarget);
+  if (tokenUpdate) {
+    updates.push(tokenUpdate);
+    await appendAudit(indexDir, `${new Date().toISOString()} token_postings compacted: parts=${tokenUpdate.parts.length}`);
+  }
+  if (updates.length) {
+    await updateManifest(indexDir, updates);
+  }
+  if (!updates.length) {
+    console.log(`[pieces] ${mode}: no compaction needed.`);
+  } else {
+    console.log(`[pieces] ${mode}: compaction ${dryRun ? 'planned' : 'complete'}.`);
+  }
+}
diff --git a/tools/compact-sqlite-index.js b/tools/compact-sqlite-index.js
index 15bf3c90a..0ac762fd4 100644
--- a/tools/compact-sqlite-index.js
+++ b/tools/compact-sqlite-index.js
@@ -3,12 +3,12 @@ import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
 import { pathToFileURL } from 'node:url';
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
 import { loadUserConfig, resolveRepoRoot, resolveSqlitePaths } from './dict-utils.js';
 import { encodeVector, ensureVectorTable, getVectorExtensionConfig, hasVectorTable, loadVectorExtension } from './vector-extension.js';
-import { CREATE_TABLES_SQL, REQUIRED_TABLES, SCHEMA_VERSION } from '../src/sqlite/schema.js';
-import { hasRequiredTables, normalizeFilePath } from '../src/sqlite/utils.js';
-import { dequantizeUint8ToFloat32, toVectorId } from '../src/sqlite/vector.js';
+import { CREATE_TABLES_SQL, REQUIRED_TABLES, SCHEMA_VERSION } from '../src/storage/sqlite/schema.js';
+import { hasRequiredTables, normalizeFilePath, replaceSqliteDatabase } from '../src/storage/sqlite/utils.js';
+import { dequantizeUint8ToFloat32, toVectorId } from '../src/storage/sqlite/vector.js';
 
 let Database;
 try {
@@ -111,21 +111,21 @@ export async function compactDatabase(input) {
 
   const insertChunk = outDb.prepare(`
     INSERT OR REPLACE INTO chunks (
-      id, mode, file, start, end, startLine, endLine, ext, kind, name, headline,
-      preContext, postContext, weight, tokens, ngrams, codeRelations, docmeta,
-      stats, complexity, lint, externalDocs, last_modified, last_author, churn,
-      chunk_authors
+      id, chunk_id, mode, file, start, end, startLine, endLine, ext, kind, name,
+      headline, preContext, postContext, weight, tokens, ngrams, codeRelations,
+      docmeta, stats, complexity, lint, externalDocs, last_modified, last_author,
+      churn, chunk_authors
     ) VALUES (
-      @id, @mode, @file, @start, @end, @startLine, @endLine, @ext, @kind, @name, @headline,
-      @preContext, @postContext, @weight, @tokens, @ngrams, @codeRelations, @docmeta,
-      @stats, @complexity, @lint, @externalDocs, @last_modified, @last_author, @churn,
-      @chunk_authors
+      @id, @chunk_id, @mode, @file, @start, @end, @startLine, @endLine, @ext, @kind,
+      @name, @headline, @preContext, @postContext, @weight, @tokens, @ngrams,
+      @codeRelations, @docmeta, @stats, @complexity, @lint, @externalDocs,
+      @last_modified, @last_author, @churn, @chunk_authors
     );
   `);
 
   const insertFts = outDb.prepare(`
-    INSERT OR REPLACE INTO chunks_fts (rowid, mode, file, name, kind, headline, tokens)
-    VALUES (@id, @mode, @file, @name, @kind, @headline, @tokensText);
+    INSERT OR REPLACE INTO chunks_fts (rowid, mode, file, name, signature, kind, headline, doc, tokens)
+    VALUES (@id, @mode, @file, @name, @signature, @kind, @headline, @doc, @tokensText);
   `);
 
   const insertTokenVocab = outDb.prepare(
@@ -196,13 +196,24 @@ export async function compactDatabase(input) {
       insertChunk.run(chunkRow);
 
       const tokensText = parseTokens(row.tokens).join(' ');
+      let signature = null;
+      let doc = null;
+      if (row.docmeta) {
+        try {
+          const meta = JSON.parse(row.docmeta);
+          signature = typeof meta?.signature === 'string' ? meta.signature : null;
+          doc = typeof meta?.doc === 'string' ? meta.doc : null;
+        } catch {}
+      }
       insertFts.run({
         id: newId,
         mode,
         file: normalizedFile,
         name: row.name,
+        signature,
         kind: row.kind,
         headline: row.headline,
+        doc,
         tokensText
       });
 
@@ -390,28 +401,22 @@ export async function compactDatabase(input) {
   if (!keepBackup && fs.existsSync(backupPath)) {
     await fsPromises.rm(backupPath, { force: true });
   }
-
-  await fsPromises.rename(dbPath, backupPath);
-  await fsPromises.rename(tempPath, dbPath);
-
-  if (!keepBackup) {
-    await fsPromises.rm(backupPath, { force: true });
-  }
+  await replaceSqliteDatabase(tempPath, dbPath, { keepBackup, backupPath });
 
   return { skipped: false };
 }
 
 const isDirectRun = import.meta.url === pathToFileURL(process.argv[1]).href;
 if (isDirectRun) {
-  const argv = minimist(process.argv.slice(2), {
-    string: ['mode', 'repo'],
-    boolean: ['dry-run', 'keep-backup'],
-    default: {
-      mode: 'all',
-      'dry-run': false,
-      'keep-backup': false
+  const argv = createCli({
+    scriptName: 'compact-sqlite-index',
+    options: {
+      mode: { type: 'string', default: 'all' },
+      repo: { type: 'string' },
+      'dry-run': { type: 'boolean', default: false },
+      'keep-backup': { type: 'boolean', default: false }
     }
-  });
+  }).parse();
 
   const rootArg = argv.repo ? path.resolve(argv.repo) : null;
   const root = rootArg || resolveRepoRoot(process.cwd());
diff --git a/tools/compare-models.js b/tools/compare-models.js
index a9c13e721..c934c5031 100644
--- a/tools/compare-models.js
+++ b/tools/compare-models.js
@@ -3,46 +3,75 @@ import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
 import crypto from 'node:crypto';
-import { spawnSync } from 'node:child_process';
-import { fileURLToPath } from 'node:url';
-import minimist from 'minimist';
-import { resolveAnnSetting, resolveBaseline, resolveCompareModels } from '../src/compare/config.js';
+import { execaSync } from 'execa';
+import { createCli } from '../src/shared/cli.js';
+import { getEnvConfig } from '../src/shared/env.js';
+import { resolveAnnSetting, resolveBaseline, resolveCompareModels } from '../src/experimental/compare/config.js';
 import {
   DEFAULT_MODEL_ID,
   getCacheRoot,
   getDictConfig,
   getModelConfig,
   getRepoId,
+  getRuntimeConfig,
   loadUserConfig,
   resolveRepoRoot,
-  resolveSqlitePaths
+  resolveRuntimeEnv,
+  resolveSqlitePaths,
+  resolveToolRoot
 } from './dict-utils.js';
 
 const rawArgs = process.argv.slice(2);
-const argv = minimist(rawArgs, {
-  boolean: ['json', 'build', 'build-index', 'build-sqlite', 'incremental', 'stub-embeddings', 'ann', 'no-ann'],
-  string: ['models', 'baseline', 'queries', 'backend', 'out', 'mode', 'cache-root', 'repo'],
-  alias: { n: 'top', q: 'queries' },
-  default: { top: 5, limit: 0 }
-});
+const argv = createCli({
+  scriptName: 'compare-models',
+  options: {
+    json: { type: 'boolean', default: false },
+    build: { type: 'boolean', default: false },
+    'build-index': { type: 'boolean', default: false },
+    'build-sqlite': { type: 'boolean', default: false },
+    incremental: { type: 'boolean', default: false },
+    'stub-embeddings': { type: 'boolean', default: false },
+    ann: { type: 'boolean' },
+    'no-ann': { type: 'boolean' },
+    models: { type: 'string' },
+    baseline: { type: 'string' },
+    queries: { type: 'string' },
+    backend: { type: 'string' },
+    out: { type: 'string' },
+    mode: { type: 'string' },
+    'cache-root': { type: 'string' },
+    repo: { type: 'string' },
+    profile: { type: 'string' },
+    top: { type: 'number', default: 5 },
+    limit: { type: 'number', default: 0 }
+  },
+  aliases: { n: 'top', q: 'queries' }
+}).parse();
 
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
 const root = rootArg || resolveRepoRoot(process.cwd());
-const scriptRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
-const userConfig = loadUserConfig(root);
+const scriptRoot = resolveToolRoot();
+const userConfig = loadUserConfig(root, { profile: argv.profile });
+if (userConfig.profile !== 'full') {
+  console.error('compare-models is experimental. Run with profile=full or set PAIROFCLEATS_PROFILE=full.');
+  process.exit(1);
+}
+const envConfig = getEnvConfig();
+const runtimeConfig = getRuntimeConfig(root, userConfig);
+const baseEnv = resolveRuntimeEnv(runtimeConfig, process.env);
 const configCacheRoot = typeof userConfig.cache?.root === 'string' && userConfig.cache.root.trim()
   ? path.resolve(userConfig.cache.root)
   : null;
 const cacheRootBase = argv['cache-root']
   ? path.resolve(argv['cache-root'])
-  : (process.env.PAIROFCLEATS_CACHE_ROOT
-    ? path.resolve(process.env.PAIROFCLEATS_CACHE_ROOT)
+  : (envConfig.cacheRoot
+    ? path.resolve(envConfig.cacheRoot)
     : getCacheRoot());
 const repoId = getRepoId(root);
 const modelConfig = getModelConfig(root, userConfig);
 const dictConfig = getDictConfig(root, userConfig);
-const sharedModelsDir = process.env.PAIROFCLEATS_MODELS_DIR || modelConfig.dir;
-const sharedDictDir = process.env.PAIROFCLEATS_DICT_DIR || dictConfig.dir;
+const sharedModelsDir = envConfig.modelsDir || modelConfig.dir;
+const sharedDictDir = envConfig.dictDir || dictConfig.dir;
 
 const configCompareModels = Array.isArray(userConfig.models?.compare)
   ? userConfig.models.compare
@@ -125,7 +154,7 @@ function getModelCacheRoot(modelId) {
  */
 function buildEnv(modelId, modelCacheRoot) {
   const env = {
-    ...process.env,
+    ...baseEnv,
     PAIROFCLEATS_MODEL: modelId
   };
   if (modelCacheRoot) env.PAIROFCLEATS_CACHE_ROOT = modelCacheRoot;
@@ -142,7 +171,38 @@ function buildEnv(modelId, modelCacheRoot) {
  * @returns {boolean}
  */
 function indexExists(modelCacheRoot, mode) {
-  const metaPath = path.join(modelCacheRoot, 'repos', repoId, `index-${mode}`, 'chunk_meta.json');
+  const repoCacheRoot = path.join(modelCacheRoot, 'repos', repoId);
+  let indexRoot = repoCacheRoot;
+  const currentPath = path.join(repoCacheRoot, 'builds', 'current.json');
+  if (fs.existsSync(currentPath)) {
+    try {
+      const data = JSON.parse(fs.readFileSync(currentPath, 'utf8')) || {};
+      const resolveRoot = (value) => {
+        if (!value) return null;
+        return path.isAbsolute(value) ? value : path.join(repoCacheRoot, value);
+      };
+      const buildId = typeof data.buildId === 'string' ? data.buildId : null;
+      const buildRootRaw = typeof data.buildRoot === 'string' ? data.buildRoot : null;
+      const buildRoot = buildRootRaw
+        ? resolveRoot(buildRootRaw)
+        : (buildId ? path.join(repoCacheRoot, 'builds', buildId) : null);
+      let modeRoot = null;
+      if (data.buildRoots && typeof data.buildRoots === 'object' && !Array.isArray(data.buildRoots)) {
+        const raw = data.buildRoots[mode];
+        if (typeof raw === 'string') {
+          modeRoot = resolveRoot(raw);
+        }
+      } else if (buildRoot && Array.isArray(data.modes) && data.modes.includes(mode)) {
+        modeRoot = buildRoot;
+      }
+      if (modeRoot && fs.existsSync(modeRoot)) {
+        indexRoot = modeRoot;
+      } else if (buildRoot && fs.existsSync(buildRoot)) {
+        indexRoot = buildRoot;
+      }
+    } catch {}
+  }
+  const metaPath = path.join(indexRoot, `index-${mode}`, 'chunk_meta.json');
   return fs.existsSync(metaPath);
 }
 
@@ -166,11 +226,11 @@ function ensureIndex(modelCacheRoot) {
  * @param {string} label
  */
 function runCommand(args, env, label) {
-  const stdio = argv.json ? ['ignore', process.stderr, process.stderr] : 'inherit';
-  const result = spawnSync(process.execPath, args, { env, stdio });
-  if (result.status !== 0) {
+  const stdio = argv.json ? ['ignore', 'ignore', 'ignore'] : 'inherit';
+  const result = execaSync(process.execPath, args, { env, stdio, reject: false });
+  if (result.exitCode !== 0) {
     console.error(`Failed: ${label}`);
-    process.exit(result.status ?? 1);
+    process.exit(result.exitCode ?? 1);
   }
 }
 
@@ -190,18 +250,20 @@ function runSearch(query, env) {
     backend,
     '-n',
     String(topN),
-    annArg
+    annArg,
+    '--repo',
+    root
   ];
   if (modeArg && modeArg !== 'both') {
     args.push('--mode', modeArg);
   }
   const start = Date.now();
-  const result = spawnSync(process.execPath, args, { env, encoding: 'utf8' });
+  const result = execaSync(process.execPath, args, { env, encoding: 'utf8', reject: false });
   const wallMs = Date.now() - start;
-  if (result.status !== 0) {
+  if (result.exitCode !== 0) {
     console.error(`Search failed for query="${query}" (model=${env.PAIROFCLEATS_MODEL})`);
     if (result.stderr) console.error(result.stderr.trim());
-    process.exit(result.status ?? 1);
+    process.exit(result.exitCode ?? 1);
   }
   const payload = JSON.parse(result.stdout || '{}');
   return { payload, wallMs };
@@ -248,9 +310,8 @@ const limit = Math.max(0, parseInt(argv.limit, 10) || 0);
 const selectedQueries = limit > 0 ? queries.slice(0, limit) : queries;
 
 if (sqliteBackend && buildSqlite) {
-  const sqlitePaths = resolveSqlitePaths(root, userConfig);
-  if (!buildIndex && !fs.existsSync(sqlitePaths.codePath) && !fs.existsSync(sqlitePaths.prosePath)) {
-    console.error('SQLite index missing. Use --build or build the indexes first.');
+  if (!buildIndex && !ensureIndex(getModelCacheRoot(models[0]))) {
+    console.error('Index missing. Use --build or build the index first.');
     process.exit(1);
   }
 }
@@ -266,7 +327,7 @@ for (const modelId of models) {
   }
 
   if (buildIndex) {
-    const args = [path.join(scriptRoot, 'build_index.js')];
+    const args = [path.join(scriptRoot, 'build_index.js'), '--repo', root];
     if (buildIncremental) args.push('--incremental');
     if (stubEmbeddings) args.push('--stub-embeddings');
     runCommand(args, env, `build index (${modelId})`);
@@ -276,7 +337,7 @@ for (const modelId of models) {
   }
 
   if (buildSqlite) {
-    const args = [path.join(scriptRoot, 'tools', 'build-sqlite-index.js')];
+    const args = [path.join(scriptRoot, 'tools', 'build-sqlite-index.js'), '--repo', root];
     if (buildIncremental) args.push('--incremental');
     runCommand(args, env, `build sqlite (${modelId})`);
   } else if (sqliteBackend) {
diff --git a/tools/config-dump.js b/tools/config-dump.js
new file mode 100644
index 000000000..9e45e42ca
--- /dev/null
+++ b/tools/config-dump.js
@@ -0,0 +1,70 @@
+#!/usr/bin/env node
+import path from 'node:path';
+import { createCli } from '../src/shared/cli.js';
+import { getEnvConfig } from '../src/shared/env.js';
+import {
+  getCacheRoot,
+  getCacheRuntimeConfig,
+  getModelConfig,
+  getRepoCacheRoot,
+  getRuntimeConfig,
+  getToolingConfig,
+  loadUserConfig,
+  resolveLmdbPaths,
+  resolveRepoRoot,
+  resolveSqlitePaths
+} from './dict-utils.js';
+
+const argv = createCli({
+  scriptName: 'config-dump',
+  options: {
+    repo: { type: 'string' },
+    json: { type: 'boolean', default: false }
+  }
+}).parse();
+
+const rootArg = argv.repo ? path.resolve(argv.repo) : null;
+const repoRoot = rootArg || resolveRepoRoot(process.cwd());
+const userConfig = loadUserConfig(repoRoot);
+const envConfig = getEnvConfig();
+
+const runtimeConfig = getRuntimeConfig(repoRoot, userConfig);
+const parsedUv = Number(process.env.UV_THREADPOOL_SIZE);
+const effectiveUvThreadpoolSize = Number.isFinite(parsedUv) && parsedUv > 0 ? Math.floor(parsedUv) : null;
+
+
+const cacheRoot = (userConfig.cache && userConfig.cache.root) || envConfig.cacheRoot || getCacheRoot();
+const payload = {
+  repoRoot,
+  profile: userConfig.profile || null,
+  env: envConfig,
+  userConfig,
+  derived: {
+    cacheRoot,
+    repoCacheRoot: getRepoCacheRoot(repoRoot, userConfig),
+    runtime: { ...runtimeConfig, effectiveUvThreadpoolSize },
+    cacheRuntime: getCacheRuntimeConfig(repoRoot, userConfig),
+    model: getModelConfig(repoRoot, userConfig),
+    tooling: getToolingConfig(repoRoot, userConfig),
+    lmdb: resolveLmdbPaths(repoRoot, userConfig),
+    sqlite: resolveSqlitePaths(repoRoot, userConfig)
+  }
+};
+
+if (argv.json) {
+  console.log(JSON.stringify(payload, null, 2));
+  process.exit(0);
+}
+
+console.log('Config dump');
+console.log(`- repo: ${repoRoot}`);
+console.log(`- profile: ${payload.profile || 'none'}`);
+console.log(`- cache root: ${payload.derived.cacheRoot}`);
+console.log(`- repo cache: ${payload.derived.repoCacheRoot}`);
+console.log(`- runtime UV_THREADPOOL_SIZE: ${payload.derived.runtime.effectiveUvThreadpoolSize ?? 'default'}`);
+console.log(`- model: ${payload.derived.model.id}`);
+console.log(`- lmdb code: ${payload.derived.lmdb.codePath}`);
+console.log(`- lmdb prose: ${payload.derived.lmdb.prosePath}`);
+console.log(`- sqlite code: ${payload.derived.sqlite.codePath}`);
+console.log(`- sqlite prose: ${payload.derived.sqlite.prosePath}`);
+console.log(`- env overrides: ${Object.entries(envConfig).filter(([, value]) => value !== '' && value != null).map(([key]) => key).join(', ') || 'none'}`);
diff --git a/tools/config-inventory.js b/tools/config-inventory.js
new file mode 100644
index 000000000..9e132d23b
--- /dev/null
+++ b/tools/config-inventory.js
@@ -0,0 +1,523 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { fdir } from 'fdir';
+import { resolveToolRoot } from './dict-utils.js';
+
+const root = resolveToolRoot();
+const schemaPath = path.join(root, 'docs', 'config-schema.json');
+const outputJsonPath = path.join(root, 'docs', 'config-inventory.json');
+const outputMdPath = path.join(root, 'docs', 'config-inventory.md');
+
+const normalizeType = (schema) => {
+  if (!schema || typeof schema !== 'object') return null;
+  if (Array.isArray(schema.type)) return schema.type.join('|');
+  if (typeof schema.type === 'string') return schema.type;
+  if (Array.isArray(schema.enum)) return 'enum';
+  return null;
+};
+
+const normalizeEnum = (schema) => {
+  if (!schema || typeof schema !== 'object') return null;
+  if (!Array.isArray(schema.enum)) return null;
+  return schema.enum.map((value) => String(value));
+};
+
+const mergeEntry = (target, incoming) => {
+  if (!target.type && incoming.type) target.type = incoming.type;
+  if (!target.enum && incoming.enum) target.enum = incoming.enum;
+  if (target.type && incoming.type && target.type !== incoming.type) {
+    const parts = new Set(String(target.type).split('|'));
+    String(incoming.type).split('|').forEach((part) => parts.add(part));
+    target.type = Array.from(parts).join('|');
+  }
+  if (target.enum && incoming.enum) {
+    const merged = new Set(target.enum);
+    incoming.enum.forEach((value) => merged.add(value));
+    target.enum = Array.from(merged);
+  }
+};
+
+const collectSchemaEntries = (schema, prefix = '', entries = []) => {
+  if (!schema || typeof schema !== 'object') return entries;
+  const properties = schema.properties && typeof schema.properties === 'object'
+    ? schema.properties
+    : null;
+  if (properties) {
+    for (const [key, child] of Object.entries(properties)) {
+      const pathKey = prefix ? `${prefix}.${key}` : key;
+      entries.push({
+        path: pathKey,
+        type: normalizeType(child),
+        enum: normalizeEnum(child)
+      });
+      collectSchemaEntries(child, pathKey, entries);
+    }
+  }
+  const additional = schema.additionalProperties && typeof schema.additionalProperties === 'object'
+    ? schema.additionalProperties
+    : null;
+  if (additional && additional.properties) {
+    const pathKey = prefix ? `${prefix}.*` : '*';
+    entries.push({
+      path: pathKey,
+      type: normalizeType(additional),
+      enum: normalizeEnum(additional)
+    });
+    collectSchemaEntries(additional, pathKey, entries);
+  }
+  const items = schema.items && typeof schema.items === 'object' ? schema.items : null;
+  if (items && items.properties) {
+    const pathKey = prefix ? `${prefix}[]` : '[]';
+    entries.push({
+      path: pathKey,
+      type: normalizeType(items),
+      enum: normalizeEnum(items)
+    });
+    collectSchemaEntries(items, pathKey, entries);
+  }
+  return entries;
+};
+
+const listSourceFiles = async () => {
+  const files = await new fdir().withFullPaths().crawl(root).withPromise();
+  return files.filter((filePath) => {
+    if (!filePath.endsWith('.js')) return false;
+    const normalized = filePath.replace(/\\/g, '/');
+    if (normalized.includes('/node_modules/')) return false;
+    if (normalized.includes('/.git/')) return false;
+    if (normalized.includes('/benchmarks/repos/')) return false;
+    if (normalized.includes('/benchmarks/cache/')) return false;
+    return true;
+  });
+};
+
+const findMatchingBrace = (source, startIndex) => {
+  let depth = 0;
+  let inString = null;
+  let escaped = false;
+  let inLineComment = false;
+  let inBlockComment = false;
+  for (let i = startIndex; i < source.length; i += 1) {
+    const ch = source[i];
+    const next = source[i + 1];
+    if (inLineComment) {
+      if (ch === '\n') inLineComment = false;
+      continue;
+    }
+    if (inBlockComment) {
+      if (ch === '*' && next === '/') {
+        inBlockComment = false;
+        i += 1;
+      }
+      continue;
+    }
+    if (inString) {
+      if (escaped) {
+        escaped = false;
+        continue;
+      }
+      if (ch === '\\') {
+        escaped = true;
+        continue;
+      }
+      if (ch === inString) {
+        inString = null;
+      }
+      continue;
+    }
+    if (ch === '/' && next === '/') {
+      inLineComment = true;
+      i += 1;
+      continue;
+    }
+    if (ch === '/' && next === '*') {
+      inBlockComment = true;
+      i += 1;
+      continue;
+    }
+    if (ch === '"' || ch === '\'' || ch === '`') {
+      inString = ch;
+      continue;
+    }
+    if (ch === '{') {
+      depth += 1;
+      continue;
+    }
+    if (ch === '}') {
+      depth -= 1;
+      if (depth === 0) return i;
+    }
+  }
+  return -1;
+};
+
+const extractOptionObjects = (source) => {
+  const ranges = [];
+  const patterns = [
+    /\boptions\s*:\s*\{/g,
+    /\.options\s*\(\s*\{/g
+  ];
+  for (const pattern of patterns) {
+    let match;
+    while ((match = pattern.exec(source)) !== null) {
+      const braceIndex = source.indexOf('{', match.index);
+      if (braceIndex < 0) continue;
+      const endIndex = findMatchingBrace(source, braceIndex);
+      if (endIndex < 0) continue;
+      ranges.push(source.slice(braceIndex, endIndex + 1));
+      pattern.lastIndex = endIndex + 1;
+    }
+  }
+  return ranges;
+};
+
+const extractStringArray = (source, name) => {
+  const regex = new RegExp(`\\b${name}\\s*=\\s*\\[([\\s\\S]*?)\\]`, 'm');
+  const match = regex.exec(source);
+  if (!match) return [];
+  const body = match[1] || '';
+  const values = new Set();
+  const stringRegex = /['"]([^'"\\]+)['"]/g;
+  let stringMatch;
+  while ((stringMatch = stringRegex.exec(body)) !== null) {
+    if (stringMatch[1]) values.add(stringMatch[1]);
+  }
+  return Array.from(values);
+};
+
+const extractTopLevelKeys = (objectText) => {
+  const keys = new Set();
+  let i = 1;
+  const len = objectText.length;
+  const skipWhitespace = () => {
+    while (i < len && /\s/.test(objectText[i])) i += 1;
+  };
+  const skipComments = () => {
+    while (i < len) {
+      if (objectText[i] === '/' && objectText[i + 1] === '/') {
+        i += 2;
+        while (i < len && objectText[i] !== '\n') i += 1;
+        continue;
+      }
+      if (objectText[i] === '/' && objectText[i + 1] === '*') {
+        i += 2;
+        while (i < len && !(objectText[i] === '*' && objectText[i + 1] === '/')) i += 1;
+        i += 2;
+        continue;
+      }
+      break;
+    }
+  };
+  const parseString = (quote) => {
+    let value = '';
+    i += 1;
+    while (i < len) {
+      const ch = objectText[i];
+      if (ch === '\\') {
+        value += ch;
+        i += 2;
+        continue;
+      }
+      if (ch === quote) {
+        i += 1;
+        break;
+      }
+      value += ch;
+      i += 1;
+    }
+    return value;
+  };
+  const parseIdentifier = () => {
+    const start = i;
+    if (!/[A-Za-z_$]/.test(objectText[i])) return null;
+    i += 1;
+    while (i < len && /[A-Za-z0-9_$]/.test(objectText[i])) i += 1;
+    return objectText.slice(start, i);
+  };
+  const skipValue = () => {
+    let depthBrace = 0;
+    let depthBracket = 0;
+    let depthParen = 0;
+    let inString = null;
+    let escaped = false;
+    let inLineComment = false;
+    let inBlockComment = false;
+    for (; i < len; i += 1) {
+      const ch = objectText[i];
+      const next = objectText[i + 1];
+      if (inLineComment) {
+        if (ch === '\n') inLineComment = false;
+        continue;
+      }
+      if (inBlockComment) {
+        if (ch === '*' && next === '/') {
+          inBlockComment = false;
+          i += 1;
+        }
+        continue;
+      }
+      if (inString) {
+        if (escaped) {
+          escaped = false;
+          continue;
+        }
+        if (ch === '\\') {
+          escaped = true;
+          continue;
+        }
+        if (ch === inString) {
+          inString = null;
+        }
+        continue;
+      }
+      if (ch === '/' && next === '/') {
+        inLineComment = true;
+        i += 1;
+        continue;
+      }
+      if (ch === '/' && next === '*') {
+        inBlockComment = true;
+        i += 1;
+        continue;
+      }
+      if (ch === '"' || ch === '\'' || ch === '`') {
+        inString = ch;
+        continue;
+      }
+      if (ch === '{') {
+        depthBrace += 1;
+        continue;
+      }
+      if (ch === '}') {
+        if (depthBrace > 0) {
+          depthBrace -= 1;
+          continue;
+        }
+        return;
+      }
+      if (ch === '[') {
+        depthBracket += 1;
+        continue;
+      }
+      if (ch === ']') {
+        if (depthBracket > 0) depthBracket -= 1;
+        continue;
+      }
+      if (ch === '(') {
+        depthParen += 1;
+        continue;
+      }
+      if (ch === ')') {
+        if (depthParen > 0) depthParen -= 1;
+        continue;
+      }
+      if (depthBrace === 0 && depthBracket === 0 && depthParen === 0 && ch === ',') {
+        i += 1;
+        return;
+      }
+    }
+  };
+
+  while (i < len - 1) {
+    skipWhitespace();
+    skipComments();
+    skipWhitespace();
+    if (objectText[i] === '}') break;
+    let key = null;
+    if (objectText[i] === '"' || objectText[i] === '\'') {
+      key = parseString(objectText[i]);
+    } else {
+      key = parseIdentifier();
+    }
+    skipWhitespace();
+    skipComments();
+    skipWhitespace();
+    if (!key || objectText[i] !== ':') {
+      i += 1;
+      continue;
+    }
+    keys.add(key);
+    i += 1;
+    skipValue();
+  }
+  return Array.from(keys);
+};
+
+const buildInventory = async () => {
+  const schemaRaw = await fs.readFile(schemaPath, 'utf8');
+  const schema = JSON.parse(schemaRaw);
+  const entries = collectSchemaEntries(schema);
+  const entryMap = new Map();
+  for (const entry of entries) {
+    if (!entry.path) continue;
+    const existing = entryMap.get(entry.path);
+    if (!existing) {
+      entryMap.set(entry.path, { ...entry });
+    } else {
+      mergeEntry(existing, entry);
+    }
+  }
+  const configEntries = Array.from(entryMap.values())
+    .sort((a, b) => a.path.localeCompare(b.path));
+  const topLevel = new Map();
+  for (const entry of configEntries) {
+    const rootKey = entry.path.split(/[.[\]]/)[0] || entry.path;
+    topLevel.set(rootKey, (topLevel.get(rootKey) || 0) + 1);
+  }
+
+  const sourceFiles = await listSourceFiles();
+  const envVarMap = new Map();
+  const cliFlagMap = new Map();
+  const cliFlagsByFile = new Map();
+  const dynamicOptionFiles = new Set();
+
+  for (const filePath of sourceFiles) {
+    const relPath = path.relative(root, filePath).replace(/\\/g, '/');
+    const source = await fs.readFile(filePath, 'utf8');
+
+    const envMatches = source.match(/PAIROFCLEATS_[A-Z0-9_]+/g) || [];
+    for (const match of envMatches) {
+      if (!envVarMap.has(match)) envVarMap.set(match, new Set());
+      envVarMap.get(match).add(relPath);
+    }
+
+    const optionObjects = extractOptionObjects(source);
+    const fileFlags = new Set();
+    for (const obj of optionObjects) {
+      extractTopLevelKeys(obj).forEach((key) => fileFlags.add(key));
+    }
+    const boolFlags = extractStringArray(source, 'BOOLEAN_FLAGS');
+    const stringFlags = extractStringArray(source, 'STRING_FLAGS');
+    boolFlags.forEach((flag) => fileFlags.add(flag));
+    stringFlags.forEach((flag) => fileFlags.add(flag));
+    if (source.includes('mergedOptions.profile')) fileFlags.add('profile');
+
+    if ((source.includes('.options(') || source.includes('options:')) && fileFlags.size === 0) {
+      dynamicOptionFiles.add(relPath);
+    }
+
+    if (fileFlags.size) {
+      const sorted = Array.from(fileFlags).sort((a, b) => a.localeCompare(b));
+      cliFlagsByFile.set(relPath, sorted);
+      for (const flag of sorted) {
+        if (!cliFlagMap.has(flag)) cliFlagMap.set(flag, new Set());
+        cliFlagMap.get(flag).add(relPath);
+      }
+    }
+  }
+
+  const envVars = Array.from(envVarMap.entries())
+    .map(([name, files]) => ({ name, files: Array.from(files).sort() }))
+    .sort((a, b) => a.name.localeCompare(b.name));
+
+  const cliFlags = Array.from(cliFlagMap.entries())
+    .map(([flag, files]) => ({ flag, files: Array.from(files).sort() }))
+    .sort((a, b) => a.flag.localeCompare(b.flag));
+
+  const cliFlagsByFileOutput = Array.from(cliFlagsByFile.entries())
+    .map(([file, flags]) => ({ file, flags }))
+    .sort((a, b) => a.file.localeCompare(b.file));
+
+  const duplicatedFlags = cliFlags
+    .filter((entry) => entry.files.length > 1)
+    .map((entry) => ({
+      flag: entry.flag,
+      count: entry.files.length,
+      files: entry.files
+    }))
+    .sort((a, b) => b.count - a.count || a.flag.localeCompare(b.flag));
+
+  const inventory = {
+    generatedAt: new Date().toISOString(),
+    configSchema: {
+      path: path.relative(root, schemaPath).replace(/\\/g, '/'),
+      totalKeys: configEntries.length,
+      topLevel: Array.from(topLevel.entries())
+        .map(([key, count]) => ({ key, count }))
+        .sort((a, b) => a.key.localeCompare(b.key))
+    },
+    configKeys: configEntries,
+    envVars,
+    cliFlags: {
+      totalFlags: cliFlags.length,
+      byFile: cliFlagsByFileOutput,
+      duplicated: duplicatedFlags,
+      dynamicOptionFiles: Array.from(dynamicOptionFiles).sort()
+    }
+  };
+
+  await fs.writeFile(outputJsonPath, JSON.stringify(inventory, null, 2));
+
+  const mdLines = [];
+  mdLines.push('# Config Inventory');
+  mdLines.push('');
+  mdLines.push(`Generated: ${inventory.generatedAt}`);
+  mdLines.push('');
+  mdLines.push('This file is generated by `node tools/config-inventory.js`.');
+  mdLines.push('See `docs/config-inventory-notes.md` for ownership and overlap analysis.');
+  mdLines.push('');
+  mdLines.push('## Summary');
+  mdLines.push(`- Config keys: ${inventory.configSchema.totalKeys}`);
+  mdLines.push(`- Env vars: ${inventory.envVars.length}`);
+  mdLines.push(`- CLI flags: ${inventory.cliFlags.totalFlags}`);
+  mdLines.push('');
+  mdLines.push('## Config keys by top-level namespace');
+  mdLines.push('');
+  for (const entry of inventory.configSchema.topLevel) {
+    mdLines.push(`- ${entry.key}: ${entry.count}`);
+  }
+  mdLines.push('');
+  mdLines.push('## Env vars');
+  mdLines.push('');
+  if (inventory.envVars.length === 0) {
+    mdLines.push('- (none)');
+  } else {
+    for (const entry of inventory.envVars) {
+      mdLines.push(`- ${entry.name} (${entry.files.length} files)`);
+    }
+  }
+  mdLines.push('');
+  mdLines.push('## CLI flags (duplicated across files)');
+  mdLines.push('');
+  if (inventory.cliFlags.duplicated.length === 0) {
+    mdLines.push('- (none)');
+  } else {
+    for (const entry of inventory.cliFlags.duplicated) {
+      mdLines.push(`- ${entry.flag} (${entry.count} files)`);
+    }
+  }
+  mdLines.push('');
+  mdLines.push('## CLI flags by file');
+  mdLines.push('');
+  for (const entry of inventory.cliFlags.byFile) {
+    mdLines.push(`### ${entry.file}`);
+    mdLines.push('');
+    mdLines.push(entry.flags.length ? entry.flags.join(', ') : '(none)');
+    mdLines.push('');
+  }
+  mdLines.push('## Config keys (full list)');
+  mdLines.push('');
+  mdLines.push('```');
+  for (const entry of inventory.configKeys) {
+    const type = entry.type ? ` (${entry.type})` : '';
+    const enumValues = entry.enum && entry.enum.length ? ` enum=${entry.enum.join('|')}` : '';
+    mdLines.push(`${entry.path}${type}${enumValues}`.trim());
+  }
+  mdLines.push('```');
+  mdLines.push('');
+  if (inventory.cliFlags.dynamicOptionFiles.length) {
+    mdLines.push('## Notes');
+    mdLines.push('');
+    mdLines.push('Dynamic CLI options detected in these files; verify flags manually:');
+    mdLines.push('');
+    for (const file of inventory.cliFlags.dynamicOptionFiles) {
+      mdLines.push(`- ${file}`);
+    }
+    mdLines.push('');
+  }
+
+  await fs.writeFile(outputMdPath, mdLines.join('\n'));
+};
+
+await buildInventory();
diff --git a/tools/ctags-ingest.js b/tools/ctags-ingest.js
new file mode 100644
index 000000000..a4cc02d67
--- /dev/null
+++ b/tools/ctags-ingest.js
@@ -0,0 +1,176 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import readline from 'node:readline';
+import { spawn } from 'node:child_process';
+import { createCli } from '../src/shared/cli.js';
+import { getRepoCacheRoot, loadUserConfig, resolveRepoRoot } from './dict-utils.js';
+
+const argv = createCli({
+  scriptName: 'ctags-ingest',
+  options: {
+    repo: { type: 'string' },
+    input: { type: 'string' },
+    out: { type: 'string' },
+    json: { type: 'boolean', default: false },
+    run: { type: 'boolean', default: false },
+    interactive: { type: 'boolean', default: false },
+    ctags: { type: 'string', default: 'ctags' },
+    fields: { type: 'string' },
+    args: { type: 'string' }
+  }
+}).parse();
+
+const repoRoot = argv.repo ? path.resolve(argv.repo) : resolveRepoRoot(process.cwd());
+const userConfig = loadUserConfig(repoRoot);
+const cacheRoot = getRepoCacheRoot(repoRoot, userConfig);
+const outputPath = argv.out
+  ? path.resolve(argv.out)
+  : path.join(cacheRoot, 'ctags', 'ctags.jsonl');
+const metaPath = `${outputPath}.meta.json`;
+const inputPath = argv.input ? String(argv.input) : null;
+const runCtags = argv.run === true;
+const interactive = argv.interactive === true;
+const ctagsCmd = argv.ctags || 'ctags';
+
+const toPosix = (value) => value.replace(/\\/g, '/');
+const normalizePath = (value) => {
+  if (!value) return null;
+  const raw = String(value);
+  const resolved = path.isAbsolute(raw) ? raw : path.resolve(repoRoot, raw);
+  const rel = path.relative(repoRoot, resolved);
+  return toPosix(rel || raw);
+};
+
+const mapEntry = (entry) => {
+  if (!entry || typeof entry !== 'object') return null;
+  if (entry._type && entry._type !== 'tag') return null;
+  const name = entry.name || null;
+  const file = normalizePath(entry.path || entry.file || entry.input || '');
+  if (!name || !file) return null;
+  const ext = path.extname(file).toLowerCase();
+  const kind = entry.kind || null;
+  const kindName = entry.kindName || null;
+  const signature = entry.signature || entry.pattern || null;
+  const line = Number.isFinite(Number(entry.line)) ? Number(entry.line) : null;
+  const startLine = line;
+  const endLine = line;
+  return {
+    file,
+    ext,
+    name,
+    kind,
+    kindName,
+    signature,
+    startLine,
+    endLine,
+    scope: entry.scope || null,
+    scopeKind: entry.scopeKind || null,
+    access: entry.access || null,
+    implementation: entry.implementation || null,
+    language: entry.language || null,
+    typeref: entry.typeref || null
+  };
+};
+
+const stats = {
+  entries: 0,
+  ignored: 0,
+  errors: 0,
+  kinds: {},
+  languages: {}
+};
+
+const bump = (bucket, key) => {
+  if (!key) return;
+  const k = String(key);
+  bucket[k] = (bucket[k] || 0) + 1;
+};
+
+const ensureOutputDir = async () => {
+  await fsPromises.mkdir(path.dirname(outputPath), { recursive: true });
+};
+
+const writeStream = fs.createWriteStream(outputPath, { encoding: 'utf8' });
+
+const ingestStream = async (stream) => {
+  const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
+  for await (const line of rl) {
+    const trimmed = line.trim();
+    if (!trimmed) continue;
+    let parsed = null;
+    try {
+      parsed = JSON.parse(trimmed);
+    } catch {
+      stats.errors += 1;
+      continue;
+    }
+    const mapped = mapEntry(parsed);
+    if (!mapped) {
+      stats.ignored += 1;
+      continue;
+    }
+    stats.entries += 1;
+    bump(stats.kinds, mapped.kind || mapped.kindName || 'unknown');
+    bump(stats.languages, mapped.language || 'unknown');
+    writeStream.write(`${JSON.stringify(mapped)}\n`);
+  }
+};
+
+const runCtagsCommand = async () => {
+  const args = ['--output-format=json', '--tag-relative=yes', '--recurse=yes'];
+  if (argv.fields) args.push(`--fields=${argv.fields}`);
+  if (argv.args) {
+    const extra = String(argv.args)
+      .split(/\s+/)
+      .map((entry) => entry.trim())
+      .filter(Boolean);
+    args.push(...extra);
+  }
+  args.push(repoRoot);
+  const child = spawn(ctagsCmd, args, { stdio: ['ignore', 'pipe', 'pipe'] });
+  child.stderr.on('data', (chunk) => process.stderr.write(chunk));
+  await ingestStream(child.stdout);
+  const exitCode = await new Promise((resolve) => {
+    child.on('close', (code) => resolve(code ?? 0));
+  });
+  if (exitCode !== 0) {
+    throw new Error(`ctags exited with code ${exitCode}`);
+  }
+};
+
+await ensureOutputDir();
+if (interactive) {
+  await ingestStream(process.stdin);
+} else if (inputPath && inputPath !== '-') {
+  const inputStream = fs.createReadStream(inputPath, { encoding: 'utf8' });
+  await ingestStream(inputStream);
+} else if (inputPath === '-' || runCtags) {
+  if (runCtags) {
+    await runCtagsCommand();
+  } else {
+    await ingestStream(process.stdin);
+  }
+} else {
+  await runCtagsCommand();
+}
+
+writeStream.end();
+
+const summary = {
+  generatedAt: new Date().toISOString(),
+  repoRoot: path.resolve(repoRoot),
+  input: inputPath || (runCtags ? 'ctags' : 'stdin'),
+  output: path.resolve(outputPath),
+  stats
+};
+await fsPromises.writeFile(metaPath, JSON.stringify(summary, null, 2));
+
+if (argv.json) {
+  console.log(JSON.stringify(summary, null, 2));
+} else {
+  console.log(`Ctags ingest: ${stats.entries} entries (${stats.errors} parse errors)`);
+  console.log(`- output: ${outputPath}`);
+  console.log(`- meta: ${metaPath}`);
+}
diff --git a/tools/default-config-template.js b/tools/default-config-template.js
new file mode 100644
index 000000000..77c32c329
--- /dev/null
+++ b/tools/default-config-template.js
@@ -0,0 +1,143 @@
+export const DEFAULT_USER_CONFIG_TEMPLATE = `{
+  // Enable sqlite index artifacts for search backends.
+  // Speed impact: adds sqlite build time when stage4 runs.
+  "sqlite": {
+    // Toggle sqlite index usage/artifact generation.
+    // Speed impact: enabling adds some indexing time and disk usage.
+    "use": true
+  },
+  // Enable LMDB artifacts for embeddings/cache backends.
+  // Speed impact: adds LMDB build time and disk usage during indexing.
+  "lmdb": {
+    // Toggle LMDB index usage/artifact generation.
+    // Speed impact: enabling adds some indexing time and disk usage.
+    "use": true
+  },
+  // Search defaults for query-time behavior.
+  // Speed impact: no direct impact on indexing speed.
+  "search": {
+    // Prefer ANN search by default when multiple backends exist.
+    // Speed impact: no impact on indexing; affects query latency/recall.
+    "annDefault": true,
+    // Dense vector combination strategy for search.
+    // Speed impact: minor impact on embedding/storage cost during indexing.
+    "denseVectorMode": "merged",
+    // Regex search guardrails.
+    // Speed impact: no impact on indexing; affects regex query cost.
+    "regex": {
+      // Max regex pattern length accepted.
+      // Speed impact: no impact on indexing; caps regex compile cost.
+      "maxPatternLength": 512,
+      // Max regex input length scanned.
+      // Speed impact: no impact on indexing; caps regex runtime cost.
+      "maxInputLength": 10000,
+      // Max regex program size after compilation.
+      // Speed impact: no impact on indexing; caps regex execution cost.
+      "maxProgramSize": 2000,
+      // Regex timeout in milliseconds.
+      // Speed impact: no impact on indexing; limits regex runtime.
+      "timeoutMs": 25,
+      // Regex flags to apply by default.
+      // Speed impact: no impact on indexing; affects regex behavior.
+      "flags": ""
+    }
+  },
+  // Index build pipeline options.
+  // Speed impact: many flags here change CPU/IO per file.
+  "indexing": {
+    // Sparse postings generation settings.
+    // Speed impact: heavier postings settings increase indexing time/size.
+    "postings": {
+      // Build phrase n-gram postings.
+      // Speed impact: increases indexing time and index size.
+      "enablePhraseNgrams": true,
+      // Smallest phrase n-gram length.
+      // Speed impact: lower values add more n-grams and cost.
+      "phraseMinN": 2,
+      // Largest phrase n-gram length.
+      // Speed impact: higher values increase indexing time and size.
+      "phraseMaxN": 4,
+      // Build chargram postings for fuzzy matching.
+      // Speed impact: noticeable extra CPU and disk usage.
+      "enableChargrams": true,
+      // Smallest chargram length.
+      // Speed impact: lower values increase chargram volume and cost.
+      "chargramMinN": 3,
+      // Largest chargram length.
+      // Speed impact: higher values increase chargram volume and cost.
+      "chargramMaxN": 5,
+      // Choose which fields contribute chargrams.
+      // Speed impact: more fields increase indexing work.
+      "chargramSource": "fields",
+      // Cap token length eligible for chargrams.
+      // Speed impact: higher caps increase CPU on long identifiers.
+      "chargramMaxTokenLength": 48,
+      // Track postings per field (name, path, body, etc).
+      // Speed impact: slight overhead for richer scoring.
+      "fielded": true
+    },
+    // When to scan imports ("pre" or "post" indexing).
+    // Speed impact: small; "post" avoids extra upfront work.
+    "importScan": "post",
+    // Enable AST dataflow analysis.
+    // Speed impact: moderate CPU cost on large codebases.
+    "astDataflow": true,
+    // Enable control-flow analysis.
+    // Speed impact: moderate CPU cost on large codebases.
+    "controlFlow": true,
+    // Enable risk analysis rules.
+    // Speed impact: moderate CPU cost; can be heavy on huge repos.
+    "riskAnalysis": true,
+    // Enable cross-file risk correlation.
+    // Speed impact: heavy extra work on large repos.
+    "riskAnalysisCrossFile": true,
+    // Risk regex guardrails for analysis.
+    // Speed impact: tighter caps can reduce analysis time.
+    "riskRegex": {
+      // Max regex pattern length accepted.
+      // Speed impact: lower caps reduce risk regex compile time.
+      "maxPatternLength": 512,
+      // Max regex input length scanned.
+      // Speed impact: lower caps reduce risk regex runtime cost.
+      "maxInputLength": 10000,
+      // Max regex program size after compilation.
+      // Speed impact: lower caps reduce risk regex execution cost.
+      "maxProgramSize": 2000,
+      // Regex timeout in milliseconds.
+      // Speed impact: lower timeouts reduce risk regex runtime cost.
+      "timeoutMs": 25,
+      // Regex flags to apply by default.
+      // Speed impact: minimal; affects risk regex behavior.
+      "flags": "i"
+    },
+    // Enable type inference.
+    // Speed impact: moderate to heavy CPU cost.
+    "typeInference": false,
+    // Enable cross-file type inference.
+    // Speed impact: heavy extra work on large repos.
+    "typeInferenceCrossFile": false,
+    // Collect git blame/churn metadata per file.
+    // Speed impact: heavy IO/CPU; can dominate indexing time.
+    "gitBlame": true,
+    // Run linting pass for diagnostics.
+    // Speed impact: extra CPU per file.
+    "lint": true,
+    // Compute complexity metrics.
+    // Speed impact: extra CPU per file.
+    "complexity": true,
+    // Python AST parsing options.
+    // Speed impact: small to moderate CPU on Python files.
+    "pythonAst": {
+      // Enable Python AST parsing.
+      // Speed impact: small to moderate on Python-heavy repos.
+      "enabled": true
+    },
+    // Tree-sitter parsing options.
+    // Speed impact: moderate CPU, improved chunking accuracy.
+    "treeSitter": {
+      // Enable tree-sitter parsing.
+      // Speed impact: moderate CPU on supported languages.
+      "enabled": true
+    }
+  }
+}`;
diff --git a/tools/default-config.js b/tools/default-config.js
new file mode 100644
index 000000000..ffc1c5434
--- /dev/null
+++ b/tools/default-config.js
@@ -0,0 +1,51 @@
+export const DEFAULT_USER_CONFIG = {
+  sqlite: {
+    use: true
+  },
+  lmdb: {
+    use: true
+  },
+  search: {
+    annDefault: true,
+    denseVectorMode: 'merged',
+    regex: {
+      maxPatternLength: 512,
+      maxInputLength: 10000,
+      maxProgramSize: 2000,
+      timeoutMs: 25,
+      flags: ''
+    }
+  },
+  indexing: {
+    postings: {
+      enablePhraseNgrams: true,
+      phraseMinN: 2,
+      phraseMaxN: 4,
+      enableChargrams: true,
+      chargramMinN: 3,
+      chargramMaxN: 5,
+      chargramSource: 'fields',
+      chargramMaxTokenLength: 48,
+      fielded: true
+    },
+    importScan: 'post',
+    astDataflow: true,
+    controlFlow: true,
+    riskAnalysis: true,
+    riskAnalysisCrossFile: true,
+    riskRegex: {
+      maxPatternLength: 512,
+      maxInputLength: 10000,
+      maxProgramSize: 2000,
+      timeoutMs: 25,
+      flags: 'i'
+    },
+    typeInference: false,
+    typeInferenceCrossFile: false,
+    gitBlame: true,
+    lint: true,
+    complexity: true,
+    pythonAst: { enabled: true },
+    treeSitter: { enabled: true }
+  }
+};
diff --git a/tools/dict-utils.js b/tools/dict-utils.js
index 65420710e..4f72e0b1f 100644
--- a/tools/dict-utils.js
+++ b/tools/dict-utils.js
@@ -4,6 +4,23 @@ import path from 'node:path';
 import os from 'node:os';
 import crypto from 'node:crypto';
 import { spawnSync } from 'node:child_process';
+import { fileURLToPath } from 'node:url';
+import { DEFAULT_CACHE_MB, DEFAULT_CACHE_TTL_MS } from '../src/shared/cache.js';
+import { readJsoncFile } from '../src/shared/jsonc.js';
+import { isPlainObject, mergeConfig } from '../src/shared/config.js';
+import { getEnvConfig } from '../src/shared/env.js';
+import { stableStringify } from '../src/shared/stable-json.js';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const TOOL_ROOT = path.resolve(__dirname, '..');
+const PROFILES_DIR = path.resolve(TOOL_ROOT, 'profiles');
+const profileWarnings = new Set();
+let toolVersionCache = null;
+const DEFAULT_DP_MAX_BY_FILE_COUNT = [
+  { maxFiles: 5000, dpMaxTokenLength: 32 },
+  { maxFiles: 20000, dpMaxTokenLength: 24 },
+  { maxFiles: Number.POSITIVE_INFINITY, dpMaxTokenLength: 16 }
+];
 
 export const DEFAULT_MODEL_ID = 'Xenova/all-MiniLM-L12-v2';
 export const DEFAULT_TRIAGE_PROMOTE_FIELDS = [
@@ -24,26 +41,131 @@ export const DEFAULT_TRIAGE_PROMOTE_FIELDS = [
 ];
 
 /**
- * Load repo-local configuration from .pairofcleats.json.
+ * Load repo-local configuration from .pairofcleats.json and apply profiles.
  * @param {string} repoRoot
+ * @param {{profile?:string,fallbackRoot?:string,fallbackConfigPath?:string}} [options]
  * @returns {object}
  */
-export function loadUserConfig(repoRoot) {
+export function loadUserConfig(repoRoot, options = {}) {
   try {
     const configPath = path.join(repoRoot, '.pairofcleats.json');
-    if (!fs.existsSync(configPath)) return {};
-    return JSON.parse(fs.readFileSync(configPath, 'utf8')) || {};
+    if (fs.existsSync(configPath)) {
+      const base = readJsoncFile(configPath) || {};
+      return normalizeUserConfig(applyProfileConfig(base, options.profile));
+    }
+    const fallbackPath = options.fallbackConfigPath
+      || (options.fallbackRoot ? path.join(options.fallbackRoot, '.pairofcleats.json') : null);
+    if (fallbackPath && fs.existsSync(fallbackPath)) {
+      const base = readJsoncFile(fallbackPath) || {};
+      return normalizeUserConfig(applyProfileConfig(base, options.profile));
+    }
+    const defaultPath = path.join(TOOL_ROOT, '.pairofcleats.json');
+    if (defaultPath !== configPath && fs.existsSync(defaultPath)) {
+      const base = readJsoncFile(defaultPath) || {};
+      return normalizeUserConfig(applyProfileConfig(base, options.profile));
+    }
+    return normalizeUserConfig(applyProfileConfig({}, options.profile));
   } catch {
     return {};
   }
 }
 
+/**
+ * Resolve the installation root for PairOfCleats tooling.
+ * @returns {string}
+ */
+export function resolveToolRoot() {
+  return TOOL_ROOT;
+}
+
+/**
+ * Resolve the current tool version from package.json.
+ * @returns {string|null}
+ */
+export function getToolVersion() {
+  if (toolVersionCache !== null) return toolVersionCache;
+  try {
+    const pkgPath = path.join(TOOL_ROOT, 'package.json');
+    const parsed = JSON.parse(fs.readFileSync(pkgPath, 'utf8'));
+    toolVersionCache = typeof parsed?.version === 'string' ? parsed.version : null;
+  } catch {
+    toolVersionCache = null;
+  }
+  return toolVersionCache;
+}
+
+/**
+ * Compute a stable hash of the effective config inputs for a repo.
+ * @param {string} repoRoot
+ * @param {object|null} userConfig
+ * @returns {string}
+ */
+export function getEffectiveConfigHash(repoRoot, userConfig = null) {
+  const cfg = userConfig || loadUserConfig(repoRoot);
+  const env = getEnvConfig();
+  const payload = { config: cfg, env };
+  const json = stableStringify(payload);
+  return crypto.createHash('sha1').update(json).digest('hex');
+}
+
+
+function normalizeUserConfig(baseConfig) {
+  if (!isPlainObject(baseConfig)) return baseConfig || {};
+
+  return baseConfig;
+}
+
+
+function loadProfileConfig(profileName) {
+  if (!profileName) return { config: {}, path: null, error: null };
+  const profileFile = `${profileName}.json`;
+  const profilePath = path.join(PROFILES_DIR, profileFile);
+  if (!fs.existsSync(profilePath)) {
+    return {
+      config: {},
+      path: profilePath,
+      error: `Profile not found: ${profilePath}`
+    };
+  }
+  try {
+    const config = JSON.parse(fs.readFileSync(profilePath, 'utf8')) || {};
+    if (isPlainObject(config)) delete config.profile;
+    return { config, path: profilePath, error: null };
+  } catch (error) {
+    return {
+      config: {},
+      path: profilePath,
+      error: `Failed to parse profile ${profilePath}: ${error?.message || error}`
+    };
+  }
+}
+
+function applyProfileConfig(baseConfig, profileOverride) {
+  const overrideName = typeof profileOverride === 'string' ? profileOverride.trim() : '';
+  const envProfile = getEnvConfig().profile || '';
+  const configProfile = typeof baseConfig?.profile === 'string' ? baseConfig.profile.trim() : '';
+  const profileName = overrideName || envProfile || configProfile;
+  if (!profileName) return baseConfig || {};
+  const { config: profileConfig, path: profilePath, error } = loadProfileConfig(profileName);
+  if (error) {
+    const key = `${profileName}:${profilePath}`;
+    if (!profileWarnings.has(key)) {
+      profileWarnings.add(key);
+      console.error(`[config] ${error}`);
+    }
+  }
+  const merged = mergeConfig(profileConfig, baseConfig || {});
+  merged.profile = profileName;
+  return merged;
+}
+
 /**
  * Resolve the cache root directory.
  * @returns {string}
  */
 export function getCacheRoot() {
-  if (process.env.PAIROFCLEATS_HOME) return process.env.PAIROFCLEATS_HOME;
+  const envConfig = getEnvConfig();
+  if (envConfig.home) return envConfig.home;
   if (process.env.LOCALAPPDATA) return path.join(process.env.LOCALAPPDATA, 'PairOfCleats');
   if (process.env.XDG_CACHE_HOME) return path.join(process.env.XDG_CACHE_HOME, 'pairofcleats');
   return path.join(os.homedir(), '.cache', 'pairofcleats');
@@ -58,14 +180,65 @@ export function getCacheRoot() {
 export function getDictConfig(repoRoot, userConfig = null) {
   const cfg = userConfig || loadUserConfig(repoRoot);
   const dict = cfg.dictionary || {};
+  const envConfig = getEnvConfig();
+  const dpMaxTokenLengthByFileCount = normalizeDpMaxTokenLengthByFileCount(
+    dict.dpMaxTokenLengthByFileCount
+  );
   return {
-    dir: dict.dir || process.env.PAIROFCLEATS_DICT_DIR || path.join(getCacheRoot(), 'dictionaries'),
+    dir: dict.dir || envConfig.dictDir || path.join(getCacheRoot(), 'dictionaries'),
     languages: Array.isArray(dict.languages) ? dict.languages : ['en'],
     files: Array.isArray(dict.files) ? dict.files : [],
     includeSlang: dict.includeSlang !== false,
     slangDirs: Array.isArray(dict.slangDirs) ? dict.slangDirs : [],
     slangFiles: Array.isArray(dict.slangFiles) ? dict.slangFiles : [],
-    enableRepoDictionary: dict.enableRepoDictionary === true
+    enableRepoDictionary: dict.enableRepoDictionary === true,
+    segmentation: typeof dict.segmentation === 'string' ? dict.segmentation : 'auto',
+    dpMaxTokenLength: Number.isFinite(Number(dict.dpMaxTokenLength))
+      ? Number(dict.dpMaxTokenLength)
+      : 32,
+    dpMaxTokenLengthByFileCount
+  };
+}
+
+function normalizeDpMaxTokenLengthByFileCount(raw) {
+  if (!Array.isArray(raw) || !raw.length) {
+    return DEFAULT_DP_MAX_BY_FILE_COUNT.map((entry) => ({ ...entry }));
+  }
+  const normalized = raw
+    .map((entry) => {
+      if (!entry || typeof entry !== 'object') return null;
+      const maxFiles = Number(entry.maxFiles);
+      const dpMaxTokenLength = Number(entry.dpMaxTokenLength);
+      if (!Number.isFinite(maxFiles) || maxFiles <= 0) return null;
+      if (!Number.isFinite(dpMaxTokenLength) || dpMaxTokenLength <= 0) return null;
+      return {
+        maxFiles,
+        dpMaxTokenLength: Math.max(4, Math.floor(dpMaxTokenLength))
+      };
+    })
+    .filter(Boolean)
+    .sort((a, b) => a.maxFiles - b.maxFiles);
+  return normalized.length ? normalized : DEFAULT_DP_MAX_BY_FILE_COUNT.map((entry) => ({ ...entry }));
+}
+
+export function applyAdaptiveDictConfig(dictConfig, fileCount) {
+  if (!dictConfig || typeof dictConfig !== 'object') return dictConfig || {};
+  const count = Number(fileCount);
+  if (!Number.isFinite(count) || count <= 0) return dictConfig;
+  const mode = typeof dictConfig.segmentation === 'string'
+    ? dictConfig.segmentation.trim().toLowerCase()
+    : 'auto';
+  if (mode !== 'auto' && mode !== 'dp') return dictConfig;
+  const thresholds = Array.isArray(dictConfig.dpMaxTokenLengthByFileCount)
+    && dictConfig.dpMaxTokenLengthByFileCount.length
+    ? dictConfig.dpMaxTokenLengthByFileCount
+    : DEFAULT_DP_MAX_BY_FILE_COUNT;
+  const match = thresholds.find((entry) => count <= entry.maxFiles) || thresholds[thresholds.length - 1];
+  if (!match || !Number.isFinite(match.dpMaxTokenLength)) return dictConfig;
+  if (dictConfig.dpMaxTokenLength === match.dpMaxTokenLength) return dictConfig;
+  return {
+    ...dictConfig,
+    dpMaxTokenLength: match.dpMaxTokenLength
   };
 }
 
@@ -76,9 +249,21 @@ export function getDictConfig(repoRoot, userConfig = null) {
  */
 export function getRepoId(repoRoot) {
   const resolved = path.resolve(repoRoot);
-  return crypto.createHash('sha1').update(resolved).digest('hex');
+  const base = path.basename(resolved);
+  const normalized = String(base || 'repo')
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, '-')
+    .replace(/^-+|-+$/g, '');
+  const prefix = (normalized || 'repo').slice(0, 24);
+  const hash = crypto.createHash('sha1').update(resolved).digest('hex').slice(0, 12);
+  return `${prefix}-${hash}`;
 }
 
+const getLegacyRepoId = (repoRoot) => {
+  const resolved = path.resolve(repoRoot);
+  return crypto.createHash('sha1').update(resolved).digest('hex');
+};
+
 /**
  * Resolve the repo root from a starting directory.
  * @param {string} startPath
@@ -126,9 +311,120 @@ function findConfigRoot(startPath) {
  */
 export function getRepoCacheRoot(repoRoot, userConfig = null) {
   const cfg = userConfig || loadUserConfig(repoRoot);
-  const cacheRoot = (cfg.cache && cfg.cache.root) || process.env.PAIROFCLEATS_CACHE_ROOT || getCacheRoot();
+  const envConfig = getEnvConfig();
+  const cacheRoot = (cfg.cache && cfg.cache.root) || envConfig.cacheRoot || getCacheRoot();
   const repoId = getRepoId(repoRoot);
-  return path.join(cacheRoot, 'repos', repoId);
+  const repoCacheRoot = path.join(cacheRoot, 'repos', repoId);
+  const legacyRoot = path.join(cacheRoot, 'repos', getLegacyRepoId(repoRoot));
+  if (fs.existsSync(legacyRoot) && !fs.existsSync(repoCacheRoot)) return legacyRoot;
+  return repoCacheRoot;
+}
+
+/**
+ * Resolve the builds root directory for a repo.
+ * @param {string} repoRoot
+ * @param {object|null} userConfig
+ * @returns {string}
+ */
+export function getBuildsRoot(repoRoot, userConfig = null) {
+  return path.join(getRepoCacheRoot(repoRoot, userConfig), 'builds');
+}
+
+/**
+ * Resolve current build metadata for a repo, if present.
+ * @param {string} repoRoot
+ * @param {object|null} userConfig
+ * @returns {{buildId:string,buildRoot:string,path:string,data:object}|null}
+ */
+export function getCurrentBuildInfo(repoRoot, userConfig = null, options = {}) {
+  const repoCacheRoot = getRepoCacheRoot(repoRoot, userConfig);
+  const buildsRoot = path.join(repoCacheRoot, 'builds');
+  const currentPath = path.join(buildsRoot, 'current.json');
+  if (!fs.existsSync(currentPath)) return null;
+  try {
+    const data = JSON.parse(fs.readFileSync(currentPath, 'utf8')) || {};
+    const buildId = typeof data.buildId === 'string' ? data.buildId : null;
+    const buildRootRaw = typeof data.buildRoot === 'string' ? data.buildRoot : null;
+    const resolveRoot = (value) => {
+      if (!value) return null;
+      return path.isAbsolute(value) ? value : path.join(repoCacheRoot, value);
+    };
+    const buildRoot = buildRootRaw
+      ? resolveRoot(buildRootRaw)
+      : (buildId ? path.join(buildsRoot, buildId) : null);
+    const buildRoots = {};
+    if (data.buildRoots && typeof data.buildRoots === 'object' && !Array.isArray(data.buildRoots)) {
+      for (const [mode, value] of Object.entries(data.buildRoots)) {
+        if (typeof value !== 'string') continue;
+        const resolved = resolveRoot(value);
+        if (resolved) buildRoots[mode] = resolved;
+      }
+    } else if (buildRoot && Array.isArray(data.modes)) {
+      for (const mode of data.modes) {
+        if (typeof mode !== 'string') continue;
+        buildRoots[mode] = buildRoot;
+      }
+    }
+    const preferredMode = typeof options.mode === 'string' ? options.mode : null;
+    const preferredRoot = preferredMode ? buildRoots[preferredMode] : null;
+    const activeRoot = preferredRoot || buildRoot || Object.values(buildRoots)[0] || null;
+    if (!buildId || !activeRoot || !fs.existsSync(activeRoot)) return null;
+    return { buildId, buildRoot: buildRoot || activeRoot, activeRoot, path: currentPath, data, buildRoots };
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Resolve the active index root for a repo (current build or legacy path).
+ * @param {string} repoRoot
+ * @param {object|null} userConfig
+ * @param {{indexRoot?:string|null}} [options]
+ * @returns {string}
+ */
+export function resolveIndexRoot(repoRoot, userConfig = null, options = {}) {
+  if (options?.indexRoot) return path.resolve(options.indexRoot);
+  const repoCacheRoot = getRepoCacheRoot(repoRoot, userConfig);
+  const buildsRoot = path.join(repoCacheRoot, 'builds');
+  const currentPath = path.join(buildsRoot, 'current.json');
+  if (fs.existsSync(currentPath)) {
+    try {
+      const data = JSON.parse(fs.readFileSync(currentPath, 'utf8')) || {};
+      const resolveRoot = (value) => {
+        if (!value) return null;
+        return path.isAbsolute(value) ? value : path.join(repoCacheRoot, value);
+      };
+      const buildRootRaw = typeof data.buildRoot === 'string' ? data.buildRoot : null;
+      const buildId = typeof data.buildId === 'string' ? data.buildId : null;
+      const buildRoot = buildRootRaw
+        ? resolveRoot(buildRootRaw)
+        : (buildId ? path.join(buildsRoot, buildId) : null);
+      const buildRoots = {};
+      if (data.buildRoots && typeof data.buildRoots === 'object' && !Array.isArray(data.buildRoots)) {
+        for (const [mode, value] of Object.entries(data.buildRoots)) {
+          if (typeof value !== 'string') continue;
+          buildRoots[mode] = resolveRoot(value);
+        }
+      } else if (buildRoot && Array.isArray(data.modes)) {
+        for (const mode of data.modes) {
+          if (typeof mode !== 'string') continue;
+          buildRoots[mode] = buildRoot;
+        }
+      }
+      const preferredMode = typeof options.mode === 'string' ? options.mode : null;
+      const ensureExists = (value) => (value && fs.existsSync(value) ? value : null);
+      let resolved = preferredMode ? ensureExists(buildRoots[preferredMode]) : null;
+      if (!resolved && !preferredMode) {
+        for (const mode of ['code', 'prose', 'records']) {
+          resolved = ensureExists(buildRoots[mode]);
+          if (resolved) break;
+        }
+      }
+      if (!resolved) resolved = ensureExists(buildRoot);
+      if (resolved) return resolved;
+    } catch {}
+  }
+  return getRepoCacheRoot(repoRoot, userConfig);
 }
 
 /**
@@ -140,13 +436,116 @@ export function getRepoCacheRoot(repoRoot, userConfig = null) {
 export function getModelConfig(repoRoot, userConfig = null) {
   const cfg = userConfig || loadUserConfig(repoRoot);
   const models = cfg.models || {};
-  const id = process.env.PAIROFCLEATS_MODEL || models.id || DEFAULT_MODEL_ID;
+  const envConfig = getEnvConfig();
+  const id = envConfig.model || models.id || DEFAULT_MODEL_ID;
   return {
     id,
     dir: getModelsDir(repoRoot, cfg)
   };
 }
 
+/**
+ * Resolve runtime configuration for a repo.
+ * @param {string} repoRoot
+ * @param {object|null} userConfig
+ * @returns {{maxOldSpaceMb:number|null,nodeOptions:string,uvThreadpoolSize:number|null}}
+ */
+export function getRuntimeConfig(repoRoot, userConfig = null) {
+  const cfg = userConfig || loadUserConfig(repoRoot);
+  const runtime = cfg.runtime || {};
+  const envConfig = getEnvConfig();
+  const rawMaxOldSpace = runtime.maxOldSpaceMb ?? envConfig.maxOldSpaceMb;
+  const parsedMaxOldSpace = Number(rawMaxOldSpace);
+  const maxOldSpaceMb = Number.isFinite(parsedMaxOldSpace) && parsedMaxOldSpace > 0
+    ? parsedMaxOldSpace
+    : null;
+  const nodeOptionsRaw = runtime.nodeOptions ?? envConfig.nodeOptions;
+  const nodeOptions = typeof nodeOptionsRaw === 'string' ? nodeOptionsRaw.trim() : '';
+  const rawUvThreadpoolSize = runtime.uvThreadpoolSize ?? envConfig.uvThreadpoolSize;
+  const parsedUvThreadpoolSize = Number(rawUvThreadpoolSize);
+  const uvThreadpoolSize = Number.isFinite(parsedUvThreadpoolSize) && parsedUvThreadpoolSize > 0
+    ? Math.floor(parsedUvThreadpoolSize)
+    : null;
+  return { maxOldSpaceMb, nodeOptions, uvThreadpoolSize };
+}
+
+/**
+ * Resolve runtime cache limits and TTLs for a repo.
+ * @param {string} repoRoot
+ * @param {object|null} userConfig
+ * @returns {{fileText:{maxMb:number,ttlMs:number},summary:{maxMb:number,ttlMs:number},lint:{maxMb:number,ttlMs:number},complexity:{maxMb:number,ttlMs:number},gitMeta:{maxMb:number,ttlMs:number}}}
+ */
+export function getCacheRuntimeConfig(repoRoot, userConfig = null) {
+  const cfg = userConfig || loadUserConfig(repoRoot);
+  const runtimeCache = cfg.cache?.runtime || {};
+  const resolveEntry = (key) => {
+    const entry = runtimeCache[key] || {};
+    const maxMbRaw = entry.maxMb;
+    const ttlMsRaw = entry.ttlMs;
+    const maxMb = Number.isFinite(Number(maxMbRaw))
+      ? Math.max(0, Number(maxMbRaw))
+      : (DEFAULT_CACHE_MB[key] || 0);
+    const ttlMs = Number.isFinite(Number(ttlMsRaw))
+      ? Math.max(0, Number(ttlMsRaw))
+      : (DEFAULT_CACHE_TTL_MS[key] || 0);
+    return { maxMb, ttlMs };
+  };
+  return {
+    fileText: resolveEntry('fileText'),
+    summary: resolveEntry('summary'),
+    lint: resolveEntry('lint'),
+    complexity: resolveEntry('complexity'),
+    gitMeta: resolveEntry('gitMeta')
+  };
+}
+
+/**
+ * Merge runtime Node options with existing NODE_OPTIONS.
+ * @param {{maxOldSpaceMb:number|null,nodeOptions:string,uvThreadpoolSize:number|null}} runtimeConfig
+ * @param {string} [baseOptions]
+ * @returns {string}
+ */
+export function resolveNodeOptions(runtimeConfig, baseOptions = process.env.NODE_OPTIONS || '') {
+  const base = typeof baseOptions === 'string' ? baseOptions.trim() : '';
+  const extras = [];
+  if (runtimeConfig?.nodeOptions) extras.push(runtimeConfig.nodeOptions.trim());
+  if (Number.isFinite(runtimeConfig?.maxOldSpaceMb) && runtimeConfig.maxOldSpaceMb > 0) {
+    const combined = [base, ...extras].join(' ');
+    if (!combined.includes('--max-old-space-size')) {
+      extras.push(`--max-old-space-size=${Math.floor(runtimeConfig.maxOldSpaceMb)}`);
+    }
+  }
+  return [base, ...extras].filter(Boolean).join(' ').trim();
+}
+
+
+/**
+ * Resolve the child-process runtime environment for PairOfCleats tool launches.
+ * Applies runtime Node options and (optionally) propagates UV_THREADPOOL_SIZE when configured.
+ * Note: UV_THREADPOOL_SIZE must be set before the Node process starts to affect libuv.
+ * @param {{maxOldSpaceMb:number|null,nodeOptions:string,uvThreadpoolSize:number|null}} runtimeConfig
+ * @param {NodeJS.ProcessEnv} [baseEnv]
+ * @returns {NodeJS.ProcessEnv}
+ */
+export function resolveRuntimeEnv(runtimeConfig, baseEnv = process.env) {
+  const env = { ...baseEnv };
+  const resolvedNodeOptions = resolveNodeOptions(runtimeConfig, env.NODE_OPTIONS || '');
+  if (resolvedNodeOptions) {
+    env.NODE_OPTIONS = resolvedNodeOptions;
+  }
+
+  const uvThreadpoolSize = runtimeConfig?.uvThreadpoolSize;
+  if (
+    Number.isFinite(Number(uvThreadpoolSize))
+    && Number(uvThreadpoolSize) > 0
+    && !env.UV_THREADPOOL_SIZE
+  ) {
+    env.UV_THREADPOOL_SIZE = String(Math.floor(Number(uvThreadpoolSize)));
+  }
+
+  return env;
+}
+
 /**
  * Resolve the index directory for a repo/mode.
  * @param {string} repoRoot
@@ -154,8 +553,9 @@ export function getModelConfig(repoRoot, userConfig = null) {
  * @param {object|null} userConfig
  * @returns {string}
  */
-export function getIndexDir(repoRoot, mode, userConfig = null) {
-  return path.join(getRepoCacheRoot(repoRoot, userConfig), `index-${mode}`);
+export function getIndexDir(repoRoot, mode, userConfig = null, options = {}) {
+  const base = resolveIndexRoot(repoRoot, userConfig, { ...options, mode });
+  return path.join(base, `index-${mode}`);
 }
 
 /**
@@ -223,18 +623,40 @@ export function getRepoDictPath(repoRoot, dictConfig = null) {
   return path.join(config.dir, 'repos', `${repoId}.txt`);
 }
 
+/**
+ * Resolve LMDB database paths for the repo.
+ * @param {string} repoRoot
+ * @param {object|null} userConfig
+ * @returns {{codePath:string,prosePath:string,dbDir:string}}
+ */
+export function resolveLmdbPaths(repoRoot, userConfig = null, options = {}) {
+  const cfg = userConfig || loadUserConfig(repoRoot);
+  const lmdb = cfg.lmdb || {};
+  const indexRoot = resolveIndexRoot(repoRoot, cfg, options);
+  const defaultDir = path.join(indexRoot, 'index-lmdb');
+  const dbDir = lmdb.dbDir ? resolvePath(repoRoot, lmdb.dbDir) : defaultDir;
+  const codePath = lmdb.codeDbPath
+    ? resolvePath(repoRoot, lmdb.codeDbPath)
+    : path.join(dbDir, 'index-code');
+  const prosePath = lmdb.proseDbPath
+    ? resolvePath(repoRoot, lmdb.proseDbPath)
+    : path.join(dbDir, 'index-prose');
+  return { codePath, prosePath, dbDir };
+}
+
 /**
  * Resolve SQLite database paths for the repo.
  * @param {string} repoRoot
  * @param {object|null} userConfig
  * @returns {{codePath:string,prosePath:string,dbDir:string,legacyPath:string,legacyExists:boolean}}
  */
-export function resolveSqlitePaths(repoRoot, userConfig = null) {
+export function resolveSqlitePaths(repoRoot, userConfig = null, options = {}) {
   const cfg = userConfig || loadUserConfig(repoRoot);
   const sqlite = cfg.sqlite || {};
   const repoCacheRoot = getRepoCacheRoot(repoRoot, cfg);
-  const defaultDir = path.join(repoCacheRoot, 'index-sqlite');
-  const legacyPath = sqlite.dbPath ? resolvePath(repoRoot, sqlite.dbPath) : path.join(defaultDir, 'index.db');
+  const indexRoot = resolveIndexRoot(repoRoot, cfg, options);
+  const defaultDir = path.join(indexRoot, 'index-sqlite');
+  const legacyPath = path.join(repoCacheRoot, 'index-sqlite', 'index.db');
   const dbDir = sqlite.dbDir ? resolvePath(repoRoot, sqlite.dbDir) : defaultDir;
   const codePath = sqlite.codeDbPath
     ? resolvePath(repoRoot, sqlite.codeDbPath)
@@ -259,9 +681,10 @@ export function resolveSqlitePaths(repoRoot, userConfig = null) {
  */
 export function getModelsDir(repoRoot, userConfig = null) {
   const cfg = userConfig || loadUserConfig(repoRoot);
-  const cacheRoot = (cfg.cache && cfg.cache.root) || process.env.PAIROFCLEATS_CACHE_ROOT || getCacheRoot();
+  const envConfig = getEnvConfig();
+  const cacheRoot = (cfg.cache && cfg.cache.root) || envConfig.cacheRoot || getCacheRoot();
   const models = cfg.models || {};
-  return models.dir || process.env.PAIROFCLEATS_MODELS_DIR || path.join(cacheRoot, 'models');
+  return models.dir || envConfig.modelsDir || path.join(cacheRoot, 'models');
 }
 
 /**
@@ -272,26 +695,70 @@ export function getModelsDir(repoRoot, userConfig = null) {
  */
 export function getToolingDir(repoRoot, userConfig = null) {
   const cfg = userConfig || loadUserConfig(repoRoot);
-  const cacheRoot = (cfg.cache && cfg.cache.root) || process.env.PAIROFCLEATS_CACHE_ROOT || getCacheRoot();
+  const envConfig = getEnvConfig();
+  const cacheRoot = (cfg.cache && cfg.cache.root) || envConfig.cacheRoot || getCacheRoot();
   const tooling = cfg.tooling || {};
-  return tooling.dir || process.env.PAIROFCLEATS_TOOLING_DIR || path.join(cacheRoot, 'tooling');
+  return tooling.dir || envConfig.toolingDir || path.join(cacheRoot, 'tooling');
 }
 
 /**
  * Resolve tooling configuration for a repo.
  * @param {string} repoRoot
  * @param {object|null} userConfig
- * @returns {{autoInstallOnDetect:boolean,installScope:string,allowGlobalFallback:boolean,dir:string}}
+ * @returns {{autoInstallOnDetect:boolean,autoEnableOnDetect:boolean,installScope:string,allowGlobalFallback:boolean,dir:string,enabledTools:string[],disabledTools:string[],typescript:{enabled:boolean,resolveOrder:string[],useTsconfig:boolean,tsconfigPath:string},clangd:{requireCompilationDatabase:boolean,compileCommandsDir:string}}}
  */
 export function getToolingConfig(repoRoot, userConfig = null) {
   const cfg = userConfig || loadUserConfig(repoRoot);
   const tooling = cfg.tooling || {};
-  const installScope = (tooling.installScope || process.env.PAIROFCLEATS_TOOLING_INSTALL_SCOPE || 'cache').toLowerCase();
+  const typescript = tooling.typescript || {};
+  const clangd = tooling.clangd || {};
+  const envConfig = getEnvConfig();
+  const timeoutMs = Number(tooling.timeoutMs ?? envConfig.toolingTimeoutMs);
+  const maxRetries = Number(tooling.maxRetries ?? envConfig.toolingMaxRetries);
+  const breakerThreshold = Number(tooling.circuitBreakerThreshold ?? envConfig.toolingCircuitBreaker);
+  const logDir = typeof tooling.logDir === 'string' ? tooling.logDir : '';
+  const installScope = (tooling.installScope || envConfig.toolingInstallScope || 'cache').toLowerCase();
+  const normalizeOrder = (value) => {
+    if (Array.isArray(value)) return value.map((entry) => String(entry).trim()).filter(Boolean);
+    if (typeof value === 'string') {
+      return value.split(',').map((entry) => entry.trim()).filter(Boolean);
+    }
+    return null;
+  };
+  const normalizeToolList = (value) => {
+    if (Array.isArray(value)) {
+      return value.map((entry) => String(entry).trim().toLowerCase()).filter(Boolean);
+    }
+    if (typeof value === 'string') {
+      return value.split(',').map((entry) => entry.trim().toLowerCase()).filter(Boolean);
+    }
+    return [];
+  };
+  const enabledTools = normalizeToolList(tooling.enabledTools);
+  const disabledTools = normalizeToolList(tooling.disabledTools);
+  const resolveOrder = normalizeOrder(typescript.resolveOrder) || ['repo', 'cache', 'global'];
   return {
     autoInstallOnDetect: tooling.autoInstallOnDetect === true,
+    autoEnableOnDetect: tooling.autoEnableOnDetect !== false,
+    timeoutMs: Number.isFinite(timeoutMs) ? Math.max(1000, Math.floor(timeoutMs)) : null,
+    maxRetries: Number.isFinite(maxRetries) ? Math.max(0, Math.floor(maxRetries)) : null,
+    circuitBreakerThreshold: Number.isFinite(breakerThreshold) ? Math.max(1, Math.floor(breakerThreshold)) : null,
+    logDir: logDir.trim(),
     installScope,
     allowGlobalFallback: tooling.allowGlobalFallback !== false,
-    dir: getToolingDir(repoRoot, cfg)
+    dir: getToolingDir(repoRoot, cfg),
+    enabledTools,
+    disabledTools,
+    typescript: {
+      enabled: typescript.enabled !== false,
+      resolveOrder,
+      useTsconfig: typescript.useTsconfig !== false,
+      tsconfigPath: typeof typescript.tsconfigPath === 'string' ? typescript.tsconfigPath : ''
+    },
+    clangd: {
+      requireCompilationDatabase: clangd.requireCompilationDatabase === true,
+      compileCommandsDir: typeof clangd.compileCommandsDir === 'string' ? clangd.compileCommandsDir : ''
+    }
   };
 }
 
@@ -303,12 +770,13 @@ export function getToolingConfig(repoRoot, userConfig = null) {
  */
 export function getExtensionsDir(repoRoot, userConfig = null) {
   const cfg = userConfig || loadUserConfig(repoRoot);
-  const cacheRoot = (cfg.cache && cfg.cache.root) || process.env.PAIROFCLEATS_CACHE_ROOT || getCacheRoot();
+  const envConfig = getEnvConfig();
+  const cacheRoot = (cfg.cache && cfg.cache.root) || envConfig.cacheRoot || getCacheRoot();
   const extensions = cfg.extensions || {};
   const sqliteVector = cfg.sqlite?.vectorExtension || {};
   return extensions.dir
     || sqliteVector.dir
-    || process.env.PAIROFCLEATS_EXTENSIONS_DIR
+    || envConfig.extensionsDir
     || path.join(cacheRoot, 'extensions');
 }
 
@@ -389,6 +857,8 @@ export async function getDictionaryPaths(repoRoot, dictConfig = null) {
   if (config.enableRepoDictionary) {
     const repoDict = getRepoDictPath(repoRoot, config);
     if (fs.existsSync(repoDict)) paths.push(repoDict);
+    const legacyRepoDict = path.join(config.dir, 'repos', `${getLegacyRepoId(repoRoot)}.txt`);
+    if (fs.existsSync(legacyRepoDict)) paths.push(legacyRepoDict);
   }
 
   if (!paths.length) {
diff --git a/tools/download-dicts.js b/tools/download-dicts.js
index d607e310e..173660c8a 100644
--- a/tools/download-dicts.js
+++ b/tools/download-dicts.js
@@ -1,18 +1,27 @@
 #!/usr/bin/env node
 import fs from 'node:fs/promises';
 import fsSync from 'node:fs';
+import crypto from 'node:crypto';
 import path from 'node:path';
 import http from 'node:http';
 import https from 'node:https';
 import { URL } from 'node:url';
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
+import { createError, ERROR_CODES } from '../src/shared/error-codes.js';
 import { getDictConfig, loadUserConfig, resolveRepoRoot } from './dict-utils.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['update', 'force'],
-  string: ['lang', 'dir', 'url', 'repo'],
-  default: { update: false, force: false }
-});
+const argv = createCli({
+  scriptName: 'download-dicts',
+  options: {
+    update: { type: 'boolean', default: false },
+    force: { type: 'boolean', default: false },
+    lang: { type: 'string' },
+    dir: { type: 'string' },
+    url: { type: 'string', array: true },
+    sha256: { type: 'string', array: true },
+    repo: { type: 'string' }
+  }
+}).parse();
 
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
 const repoRoot = rootArg || resolveRepoRoot(process.cwd());
@@ -32,6 +41,80 @@ try {
   manifest = {};
 }
 
+const normalizeHash = (value) => {
+  if (!value) return null;
+  const trimmed = String(value).trim().toLowerCase();
+  if (!trimmed) return null;
+  const normalized = trimmed.startsWith('sha256:') ? trimmed.slice(7) : trimmed;
+  if (!/^[a-f0-9]{64}$/.test(normalized)) return null;
+  return normalized;
+};
+
+const parseHashes = (input) => {
+  if (!input) return {};
+  const items = Array.isArray(input) ? input : [input];
+  const out = {};
+  for (const item of items) {
+    const eq = String(item || '').indexOf('=');
+    if (eq <= 0 || eq >= item.length - 1) continue;
+    const name = item.slice(0, eq);
+    const hash = normalizeHash(item.slice(eq + 1));
+    if (name && hash) out[name] = hash;
+  }
+  return out;
+};
+
+const resolveDownloadPolicy = (cfg) => {
+  const policy = cfg?.security?.downloads || {};
+  const allowlist = policy.allowlist && typeof policy.allowlist === 'object'
+    ? policy.allowlist
+    : {};
+  return {
+    requireHash: policy.requireHash === true,
+    warnUnsigned: policy.warnUnsigned !== false,
+    allowlist
+  };
+};
+
+const resolveExpectedHash = (source, policy, overrides) => {
+  const explicit = normalizeHash(source?.sha256 || source?.hash);
+  if (explicit) return explicit;
+  const allowlist = policy?.allowlist || {};
+  const fallback = overrides?.[source?.name]
+    || overrides?.[source?.url]
+    || overrides?.[source?.file]
+    || allowlist[source?.name]
+    || allowlist[source?.url]
+    || allowlist[source?.file];
+  return normalizeHash(fallback);
+};
+
+const verifyDownloadHash = (source, buffer, expectedHash, policy) => {
+  if (!expectedHash) {
+    if (policy?.requireHash) {
+      throw createError(
+        ERROR_CODES.DOWNLOAD_VERIFY_FAILED,
+        `Download verification requires a sha256 hash (${source?.name || source?.url || 'unknown source'}).`
+      );
+    }
+    if (policy?.warnUnsigned) {
+      console.warn(`[download] Skipping hash verification for ${source?.name || source?.url || 'unknown source'}.`);
+    }
+    return null;
+  }
+  const actual = crypto.createHash('sha256').update(buffer).digest('hex');
+  if (actual !== expectedHash) {
+    throw createError(
+      ERROR_CODES.DOWNLOAD_VERIFY_FAILED,
+      `Download verification failed for ${source?.name || source?.url || 'unknown source'}.`
+    );
+  }
+  return actual;
+};
+
+const hashOverrides = parseHashes(argv.sha256);
+const downloadPolicy = resolveDownloadPolicy(userConfig);
+
 const SOURCES = {
   en: {
     name: 'en',
@@ -45,14 +128,17 @@ const SOURCES = {
  * @param {string|string[]|null} input
  * @returns {Array<{name:string,url:string,file:string}>}
  */
-function parseUrls(input) {
+function parseUrls(input, hashes = null) {
   if (!input) return [];
   const items = Array.isArray(input) ? input : [input];
   const sources = [];
   for (const item of items) {
-    const [name, url] = item.split('=');
-    if (!name || !url) continue;
-    sources.push({ name, url, file: `${name}.txt` });
+    const eq = item.indexOf('=');
+    if (eq <= 0 || eq >= item.length - 1) continue;
+    const name = item.slice(0, eq);
+    const url = item.slice(eq + 1);
+    const sha256 = hashes && hashes[name] ? hashes[name] : null;
+    sources.push({ name, url, file: `${name}.txt`, sha256 });
   }
   return sources;
 }
@@ -122,12 +208,17 @@ async function downloadSource(source) {
     throw new Error(`Failed to download ${source.url}: ${response.statusCode}`);
   }
 
+  const expectedHash = resolveExpectedHash(source, downloadPolicy, hashOverrides);
+  const actualHash = verifyDownloadHash(source, response.body, expectedHash, downloadPolicy);
+
   const text = response.body.toString('utf8');
   await fs.writeFile(outputPath, text.endsWith('\n') ? text : `${text}\n`);
 
   manifest[source.name] = {
     url: source.url,
     file: source.file,
+    sha256: actualHash || expectedHash || null,
+    verified: Boolean(expectedHash),
     etag: response.headers.etag || null,
     lastModified: response.headers['last-modified'] || null,
     downloadedAt: new Date().toISOString()
@@ -146,7 +237,7 @@ for (const lang of langs) {
   if (src) sources.push(src);
 }
 
-const urlSources = parseUrls(argv.url);
+const urlSources = parseUrls(argv.url, hashOverrides);
 sources.push(...urlSources);
 
 if (!sources.length) {
diff --git a/tools/download-extensions.js b/tools/download-extensions.js
index 7868141c9..a58950144 100644
--- a/tools/download-extensions.js
+++ b/tools/download-extensions.js
@@ -1,22 +1,33 @@
 #!/usr/bin/env node
 import fs from 'node:fs/promises';
 import fsSync from 'node:fs';
+import crypto from 'node:crypto';
 import path from 'node:path';
 import http from 'node:http';
 import https from 'node:https';
 import { pipeline } from 'node:stream/promises';
 import { URL } from 'node:url';
 import { createGunzip } from 'node:zlib';
-import { spawnSync } from 'node:child_process';
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
+import { createError, ERROR_CODES } from '../src/shared/error-codes.js';
 import { loadUserConfig, resolveRepoRoot } from './dict-utils.js';
 import { getBinarySuffix, getPlatformKey, getVectorExtensionConfig, resolveVectorExtensionPath } from './vector-extension.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['update', 'force'],
-  string: ['provider', 'dir', 'url', 'out', 'platform', 'arch', 'repo'],
-  default: { update: false, force: false }
-});
+const argv = createCli({
+  scriptName: 'download-extensions',
+  options: {
+    update: { type: 'boolean', default: false },
+    force: { type: 'boolean', default: false },
+    provider: { type: 'string' },
+    dir: { type: 'string' },
+    url: { type: 'string' },
+    sha256: { type: 'string', array: true },
+    out: { type: 'string' },
+    platform: { type: 'string' },
+    arch: { type: 'string' },
+    repo: { type: 'string' }
+  }
+}).parse();
 
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
 const repoRoot = rootArg || resolveRepoRoot(process.cwd());
@@ -41,6 +52,105 @@ try {
   manifest = {};
 }
 
+const FILE_MODE = 0o644;
+const DIR_MODE = 0o755;
+const DEFAULT_ARCHIVE_LIMITS = {
+  maxBytes: 200 * 1024 * 1024,
+  maxEntryBytes: 50 * 1024 * 1024,
+  maxEntries: 2048
+};
+
+const normalizeLimit = (value, fallback) => {
+  if (value === 0 || value === false) return null;
+  const parsed = Number(value);
+  if (Number.isFinite(parsed) && parsed > 0) return Math.floor(parsed);
+  return fallback;
+};
+
+const normalizeHash = (value) => {
+  if (!value) return null;
+  const trimmed = String(value).trim().toLowerCase();
+  if (!trimmed) return null;
+  const normalized = trimmed.startsWith('sha256:') ? trimmed.slice(7) : trimmed;
+  if (!/^[a-f0-9]{64}$/.test(normalized)) return null;
+  return normalized;
+};
+
+const parseHashes = (input) => {
+  if (!input) return {};
+  const items = Array.isArray(input) ? input : [input];
+  const out = {};
+  for (const item of items) {
+    const eq = String(item || '').indexOf('=');
+    if (eq <= 0 || eq >= item.length - 1) continue;
+    const name = item.slice(0, eq);
+    const hash = normalizeHash(item.slice(eq + 1));
+    if (name && hash) out[name] = hash;
+  }
+  return out;
+};
+
+const resolveDownloadPolicy = (cfg) => {
+  const policy = cfg?.security?.downloads || {};
+  const allowlist = policy.allowlist && typeof policy.allowlist === 'object'
+    ? policy.allowlist
+    : {};
+  return {
+    requireHash: policy.requireHash === true,
+    warnUnsigned: policy.warnUnsigned !== false,
+    allowlist
+  };
+};
+
+const resolveArchiveLimits = (cfg) => {
+  const archives = cfg?.security?.archives || {};
+  return {
+    maxBytes: normalizeLimit(archives.maxBytes, DEFAULT_ARCHIVE_LIMITS.maxBytes),
+    maxEntryBytes: normalizeLimit(archives.maxEntryBytes, DEFAULT_ARCHIVE_LIMITS.maxEntryBytes),
+    maxEntries: normalizeLimit(archives.maxEntries, DEFAULT_ARCHIVE_LIMITS.maxEntries)
+  };
+};
+
+const resolveExpectedHash = (source, policy, overrides) => {
+  const explicit = normalizeHash(source?.sha256 || source?.hash);
+  if (explicit) return explicit;
+  const allowlist = policy?.allowlist || {};
+  const fallback = overrides?.[source?.name]
+    || overrides?.[source?.url]
+    || overrides?.[source?.file]
+    || allowlist[source?.name]
+    || allowlist[source?.url]
+    || allowlist[source?.file];
+  return normalizeHash(fallback);
+};
+
+const verifyDownloadHash = (source, buffer, expectedHash, policy) => {
+  if (!expectedHash) {
+    if (policy?.requireHash) {
+      throw createError(
+        ERROR_CODES.DOWNLOAD_VERIFY_FAILED,
+        `Download verification requires a sha256 hash (${source?.name || source?.url || 'unknown source'}).`
+      );
+    }
+    if (policy?.warnUnsigned) {
+      console.warn(`[download] Skipping hash verification for ${source?.name || source?.url || 'unknown source'}.`);
+    }
+    return null;
+  }
+  const actual = crypto.createHash('sha256').update(buffer).digest('hex');
+  if (actual !== expectedHash) {
+    throw createError(
+      ERROR_CODES.DOWNLOAD_VERIFY_FAILED,
+      `Download verification failed for ${source?.name || source?.url || 'unknown source'}.`
+    );
+  }
+  return actual;
+};
+
+const hashOverrides = parseHashes(argv.sha256);
+const downloadPolicy = resolveDownloadPolicy(userConfig);
+const archiveLimits = resolveArchiveLimits(userConfig);
+
 /**
  * Identify the archive type from a filename or URL.
  * @param {string|undefined|null} value
@@ -64,51 +174,203 @@ function getArchiveTypeForSource(source) {
   return getArchiveType(source.file) || getArchiveType(source.url);
 }
 
-/**
- * Run a command and return true if it succeeded.
- * @param {string} cmd
- * @param {string[]} args
- * @returns {boolean}
- */
-function runCommand(cmd, args) {
-  const result = spawnSync(cmd, args, { stdio: 'inherit' });
-  return result.status === 0;
+function normalizeArchiveEntry(entryName) {
+  const name = String(entryName || '').replace(/\\/g, '/').trim();
+  let cleaned = name.replace(/^(\.\/)+/, '');
+  cleaned = cleaned.replace(/^\/+/, '');
+  // Handle Windows extended-length paths that can appear as //?/C:/...
+  cleaned = cleaned.replace(/^\?\//, '');
+  // Strip Windows drive-letter prefixes (e.g., C:, C:/, C:\)
+  cleaned = cleaned.replace(/^[A-Za-z]:/, '');
+  cleaned = cleaned.replace(/^\/+/, '');
+  return path.posix.normalize(cleaned);
 }
 
-async function extractZipNode(archivePath, destDir) {
-  try {
-    const mod = await import('adm-zip');
-    const AdmZip = mod.default || mod;
-    const zip = new AdmZip(archivePath);
-    zip.extractAllTo(destDir, true);
-    return true;
-  } catch {
-    return false;
+function isArchivePathSafe(rootDir, entryName) {
+  const normalized = normalizeArchiveEntry(entryName);
+  if (!normalized) return false;
+  if (normalized === '.' || normalized === '..') return false;
+  if (normalized.startsWith('../') || normalized.includes('/../')) return false;
+  if (/^[A-Za-z]:/.test(normalized)) return false;
+  if (path.posix.isAbsolute(normalized) || path.win32.isAbsolute(normalized)) return false;
+  const root = path.resolve(rootDir);
+  const resolved = path.resolve(root, normalized);
+  const rootPrefix = root.endsWith(path.sep) ? root : `${root}${path.sep}`;
+  if (process.platform === 'win32') {
+    return resolved.toLowerCase().startsWith(rootPrefix.toLowerCase());
   }
+  return resolved.startsWith(rootPrefix);
 }
 
-async function extractTarNode(archivePath, destDir, gzip) {
-  try {
-    const mod = await import('tar-fs');
-    const tarFs = mod.default || mod;
-    await fs.mkdir(destDir, { recursive: true });
-    const extract = tarFs.extract(destDir);
-    const source = fsSync.createReadStream(archivePath);
-    if (gzip) {
-      await pipeline(source, createGunzip(), extract);
-    } else {
-      await pipeline(source, extract);
+function resolveArchivePath(rootDir, entryName) {
+  if (!isArchivePathSafe(rootDir, entryName)) return null;
+  const normalized = normalizeArchiveEntry(entryName);
+  return path.resolve(rootDir, normalized);
+}
+
+function isZipSymlink(entry) {
+  const attr = entry?.header?.attr;
+  if (typeof attr !== 'number') return false;
+  const mode = attr >>> 16;
+  return (mode & 0o170000) === 0o120000;
+}
+
+function createArchiveLimiter(limits) {
+  const maxEntries = Number.isFinite(limits?.maxEntries) ? limits.maxEntries : null;
+  const maxEntryBytes = Number.isFinite(limits?.maxEntryBytes) ? limits.maxEntryBytes : null;
+  const maxBytes = Number.isFinite(limits?.maxBytes) ? limits.maxBytes : null;
+  let entries = 0;
+  let totalBytes = 0;
+  const checkTotals = () => {
+    if (maxBytes && totalBytes > maxBytes) {
+      throw createError(ERROR_CODES.ARCHIVE_TOO_LARGE, `Archive exceeds max size (${totalBytes} > ${maxBytes}).`);
+    }
+  };
+  const checkEntry = (name, size) => {
+    entries += 1;
+    if (maxEntries && entries > maxEntries) {
+      throw createError(ERROR_CODES.ARCHIVE_TOO_LARGE, `Archive exceeds entry limit (${entries} > ${maxEntries}).`);
+    }
+    const entryBytes = Number.isFinite(size) && size > 0 ? size : 0;
+    if (maxEntryBytes && entryBytes > maxEntryBytes) {
+      throw createError(ERROR_CODES.ARCHIVE_TOO_LARGE, `Archive entry too large (${name}).`);
+    }
+    totalBytes += entryBytes;
+    checkTotals();
+    return entryBytes;
+  };
+  const addBytes = (delta) => {
+    if (!Number.isFinite(delta) || delta <= 0) return;
+    totalBytes += delta;
+    checkTotals();
+  };
+  return { checkEntry, addBytes };
+}
+
+
+async function extractZipNode(archivePath, destDir, limits) {
+  const mod = await import('adm-zip');
+  const AdmZip = mod.default || mod;
+  const zip = new AdmZip(archivePath);
+  const entries = zip.getEntries();
+  const limiter = createArchiveLimiter(limits);
+  await fs.mkdir(destDir, { recursive: true });
+  for (const entry of entries) {
+    if (isZipSymlink(entry)) {
+      throw createError(ERROR_CODES.ARCHIVE_UNSAFE, `unsafe zip entry (symlink): ${entry.entryName}`);
+    }
+    const targetPath = resolveArchivePath(destDir, entry.entryName);
+    if (!targetPath) {
+      throw createError(ERROR_CODES.ARCHIVE_UNSAFE, `unsafe zip entry: ${entry.entryName}`);
     }
-    return true;
-  } catch {
-    return false;
+    const declaredSize = Number(entry?.header?.size);
+    const counted = limiter.checkEntry(entry.entryName, Number.isFinite(declaredSize) ? declaredSize : 0);
+    if (entry.isDirectory) {
+      await fs.mkdir(targetPath, { recursive: true });
+      try { await fs.chmod(targetPath, DIR_MODE); } catch {}
+      continue;
+    }
+    const data = entry.getData();
+    if (limits?.maxEntryBytes && data.length > limits.maxEntryBytes) {
+      throw createError(ERROR_CODES.ARCHIVE_TOO_LARGE, `archive entry too large (${entry.entryName}).`);
+    }
+    if (data.length > counted) {
+      limiter.addBytes(data.length - counted);
+    }
+    await fs.mkdir(path.dirname(targetPath), { recursive: true });
+    await fs.writeFile(targetPath, data, { mode: FILE_MODE });
+    try { await fs.chmod(targetPath, FILE_MODE); } catch {}
+  }
+  return true;
+}
+
+async function extractTarNode(archivePath, destDir, gzip, limits) {
+  const mod = await import('tar-stream');
+  const tarStream = mod.default || mod;
+  const extract = tarStream.extract();
+  const limiter = createArchiveLimiter(limits);
+  await fs.mkdir(destDir, { recursive: true });
+  extract.on('entry', (header, stream, next) => {
+    const rawName = header?.name || '';
+    const normalized = normalizeArchiveEntry(rawName);
+    const type = header?.type || 'file';
+
+    (async () => {
+      // Reject symlinks/hardlinks to avoid writing outside the destination or
+      // creating unexpected filesystem references.
+      if (type === 'symlink' || type === 'link') {
+        throw createError(ERROR_CODES.ARCHIVE_UNSAFE, `unsafe tar entry (symlink): ${rawName}`);
+      }
+
+      // Skip empty / root-ish entries.
+      if (!normalized || normalized === '.' || normalized === '..') {
+        stream.resume();
+        return;
+      }
+
+      const targetPath = resolveArchivePath(destDir, normalized);
+      if (!targetPath) {
+        throw createError(ERROR_CODES.ARCHIVE_UNSAFE, `unsafe tar entry: ${rawName}`);
+      }
+
+      if (type === 'directory') {
+        await fs.mkdir(targetPath, { recursive: true });
+        try { await fs.chmod(targetPath, DIR_MODE); } catch {}
+        stream.resume();
+        return;
+      }
+
+      // Ignore special entries (devices, FIFOs, pax headers, etc.).
+      if (type !== 'file' && type !== 'contiguous-file') {
+        stream.resume();
+        return;
+      }
+
+      const declaredSize = Number(header?.size);
+      const counted = limiter.checkEntry(
+        normalized,
+        Number.isFinite(declaredSize) ? declaredSize : 0
+      );
+
+      await fs.mkdir(path.dirname(targetPath), { recursive: true });
+
+      const writer = fsSync.createWriteStream(targetPath, { mode: FILE_MODE });
+      let written = 0;
+      stream.on('data', (chunk) => {
+        written += chunk.length;
+        if (limits?.maxEntryBytes && written > limits.maxEntryBytes) {
+          stream.destroy(
+            createError(ERROR_CODES.ARCHIVE_TOO_LARGE, `archive entry too large (${normalized}).`)
+          );
+        }
+      });
+
+      await pipeline(stream, writer);
+
+      if (written > counted) {
+        limiter.addBytes(written - counted);
+      }
+      try { await fs.chmod(targetPath, FILE_MODE); } catch {}
+    })()
+      .then(() => next())
+      .catch((err) => {
+        try { stream.resume(); } catch {}
+        extract.destroy(err);
+      });
+  });
+  const source = fsSync.createReadStream(archivePath);
+  if (gzip) {
+    await pipeline(source, createGunzip(), extract);
+  } else {
+    await pipeline(source, extract);
   }
+  return true;
 }
 
-async function extractArchiveNode(archivePath, destDir, type) {
-  if (type === 'zip') return extractZipNode(archivePath, destDir);
+async function extractArchiveNode(archivePath, destDir, type, limits) {
+  if (type === 'zip') return extractZipNode(archivePath, destDir, limits);
   const gzip = type === 'tar.gz';
-  return extractTarNode(archivePath, destDir, gzip);
+  return extractTarNode(archivePath, destDir, gzip, limits);
 }
 
 /**
@@ -118,22 +380,8 @@ async function extractArchiveNode(archivePath, destDir, type) {
  * @param {string} type
  * @returns {boolean}
  */
-async function extractArchive(archivePath, destDir, type) {
-  if (type === 'zip') {
-    if (runCommand('unzip', ['-o', archivePath, '-d', destDir])) return true;
-    if (runCommand('tar', ['-xf', archivePath, '-C', destDir])) return true;
-    if (process.platform === 'win32') {
-      const script = `Expand-Archive -LiteralPath "${archivePath}" -DestinationPath "${destDir}" -Force`;
-      if (runCommand('powershell', ['-NoProfile', '-Command', script])) return true;
-      if (runCommand('pwsh', ['-NoProfile', '-Command', script])) return true;
-    }
-    return extractArchiveNode(archivePath, destDir, type);
-  }
-  const tarArgs = type === 'tar.gz'
-    ? ['-xzf', archivePath, '-C', destDir]
-    : ['-xf', archivePath, '-C', destDir];
-  if (runCommand('tar', tarArgs)) return true;
-  return extractArchiveNode(archivePath, destDir, type);
+async function extractArchive(archivePath, destDir, type, limits) {
+  return extractArchiveNode(archivePath, destDir, type, limits);
 }
 
 /**
@@ -173,15 +421,18 @@ async function findFile(rootDir, targetName, suffix) {
  * @param {string} suffix
  * @returns {Array<{name:string,url:string,file:string}>}
  */
-function parseUrls(input, suffix) {
+function parseUrls(input, suffix, hashes = null) {
   if (!input) return [];
   const items = Array.isArray(input) ? input : [input];
   const sources = [];
   for (const item of items) {
-    const [name, url] = item.split('=');
-    if (!name || !url) continue;
+    const eq = item.indexOf('=');
+    if (eq <= 0 || eq >= item.length - 1) continue;
+    const name = item.slice(0, eq);
+    const url = item.slice(eq + 1);
     const fileName = name.includes('.') ? name : `${name}${suffix}`;
-    sources.push({ name, url, file: fileName });
+    const sha256 = hashes && hashes[name] ? hashes[name] : null;
+    sources.push({ name, url, file: fileName, sha256 });
   }
   return sources;
 }
@@ -200,7 +451,8 @@ function resolveSourceFromConfig(cfg) {
     return {
       name: cfg.provider,
       url: byPlatform.url,
-      file: byPlatform.file || cfg.filename
+      file: byPlatform.file || cfg.filename,
+      sha256: byPlatform.sha256 || byPlatform.hash || null
     };
   }
   if (typeof byPlatform === 'string') {
@@ -248,7 +500,7 @@ function requestUrl(url, headers = {}, redirects = 0) {
 }
 
 const suffix = getBinarySuffix(config.platform);
-const sources = parseUrls(argv.url, suffix);
+const sources = parseUrls(argv.url, suffix, hashOverrides);
 if (!sources.length) {
   const fallback = resolveSourceFromConfig(config);
   if (fallback?.url) sources.push(fallback);
@@ -319,17 +571,19 @@ async function downloadSource(source, index) {
   if (response.statusCode !== 200) {
     throw new Error(`Failed to download ${source.url}: ${response.statusCode}`);
   }
+  const expectedHash = resolveExpectedHash(source, downloadPolicy, hashOverrides);
+  const actualHash = verifyDownloadHash(source, response.body, expectedHash, downloadPolicy);
 
   if (archiveType) {
     await fs.mkdir(tempRoot, { recursive: true });
   }
-  await fs.writeFile(downloadPath, response.body);
+  await fs.writeFile(downloadPath, response.body, { mode: FILE_MODE });
 
   let extractedFrom = null;
   if (archiveType) {
     const extractDir = path.join(tempRoot, `extract-${Date.now()}`);
     await fs.mkdir(extractDir, { recursive: true });
-    const ok = await extractArchive(downloadPath, extractDir, archiveType);
+    const ok = await extractArchive(downloadPath, extractDir, archiveType, archiveLimits);
     if (!ok) {
       throw new Error(`Failed to extract ${downloadPath} (${archiveType})`);
     }
@@ -338,6 +592,7 @@ async function downloadSource(source, index) {
       throw new Error(`No extension binary found in ${downloadPath}`);
     }
     await fs.copyFile(extractedPath, outputPath);
+    try { await fs.chmod(outputPath, FILE_MODE); } catch {}
     extractedFrom = path.relative(extensionDir, extractedPath);
     await fs.rm(extractDir, { recursive: true, force: true });
     await fs.rm(downloadPath, { force: true });
@@ -353,6 +608,8 @@ async function downloadSource(source, index) {
     provider: config.provider,
     platform: config.platform,
     arch: config.arch,
+    sha256: actualHash || expectedHash || null,
+    verified: Boolean(expectedHash),
     etag: response.headers.etag || null,
     lastModified: response.headers['last-modified'] || null,
     downloadedAt: new Date().toISOString()
diff --git a/tools/download-models.js b/tools/download-models.js
index bf4d85f99..af0686197 100644
--- a/tools/download-models.js
+++ b/tools/download-models.js
@@ -1,16 +1,18 @@
 #!/usr/bin/env node
 import fs from 'node:fs/promises';
 import path from 'node:path';
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
 import { pipeline, env } from '@xenova/transformers';
 import { DEFAULT_MODEL_ID, getModelConfig, loadUserConfig, resolveRepoRoot } from './dict-utils.js';
 
-const argv = minimist(process.argv.slice(2), {
-  string: ['model', 'cache-dir', 'repo'],
-  default: {
-    model: DEFAULT_MODEL_ID
+const argv = createCli({
+  scriptName: 'download-models',
+  options: {
+    model: { type: 'string', default: DEFAULT_MODEL_ID },
+    'cache-dir': { type: 'string' },
+    repo: { type: 'string' }
   }
-});
+}).parse();
 
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
 const root = rootArg || resolveRepoRoot(process.cwd());
diff --git a/tools/eval/run.js b/tools/eval/run.js
new file mode 100644
index 000000000..6678fda63
--- /dev/null
+++ b/tools/eval/run.js
@@ -0,0 +1,194 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import path from 'node:path';
+import { createCli } from '../../src/shared/cli.js';
+import { search as coreSearch } from '../../src/integrations/core/index.js';
+import { createSqliteDbCache } from '../../src/retrieval/sqlite-cache.js';
+
+const argv = createCli({
+  scriptName: 'eval-run',
+  options: {
+    repo: { type: 'string' },
+    dataset: { type: 'string' },
+    backend: { type: 'string', default: 'auto' },
+    top: { type: 'number', default: 10 },
+    ann: { type: 'boolean' },
+    out: { type: 'string' },
+    pretty: { type: 'boolean', default: false }
+  },
+  aliases: { n: 'top' }
+}).parse();
+
+const root = process.cwd();
+const repoRoot = argv.repo ? path.resolve(argv.repo) : root;
+const datasetPath = argv.dataset
+  ? path.resolve(argv.dataset)
+  : path.join(root, 'tests', 'fixtures', 'sample', 'eval.json');
+const backend = argv.backend ? String(argv.backend) : 'auto';
+const topN = Math.max(1, parseInt(argv.top, 10) || 10);
+const annFlag = typeof argv.ann === 'boolean' ? argv.ann : null;
+const ks = [1, 3, 5, 10].filter((k) => k <= Math.max(10, topN));
+
+const loadDataset = () => {
+  const raw = fs.readFileSync(datasetPath, 'utf8');
+  const data = JSON.parse(raw);
+  if (!Array.isArray(data)) return [];
+  return data;
+};
+
+const matchExpected = (hit, expected) => {
+  if (!hit) return false;
+  if (expected.file && hit.file !== expected.file) return false;
+  if (expected.name) {
+    const hitName = hit.name ? String(hit.name).toLowerCase() : '';
+    if (!hitName.includes(String(expected.name).toLowerCase())) return false;
+  }
+  if (expected.kind) {
+    if (!hit.kind || String(hit.kind).toLowerCase() !== String(expected.kind).toLowerCase()) {
+      return false;
+    }
+  }
+  return true;
+};
+
+const computeRecallAtK = (ranks, totalRelevant, k) => {
+  if (!totalRelevant) return 0;
+  const found = ranks.filter((rank) => rank <= k).length;
+  return found / totalRelevant;
+};
+
+const computeMRR = (ranks) => {
+  if (!ranks.length) return 0;
+  return 1 / Math.min(...ranks);
+};
+
+const computeNDCG = (ranks, totalRelevant, k) => {
+  if (!totalRelevant) return 0;
+  const hits = ranks.filter((rank) => rank <= k).sort((a, b) => a - b);
+  if (!hits.length) return 0;
+  const dcg = hits.reduce((sum, rank) => sum + 1 / Math.log2(rank + 1), 0);
+  const idealCount = Math.min(totalRelevant, k);
+  let idcg = 0;
+  for (let i = 1; i <= idealCount; i += 1) {
+    idcg += 1 / Math.log2(i + 1);
+  }
+  return idcg ? dcg / idcg : 0;
+};
+
+const runSearch = async (query, mode) => {
+  const args = ['--json-compact', '--repo', repoRoot, '-n', String(topN)];
+  if (mode && mode !== 'both') args.push('--mode', mode);
+  if (backend && backend !== 'auto') args.push('--backend', backend);
+  if (annFlag === true) args.push('--ann');
+  if (annFlag === false) args.push('--no-ann');
+
+  const payload = await coreSearch(repoRoot, {
+    args,
+    query,
+    emitOutput: false,
+    exitOnError: false,
+    indexCache: evalCaches.indexCache,
+    sqliteCache: evalCaches.sqliteCache
+  });
+  if (mode === 'code') return payload.code || [];
+  if (mode === 'prose') return payload.prose || [];
+  return [...(payload.code || []), ...(payload.prose || [])];
+};
+
+const evalCaches = {
+  indexCache: new Map(),
+  sqliteCache: createSqliteDbCache()
+};
+
+const cases = loadDataset();
+if (!cases.length) {
+  console.error(`No eval cases found at ${datasetPath}`);
+  process.exit(1);
+}
+
+const results = [];
+for (const entry of cases) {
+  const query = String(entry?.query || '').trim();
+  if (!query) continue;
+  const mode = entry.mode || 'both';
+  const silver = Array.isArray(entry.relevant)
+    ? entry.relevant
+    : (Array.isArray(entry.expect) ? entry.expect : []);
+  const gold = Array.isArray(entry.gold) ? entry.gold : [];
+
+  const hits = await runSearch(query, mode);
+  const ranks = [];
+  const goldRanks = [];
+  hits.forEach((hit, index) => {
+    const rank = index + 1;
+    if (silver.some((exp) => matchExpected(hit, exp))) ranks.push(rank);
+    if (gold.some((exp) => matchExpected(hit, exp))) goldRanks.push(rank);
+  });
+
+  const metrics = {
+    recallAtK: Object.fromEntries(ks.map((k) => [k, computeRecallAtK(ranks, silver.length, k)])),
+    mrr: computeMRR(ranks),
+    ndcgAtK: Object.fromEntries(ks.map((k) => [k, computeNDCG(ranks, silver.length, k)]))
+  };
+  const goldMetrics = gold.length
+    ? {
+      recallAtK: Object.fromEntries(ks.map((k) => [k, computeRecallAtK(goldRanks, gold.length, k)])),
+      mrr: computeMRR(goldRanks),
+      ndcgAtK: Object.fromEntries(ks.map((k) => [k, computeNDCG(goldRanks, gold.length, k)]))
+    }
+    : null;
+
+  results.push({
+    query,
+    mode,
+    totals: {
+      relevant: silver.length,
+      gold: gold.length,
+      hits: hits.length
+    },
+    metrics,
+    goldMetrics
+  });
+}
+
+const aggregate = (field) => {
+  if (!results.length) return 0;
+  const sum = results.reduce((acc, entry) => acc + (entry.metrics?.[field] || 0), 0);
+  return sum / results.length;
+};
+
+const aggregateMap = (key) => {
+  const totals = {};
+  if (!results.length) return totals;
+  for (const k of ks) {
+    const sum = results.reduce((acc, entry) => acc + (entry.metrics?.[key]?.[k] || 0), 0);
+    totals[k] = sum / results.length;
+  }
+  return totals;
+};
+
+const summary = {
+  cases: results.length,
+  recallAtK: aggregateMap('recallAtK'),
+  ndcgAtK: aggregateMap('ndcgAtK'),
+  mrr: aggregate('mrr')
+};
+
+const output = {
+  generatedAt: new Date().toISOString(),
+  repo: repoRoot,
+  dataset: datasetPath,
+  backend,
+  topN,
+  ann: annFlag,
+  ks,
+  summary,
+  results
+};
+
+if (argv.out) {
+  fs.writeFileSync(path.resolve(argv.out), JSON.stringify(output, null, 2));
+}
+
+const payload = argv.pretty ? JSON.stringify(output, null, 2) : JSON.stringify(output);
+console.log(payload);
diff --git a/tools/eval/sample.json b/tools/eval/sample.json
new file mode 100644
index 000000000..f083378d1
--- /dev/null
+++ b/tools/eval/sample.json
@@ -0,0 +1,19 @@
+[
+  {
+    "query": "greet",
+    "mode": "code",
+    "relevant": [{ "file": "src/index.js", "name": "greet" }],
+    "gold": [{ "file": "src/index.js", "name": "greet" }]
+  },
+  {
+    "query": "clamp",
+    "mode": "code",
+    "relevant": [{ "file": "src/util.js", "name": "clamp" }]
+  },
+  {
+    "query": "guide",
+    "mode": "prose",
+    "relevant": [{ "file": "docs/guide.md", "name": "Guide" }],
+    "gold": [{ "file": "docs/guide.md", "name": "Guide" }]
+  }
+]
diff --git a/tools/generate-demo-config.js b/tools/generate-demo-config.js
new file mode 100644
index 000000000..46049cf2d
--- /dev/null
+++ b/tools/generate-demo-config.js
@@ -0,0 +1,134 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import path from 'node:path';
+import { createCli } from '../src/shared/cli.js';
+import { parseJsoncText } from '../src/shared/jsonc.js';
+import { DEFAULT_USER_CONFIG_TEMPLATE } from './default-config-template.js';
+
+const argv = createCli({
+  scriptName: 'generate-demo-config',
+  options: {
+    schema: { type: 'string', default: 'docs/config-schema.json' },
+    out: { type: 'string', default: 'demo.pairofcleats.json' }
+  }
+}).parse();
+
+const schemaPath = path.resolve(argv.schema);
+const outPath = path.resolve(argv.out);
+const schemaRaw = await fs.readFile(schemaPath, 'utf8');
+const schema = JSON.parse(schemaRaw);
+const templateDefaults = parseJsoncText(DEFAULT_USER_CONFIG_TEMPLATE, 'default-config-template');
+
+const collectTypes = (node) => {
+  if (!node || typeof node !== 'object') return [];
+  if (Array.isArray(node.type)) return node.type;
+  if (typeof node.type === 'string') return [node.type];
+  const options = node.oneOf || node.anyOf || [];
+  const nested = [];
+  for (const option of options) {
+    nested.push(...collectTypes(option));
+  }
+  return Array.from(new Set(nested));
+};
+
+const collectEnum = (node) => {
+  if (!node || typeof node !== 'object') return [];
+  if (Array.isArray(node.enum)) return node.enum.slice();
+  if (node.const !== undefined) return [node.const];
+  const options = node.oneOf || node.anyOf || [];
+  const values = [];
+  for (const option of options) {
+    values.push(...collectEnum(option));
+  }
+  return Array.from(new Set(values));
+};
+
+const resolveDefault = (node) => {
+  if (!node || typeof node !== 'object') return { value: null, hasDefault: false };
+  if (node.default !== undefined) return { value: node.default, hasDefault: true };
+  if (node.const !== undefined) return { value: node.const, hasDefault: true };
+  const types = collectTypes(node);
+  if (types.includes('array')) return { value: [], hasDefault: false };
+  return { value: null, hasDefault: false };
+};
+
+const formatValue = (value) => {
+  return JSON.stringify(value);
+};
+
+const describeAcceptedValues = (node) => {
+  const enumValues = collectEnum(node);
+  if (enumValues.length) {
+    return `Accepted values: ${enumValues.map(formatValue).join(', ')}`;
+  }
+  const types = collectTypes(node);
+  if (types.includes('boolean')) {
+    return 'Accepted values: true, false';
+  }
+  const itemEnums = collectEnum(node?.items);
+  if (itemEnums.length) {
+    return `Accepted values (items): ${itemEnums.map(formatValue).join(', ')}`;
+  }
+  return '';
+};
+
+const describeDefault = (node, hasDefault, value, templateValue) => {
+  if (templateValue !== undefined) return `Default: ${formatValue(templateValue)}`;
+  if (hasDefault) return `Default: ${formatValue(value)}`;
+  if (node && node.default !== undefined) return `Default: ${formatValue(node.default)}`;
+  return '';
+};
+
+const describeMax = (node) => {
+  if (!node || typeof node !== 'object') return '';
+  if (Number.isFinite(node.maximum)) return `Max: ${node.maximum}`;
+  if (Number.isFinite(node.maxItems)) return `Max items: ${node.maxItems}`;
+  if (Number.isFinite(node.maxLength)) return `Max length: ${node.maxLength}`;
+  if (Number.isFinite(node.maxProperties)) return `Max properties: ${node.maxProperties}`;
+  return '';
+};
+
+const renderProperties = (node, lines, indent, pathPrefix, templateNode) => {
+  const properties = node?.properties && typeof node.properties === 'object'
+    ? node.properties
+    : {};
+  const keys = Object.keys(properties);
+  keys.forEach((key, index) => {
+    const prop = properties[key];
+    const propPath = pathPrefix ? `${pathPrefix}.${key}` : key;
+    const { value, hasDefault } = resolveDefault(prop);
+    const templateValue = templateNode && typeof templateNode === 'object'
+      ? templateNode[key]
+      : undefined;
+    const types = collectTypes(prop);
+    const accepted = describeAcceptedValues(prop);
+    if (accepted) lines.push(`${indent}// ${accepted}`);
+    const defaultLine = describeDefault(prop, hasDefault, value, templateValue);
+    if (defaultLine) lines.push(`${indent}// ${defaultLine}`);
+    const maxLine = describeMax(prop);
+    if (maxLine) lines.push(`${indent}// ${maxLine}`);
+
+    const isObject = types.includes('object') && prop?.properties && typeof prop.properties === 'object';
+    const isLeafObject = types.includes('object') && !prop?.properties;
+    const comma = index < keys.length - 1 ? ',' : '';
+    if (isObject) {
+      lines.push(`${indent}"${key}": {`);
+      renderProperties(prop, lines, `${indent}  `, propPath, templateValue);
+      lines.push(`${indent}}${comma}`);
+    } else if (isLeafObject && hasDefault && typeof value === 'object') {
+      lines.push(`${indent}"${key}": ${JSON.stringify(value, null, 2)}${comma}`);
+    } else {
+      const outputValue = templateValue !== undefined ? templateValue : value;
+      lines.push(`${indent}"${key}": ${formatValue(outputValue)}${comma}`);
+    }
+  });
+};
+
+const lines = [];
+lines.push('{');
+renderProperties(schema, lines, '  ', '', templateDefaults);
+lines.push('}');
+lines.push('');
+
+await fs.writeFile(outPath, `${lines.join('\n')}\n`, 'utf8');
+console.log(`Wrote ${outPath}`);
diff --git a/tools/generate-repo-dict.js b/tools/generate-repo-dict.js
index faaf61b74..9732aed7e 100644
--- a/tools/generate-repo-dict.js
+++ b/tools/generate-repo-dict.js
@@ -3,16 +3,21 @@ import fs from 'node:fs/promises';
 import fsSync from 'node:fs';
 import path from 'node:path';
 import { spawnSync } from 'node:child_process';
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
 import ignore from 'ignore';
 import { getDictConfig, getRepoDictPath, loadUserConfig, resolveRepoRoot } from './dict-utils.js';
 import { splitId } from '../src/shared/tokenize.js';
 
-const argv = minimist(process.argv.slice(2), {
-  string: ['out', 'extensions', 'repo'],
-  boolean: ['include-prose'],
-  default: { 'min-count': 3, 'include-prose': false }
-});
+const argv = createCli({
+  scriptName: 'generate-repo-dict',
+  options: {
+    out: { type: 'string' },
+    extensions: { type: 'string' },
+    repo: { type: 'string' },
+    'include-prose': { type: 'boolean', default: false },
+    'min-count': { type: 'number', default: 3 }
+  }
+}).parse();
 
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
 const repoRoot = rootArg || resolveRepoRoot(process.cwd());
diff --git a/tools/get-last-failure.js b/tools/get-last-failure.js
new file mode 100644
index 000000000..85df1df07
--- /dev/null
+++ b/tools/get-last-failure.js
@@ -0,0 +1,109 @@
+#!/usr/bin/env node
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+
+const root = process.cwd();
+const candidates = [];
+
+const readPrefix = async (filePath, maxBytes) => {
+  try {
+    const handle = await fs.open(filePath, 'r');
+    try {
+      const { size } = await handle.stat();
+      const readBytes = Math.min(size, maxBytes);
+      const buffer = Buffer.alloc(readBytes);
+      await handle.read(buffer, 0, readBytes, 0);
+      return buffer.toString('utf8');
+    } finally {
+      await handle.close();
+    }
+  } catch {
+    return '';
+  }
+};
+
+const readSuffix = async (filePath, maxBytes) => {
+  try {
+    const handle = await fs.open(filePath, 'r');
+    try {
+      const { size } = await handle.stat();
+      const readBytes = Math.min(size, maxBytes);
+      const buffer = Buffer.alloc(readBytes);
+      const start = Math.max(0, size - readBytes);
+      await handle.read(buffer, 0, readBytes, start);
+      return buffer.toString('utf8');
+    } finally {
+      await handle.close();
+    }
+  } catch {
+    return '';
+  }
+};
+
+const isFailureLog = async (filePath) => {
+  const prefix = await readPrefix(filePath, 4096);
+  if (/\bexit:\s*[1-9]\d*/i.test(prefix)) return true;
+  if (/\bFailed:/i.test(prefix) || /\buncaughtException\b/i.test(prefix)) return true;
+  const suffix = await readSuffix(filePath, 8192);
+  if (/\bFailed:/i.test(suffix) || /\buncaughtException\b/i.test(suffix)) return true;
+  return false;
+};
+
+const addCandidate = async (filePath) => {
+  try {
+    const stat = await fs.stat(filePath);
+    if (!stat.isFile()) return;
+    candidates.push({ path: filePath, mtimeMs: stat.mtimeMs });
+  } catch {
+    // ignore missing or unreadable paths
+  }
+};
+
+const collectLogs = async (dirPath) => {
+  let entries;
+  try {
+    entries = await fs.readdir(dirPath, { withFileTypes: true });
+  } catch {
+    return;
+  }
+  for (const entry of entries) {
+    const nextPath = path.join(dirPath, entry.name);
+    if (entry.isDirectory()) {
+      await collectLogs(nextPath);
+      continue;
+    }
+    if (!entry.isFile()) continue;
+    if (!entry.name.toLowerCase().endsWith('.log')) continue;
+    await addCandidate(nextPath);
+  }
+};
+
+const searchRoots = [
+  path.join(root, 'tests', '.logs'),
+  path.join(root, 'benchmarks', 'results')
+];
+
+for (const dirPath of searchRoots) {
+  await collectLogs(dirPath);
+}
+
+if (!candidates.length) {
+  console.error('No log files found.');
+  process.exit(1);
+}
+
+const failures = [];
+for (const entry of candidates) {
+  if (await isFailureLog(entry.path)) {
+    failures.push(entry);
+  }
+}
+
+const pick = (list) => list.sort((a, b) => b.mtimeMs - a.mtimeMs)[0];
+const selected = failures.length ? pick(failures) : pick(candidates);
+if (!selected || !fsSync.existsSync(selected.path)) {
+  console.error('No log files found.');
+  process.exit(1);
+}
+console.log(selected.path);
diff --git a/tools/git-hooks.js b/tools/git-hooks.js
index 23b24eb5d..9c983f485 100644
--- a/tools/git-hooks.js
+++ b/tools/git-hooks.js
@@ -2,14 +2,19 @@
 import fs from 'node:fs/promises';
 import fsSync from 'node:fs';
 import path from 'node:path';
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
 import { resolveRepoRoot } from './dict-utils.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['install', 'uninstall', 'status'],
-  string: ['hooks', 'repo'],
-  default: { install: false, uninstall: false, status: false }
-});
+const argv = createCli({
+  scriptName: 'git-hooks',
+  options: {
+    install: { type: 'boolean', default: false },
+    uninstall: { type: 'boolean', default: false },
+    status: { type: 'boolean', default: false },
+    hooks: { type: 'string' },
+    repo: { type: 'string' }
+  }
+}).parse();
 
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
 const root = rootArg || resolveRepoRoot(process.cwd());
@@ -31,7 +36,7 @@ ROOT="$(git rev-parse --show-toplevel 2>/dev/null)"
 if [ -z "$ROOT" ]; then
   exit 0
 fi
-node "$ROOT/build_index.js" --incremental
+node "$ROOT/bin/pairofcleats.js" build-index --incremental --repo "$ROOT"
 `;
 
 const ensureHooksDir = async () => {
diff --git a/tools/gtags-ingest.js b/tools/gtags-ingest.js
new file mode 100644
index 000000000..ffadb7a19
--- /dev/null
+++ b/tools/gtags-ingest.js
@@ -0,0 +1,135 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import readline from 'node:readline';
+import { spawn } from 'node:child_process';
+import { createCli } from '../src/shared/cli.js';
+import { getRepoCacheRoot, loadUserConfig, resolveRepoRoot } from './dict-utils.js';
+
+const argv = createCli({
+  scriptName: 'gtags-ingest',
+  options: {
+    repo: { type: 'string' },
+    input: { type: 'string' },
+    out: { type: 'string' },
+    json: { type: 'boolean', default: false },
+    run: { type: 'boolean', default: false },
+    global: { type: 'string', default: 'global' },
+    args: { type: 'string' }
+  }
+}).parse();
+
+const repoRoot = argv.repo ? path.resolve(argv.repo) : resolveRepoRoot(process.cwd());
+const userConfig = loadUserConfig(repoRoot);
+const cacheRoot = getRepoCacheRoot(repoRoot, userConfig);
+const outputPath = argv.out
+  ? path.resolve(argv.out)
+  : path.join(cacheRoot, 'gtags', 'gtags.jsonl');
+const metaPath = `${outputPath}.meta.json`;
+const inputPath = argv.input ? String(argv.input) : null;
+const runGlobal = argv.run === true;
+const globalCmd = argv.global || 'global';
+
+const toPosix = (value) => value.replace(/\\/g, '/');
+const normalizePath = (value) => {
+  if (!value) return null;
+  const raw = String(value);
+  const resolved = path.isAbsolute(raw) ? raw : path.resolve(repoRoot, raw);
+  const rel = path.relative(repoRoot, resolved);
+  return toPosix(rel || raw);
+};
+
+const stats = {
+  entries: 0,
+  errors: 0
+};
+
+const ensureOutputDir = async () => {
+  await fsPromises.mkdir(path.dirname(outputPath), { recursive: true });
+};
+
+const writeStream = fs.createWriteStream(outputPath, { encoding: 'utf8' });
+
+const parseGlobalLine = (line) => {
+  const trimmed = line.trim();
+  if (!trimmed) return null;
+  const parts = trimmed.split(/\s+/);
+  if (parts.length < 3) return null;
+  const name = parts[0];
+  const lineNo = Number.parseInt(parts[1], 10);
+  const file = normalizePath(parts.slice(2).join(' '));
+  if (!name || !file || !Number.isFinite(lineNo)) return null;
+  return { file, name, line: lineNo };
+};
+
+const ingestTextLines = async (stream) => {
+  const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
+  for await (const line of rl) {
+    const parsed = parseGlobalLine(line);
+    if (!parsed) {
+      if (line.trim()) stats.errors += 1;
+      continue;
+    }
+    stats.entries += 1;
+    const payload = {
+      file: parsed.file,
+      ext: path.extname(parsed.file).toLowerCase(),
+      name: parsed.name,
+      startLine: parsed.line,
+      endLine: parsed.line,
+      role: 'definition',
+      source: 'gtags'
+    };
+    writeStream.write(`${JSON.stringify(payload)}\n`);
+  }
+};
+
+const runGlobalCommand = async () => {
+  const args = ['-x'];
+  if (argv.args) {
+    const extra = String(argv.args)
+      .split(/\s+/)
+      .map((entry) => entry.trim())
+      .filter(Boolean);
+    args.push(...extra);
+  }
+  const child = spawn(globalCmd, args, { cwd: repoRoot, stdio: ['ignore', 'pipe', 'pipe'] });
+  child.stderr.on('data', (chunk) => process.stderr.write(chunk));
+  await ingestTextLines(child.stdout);
+  const exitCode = await new Promise((resolve) => {
+    child.on('close', (code) => resolve(code ?? 0));
+  });
+  if (exitCode !== 0) {
+    throw new Error(`global exited with code ${exitCode}`);
+  }
+};
+
+await ensureOutputDir();
+if (runGlobal) {
+  await runGlobalCommand();
+} else if (inputPath && inputPath !== '-') {
+  const inputStream = fs.createReadStream(inputPath, { encoding: 'utf8' });
+  await ingestTextLines(inputStream);
+} else {
+  await ingestTextLines(process.stdin);
+}
+
+writeStream.end();
+
+const summary = {
+  generatedAt: new Date().toISOString(),
+  repoRoot: path.resolve(repoRoot),
+  input: inputPath || (runGlobal ? 'global' : 'stdin'),
+  output: path.resolve(outputPath),
+  stats
+};
+await fsPromises.writeFile(metaPath, JSON.stringify(summary, null, 2));
+
+if (argv.json) {
+  console.log(JSON.stringify(summary, null, 2));
+} else {
+  console.log(`GTAGS ingest: ${stats.entries} entries (${stats.errors} parse errors)`);
+  console.log(`- output: ${outputPath}`);
+  console.log(`- meta: ${metaPath}`);
+}
diff --git a/tools/index-validate.js b/tools/index-validate.js
new file mode 100644
index 000000000..3e486a467
--- /dev/null
+++ b/tools/index-validate.js
@@ -0,0 +1,81 @@
+#!/usr/bin/env node
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { createCli } from '../src/shared/cli.js';
+import { resolveRepoRoot } from './dict-utils.js';
+import { validateIndexArtifacts } from '../src/index/validate.js';
+
+const parseModes = (raw) => {
+  const tokens = String(raw || '')
+    .split(/[,\s]+/)
+    .map((token) => token.trim())
+    .filter(Boolean);
+  const modeSet = new Set(tokens.length ? tokens : ['code', 'prose']);
+  if (modeSet.has('all')) return ['code', 'prose', 'records'];
+  return Array.from(modeSet);
+};
+
+async function runCli() {
+  const argv = createCli({
+    scriptName: 'index-validate',
+    options: {
+      json: { type: 'boolean', default: false },
+      repo: { type: 'string' },
+      mode: { type: 'string' },
+      'index-root': { type: 'string' }
+    }
+  }).parse();
+
+  const rootArg = argv.repo ? path.resolve(argv.repo) : null;
+  const root = rootArg || resolveRepoRoot(process.cwd());
+  const indexRoot = argv['index-root'] ? path.resolve(argv['index-root']) : null;
+  const modes = parseModes(argv.mode);
+  const report = await validateIndexArtifacts({ root, indexRoot, modes });
+
+  if (argv.json) {
+    console.log(JSON.stringify(report, null, 2));
+    process.exit(report.ok ? 0 : 1);
+  }
+
+  console.log('Index validation');
+  console.log(`- repo: ${report.root}`);
+  for (const mode of modes) {
+    const entry = report.modes[mode];
+    const status = entry.ok ? 'ok' : 'missing';
+    console.log(`- ${mode}: ${status} (${entry.path})`);
+    if (entry.missing.length) {
+      console.log(`  - missing: ${entry.missing.join(', ')}`);
+    }
+    if (entry.warnings.length) {
+      console.log(`  - optional: ${entry.warnings.join(', ')}`);
+    }
+  }
+  if (report.sqlite.enabled) {
+    const status = report.sqlite.ok ? 'ok' : 'issues';
+    console.log(`- sqlite: ${status} (mode=${report.sqlite.mode})`);
+    if (report.sqlite.issues.length) {
+      report.sqlite.issues.forEach((issue) => console.log(`  - ${issue}`));
+    }
+  }
+
+  if (report.warnings.length && report.ok) {
+    console.log('Warnings:');
+    report.warnings.forEach((warning) => console.log(`- ${warning}`));
+  }
+  if (!report.ok) {
+    console.log('Issues:');
+    report.issues.forEach((issue) => console.log(`- ${issue}`));
+  }
+  if (report.hints?.length) {
+    console.log('Hints:');
+    report.hints.forEach((hint) => console.log(`- ${hint}`));
+  }
+  process.exit(report.ok ? 0 : 1);
+}
+
+if (process.argv[1] === fileURLToPath(import.meta.url)) {
+  runCli().catch((err) => {
+    console.error(err?.message || err);
+    process.exit(1);
+  });
+}
diff --git a/tools/indexer-service.js b/tools/indexer-service.js
new file mode 100644
index 000000000..eb7ab1c90
--- /dev/null
+++ b/tools/indexer-service.js
@@ -0,0 +1,396 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { spawn } from 'node:child_process';
+import { createCli } from '../src/shared/cli.js';
+import { resolveRepoRoot, getCacheRoot, getRepoCacheRoot, resolveToolRoot } from './dict-utils.js';
+import { getServiceConfigPath, loadServiceConfig, resolveRepoRegistry } from './service/config.js';
+import { ensureQueueDir, enqueueJob, claimNextJob, completeJob, queueSummary, resolveQueueName, requeueStaleJobs, touchJobHeartbeat } from './service/queue.js';
+import { ensureRepo, resolveRepoPath } from './service/repos.js';
+
+const argv = createCli({
+  scriptName: 'indexer-service',
+  options: {
+    config: { type: 'string' },
+    repo: { type: 'string' },
+    mode: { type: 'string', default: 'both' },
+    reason: { type: 'string' },
+    stage: { type: 'string' },
+    command: { type: 'string' },
+    watch: { type: 'boolean', default: false },
+    interval: { type: 'number' },
+    concurrency: { type: 'number' },
+    queue: { type: 'string', default: 'index' }
+  }
+}).parse();
+
+const command = argv.command || String(argv._[0] || '');
+const parsedUv = Number(process.env.UV_THREADPOOL_SIZE);
+const effectiveUvThreadpoolSize = Number.isFinite(parsedUv) && parsedUv > 0 ? Math.floor(parsedUv) : null;
+if (command === 'serve' || argv.watch) {
+  console.error(`[indexer-service] UV_THREADPOOL_SIZE: ${effectiveUvThreadpoolSize ?? 'default'}`);
+}
+
+const configPath = getServiceConfigPath(argv.config || null);
+const config = loadServiceConfig(configPath);
+const repoEntries = resolveRepoRegistry(config, configPath);
+const baseDir = config.baseDir
+  ? path.resolve(config.baseDir)
+  : path.join(getCacheRoot(), 'service', 'repos');
+const queueDir = config.queueDir
+  ? path.resolve(config.queueDir)
+  : path.join(getCacheRoot(), 'service', 'queue');
+const queueName = argv.queue || 'index';
+const resolvedQueueName = resolveQueueName(queueName, {
+  reason: queueName === 'embeddings' ? 'embeddings' : null,
+  stage: argv.stage || null,
+  mode: argv.mode || null
+});
+
+const resolveRepoEntry = (repoArg) => {
+  if (!repoArg) return null;
+  const resolved = path.resolve(repoArg);
+  return repoEntries.find((entry) => resolveRepoPath(entry, baseDir) === resolved)
+    || repoEntries.find((entry) => entry.id === repoArg)
+    || { id: repoArg, path: resolved, syncPolicy: 'none' };
+};
+
+const formatJobId = () => `${Date.now()}-${Math.random().toString(16).slice(2, 10)}`;
+
+const toolRoot = resolveToolRoot();
+
+const BUILD_STATE_FILE = 'build_state.json';
+const BUILD_STATE_POLL_MS = 5000;
+const BUILD_STATE_LOOKBACK_MS = 5 * 60 * 1000;
+
+const resolveBuildsRoot = (repoCacheRoot) => path.join(repoCacheRoot, 'builds');
+
+const readBuildState = async (buildRoot) => {
+  if (!buildRoot) return null;
+  const statePath = path.join(buildRoot, BUILD_STATE_FILE);
+  try {
+    const raw = await fsPromises.readFile(statePath, 'utf8');
+    const parsed = JSON.parse(raw);
+    return parsed && typeof parsed === 'object' ? { state: parsed, path: statePath } : null;
+  } catch {
+    return null;
+  }
+};
+
+const listBuildStateCandidates = async (repoCacheRoot) => {
+  const buildsRoot = resolveBuildsRoot(repoCacheRoot);
+  let entries;
+  try {
+    entries = await fsPromises.readdir(buildsRoot, { withFileTypes: true });
+  } catch {
+    return [];
+  }
+  const candidates = [];
+  for (const entry of entries) {
+    if (!entry.isDirectory()) continue;
+    const buildRoot = path.join(buildsRoot, entry.name);
+    const statePath = path.join(buildRoot, BUILD_STATE_FILE);
+    try {
+      const stat = await fsPromises.stat(statePath);
+      candidates.push({ buildRoot, statePath, mtimeMs: stat.mtimeMs });
+    } catch {}
+  }
+  return candidates.sort((a, b) => b.mtimeMs - a.mtimeMs);
+};
+
+const pickBuildState = async (repoCacheRoot, stage, sinceMs) => {
+  const candidates = await listBuildStateCandidates(repoCacheRoot);
+  for (const candidate of candidates) {
+    if (Number.isFinite(sinceMs) && candidate.mtimeMs < sinceMs) continue;
+    const loaded = await readBuildState(candidate.buildRoot);
+    if (!loaded) continue;
+    const state = loaded.state;
+    if (stage && state?.stage && state.stage !== stage) continue;
+    if (stage && state?.phases?.[stage]?.status === 'failed') continue;
+    return { buildRoot: candidate.buildRoot, state: loaded.state, path: loaded.path };
+  }
+  return null;
+};
+
+const formatDuration = (ms) => {
+  const total = Math.max(0, Math.floor(ms / 1000));
+  const hours = Math.floor(total / 3600);
+  const minutes = Math.floor((total % 3600) / 60);
+  const seconds = total % 60;
+  if (hours > 0) return `${hours}h ${minutes}m ${seconds}s`;
+  if (minutes > 0) return `${minutes}m ${seconds}s`;
+  return `${seconds}s`;
+};
+
+const formatProgressLine = ({ jobId, stage, state }) => {
+  if (!state) return null;
+  const phases = state?.phases || {};
+  const phase = stage ? phases?.[stage] : null;
+  const phaseOrder = ['discovery', 'preprocessing', stage, 'validation', 'promote'].filter(Boolean);
+  const activePhase = phaseOrder.find((name) => phases?.[name]?.status === 'running');
+  const startedAtRaw = phase?.startedAt || state?.createdAt || null;
+  const startedAt = startedAtRaw ? Date.parse(startedAtRaw) : null;
+  const now = Date.now();
+  const elapsedMs = Number.isFinite(startedAt) ? Math.max(0, now - startedAt) : null;
+  const progress = state?.progress || {};
+  let processedTotal = 0;
+  let totalFiles = 0;
+  const modeParts = [];
+  for (const [mode, data] of Object.entries(progress)) {
+    const processed = Number(data?.processedFiles);
+    const total = Number(data?.totalFiles);
+    if (!Number.isFinite(processed) || !Number.isFinite(total) || total <= 0) continue;
+    processedTotal += processed;
+    totalFiles += total;
+    modeParts.push(`${mode} ${processed}/${total}`);
+  }
+  const etaMs = (elapsedMs && processedTotal > 0 && totalFiles > processedTotal)
+    ? ((totalFiles - processedTotal) / (processedTotal / (elapsedMs / 1000))) * 1000
+    : null;
+  const elapsedText = elapsedMs !== null ? formatDuration(elapsedMs) : 'n/a';
+  const etaText = Number.isFinite(etaMs) ? formatDuration(etaMs) : 'n/a';
+  const status = phase?.status || state?.stage || 'running';
+  const progressText = modeParts.length
+    ? modeParts.join(' | ')
+    : 'progress pending';
+  const phaseNote = activePhase && activePhase !== stage ? ` | phase ${activePhase} running` : '';
+  return `[indexer] job ${jobId} ${stage || state?.stage || 'stage'} ${status} | ${progressText}${phaseNote} | elapsed ${elapsedText} | eta ${etaText}`;
+};
+
+const startBuildProgressMonitor = ({ job, repoPath, stage }) => {
+  if (!job || !repoPath) return () => {};
+  const repoCacheRoot = getRepoCacheRoot(repoPath);
+  const startedAt = Date.now();
+  let active = null;
+  let waitingLogged = false;
+  let lastLine = '';
+  const poll = async () => {
+    if (!active) {
+      active = await pickBuildState(repoCacheRoot, stage, startedAt - BUILD_STATE_LOOKBACK_MS);
+    }
+    if (!active) {
+      if (!waitingLogged) {
+        console.log(`[indexer] job ${job.id} ${stage || 'stage'} running; waiting for build state...`);
+        waitingLogged = true;
+      }
+      return;
+    }
+    const loaded = await readBuildState(active.buildRoot);
+    if (loaded?.state) active.state = loaded.state;
+    const line = formatProgressLine({ jobId: job.id, stage, state: active.state });
+    if (line && line !== lastLine) {
+      console.log(line);
+      lastLine = line;
+    }
+  };
+  const timer = setInterval(() => {
+    void poll();
+  }, BUILD_STATE_POLL_MS);
+  void poll();
+  return () => clearInterval(timer);
+};
+
+const spawnWithLog = (args, extraEnv = {}, logPath = null) => new Promise((resolve) => {
+  const useLog = typeof logPath === 'string' && logPath.trim();
+  const stdio = useLog ? ['ignore', 'pipe', 'pipe'] : 'inherit';
+  const child = spawn(process.execPath, args, { stdio, env: { ...process.env, ...extraEnv } });
+  let stream = null;
+  if (useLog) {
+    fs.mkdirSync(path.dirname(logPath), { recursive: true });
+    stream = fs.createWriteStream(logPath, { flags: 'a' });
+    stream.write(`[${new Date().toISOString()}] job start\n`);
+    child.stdout.pipe(stream);
+    child.stderr.pipe(stream);
+  }
+  child.on('close', (code) => {
+    if (stream) {
+      stream.write(`[${new Date().toISOString()}] job exit ${code ?? 1}\n`);
+      stream.end();
+    }
+    resolve(code ?? 1);
+  });
+});
+
+const runBuildIndex = (repoPath, mode, stage, extraArgs = null, logPath = null) => {
+  const buildPath = path.join(toolRoot, 'build_index.js');
+  const args = [buildPath];
+  if (Array.isArray(extraArgs) && extraArgs.length) {
+    args.push(...extraArgs);
+  } else {
+    args.push('--repo', repoPath);
+    if (mode && mode !== 'both') args.push('--mode', mode);
+    if (stage) args.push('--stage', stage);
+  }
+  return spawnWithLog(args, {}, logPath);
+};
+
+const runBuildEmbeddings = (repoPath, mode, extraEnv = {}, logPath = null) => {
+  const buildPath = path.join(toolRoot, 'tools', 'build-embeddings.js');
+  const args = [buildPath, '--repo', repoPath];
+  if (mode && mode !== 'both') args.push('--mode', mode);
+  return spawnWithLog(args, extraEnv, logPath);
+};
+
+const handleSync = async () => {
+  const targets = argv.repo ? [resolveRepoEntry(argv.repo)].filter(Boolean) : repoEntries;
+  if (!targets.length) {
+    console.error('No repos configured for sync.');
+    process.exit(1);
+  }
+  const policy = config.sync?.policy || 'pull';
+  const results = [];
+  for (const entry of targets) {
+    const result = await ensureRepo(entry, baseDir, policy);
+    results.push({ id: entry.id || entry.path, ...result });
+  }
+  console.log(JSON.stringify({ ok: true, results }, null, 2));
+};
+
+const handleEnqueue = async () => {
+  const target = resolveRepoEntry(argv.repo || resolveRepoRoot(process.cwd()));
+  if (!target) {
+    console.error('Repo not found for enqueue.');
+    process.exit(1);
+  }
+  await ensureQueueDir(queueDir);
+  const queueConfig = queueName === 'embeddings'
+    ? (config.embeddings?.queue || {})
+    : (config.queue || {});
+  const id = formatJobId();
+  const mode = argv.mode || 'both';
+  const result = await enqueueJob(queueDir, {
+    id,
+    createdAt: new Date().toISOString(),
+    repo: resolveRepoPath(target, baseDir) || target.path,
+    mode,
+    reason: argv.reason || null,
+    stage: argv.stage || null,
+    maxRetries: queueConfig.maxRetries ?? null
+  }, queueConfig.maxQueued ?? null, queueName);
+  if (!result.ok) {
+    console.error(result.message || 'Failed to enqueue job.');
+    process.exit(1);
+  }
+  console.log(JSON.stringify({ ok: true, job: result.job }, null, 2));
+};
+
+const handleStatus = async () => {
+  const summary = await queueSummary(queueDir, resolvedQueueName);
+  console.log(JSON.stringify({ ok: true, queue: summary, name: resolvedQueueName }, null, 2));
+};
+
+const processQueueOnce = async (metrics) => {
+  const queueConfig = queueName === 'embeddings'
+    ? (config.embeddings?.queue || {})
+    : (config.queue || {});
+  await requeueStaleJobs(queueDir, resolvedQueueName, {
+    maxRetries: Number.isFinite(queueConfig.maxRetries) ? queueConfig.maxRetries : 2
+  });
+  const job = await claimNextJob(queueDir, resolvedQueueName);
+  if (!job) return false;
+  metrics.processed += 1;
+  const embedWorkerConfig = config.embeddings?.worker || {};
+  const memoryMb = Number.isFinite(Number(embedWorkerConfig.maxMemoryMb))
+    ? Math.max(128, Math.floor(Number(embedWorkerConfig.maxMemoryMb)))
+    : null;
+  const extraEnv = memoryMb
+    ? { NODE_OPTIONS: `${process.env.NODE_OPTIONS || ''} --max-old-space-size=${memoryMb}`.trim() }
+    : {};
+  const heartbeat = setInterval(() => {
+    void touchJobHeartbeat(queueDir, job.id, resolvedQueueName);
+  }, 30000);
+  const logPath = job.logPath || path.join(queueDir, 'logs', `${job.id}.log`);
+  const stopProgress = queueName === 'index'
+    ? startBuildProgressMonitor({ job, repoPath: job.repo, stage: job.stage })
+    : () => {};
+  const exitCode = queueName === 'embeddings'
+    ? await runBuildEmbeddings(job.repo, job.mode, extraEnv, logPath)
+    : await runBuildIndex(job.repo, job.mode, job.stage, job.args, logPath);
+  stopProgress();
+  clearInterval(heartbeat);
+  const status = exitCode === 0 ? 'done' : 'failed';
+  const attempts = Number.isFinite(job.attempts) ? job.attempts : 0;
+  const maxRetries = Number.isFinite(job.maxRetries)
+    ? job.maxRetries
+    : (Number.isFinite(queueConfig.maxRetries) ? queueConfig.maxRetries : 0);
+  if (status === 'failed' && maxRetries > attempts) {
+    const nextAttempts = attempts + 1;
+    metrics.retried += 1;
+    await completeJob(
+      queueDir,
+      job.id,
+      'queued',
+      { exitCode, retry: true, attempts: nextAttempts, error: `exit ${exitCode}` },
+      resolvedQueueName
+    );
+    return true;
+  }
+  if (status === 'done') {
+    metrics.succeeded += 1;
+  } else {
+    metrics.failed += 1;
+  }
+  await completeJob(queueDir, job.id, status, { exitCode, error: `exit ${exitCode}` }, resolvedQueueName);
+  return true;
+};
+
+const handleWork = async () => {
+  await ensureQueueDir(queueDir);
+  const workerConfig = queueName === 'embeddings'
+    ? (config.embeddings?.worker || {})
+    : (config.worker || {});
+  const concurrency = Number.isFinite(Number(argv.concurrency))
+    ? Math.max(1, Number(argv.concurrency))
+    : (workerConfig.concurrency || 1);
+  const intervalMs = Number.isFinite(Number(argv.interval))
+    ? Math.max(100, Number(argv.interval))
+    : (config.sync?.intervalMs || 5000);
+  const runBatch = async () => {
+    const metrics = { processed: 0, succeeded: 0, failed: 0, retried: 0 };
+    const workers = Array.from({ length: concurrency }, async () => {
+      let worked = true;
+      while (worked) {
+        worked = await processQueueOnce(metrics);
+      }
+    });
+    await Promise.all(workers);
+    if (metrics.processed) {
+      console.log(JSON.stringify({
+        ok: true,
+        queue: resolvedQueueName,
+        metrics,
+        at: new Date().toISOString()
+      }, null, 2));
+    }
+  };
+  await runBatch();
+  if (argv.watch) {
+    while (true) {
+      await new Promise((resolve) => setTimeout(resolve, intervalMs));
+      await runBatch();
+    }
+  }
+};
+
+const handleServe = async () => {
+  const apiPath = path.join(toolRoot, 'tools', 'api-server.js');
+  const repoArg = argv.repo ? path.resolve(argv.repo) : resolveRepoRoot(process.cwd());
+  const child = spawn(process.execPath, [apiPath, '--repo', repoArg], { stdio: 'inherit' });
+  child.on('exit', (code) => process.exit(code ?? 0));
+};
+
+if (command === 'sync') {
+  await handleSync();
+} else if (command === 'enqueue') {
+  await handleEnqueue();
+} else if (command === 'work') {
+  await handleWork();
+} else if (command === 'status') {
+  await handleStatus();
+} else if (command === 'serve') {
+  await handleServe();
+} else {
+  console.error('Usage: indexer-service <sync|enqueue|work|status|serve> [--queue index|embeddings] [--stage stage1|stage2|stage3|stage4]');
+  process.exit(1);
+}
diff --git a/tools/lsif-ingest.js b/tools/lsif-ingest.js
new file mode 100644
index 000000000..eed864c39
--- /dev/null
+++ b/tools/lsif-ingest.js
@@ -0,0 +1,188 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import readline from 'node:readline';
+import { createCli } from '../src/shared/cli.js';
+import { getRepoCacheRoot, loadUserConfig, resolveRepoRoot } from './dict-utils.js';
+
+const argv = createCli({
+  scriptName: 'lsif-ingest',
+  options: {
+    repo: { type: 'string' },
+    input: { type: 'string' },
+    out: { type: 'string' },
+    json: { type: 'boolean', default: false }
+  }
+}).parse();
+
+const repoRoot = argv.repo ? path.resolve(argv.repo) : resolveRepoRoot(process.cwd());
+const userConfig = loadUserConfig(repoRoot);
+const cacheRoot = getRepoCacheRoot(repoRoot, userConfig);
+const inputPath = argv.input ? String(argv.input) : null;
+const outputPath = argv.out
+  ? path.resolve(argv.out)
+  : path.join(cacheRoot, 'lsif', 'lsif.jsonl');
+const metaPath = `${outputPath}.meta.json`;
+
+const toPosix = (value) => value.replace(/\\/g, '/');
+const normalizePath = (value) => {
+  if (!value) return null;
+  let raw = String(value);
+  const posixRaw = raw.replace(/\\/g, '/');
+  if (posixRaw === '/repo') return '';
+  if (posixRaw.startsWith('/repo/')) {
+    return posixRaw.slice('/repo/'.length);
+  }
+  if (posixRaw.startsWith('/') && /^[A-Za-z]:\//.test(posixRaw.slice(1))) {
+    raw = posixRaw.slice(1);
+  }
+  const resolved = path.isAbsolute(raw) ? raw : path.resolve(repoRoot, raw);
+  const rel = path.relative(repoRoot, resolved);
+  return toPosix(rel || raw);
+};
+
+const stats = {
+  vertices: 0,
+  edges: 0,
+  definitions: 0,
+  references: 0,
+  errors: 0,
+  kinds: {},
+  languages: {}
+};
+
+const bump = (bucket, key) => {
+  if (!key) return;
+  const k = String(key);
+  bucket[k] = (bucket[k] || 0) + 1;
+};
+
+const ensureOutputDir = async () => {
+  await fsPromises.mkdir(path.dirname(outputPath), { recursive: true });
+};
+
+const writeStream = fs.createWriteStream(outputPath, { encoding: 'utf8' });
+
+const vertexById = new Map();
+const docById = new Map();
+const rangeById = new Map();
+const rangeToDoc = new Map();
+
+const normalizeRange = (range) => {
+  if (!range || typeof range !== 'object') return null;
+  const start = range.start || {};
+  const end = range.end || {};
+  const startLine = Number.isFinite(Number(start.line)) ? Number(start.line) + 1 : null;
+  const endLine = Number.isFinite(Number(end.line)) ? Number(end.line) + 1 : startLine;
+  return {
+    startLine,
+    endLine,
+    startChar: Number.isFinite(Number(start.character)) ? Number(start.character) : null,
+    endChar: Number.isFinite(Number(end.character)) ? Number(end.character) : null
+  };
+};
+
+const recordEntry = (payload) => {
+  writeStream.write(`${JSON.stringify(payload)}\n`);
+};
+
+const handleVertex = (vertex) => {
+  vertexById.set(vertex.id, vertex);
+  const label = vertex.label || vertex.type || null;
+  bump(stats.kinds, label || 'unknown');
+  if (label === 'document' && vertex.uri) {
+    docById.set(vertex.id, vertex);
+  }
+  if (label === 'range') {
+    rangeById.set(vertex.id, vertex);
+  }
+  stats.vertices += 1;
+};
+
+const handleEdge = (edge) => {
+  stats.edges += 1;
+  const label = edge.label || edge.type || null;
+  if (label === 'contains' && edge.outV != null && Array.isArray(edge.inVs)) {
+    const outVertex = vertexById.get(edge.outV);
+    if (outVertex && (outVertex.label === 'document' || outVertex.type === 'document')) {
+      for (const id of edge.inVs) {
+        rangeToDoc.set(id, outVertex);
+      }
+    }
+  }
+  if (label === 'item' && edge.outV != null && Array.isArray(edge.inVs)) {
+    const doc = rangeToDoc.get(edge.outV) || null;
+    const docUri = doc?.uri || null;
+    const file = docUri ? normalizePath(new URL(docUri).pathname) : null;
+    if (!file) return;
+    const range = rangeById.get(edge.outV);
+    const normalized = normalizeRange(range);
+    for (const inV of edge.inVs) {
+      const inVertex = vertexById.get(inV);
+      const inLabel = inVertex?.label || inVertex?.type || null;
+      const role = inLabel === 'definitionResult' ? 'definition'
+        : inLabel === 'referenceResult' ? 'reference'
+          : 'other';
+      if (role === 'definition') stats.definitions += 1;
+      if (role === 'reference') stats.references += 1;
+      bump(stats.languages, doc?.languageId || 'unknown');
+      recordEntry({
+        file,
+        ext: path.extname(file).toLowerCase(),
+        name: range?.tag || range?.text || null,
+        kind: range?.kind || null,
+        startLine: normalized?.startLine ?? null,
+        endLine: normalized?.endLine ?? null,
+        startChar: normalized?.startChar ?? null,
+        endChar: normalized?.endChar ?? null,
+        role,
+        language: doc?.languageId || null
+      });
+    }
+  }
+};
+
+const ingestJsonLines = async (stream) => {
+  const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
+  for await (const line of rl) {
+    const trimmed = line.trim();
+    if (!trimmed) continue;
+    let parsed = null;
+    try {
+      parsed = JSON.parse(trimmed);
+    } catch {
+      stats.errors += 1;
+      continue;
+    }
+    if (parsed && parsed.type === 'vertex') handleVertex(parsed);
+    else if (parsed && parsed.type === 'edge') handleEdge(parsed);
+  }
+};
+
+await ensureOutputDir();
+if (inputPath && inputPath !== '-') {
+  const inputStream = fs.createReadStream(inputPath, { encoding: 'utf8' });
+  await ingestJsonLines(inputStream);
+} else {
+  await ingestJsonLines(process.stdin);
+}
+
+writeStream.end();
+
+const summary = {
+  generatedAt: new Date().toISOString(),
+  repoRoot: path.resolve(repoRoot),
+  input: inputPath || 'stdin',
+  output: path.resolve(outputPath),
+  stats
+};
+await fsPromises.writeFile(metaPath, JSON.stringify(summary, null, 2));
+
+if (argv.json) {
+  console.log(JSON.stringify(summary, null, 2));
+} else {
+  console.log(`LSIF ingest: ${stats.vertices} vertices, ${stats.edges} edges`);
+  console.log(`- output: ${outputPath}`);
+  console.log(`- meta: ${metaPath}`);
+}
diff --git a/tools/map-iso-serve.js b/tools/map-iso-serve.js
new file mode 100644
index 000000000..5e5950e36
--- /dev/null
+++ b/tools/map-iso-serve.js
@@ -0,0 +1,178 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import path from 'node:path';
+import https from 'node:https';
+import { spawnSync, spawn } from 'node:child_process';
+import { fileURLToPath } from 'node:url';
+import { createCli } from '../src/shared/cli.js';
+import selfsigned from 'selfsigned';
+
+const argv = createCli({
+  scriptName: 'map-iso',
+  options: {
+    repo: { type: 'string', describe: 'Repo root.' },
+    dir: { type: 'string', describe: 'Alias for --repo.' },
+    out: { type: 'string', describe: 'Output HTML path.' },
+    port: { type: 'number', default: 0, describe: 'HTTPS port (0 for random).' },
+    'open-uri-template': { type: 'string', describe: 'URI template for double-click.' },
+    'three-url': { type: 'string', describe: 'Override three.js module URL.' },
+    'cert-dir': { type: 'string', describe: 'Directory for TLS key/cert.' },
+    open: { type: 'boolean', default: true, describe: 'Open browser.' }
+  }
+}).parse();
+
+const toolRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
+const repoRoot = argv.repo ? path.resolve(argv.repo)
+  : (argv.dir ? path.resolve(argv.dir) : process.cwd());
+const mapsDir = path.join(repoRoot, '.pairofcleats', 'maps');
+const outPath = argv.out ? path.resolve(argv.out) : path.join(mapsDir, 'map.iso.html');
+const threeUrl = argv['three-url'] || '/three/three.module.js';
+const certDir = argv['cert-dir'] ? path.resolve(argv['cert-dir']) : path.join(mapsDir, '.certs');
+const port = Number.isFinite(argv.port) ? argv.port : 0;
+
+const ensureDir = (targetPath) => {
+  fs.mkdirSync(targetPath, { recursive: true });
+};
+
+const ensureCert = (targetDir) => {
+  ensureDir(targetDir);
+  const keyPath = path.join(targetDir, 'localhost.key');
+  const certPath = path.join(targetDir, 'localhost.crt');
+  if (fs.existsSync(keyPath) && fs.existsSync(certPath)) {
+    return { key: fs.readFileSync(keyPath), cert: fs.readFileSync(certPath) };
+  }
+  const attrs = [{ name: 'commonName', value: 'localhost' }];
+  const pems = selfsigned.generate(attrs, { days: 30, keySize: 2048 });
+  fs.writeFileSync(keyPath, pems.private);
+  fs.writeFileSync(certPath, pems.cert);
+  return { key: pems.private, cert: pems.cert };
+};
+
+const runReport = () => {
+  ensureDir(path.dirname(outPath));
+  const args = [
+    path.join(toolRoot, 'tools', 'report-code-map.js'),
+    '--repo', repoRoot,
+    '--format', 'html-iso',
+    '--out', outPath,
+    '--three-url', threeUrl
+  ];
+  if (argv['open-uri-template']) {
+    args.push('--open-uri-template', argv['open-uri-template']);
+  }
+  const result = spawnSync(process.execPath, args, { cwd: toolRoot, stdio: 'inherit' });
+  if (result.status !== 0) {
+    process.exit(result.status ?? 1);
+  }
+};
+
+const contentTypeFor = (filePath) => {
+  const ext = path.extname(filePath).toLowerCase();
+  if (ext === '.html') return 'text/html; charset=utf-8';
+  if (ext === '.js') return 'application/javascript; charset=utf-8';
+  if (ext === '.json') return 'application/json; charset=utf-8';
+  if (ext === '.map') return 'application/json; charset=utf-8';
+  if (ext === '.jpg' || ext === '.jpeg') return 'image/jpeg';
+  if (ext === '.png') return 'image/png';
+  if (ext === '.hdr') return 'application/octet-stream';
+  return 'application/octet-stream';
+};
+
+const safeJoin = (baseDir, requestPath) => {
+  const safePath = path.normalize(path.join(baseDir, requestPath));
+  if (!safePath.startsWith(baseDir)) return null;
+  return safePath;
+};
+
+const openBrowser = (url) => {
+  if (argv.open === false) return;
+  if (process.platform === 'win32') {
+    spawn('cmd', ['/c', 'start', '', url], { detached: true, stdio: 'ignore' });
+    return;
+  }
+  const opener = process.platform === 'darwin' ? 'open' : 'xdg-open';
+  spawn(opener, [url], { detached: true, stdio: 'ignore' });
+};
+
+runReport();
+
+const { key, cert } = ensureCert(certDir);
+const threeRoot = path.join(toolRoot, 'node_modules', 'three');
+const threeBuildRoot = path.join(threeRoot, 'build');
+const threeExamplesRoot = path.join(threeRoot, 'examples');
+const isomapAssetsRoot = path.join(toolRoot, 'assets', 'isomap');
+const isomapClientRoot = path.join(toolRoot, 'src', 'map', 'isometric', 'client');
+
+const server = https.createServer({ key, cert }, (req, res) => {
+  const url = new URL(req.url || '/', 'https://localhost');
+  const pathname = decodeURIComponent(url.pathname || '/');
+  if (pathname === '/' || pathname === '/map.iso.html') {
+    const htmlPath = outPath;
+    if (!fs.existsSync(htmlPath)) {
+      res.writeHead(404);
+      res.end('map.iso.html not found.');
+      return;
+    }
+    res.writeHead(200, { 'Content-Type': contentTypeFor(htmlPath) });
+    fs.createReadStream(htmlPath).pipe(res);
+    return;
+  }
+  if (pathname.startsWith('/three/examples/')) {
+    const relativePath = pathname.replace('/three/examples/', '');
+    const targetPath = safeJoin(threeExamplesRoot, relativePath);
+    if (!targetPath || !fs.existsSync(targetPath)) {
+      res.writeHead(404);
+      res.end('three.js example asset not found.');
+      return;
+    }
+    res.writeHead(200, { 'Content-Type': contentTypeFor(targetPath) });
+    fs.createReadStream(targetPath).pipe(res);
+    return;
+  }
+  if (pathname.startsWith('/three/')) {
+    const relativePath = pathname.replace('/three/', '');
+    const targetPath = safeJoin(threeBuildRoot, relativePath);
+    if (!targetPath || !fs.existsSync(targetPath)) {
+      res.writeHead(404);
+      res.end('three.js asset not found.');
+      return;
+    }
+    res.writeHead(200, { 'Content-Type': contentTypeFor(targetPath) });
+    fs.createReadStream(targetPath).pipe(res);
+    return;
+  }
+  if (pathname.startsWith('/assets/isomap/')) {
+    const relativePath = pathname.replace('/assets/isomap/', '');
+    const targetPath = safeJoin(isomapAssetsRoot, relativePath);
+    if (!targetPath || !fs.existsSync(targetPath)) {
+      res.writeHead(404);
+      res.end('isomap asset not found.');
+      return;
+    }
+    res.writeHead(200, { 'Content-Type': contentTypeFor(targetPath) });
+    fs.createReadStream(targetPath).pipe(res);
+    return;
+  }
+  if (pathname.startsWith('/isomap/')) {
+    const relativePath = pathname.replace('/isomap/', '');
+    const targetPath = safeJoin(isomapClientRoot, relativePath);
+    if (!targetPath || !fs.existsSync(targetPath)) {
+      res.writeHead(404);
+      res.end('isomap client asset not found.');
+      return;
+    }
+    res.writeHead(200, { 'Content-Type': contentTypeFor(targetPath) });
+    fs.createReadStream(targetPath).pipe(res);
+    return;
+  }
+  res.writeHead(404);
+  res.end('Not found.');
+});
+
+server.listen(port, '127.0.0.1', () => {
+  const address = server.address();
+  const actualPort = typeof address === 'object' && address ? address.port : port;
+  const url = `https://localhost:${actualPort}/map.iso.html`;
+  console.log(`Serving map: ${url}`);
+  openBrowser(url);
+});
diff --git a/tools/mcp-server.js b/tools/mcp-server.js
index bfcacb7a5..88f52c364 100644
--- a/tools/mcp-server.js
+++ b/tools/mcp-server.js
@@ -1,1284 +1,58 @@
 #!/usr/bin/env node
 import fs from 'node:fs';
 import path from 'node:path';
-import { fileURLToPath } from 'node:url';
-import { spawn, spawnSync } from 'node:child_process';
-import simpleGit from 'simple-git';
-import { getToolDefs } from '../src/mcp/defs.js';
-import { sendError, sendNotification, sendResult } from '../src/mcp/protocol.js';
-import {
-  DEFAULT_MODEL_ID,
-  getCacheRoot,
-  getDictConfig,
-  getDictionaryPaths,
-  getIndexDir,
-  getMetricsDir,
-  getModelConfig,
-  getRepoCacheRoot,
-  getRepoId,
-  loadUserConfig,
-  resolveRepoRoot,
-  resolveSqlitePaths
-} from './dict-utils.js';
-import { getVectorExtensionConfig, resolveVectorExtensionPath } from './vector-extension.js';
+import { getToolDefs } from '../src/integrations/mcp/defs.js';
+import { DEFAULT_MODEL_ID, getRuntimeConfig, loadUserConfig, resolveRepoRoot, resolveToolRoot } from './dict-utils.js';
+import { parseTimeoutMs, resolveToolTimeoutMs } from './mcp/repo.js';
+import { handleToolCall } from './mcp/tools.js';
+import { createMcpTransport } from './mcp/transport.js';
+import { configureServiceLogger } from './service/logger.js';
 
-const __dirname = path.dirname(fileURLToPath(import.meta.url));
-const ROOT = path.resolve(__dirname, '..');
-const PKG = JSON.parse(fs.readFileSync(path.join(ROOT, 'package.json'), 'utf8'));
+const toolRoot = resolveToolRoot();
+const PKG = JSON.parse(fs.readFileSync(path.join(toolRoot, 'package.json'), 'utf8'));
 
 const TOOL_DEFS = getToolDefs(DEFAULT_MODEL_ID);
 
-
-/**
- * Resolve and validate a repo path.
- * @param {string} inputPath
- * @returns {string}
- */
-function resolveRepoPath(inputPath) {
-  const base = inputPath ? path.resolve(inputPath) : process.cwd();
-  if (!fs.existsSync(base) || !fs.statSync(base).isDirectory()) {
-    throw new Error(`Repo path not found: ${base}`);
-  }
-  return inputPath ? base : resolveRepoRoot(base);
-}
-
-/**
- * Build the artifact path map for a repo.
- * @param {string} repoPath
- * @param {object} userConfig
- * @returns {object}
- */
-function listArtifacts(repoPath, userConfig) {
-  const indexCode = getIndexDir(repoPath, 'code', userConfig);
-  const indexProse = getIndexDir(repoPath, 'prose', userConfig);
-  const indexRecords = getIndexDir(repoPath, 'records', userConfig);
-  const metricsDir = getMetricsDir(repoPath, userConfig);
-  const sqlitePaths = resolveSqlitePaths(repoPath, userConfig);
-  return {
-    index: {
-      code: {
-        dir: indexCode,
-        chunkMeta: path.join(indexCode, 'chunk_meta.json'),
-        tokenPostings: path.join(indexCode, 'token_postings.json')
-      },
-      prose: {
-        dir: indexProse,
-        chunkMeta: path.join(indexProse, 'chunk_meta.json'),
-        tokenPostings: path.join(indexProse, 'token_postings.json')
-      },
-      records: {
-        dir: indexRecords,
-        chunkMeta: path.join(indexRecords, 'chunk_meta.json'),
-        tokenPostings: path.join(indexRecords, 'token_postings.json')
-      }
-    },
-    metrics: {
-      dir: metricsDir,
-      indexCode: path.join(metricsDir, 'index-code.json'),
-      indexProse: path.join(metricsDir, 'index-prose.json'),
-      indexRecords: path.join(metricsDir, 'index-records.json'),
-      queryCache: path.join(metricsDir, 'queryCache.json')
-    },
-    sqlite: {
-      code: sqlitePaths.codePath,
-      prose: sqlitePaths.prosePath,
-      legacy: sqlitePaths.legacyPath,
-      legacyExists: sqlitePaths.legacyExists
-    }
-  };
-}
-
-/**
- * Stat a path if it exists.
- * @param {string} target
- * @returns {{exists:boolean,mtime:(string|null),bytes:number}}
- */
-function statIfExists(target) {
-  try {
-    const stat = fs.statSync(target);
-    return {
-      exists: true,
-      mtime: stat.mtime ? stat.mtime.toISOString() : null,
-      bytes: stat.size
-    };
-  } catch {
-    return { exists: false, mtime: null, bytes: 0 };
-  }
-}
-
-/**
- * Fetch lightweight git status info for a repo.
- * @param {string} repoPath
- * @returns {Promise<object>}
- */
-async function getGitInfo(repoPath) {
-  const gitDir = path.join(repoPath, '.git');
-  const hasGitDir = fs.existsSync(gitDir);
-  if (!hasGitDir) {
-    return {
-      isRepo: false,
-      warning: 'Git repository not detected; using path-based repo identity.'
-    };
-  }
-  try {
-    const git = simpleGit(repoPath);
-    const status = await git.status();
-    const head = await git.revparse(['HEAD']);
-    return {
-      isRepo: true,
-      head: head.trim(),
-      branch: status.current || null,
-      isDirty: status.files.length > 0
-    };
-  } catch (error) {
-    return {
-      isRepo: true,
-      warning: `Git detected but status unavailable: ${error.message}`
-    };
-  }
-}
-
-/**
- * Build an index status report for the MCP tool.
- * @param {object} [args]
- * @returns {Promise<object>}
- */
-async function indexStatus(args = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const userConfig = loadUserConfig(repoPath);
-  const cacheRoot = (userConfig.cache && userConfig.cache.root) || process.env.PAIROFCLEATS_CACHE_ROOT || getCacheRoot();
-  const repoId = getRepoId(repoPath);
-  const repoCacheRoot = getRepoCacheRoot(repoPath, userConfig);
-  const dictConfig = getDictConfig(repoPath, userConfig);
-  const dictPaths = await getDictionaryPaths(repoPath, dictConfig);
-  const modelConfig = getModelConfig(repoPath, userConfig);
-  const modelsDir = modelConfig.dir;
-  const modelDirName = `models--${modelConfig.id.replace('/', '--')}`;
-  const modelPath = path.join(modelsDir, modelDirName);
-
-  const artifacts = listArtifacts(repoPath, userConfig);
-  const git = await getGitInfo(repoPath);
-  const incrementalRoot = path.join(repoCacheRoot, 'incremental');
-  const report = {
-    repoPath,
-    repoId,
-    cacheRoot,
-    repoCacheRoot,
-    git,
-    dictionaries: {
-      dir: dictConfig.dir,
-      files: dictPaths,
-      enabled: dictPaths.length > 0,
-      includeSlang: dictConfig.includeSlang
-    },
-    models: {
-      dir: modelsDir,
-      model: modelConfig.id,
-      available: fs.existsSync(modelPath),
-      hint: fs.existsSync(modelPath)
-        ? null
-        : 'Run the download_models tool or `npm run download-models` to prefetch embeddings.'
-    },
-    incremental: {
-      dir: incrementalRoot,
-      exists: fs.existsSync(incrementalRoot)
-    },
-    index: {
-      code: {
-        dir: artifacts.index.code.dir,
-        chunkMeta: statIfExists(artifacts.index.code.chunkMeta),
-        tokenPostings: statIfExists(artifacts.index.code.tokenPostings)
-      },
-      prose: {
-        dir: artifacts.index.prose.dir,
-        chunkMeta: statIfExists(artifacts.index.prose.chunkMeta),
-        tokenPostings: statIfExists(artifacts.index.prose.tokenPostings)
-      },
-      records: {
-        dir: artifacts.index.records.dir,
-        chunkMeta: statIfExists(artifacts.index.records.chunkMeta),
-        tokenPostings: statIfExists(artifacts.index.records.tokenPostings)
-      }
-    },
-    sqlite: {
-      code: { path: artifacts.sqlite.code, ...statIfExists(artifacts.sqlite.code) },
-      prose: { path: artifacts.sqlite.prose, ...statIfExists(artifacts.sqlite.prose) },
-      legacy: artifacts.sqlite.legacyExists ? artifacts.sqlite.legacy : null
-    },
-    metrics: {
-      dir: artifacts.metrics.dir,
-      indexCode: statIfExists(artifacts.metrics.indexCode),
-      indexProse: statIfExists(artifacts.metrics.indexProse),
-      indexRecords: statIfExists(artifacts.metrics.indexRecords),
-      queryCache: statIfExists(artifacts.metrics.queryCache)
-    }
-  };
-
-  return report;
-}
-
-/**
- * Inspect configuration + cache status with warnings.
- * @param {object} [args]
- * @returns {Promise<object>}
- */
-async function configStatus(args = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const userConfig = loadUserConfig(repoPath);
-  const cacheRoot = (userConfig.cache && userConfig.cache.root) || process.env.PAIROFCLEATS_CACHE_ROOT || getCacheRoot();
-  const repoCacheRoot = getRepoCacheRoot(repoPath, userConfig);
-  const dictConfig = getDictConfig(repoPath, userConfig);
-  const dictionaryPaths = await getDictionaryPaths(repoPath, dictConfig);
-  const modelConfig = getModelConfig(repoPath, userConfig);
-  const modelsDir = modelConfig.dir;
-  const modelDirName = `models--${modelConfig.id.replace('/', '--')}`;
-  const modelPath = path.join(modelsDir, modelDirName);
-  const sqlitePaths = resolveSqlitePaths(repoPath, userConfig);
-  const sqliteConfigured = userConfig.sqlite?.use !== false;
-  const vectorConfig = getVectorExtensionConfig(repoPath, userConfig);
-  const vectorPath = resolveVectorExtensionPath(vectorConfig);
-
-  const warnings = [];
-  if (!dictionaryPaths.length && (dictConfig.languages.length || dictConfig.files.length || dictConfig.includeSlang || dictConfig.enableRepoDictionary)) {
-    warnings.push({
-      code: 'dictionary_missing',
-      message: 'No dictionary files found; identifier splitting will be limited.'
-    });
-  }
-  if (!fs.existsSync(modelPath)) {
-    warnings.push({
-      code: 'model_missing',
-      message: `Embedding model not found (${modelConfig.id}). Run npm run download-models.`
-    });
-  }
-  if (sqliteConfigured) {
-    const missing = [];
-    if (!fs.existsSync(sqlitePaths.codePath)) missing.push(`code=${sqlitePaths.codePath}`);
-    if (!fs.existsSync(sqlitePaths.prosePath)) missing.push(`prose=${sqlitePaths.prosePath}`);
-    if (missing.length) {
-      warnings.push({
-        code: 'sqlite_missing',
-        message: `SQLite indexes missing (${missing.join(', ')}). Run npm run build-sqlite-index.`
-      });
-    }
-  }
-  if (vectorConfig.enabled) {
-    if (!vectorPath || !fs.existsSync(vectorPath)) {
-      warnings.push({
-        code: 'extension_missing',
-        message: 'SQLite vector extension is enabled but not installed.'
-      });
-    }
-  }
-
-  return {
-    repoPath,
-    repoId: getRepoId(repoPath),
-    config: {
-      cacheRoot,
-      repoCacheRoot,
-      dictionary: dictConfig,
-      models: modelConfig,
-      sqlite: {
-        use: sqliteConfigured,
-        annMode: userConfig.sqlite?.annMode || null,
-        codeDbPath: sqlitePaths.codePath,
-        proseDbPath: sqlitePaths.prosePath
-      },
-      search: userConfig.search || {},
-      indexing: userConfig.indexing || {},
-      tooling: userConfig.tooling || {}
-    },
-    cache: {
-      cacheRootExists: fs.existsSync(cacheRoot),
-      repoCacheExists: fs.existsSync(repoCacheRoot),
-      dictionaries: dictionaryPaths,
-      modelAvailable: fs.existsSync(modelPath),
-      sqlite: {
-        codeExists: fs.existsSync(sqlitePaths.codePath),
-        proseExists: fs.existsSync(sqlitePaths.prosePath)
-      },
-      vectorExtension: {
-        enabled: vectorConfig.enabled,
-        path: vectorPath,
-        available: !!(vectorPath && fs.existsSync(vectorPath))
-      }
-    },
-    warnings
-  };
-}
-
-/**
- * Run a node command and return stdout.
- * @param {string} cwd
- * @param {string[]} args
- * @returns {string}
- */
-function runNodeSync(cwd, args) {
-  const result = spawnSync(process.execPath, args, { cwd, encoding: 'utf8' });
-  if (result.status !== 0) {
-    const err = result.stderr || `Command failed: ${args.join(' ')}`;
-    throw new Error(err.trim());
-  }
-  return result.stdout || '';
-}
-
-/**
- * Normalize meta filters into CLI-friendly key/value strings.
- * @param {any} meta
- * @returns {string[]|null}
- */
-function normalizeMetaFilters(meta) {
-  if (!meta) return null;
-  if (Array.isArray(meta)) {
-    const entries = meta.flatMap((entry) => {
-      if (entry == null) return [];
-      if (typeof entry === 'string') return [entry];
-      if (typeof entry === 'object') {
-        return Object.entries(entry).map(([key, value]) =>
-          value == null || value === '' ? String(key) : `${key}=${value}`
-        );
-      }
-      return [String(entry)];
-    });
-    return entries.length ? entries : null;
-  }
-  if (typeof meta === 'object') {
-    const entries = Object.entries(meta).map(([key, value]) =>
-      value == null || value === '' ? String(key) : `${key}=${value}`
-    );
-    return entries.length ? entries : null;
-  }
-  return [String(meta)];
-}
-
-/**
- * Build a line buffer for progress streaming.
- * @param {(line:string)=>void} onLine
- * @returns {{push:(text:string)=>void,flush:()=>void}}
- */
-function createLineBuffer(onLine) {
-  let buffer = '';
-  return {
-    push(text) {
-      buffer += text;
-      const lines = buffer.split(/\r?\n/);
-      buffer = lines.pop() || '';
-      for (const line of lines) {
-        const trimmed = line.trim();
-        if (trimmed) onLine(trimmed);
-      }
-    },
-    flush() {
-      const trimmed = buffer.trim();
-      if (trimmed) onLine(trimmed);
-      buffer = '';
-    }
-  };
-}
-
-/**
- * Run a node command asynchronously with optional stderr streaming.
- * @param {string} cwd
- * @param {string[]} args
- * @param {{streamOutput?:boolean,onLine?:(payload:{stream:string,line:string})=>void}} [options]
- * @returns {Promise<{stdout:string,stderr:string}>}
- */
-function runNodeAsync(cwd, args, options = {}) {
-  return new Promise((resolve, reject) => {
-    const child = spawn(process.execPath, args, { cwd });
-    let stdout = '';
-    let stderr = '';
-    const streamOutput = options.streamOutput === true;
-    const onLine = typeof options.onLine === 'function' ? options.onLine : null;
-    const stdoutBuffer = onLine
-      ? createLineBuffer((line) => onLine({ stream: 'stdout', line }))
-      : null;
-    const stderrBuffer = onLine
-      ? createLineBuffer((line) => onLine({ stream: 'stderr', line }))
-      : null;
-    child.stdout?.on('data', (chunk) => {
-      const text = chunk.toString();
-      stdout += text;
-      if (streamOutput) process.stderr.write(text);
-      stdoutBuffer?.push(text);
-    });
-    child.stderr?.on('data', (chunk) => {
-      const text = chunk.toString();
-      stderr += text;
-      if (streamOutput) process.stderr.write(text);
-      stderrBuffer?.push(text);
-    });
-    child.on('error', (err) => {
-      const error = new Error(err.message || 'Command failed');
-      error.stdout = stdout;
-      error.stderr = stderr;
-      reject(error);
-    });
-    child.on('close', (code) => {
-      stdoutBuffer?.flush();
-      stderrBuffer?.flush();
-      if (code === 0) {
-        resolve({ stdout, stderr });
-        return;
-      }
-      const error = new Error(stderr.trim() || `Command failed: ${args.join(' ')}`);
-      error.code = code;
-      error.stdout = stdout;
-      error.stderr = stderr;
-      reject(error);
-    });
-  });
-}
-
-/**
- * Run a tool script with progress notifications.
- * @param {{repoPath:string,scriptArgs:string[],context?:object,startMessage?:string,doneMessage?:string}} input
- * @returns {Promise<string>}
- */
-async function runToolWithProgress({ repoPath, scriptArgs, context = {}, startMessage, doneMessage }) {
-  const progress = typeof context.progress === 'function' ? context.progress : null;
-  const progressLine = progress
-    ? ({ stream, line }) => progress({ message: line, stream })
-    : null;
-  if (progress && startMessage) {
-    progress({ message: startMessage, phase: 'start' });
-  }
-  const { stdout } = await runNodeAsync(repoPath, scriptArgs, {
-    streamOutput: true,
-    onLine: progressLine
-  });
-  if (progress && doneMessage) {
-    progress({ message: doneMessage, phase: 'done' });
-  }
-  return stdout || '';
-}
-
-function parseCountSummary(stdout) {
-  const match = String(stdout || '').match(/downloaded=(\d+)\s+skipped=(\d+)/i);
-  if (!match) return null;
-  return {
-    downloaded: Number(match[1]),
-    skipped: Number(match[2])
-  };
-}
-
-function parseExtensionPath(stdout) {
-  const match = String(stdout || '').match(/Extension present at (.+)$/im);
-  return match ? match[1].trim() : null;
-}
-
-/**
- * Format error payloads for tool responses.
- * @param {any} error
- * @returns {{message:string,code?:number,stderr?:string,stdout?:string}}
- */
-function getRemediationHint(error) {
-  const parts = [error?.message, error?.stderr, error?.stdout]
-    .filter(Boolean)
-    .join('\n')
-    .toLowerCase();
-  if (!parts) return null;
-
-  if (parts.includes('sqlite backend requested but index not found')
-    || parts.includes('missing required tables')) {
-    return 'Run `npm run build-sqlite-index` or set sqlite.use=false / --backend memory.';
-  }
-  if (parts.includes('better-sqlite3 is required')) {
-    return 'Run `npm install` and ensure better-sqlite3 can load on this platform.';
-  }
-  if (parts.includes('chunk_meta.json') || parts.includes('minhash_signatures')) {
-    return 'Run `npm run build-index` (or `npm run setup`/`npm run bootstrap`) to generate indexes.';
-  }
-  if ((parts.includes('model') || parts.includes('xenova') || parts.includes('transformers'))
-    && (parts.includes('not found') || parts.includes('failed') || parts.includes('fetch') || parts.includes('download') || parts.includes('enoent'))) {
-    return 'Run `npm run download-models` or use `--stub-embeddings` / `PAIROFCLEATS_EMBEDDINGS=stub`.';
-  }
-  if (parts.includes('dictionary')
-    || parts.includes('wordlist')
-    || parts.includes('words_alpha')
-    || parts.includes('download-dicts')) {
-    return 'Run `npm run download-dicts -- --lang en` (or configure dictionary.files/languages).';
-  }
-  return null;
-}
-
-/**
- * Format error payloads for tool responses.
- * @param {any} error
- * @returns {{message:string,code?:number,stderr?:string,stdout?:string,hint?:string}}
- */
-function formatToolError(error) {
-  const payload = {
-    message: error?.message || String(error)
-  };
-  if (error?.code !== undefined) payload.code = error.code;
-  if (error?.stderr) payload.stderr = String(error.stderr).trim();
-  if (error?.stdout) payload.stdout = String(error.stdout).trim();
-  const hint = getRemediationHint(error);
-  if (hint) payload.hint = hint;
-  return payload;
-}
-
-/**
- * Emit a progress notification for long-running tools.
- * @param {string|number|null} id
- * @param {string} tool
- * @param {{message:string,stream?:string,phase?:string}} payload
- */
-function sendProgress(id, tool, payload) {
-  if (id === null || id === undefined) return;
-  const message = payload?.message ? String(payload.message) : '';
-  if (!message) return;
-  sendNotification('notifications/progress', {
-    id,
-    tool,
-    message,
-    stream: payload?.stream || 'info',
-    phase: payload?.phase || 'progress',
-    ts: new Date().toISOString()
-  });
-}
-
-/**
- * Restore CI artifacts if present.
- * @param {string} repoPath
- * @param {string} artifactsDir
- * @returns {boolean}
- */
-function maybeRestoreArtifacts(repoPath, artifactsDir, progress) {
-  const fromDir = artifactsDir ? path.resolve(artifactsDir) : path.join(repoPath, 'ci-artifacts');
-  if (!fs.existsSync(path.join(fromDir, 'manifest.json'))) return false;
-  if (progress) {
-    progress({
-      message: `Restoring CI artifacts from ${fromDir}`,
-      phase: 'start'
-    });
-  }
-  runNodeSync(repoPath, [path.join(ROOT, 'tools', 'ci-restore-artifacts.js'), '--from', fromDir]);
-  if (progress) {
-    progress({
-      message: 'CI artifacts restored.',
-      phase: 'done'
-    });
-  }
-  return true;
-}
-
-/**
- * Handle the MCP build_index tool call.
- * @param {object} [args]
- * @returns {object}
- */
-async function buildIndex(args = {}, context = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const userConfig = loadUserConfig(repoPath);
-  const sqliteConfigured = userConfig.sqlite?.use !== false;
-  const shouldUseSqlite = typeof args.sqlite === 'boolean' ? args.sqlite : sqliteConfigured;
-  const mode = args.mode || 'all';
-  const incremental = args.incremental === true;
-  const stubEmbeddings = args.stubEmbeddings === true;
-  const buildSqlite = shouldUseSqlite && mode !== 'records';
-  const useArtifacts = args.useArtifacts === true;
-  const progress = typeof context.progress === 'function' ? context.progress : null;
-  const progressLine = progress
-    ? ({ stream, line }) => progress({ message: line, stream })
-    : null;
-
-  let restoredArtifacts = false;
-  if (useArtifacts) {
-    restoredArtifacts = maybeRestoreArtifacts(repoPath, args.artifactsDir, progress);
-  }
-
-  if (!restoredArtifacts) {
-    if (progress) {
-      progress({
-        message: `Building ${mode} index${incremental ? ' (incremental)' : ''}.`,
-        phase: 'start'
-      });
-    }
-    const indexArgs = [path.join(ROOT, 'build_index.js')];
-    if (mode && mode !== 'all') indexArgs.push('--mode', mode);
-    if (incremental) indexArgs.push('--incremental');
-    if (stubEmbeddings) indexArgs.push('--stub-embeddings');
-    await runNodeAsync(repoPath, indexArgs, { streamOutput: true, onLine: progressLine });
-  }
-
-  if (buildSqlite) {
-    if (progress) {
-      progress({
-        message: `Building SQLite index${incremental ? ' (incremental)' : ''}.`,
-        phase: 'start'
-      });
-    }
-    const sqliteArgs = [path.join(ROOT, 'tools', 'build-sqlite-index.js')];
-    if (incremental) sqliteArgs.push('--incremental');
-    await runNodeAsync(repoPath, sqliteArgs, { streamOutput: true, onLine: progressLine });
-  }
-  if (progress) {
-    progress({
-      message: 'Index build complete.',
-      phase: 'done'
-    });
-  }
-
-  return {
-    repoPath,
-    mode,
-    sqlite: buildSqlite,
-    incremental,
-    restoredArtifacts
-  };
-}
-
-/**
- * Handle the MCP search tool call.
- * @param {object} [args]
- * @returns {object}
- */
-function runSearch(args = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const query = String(args.query || '').trim();
-  if (!query) throw new Error('Query is required.');
-  const mode = args.mode || 'both';
-  const backend = args.backend || null;
-  const output = typeof args.output === 'string' ? args.output.toLowerCase() : '';
-  const ann = typeof args.ann === 'boolean' ? args.ann : null;
-  const top = Number.isFinite(Number(args.top)) ? Math.max(1, Number(args.top)) : null;
-  const context = Number.isFinite(Number(args.context)) ? Math.max(0, Number(args.context)) : null;
-  const typeFilter = args.type ? String(args.type) : null;
-  const authorFilter = args.author ? String(args.author) : null;
-  const importFilter = args.import ? String(args.import) : null;
-  const callsFilter = args.calls ? String(args.calls) : null;
-  const usesFilter = args.uses ? String(args.uses) : null;
-  const signatureFilter = args.signature ? String(args.signature) : null;
-  const paramFilter = args.param ? String(args.param) : null;
-  const decoratorFilter = args.decorator ? String(args.decorator) : null;
-  const inferredTypeFilter = args.inferredType ? String(args.inferredType) : null;
-  const returnTypeFilter = args.returnType ? String(args.returnType) : null;
-  const throwsFilter = args.throws ? String(args.throws) : null;
-  const readsFilter = args.reads ? String(args.reads) : null;
-  const writesFilter = args.writes ? String(args.writes) : null;
-  const mutatesFilter = args.mutates ? String(args.mutates) : null;
-  const aliasFilter = args.alias ? String(args.alias) : null;
-  const awaitsFilter = args.awaits ? String(args.awaits) : null;
-  const riskFilter = args.risk ? String(args.risk) : null;
-  const riskTagFilter = args.riskTag ? String(args.riskTag) : null;
-  const riskSourceFilter = args.riskSource ? String(args.riskSource) : null;
-  const riskSinkFilter = args.riskSink ? String(args.riskSink) : null;
-  const riskCategoryFilter = args.riskCategory ? String(args.riskCategory) : null;
-  const riskFlowFilter = args.riskFlow ? String(args.riskFlow) : null;
-  const branchesMin = Number.isFinite(Number(args.branchesMin)) ? Number(args.branchesMin) : null;
-  const loopsMin = Number.isFinite(Number(args.loopsMin)) ? Number(args.loopsMin) : null;
-  const breaksMin = Number.isFinite(Number(args.breaksMin)) ? Number(args.breaksMin) : null;
-  const continuesMin = Number.isFinite(Number(args.continuesMin)) ? Number(args.continuesMin) : null;
-  const churnMin = Number.isFinite(Number(args.churnMin)) ? Number(args.churnMin) : null;
-  const chunkAuthorFilter = args.chunkAuthor ? String(args.chunkAuthor) : null;
-  const modifiedAfter = args.modifiedAfter ? String(args.modifiedAfter) : null;
-  const modifiedSince = Number.isFinite(Number(args.modifiedSince)) ? Number(args.modifiedSince) : null;
-  const visibilityFilter = args.visibility ? String(args.visibility) : null;
-  const extendsFilter = args.extends ? String(args.extends) : null;
-  const lintFilter = args.lint === true;
-  const asyncFilter = args.async === true;
-  const generatorFilter = args.generator === true;
-  const returnsFilter = args.returns === true;
-  const fileFilters = [];
-  const toList = (value) => (Array.isArray(value) ? value : (value == null ? [] : [value]));
-  fileFilters.push(...toList(args.path));
-  fileFilters.push(...toList(args.file));
-  const extFilters = toList(args.ext);
-  const metaFilters = normalizeMetaFilters(args.meta);
-  const metaJson = args.metaJson || null;
-
-  const useCompact = output !== 'full' && output !== 'json';
-  const searchArgs = [path.join(ROOT, 'search.js'), query, useCompact ? '--json-compact' : '--json'];
-  if (mode && mode !== 'both') searchArgs.push('--mode', mode);
-  if (backend) searchArgs.push('--backend', backend);
-  if (ann === true) searchArgs.push('--ann');
-  if (ann === false) searchArgs.push('--no-ann');
-  if (top) searchArgs.push('-n', String(top));
-  if (context !== null) searchArgs.push('--context', String(context));
-  if (typeFilter) searchArgs.push('--type', typeFilter);
-  if (authorFilter) searchArgs.push('--author', authorFilter);
-  if (importFilter) searchArgs.push('--import', importFilter);
-  if (callsFilter) searchArgs.push('--calls', callsFilter);
-  if (usesFilter) searchArgs.push('--uses', usesFilter);
-  if (signatureFilter) searchArgs.push('--signature', signatureFilter);
-  if (paramFilter) searchArgs.push('--param', paramFilter);
-  if (decoratorFilter) searchArgs.push('--decorator', decoratorFilter);
-  if (inferredTypeFilter) searchArgs.push('--inferred-type', inferredTypeFilter);
-  if (returnTypeFilter) searchArgs.push('--return-type', returnTypeFilter);
-  if (throwsFilter) searchArgs.push('--throws', throwsFilter);
-  if (readsFilter) searchArgs.push('--reads', readsFilter);
-  if (writesFilter) searchArgs.push('--writes', writesFilter);
-  if (mutatesFilter) searchArgs.push('--mutates', mutatesFilter);
-  if (aliasFilter) searchArgs.push('--alias', aliasFilter);
-  if (awaitsFilter) searchArgs.push('--awaits', awaitsFilter);
-  if (riskFilter) searchArgs.push('--risk', riskFilter);
-  if (riskTagFilter) searchArgs.push('--risk-tag', riskTagFilter);
-  if (riskSourceFilter) searchArgs.push('--risk-source', riskSourceFilter);
-  if (riskSinkFilter) searchArgs.push('--risk-sink', riskSinkFilter);
-  if (riskCategoryFilter) searchArgs.push('--risk-category', riskCategoryFilter);
-  if (riskFlowFilter) searchArgs.push('--risk-flow', riskFlowFilter);
-  if (branchesMin !== null) searchArgs.push('--branches', String(branchesMin));
-  if (loopsMin !== null) searchArgs.push('--loops', String(loopsMin));
-  if (breaksMin !== null) searchArgs.push('--breaks', String(breaksMin));
-  if (continuesMin !== null) searchArgs.push('--continues', String(continuesMin));
-  if (churnMin !== null) searchArgs.push('--churn', String(churnMin));
-  if (chunkAuthorFilter) searchArgs.push('--chunk-author', chunkAuthorFilter);
-  if (modifiedAfter) searchArgs.push('--modified-after', modifiedAfter);
-  if (modifiedSince !== null) searchArgs.push('--modified-since', String(modifiedSince));
-  if (visibilityFilter) searchArgs.push('--visibility', visibilityFilter);
-  if (extendsFilter) searchArgs.push('--extends', extendsFilter);
-  if (lintFilter) searchArgs.push('--lint');
-  if (asyncFilter) searchArgs.push('--async');
-  if (generatorFilter) searchArgs.push('--generator');
-  if (returnsFilter) searchArgs.push('--returns');
-  for (const entry of fileFilters) {
-    if (entry == null || entry === '') continue;
-    searchArgs.push('--path', String(entry));
-  }
-  for (const entry of extFilters) {
-    if (entry == null || entry === '') continue;
-    searchArgs.push('--ext', String(entry));
-  }
-  if (Array.isArray(metaFilters)) {
-    metaFilters.forEach((entry) => searchArgs.push('--meta', entry));
-  }
-  if (metaJson) {
-    const jsonValue = typeof metaJson === 'string' ? metaJson : JSON.stringify(metaJson);
-    searchArgs.push('--meta-json', jsonValue);
-  }
-
-  const stdout = runNodeSync(repoPath, searchArgs);
-  return JSON.parse(stdout || '{}');
-}
-
-/**
- * Handle the MCP download_models tool call.
- * @param {object} [args]
- * @returns {{model:string,output:string}}
- */
-async function downloadModels(args = {}, context = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const userConfig = loadUserConfig(repoPath);
-  const modelConfig = getModelConfig(repoPath, userConfig);
-  const model = args.model || modelConfig.id || DEFAULT_MODEL_ID;
-  const scriptArgs = [path.join(ROOT, 'tools', 'download-models.js'), '--model', model];
-  if (args.cacheDir) scriptArgs.push('--cache-dir', args.cacheDir);
-  const progress = typeof context.progress === 'function' ? context.progress : null;
-  const progressLine = progress
-    ? ({ stream, line }) => progress({ message: line, stream })
-    : null;
-  if (progress) {
-    progress({ message: `Downloading model ${model}.`, phase: 'start' });
-  }
-  const { stdout } = await runNodeAsync(repoPath, scriptArgs, {
-    streamOutput: true,
-    onLine: progressLine
-  });
-  if (progress) {
-    progress({ message: `Model download complete (${model}).`, phase: 'done' });
-  }
-  return { model, output: stdout.trim() };
-}
-
-/**
- * Handle the MCP download_dictionaries tool call.
- * @param {object} [args]
- * @returns {Promise<object>}
- */
-async function downloadDictionaries(args = {}, context = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const scriptArgs = [path.join(ROOT, 'tools', 'download-dicts.js')];
-  if (args.lang) scriptArgs.push('--lang', String(args.lang));
-  const urls = Array.isArray(args.url) ? args.url : (args.url ? [args.url] : []);
-  urls.forEach((value) => scriptArgs.push('--url', String(value)));
-  if (args.dir) scriptArgs.push('--dir', String(args.dir));
-  if (args.update === true) scriptArgs.push('--update');
-  if (args.force === true) scriptArgs.push('--force');
-  const stdout = await runToolWithProgress({
-    repoPath,
-    scriptArgs,
-    context,
-    startMessage: 'Downloading dictionaries.',
-    doneMessage: 'Dictionary download complete.'
-  });
-  const summary = parseCountSummary(stdout);
-  return {
-    repoPath,
-    output: stdout.trim(),
-    ...(summary || {})
-  };
-}
-
-/**
- * Handle the MCP download_extensions tool call.
- * @param {object} [args]
- * @returns {Promise<object>}
- */
-async function downloadExtensions(args = {}, context = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const scriptArgs = [path.join(ROOT, 'tools', 'download-extensions.js')];
-  if (args.provider) scriptArgs.push('--provider', String(args.provider));
-  if (args.dir) scriptArgs.push('--dir', String(args.dir));
-  if (args.out) scriptArgs.push('--out', String(args.out));
-  if (args.platform) scriptArgs.push('--platform', String(args.platform));
-  if (args.arch) scriptArgs.push('--arch', String(args.arch));
-  const urls = Array.isArray(args.url) ? args.url : (args.url ? [args.url] : []);
-  urls.forEach((value) => scriptArgs.push('--url', String(value)));
-  if (args.update === true) scriptArgs.push('--update');
-  if (args.force === true) scriptArgs.push('--force');
-  const stdout = await runToolWithProgress({
-    repoPath,
-    scriptArgs,
-    context,
-    startMessage: 'Downloading extensions.',
-    doneMessage: 'Extension download complete.'
-  });
-  const summary = parseCountSummary(stdout);
-  const resolvedPath = parseExtensionPath(stdout);
-  return {
-    repoPath,
-    output: stdout.trim(),
-    extensionPath: resolvedPath,
-    ...(summary || {})
-  };
-}
-
-/**
- * Handle the MCP verify_extensions tool call.
- * @param {object} [args]
- * @returns {object}
- */
-function verifyExtensions(args = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const scriptArgs = [path.join(ROOT, 'tools', 'verify-extensions.js'), '--json'];
-  if (args.provider) scriptArgs.push('--provider', String(args.provider));
-  if (args.dir) scriptArgs.push('--dir', String(args.dir));
-  if (args.path) scriptArgs.push('--path', String(args.path));
-  if (args.platform) scriptArgs.push('--platform', String(args.platform));
-  if (args.arch) scriptArgs.push('--arch', String(args.arch));
-  if (args.module) scriptArgs.push('--module', String(args.module));
-  if (args.table) scriptArgs.push('--table', String(args.table));
-  if (args.column) scriptArgs.push('--column', String(args.column));
-  if (args.encoding) scriptArgs.push('--encoding', String(args.encoding));
-  if (args.options) scriptArgs.push('--options', String(args.options));
-  if (args.annMode) scriptArgs.push('--ann-mode', String(args.annMode));
-  if (args.load === false) scriptArgs.push('--no-load');
-  const stdout = runNodeSync(repoPath, scriptArgs);
-  try {
-    return JSON.parse(stdout || '{}');
-  } catch {
-    return { repoPath, output: stdout.trim() };
-  }
-}
-
-/**
- * Handle the MCP build_sqlite_index tool call.
- * @param {object} [args]
- * @returns {Promise<object>}
- */
-async function buildSqliteIndex(args = {}, context = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const scriptArgs = [path.join(ROOT, 'tools', 'build-sqlite-index.js')];
-  if (args.mode) scriptArgs.push('--mode', String(args.mode));
-  if (args.incremental === true) scriptArgs.push('--incremental');
-  if (args.compact === true) scriptArgs.push('--compact');
-  if (args.codeDir) scriptArgs.push('--code-dir', String(args.codeDir));
-  if (args.proseDir) scriptArgs.push('--prose-dir', String(args.proseDir));
-  if (args.out) scriptArgs.push('--out', String(args.out));
-  const stdout = await runToolWithProgress({
-    repoPath,
-    scriptArgs,
-    context,
-    startMessage: 'Building SQLite index.',
-    doneMessage: 'SQLite index build complete.'
-  });
-  return { repoPath, output: stdout.trim() };
-}
-
-/**
- * Handle the MCP compact_sqlite_index tool call.
- * @param {object} [args]
- * @returns {Promise<object>}
- */
-async function compactSqliteIndex(args = {}, context = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const scriptArgs = [path.join(ROOT, 'tools', 'compact-sqlite-index.js')];
-  if (args.mode) scriptArgs.push('--mode', String(args.mode));
-  if (args.dryRun === true) scriptArgs.push('--dry-run');
-  if (args.keepBackup === true) scriptArgs.push('--keep-backup');
-  const stdout = await runToolWithProgress({
-    repoPath,
-    scriptArgs,
-    context,
-    startMessage: 'Compacting SQLite index.',
-    doneMessage: 'SQLite compaction complete.'
-  });
-  return { repoPath, output: stdout.trim() };
-}
-
-/**
- * Handle the MCP cache_gc tool call.
- * @param {object} [args]
- * @returns {object}
- */
-function cacheGc(args = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const scriptArgs = [path.join(ROOT, 'tools', 'cache-gc.js'), '--json'];
-  if (args.dryRun === true) scriptArgs.push('--dry-run');
-  if (Number.isFinite(Number(args.maxBytes))) scriptArgs.push('--max-bytes', String(args.maxBytes));
-  if (Number.isFinite(Number(args.maxGb))) scriptArgs.push('--max-gb', String(args.maxGb));
-  if (Number.isFinite(Number(args.maxAgeDays))) scriptArgs.push('--max-age-days', String(args.maxAgeDays));
-  const stdout = runNodeSync(repoPath, scriptArgs);
-  try {
-    return JSON.parse(stdout || '{}');
-  } catch {
-    return { repoPath, output: stdout.trim() };
-  }
-}
-
-/**
- * Handle the MCP clean_artifacts tool call.
- * @param {object} [args]
- * @returns {Promise<object>}
- */
-async function cleanArtifacts(args = {}, context = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const scriptArgs = [path.join(ROOT, 'tools', 'clean-artifacts.js')];
-  if (args.all === true) scriptArgs.push('--all');
-  if (args.dryRun === true) scriptArgs.push('--dry-run');
-  const stdout = await runToolWithProgress({
-    repoPath,
-    scriptArgs,
-    context,
-    startMessage: 'Cleaning artifacts.',
-    doneMessage: 'Artifact cleanup complete.'
-  });
-  return { repoPath, output: stdout.trim() };
-}
-
-/**
- * Handle the MCP bootstrap tool call.
- * @param {object} [args]
- * @returns {Promise<object>}
- */
-async function runBootstrap(args = {}, context = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const scriptArgs = [path.join(ROOT, 'tools', 'bootstrap.js')];
-  if (args.skipInstall === true) scriptArgs.push('--skip-install');
-  if (args.skipDicts === true) scriptArgs.push('--skip-dicts');
-  if (args.skipIndex === true) scriptArgs.push('--skip-index');
-  if (args.skipArtifacts === true) scriptArgs.push('--skip-artifacts');
-  if (args.skipTooling === true) scriptArgs.push('--skip-tooling');
-  if (args.withSqlite === true) scriptArgs.push('--with-sqlite');
-  if (args.incremental === true) scriptArgs.push('--incremental');
-  const stdout = await runToolWithProgress({
-    repoPath,
-    scriptArgs,
-    context,
-    startMessage: 'Bootstrapping repo.',
-    doneMessage: 'Bootstrap complete.'
-  });
-  return { repoPath, output: stdout.trim() };
-}
-
-/**
- * Handle the MCP report_artifacts tool call.
- * @param {object} [args]
- * @returns {object}
- */
-function reportArtifacts(args = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const stdout = runNodeSync(repoPath, [path.join(ROOT, 'tools', 'report-artifacts.js'), '--json']);
-  return JSON.parse(stdout || '{}');
-}
-
-/**
- * Handle the MCP triage_ingest tool call.
- * @param {object} [args]
- * @returns {Promise<object>}
- */
-async function triageIngest(args = {}, context = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const source = String(args.source || '').trim();
-  const inputPath = String(args.inputPath || '').trim();
-  if (!source || !inputPath) {
-    throw new Error('source and inputPath are required.');
-  }
-  const resolvedInput = path.isAbsolute(inputPath) ? inputPath : path.join(repoPath, inputPath);
-  const metaFilters = normalizeMetaFilters(args.meta);
-  const ingestArgs = [path.join(ROOT, 'tools', 'triage', 'ingest.js'), '--source', source, '--in', resolvedInput];
-  ingestArgs.push('--repo', repoPath);
-  if (Array.isArray(metaFilters)) {
-    metaFilters.forEach((entry) => ingestArgs.push('--meta', entry));
-  }
-  const progress = typeof context.progress === 'function' ? context.progress : null;
-  const progressLine = progress
-    ? ({ stream, line }) => progress({ message: line, stream })
-    : null;
-  if (progress) {
-    progress({ message: `Ingesting ${source} findings.`, phase: 'start' });
-  }
-  const { stdout } = await runNodeAsync(repoPath, ingestArgs, { streamOutput: true, onLine: progressLine });
-  let payload = {};
-  try {
-    payload = JSON.parse(stdout || '{}');
-  } catch (error) {
-    throw new Error(`Failed to parse ingest output: ${error?.message || error}`);
-  }
-  if (args.buildIndex) {
-    await buildIndex({
-      repoPath,
-      mode: 'records',
-      incremental: args.incremental === true,
-      stubEmbeddings: args.stubEmbeddings === true,
-      sqlite: false
-    }, context);
-  }
-  if (progress) {
-    progress({ message: 'Triage ingest complete.', phase: 'done' });
-  }
-  return payload;
-}
-
-/**
- * Handle the MCP triage_decision tool call.
- * @param {object} [args]
- * @returns {object}
- */
-function triageDecision(args = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const finding = String(args.finding || '').trim();
-  const status = String(args.status || '').trim();
-  if (!finding || !status) {
-    throw new Error('finding and status are required.');
-  }
-  const metaFilters = normalizeMetaFilters(args.meta);
-  const decisionArgs = [path.join(ROOT, 'tools', 'triage', 'decision.js'), '--finding', finding, '--status', status];
-  decisionArgs.push('--repo', repoPath);
-  if (args.justification) decisionArgs.push('--justification', String(args.justification));
-  if (args.reviewer) decisionArgs.push('--reviewer', String(args.reviewer));
-  if (args.expires) decisionArgs.push('--expires', String(args.expires));
-  if (Array.isArray(metaFilters)) {
-    metaFilters.forEach((entry) => decisionArgs.push('--meta', entry));
-  }
-  const codes = Array.isArray(args.codes) ? args.codes : (args.codes ? [args.codes] : []);
-  const evidence = Array.isArray(args.evidence) ? args.evidence : (args.evidence ? [args.evidence] : []);
-  codes.filter(Boolean).forEach((code) => decisionArgs.push('--code', String(code)));
-  evidence.filter(Boolean).forEach((item) => decisionArgs.push('--evidence', String(item)));
-  const stdout = runNodeSync(repoPath, decisionArgs);
-  return JSON.parse(stdout || '{}');
-}
-
-/**
- * Handle the MCP triage_context_pack tool call.
- * @param {object} [args]
- * @returns {Promise<object>}
- */
-async function triageContextPack(args = {}, context = {}) {
-  const repoPath = resolveRepoPath(args.repoPath);
-  const recordId = String(args.recordId || '').trim();
-  if (!recordId) throw new Error('recordId is required.');
-  const contextArgs = [path.join(ROOT, 'tools', 'triage', 'context-pack.js'), '--record', recordId];
-  contextArgs.push('--repo', repoPath);
-  if (args.outPath) contextArgs.push('--out', String(args.outPath));
-  if (args.ann === true) contextArgs.push('--ann');
-  if (args.ann === false) contextArgs.push('--no-ann');
-  if (args.stubEmbeddings === true) contextArgs.push('--stub-embeddings');
-  const progress = typeof context.progress === 'function' ? context.progress : null;
-  const progressLine = progress
-    ? ({ stream, line }) => progress({ message: line, stream })
-    : null;
-  if (progress) {
-    progress({ message: 'Building triage context pack.', phase: 'start' });
-  }
-  const { stdout } = await runNodeAsync(repoPath, contextArgs, { streamOutput: true, onLine: progressLine });
-  if (progress) {
-    progress({ message: 'Context pack ready.', phase: 'done' });
-  }
-  try {
-    return JSON.parse(stdout || '{}');
-  } catch (error) {
-    throw new Error(`Failed to parse context pack output: ${error?.message || error}`);
-  }
-}
-
-/**
- * Dispatch an MCP tool call by name.
- * @param {string} name
- * @param {object} args
- * @returns {Promise<any>}
- */
-async function handleToolCall(name, args, context = {}) {
-  switch (name) {
-    case 'index_status':
-      return await indexStatus(args);
-    case 'config_status':
-      return await configStatus(args);
-    case 'build_index':
-      return await buildIndex(args, context);
-    case 'search':
-      return runSearch(args);
-    case 'download_models':
-      return await downloadModels(args, context);
-    case 'download_dictionaries':
-      return await downloadDictionaries(args, context);
-    case 'download_extensions':
-      return await downloadExtensions(args, context);
-    case 'verify_extensions':
-      return verifyExtensions(args);
-    case 'build_sqlite_index':
-      return await buildSqliteIndex(args, context);
-    case 'compact_sqlite_index':
-      return await compactSqliteIndex(args, context);
-    case 'cache_gc':
-      return cacheGc(args);
-    case 'clean_artifacts':
-      return await cleanArtifacts(args, context);
-    case 'bootstrap':
-      return await runBootstrap(args, context);
-    case 'report_artifacts':
-      return reportArtifacts(args);
-    case 'triage_ingest':
-      return await triageIngest(args, context);
-    case 'triage_decision':
-      return triageDecision(args);
-    case 'triage_context_pack':
-      return await triageContextPack(args, context);
-    default:
-      throw new Error(`Unknown tool: ${name}`);
-  }
-}
-
-/**
- * Handle a JSON-RPC message from stdin.
- * @param {object} message
- * @returns {Promise<void>}
- */
-async function handleMessage(message) {
-  if (!message || message.jsonrpc !== '2.0') return;
-  const { id, method, params } = message;
-
-  if (method === 'initialize') {
-    sendResult(id, {
-      protocolVersion: '2024-11-05',
-      serverInfo: { name: 'PairOfCleats', version: PKG.version },
-      capabilities: {
-        tools: { listChanged: false },
-        resources: { listChanged: false }
-      }
-    });
-    return;
-  }
-
-  if (method === 'shutdown') {
-    sendResult(id, {});
-    return;
-  }
-
-  if (method === 'exit') {
-    process.exit(0);
-  }
-
-  if (method === 'tools/list') {
-    sendResult(id, { tools: TOOL_DEFS });
-    return;
-  }
-
-  if (method === 'resources/list') {
-    sendResult(id, { resources: [] });
-    return;
-  }
-
-  if (method === 'tools/call') {
-    if (!id) return;
-    const name = params?.name;
-    const args = params?.arguments || {};
-    try {
-      const progress = (payload) => sendProgress(id, name, payload);
-      const result = await handleToolCall(name, args, { progress, toolCallId: id });
-      sendResult(id, {
-        content: [{ type: 'text', text: JSON.stringify(result, null, 2) }]
-      });
-    } catch (error) {
-      const payload = formatToolError(error);
-      sendResult(id, {
-        content: [{ type: 'text', text: JSON.stringify(payload, null, 2) }],
-        isError: true
-      });
-    }
-    return;
-  }
-
-  if (id) {
-    sendError(id, -32601, `Method not found: ${method}`);
-  }
-}
-
-let buffer = Buffer.alloc(0);
-let processing = false;
-const queue = [];
-
-/**
- * Process queued messages serially.
- */
-function processQueue() {
-  if (processing) return;
-  processing = true;
-  const run = async () => {
-    while (queue.length) {
-      const msg = queue.shift();
-      await handleMessage(msg);
-    }
-    processing = false;
-  };
-  run().catch((error) => {
-    processing = false;
-    console.error(error);
-  });
-}
-
-/**
- * Enqueue a message for processing.
- * @param {object} message
- */
-function enqueueMessage(message) {
-  queue.push(message);
-  processQueue();
-}
-
-/**
- * Parse framed JSON-RPC messages from the input buffer.
- */
-function parseBuffer() {
-  while (true) {
-    const headerEnd = buffer.indexOf('\r\n\r\n');
-    if (headerEnd === -1) return;
-    const header = buffer.slice(0, headerEnd).toString('utf8');
-    const lengthMatch = header.match(/Content-Length:\s*(\d+)/i);
-    if (!lengthMatch) {
-      buffer = buffer.slice(headerEnd + 4);
-      continue;
-    }
-    const length = parseInt(lengthMatch[1], 10);
-    const total = headerEnd + 4 + length;
-    if (buffer.length < total) return;
-    const body = buffer.slice(headerEnd + 4, total).toString('utf8');
-    buffer = buffer.slice(total);
-    try {
-      const msg = JSON.parse(body);
-      enqueueMessage(msg);
-    } catch {}
-  }
-}
-
-process.stdin.on('data', (chunk) => {
-  buffer = Buffer.concat([buffer, chunk]);
-  parseBuffer();
+const DEFAULT_MCP_QUEUE_MAX = 64;
+const DEFAULT_TOOL_TIMEOUT_MS = 120000;
+const DEFAULT_TOOL_TIMEOUTS = {
+  build_index: 10 * 60 * 1000,
+  build_sqlite_index: 10 * 60 * 1000,
+  download_models: 10 * 60 * 1000,
+  download_dictionaries: 10 * 60 * 1000,
+  download_extensions: 10 * 60 * 1000,
+  bootstrap: 10 * 60 * 1000,
+  triage_ingest: 5 * 60 * 1000
+};
+
+const envQueueMax = parseTimeoutMs(process.env.PAIROFCLEATS_MCP_QUEUE_MAX);
+const envToolTimeoutMs = parseTimeoutMs(process.env.PAIROFCLEATS_MCP_TOOL_TIMEOUT_MS);
+const baseConfigRoot = resolveRepoRoot(process.cwd());
+const baseConfig = loadUserConfig(baseConfigRoot);
+const { logLine } = configureServiceLogger({ repoRoot: baseConfigRoot, service: 'mcp' });
+const runtimeConfig = getRuntimeConfig(baseConfigRoot, baseConfig);
+const parsedUv = Number(process.env.UV_THREADPOOL_SIZE);
+const effectiveUvThreadpoolSize = Number.isFinite(parsedUv) && parsedUv > 0 ? Math.floor(parsedUv) : null;
+if (effectiveUvThreadpoolSize || runtimeConfig.uvThreadpoolSize) {
+  logLine(`[mcp] UV_THREADPOOL_SIZE: ${effectiveUvThreadpoolSize ?? 'default'} (config=${runtimeConfig.uvThreadpoolSize ?? 'none'})`);
+}
+
+const baseMcpConfig = baseConfig?.mcp && typeof baseConfig.mcp === 'object' ? baseConfig.mcp : {};
+const configuredQueueMax = parseTimeoutMs(baseMcpConfig.queueMax);
+const queueMax = Math.max(1, configuredQueueMax ?? envQueueMax ?? DEFAULT_MCP_QUEUE_MAX);
+
+const resolveTimeout = (name, args) => resolveToolTimeoutMs(name, args, {
+  envToolTimeoutMs,
+  defaultToolTimeoutMs: DEFAULT_TOOL_TIMEOUT_MS,
+  defaultToolTimeouts: DEFAULT_TOOL_TIMEOUTS
 });
 
-process.stdin.on('end', () => {
-  process.exit(0);
+const transport = createMcpTransport({
+  toolDefs: TOOL_DEFS,
+  serverInfo: { name: 'PairOfCleats', version: PKG.version },
+  handleToolCall,
+  resolveToolTimeoutMs: resolveTimeout,
+  queueMax
 });
+
+transport.start();
diff --git a/tools/mcp/repo.js b/tools/mcp/repo.js
new file mode 100644
index 000000000..b9a1c42f4
--- /dev/null
+++ b/tools/mcp/repo.js
@@ -0,0 +1,360 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import simpleGit from 'simple-git';
+import { getEnvConfig } from '../../src/shared/env.js';
+import { createSqliteDbCache } from '../../src/retrieval/sqlite-cache.js';
+import {
+  getCacheRoot,
+  getDictConfig,
+  getDictionaryPaths,
+  getIndexDir,
+  getMetricsDir,
+  getModelConfig,
+  getRepoCacheRoot,
+  getRepoId,
+  loadUserConfig,
+  resolveRepoRoot,
+  resolveSqlitePaths
+} from '../dict-utils.js';
+import { getVectorExtensionConfig, resolveVectorExtensionPath } from '../vector-extension.js';
+
+const repoCaches = new Map();
+
+export const getRepoCaches = (repoPath) => {
+  const key = repoPath || process.cwd();
+  const existing = repoCaches.get(key);
+  if (existing) {
+    existing.lastUsed = Date.now();
+    return existing;
+  }
+  const entry = {
+    indexCache: new Map(),
+    sqliteCache: createSqliteDbCache(),
+    lastUsed: Date.now()
+  };
+  repoCaches.set(key, entry);
+  return entry;
+};
+
+export const clearRepoCaches = (repoPath) => {
+  if (!repoPath) return;
+  const entry = repoCaches.get(repoPath);
+  if (!entry) return;
+  entry.sqliteCache?.closeAll?.();
+  entry.indexCache?.clear?.();
+  repoCaches.delete(repoPath);
+};
+
+/**
+ * Resolve and validate a repo path.
+ * @param {string} inputPath
+ * @returns {string}
+ */
+export function resolveRepoPath(inputPath) {
+  const base = inputPath ? path.resolve(inputPath) : process.cwd();
+  if (!fs.existsSync(base) || !fs.statSync(base).isDirectory()) {
+    throw new Error(`Repo path not found: ${base}`);
+  }
+  return inputPath ? base : resolveRepoRoot(base);
+}
+
+const resolveConfigRoot = (args) => {
+  const candidate = args?.repoPath ? path.resolve(String(args.repoPath)) : null;
+  if (candidate && fs.existsSync(candidate) && fs.statSync(candidate).isDirectory()) {
+    return resolveRepoRoot(candidate);
+  }
+  return resolveRepoRoot(process.cwd());
+};
+
+const resolveMcpConfig = (args) => {
+  const repoRoot = resolveConfigRoot(args);
+  const cfg = loadUserConfig(repoRoot);
+  return cfg?.mcp && typeof cfg.mcp === 'object' ? cfg.mcp : {};
+};
+
+export const parseTimeoutMs = (value) => {
+  if (value == null || value === '') return null;
+  const parsed = Number(value);
+  return Number.isFinite(parsed) ? Math.max(0, Math.floor(parsed)) : null;
+};
+
+export const resolveToolTimeoutMs = (name, args, { envToolTimeoutMs, defaultToolTimeoutMs, defaultToolTimeouts }) => {
+  const mcpConfig = resolveMcpConfig(args);
+  const toolTimeouts = mcpConfig.toolTimeouts && typeof mcpConfig.toolTimeouts === 'object'
+    ? mcpConfig.toolTimeouts
+    : {};
+  const override = parseTimeoutMs(toolTimeouts[name]);
+  const baseTimeout = parseTimeoutMs(mcpConfig.toolTimeoutMs ?? envToolTimeoutMs)
+    ?? defaultToolTimeouts[name]
+    ?? defaultToolTimeoutMs;
+  const resolved = override ?? baseTimeout;
+  return resolved && resolved > 0 ? resolved : null;
+};
+
+/**
+ * Build the artifact path map for a repo.
+ * @param {string} repoPath
+ * @param {object} userConfig
+ * @returns {object}
+ */
+function listArtifacts(repoPath, userConfig) {
+  const indexCode = getIndexDir(repoPath, 'code', userConfig);
+  const indexProse = getIndexDir(repoPath, 'prose', userConfig);
+  const indexRecords = getIndexDir(repoPath, 'records', userConfig);
+  const metricsDir = getMetricsDir(repoPath, userConfig);
+  const sqlitePaths = resolveSqlitePaths(repoPath, userConfig);
+  return {
+    index: {
+      code: {
+        dir: indexCode,
+        chunkMeta: path.join(indexCode, 'chunk_meta.json'),
+        tokenPostings: path.join(indexCode, 'token_postings.json')
+      },
+      prose: {
+        dir: indexProse,
+        chunkMeta: path.join(indexProse, 'chunk_meta.json'),
+        tokenPostings: path.join(indexProse, 'token_postings.json')
+      },
+      records: {
+        dir: indexRecords,
+        chunkMeta: path.join(indexRecords, 'chunk_meta.json'),
+        tokenPostings: path.join(indexRecords, 'token_postings.json')
+      }
+    },
+    metrics: {
+      dir: metricsDir,
+      indexCode: path.join(metricsDir, 'index-code.json'),
+      indexProse: path.join(metricsDir, 'index-prose.json'),
+      indexRecords: path.join(metricsDir, 'index-records.json'),
+      queryCache: path.join(metricsDir, 'queryCache.json')
+    },
+    sqlite: {
+      code: sqlitePaths.codePath,
+      prose: sqlitePaths.prosePath,
+      legacy: sqlitePaths.legacyPath,
+      legacyExists: sqlitePaths.legacyExists
+    }
+  };
+}
+
+/**
+ * Stat a path if it exists.
+ * @param {string} target
+ * @returns {{exists:boolean,mtime:(string|null),bytes:number}}
+ */
+function statIfExists(target) {
+  try {
+    const stat = fs.statSync(target);
+    return {
+      exists: true,
+      mtime: stat.mtime ? stat.mtime.toISOString() : null,
+      bytes: stat.size
+    };
+  } catch {
+    return { exists: false, mtime: null, bytes: 0 };
+  }
+}
+
+/**
+ * Fetch lightweight git status info for a repo.
+ * @param {string} repoPath
+ * @returns {Promise<object>}
+ */
+async function getGitInfo(repoPath) {
+  const gitDir = path.join(repoPath, '.git');
+  const hasGitDir = fs.existsSync(gitDir);
+  if (!hasGitDir) {
+    return {
+      isRepo: false,
+      warning: 'Git repository not detected; using path-based repo identity.'
+    };
+  }
+  try {
+    const git = simpleGit(repoPath);
+    const status = await git.status();
+    const head = await git.revparse(['HEAD']);
+    return {
+      isRepo: true,
+      head: head.trim(),
+      branch: status.current || null,
+      isDirty: status.files.length > 0
+    };
+  } catch (error) {
+    return {
+      isRepo: true,
+      warning: `Git detected but status unavailable: ${error.message}`
+    };
+  }
+}
+
+/**
+ * Build an index status report for the MCP tool.
+ * @param {object} [args]
+ * @returns {Promise<object>}
+ */
+export async function indexStatus(args = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  const userConfig = loadUserConfig(repoPath);
+  const envConfig = getEnvConfig();
+  const cacheRoot = (userConfig.cache && userConfig.cache.root) || envConfig.cacheRoot || getCacheRoot();
+  const repoId = getRepoId(repoPath);
+  const repoCacheRoot = getRepoCacheRoot(repoPath, userConfig);
+  const dictConfig = getDictConfig(repoPath, userConfig);
+  const dictPaths = await getDictionaryPaths(repoPath, dictConfig);
+  const modelConfig = getModelConfig(repoPath, userConfig);
+  const modelsDir = modelConfig.dir;
+  const modelDirName = `models--${modelConfig.id.replace('/', '--')}`;
+  const modelPath = path.join(modelsDir, modelDirName);
+
+  const artifacts = listArtifacts(repoPath, userConfig);
+  const git = await getGitInfo(repoPath);
+  const incrementalRoot = path.join(repoCacheRoot, 'incremental');
+  const report = {
+    repoPath,
+    repoId,
+    cacheRoot,
+    repoCacheRoot,
+    git,
+    dictionaries: {
+      dir: dictConfig.dir,
+      files: dictPaths,
+      enabled: dictPaths.length > 0,
+      includeSlang: dictConfig.includeSlang
+    },
+    models: {
+      dir: modelsDir,
+      model: modelConfig.id,
+      available: fs.existsSync(modelPath),
+      hint: fs.existsSync(modelPath)
+        ? null
+        : 'Run the download_models tool or `npm run download-models` to prefetch embeddings.'
+    },
+    incremental: {
+      dir: incrementalRoot,
+      exists: fs.existsSync(incrementalRoot)
+    },
+    index: {
+      code: {
+        dir: artifacts.index.code.dir,
+        chunkMeta: statIfExists(artifacts.index.code.chunkMeta),
+        tokenPostings: statIfExists(artifacts.index.code.tokenPostings)
+      },
+      prose: {
+        dir: artifacts.index.prose.dir,
+        chunkMeta: statIfExists(artifacts.index.prose.chunkMeta),
+        tokenPostings: statIfExists(artifacts.index.prose.tokenPostings)
+      },
+      records: {
+        dir: artifacts.index.records.dir,
+        chunkMeta: statIfExists(artifacts.index.records.chunkMeta),
+        tokenPostings: statIfExists(artifacts.index.records.tokenPostings)
+      }
+    },
+    sqlite: {
+      code: { path: artifacts.sqlite.code, ...statIfExists(artifacts.sqlite.code) },
+      prose: { path: artifacts.sqlite.prose, ...statIfExists(artifacts.sqlite.prose) },
+      legacy: artifacts.sqlite.legacyExists ? artifacts.sqlite.legacy : null
+    },
+    metrics: {
+      dir: artifacts.metrics.dir,
+      indexCode: statIfExists(artifacts.metrics.indexCode),
+      indexProse: statIfExists(artifacts.metrics.indexProse),
+      indexRecords: statIfExists(artifacts.metrics.indexRecords),
+      queryCache: statIfExists(artifacts.metrics.queryCache)
+    }
+  };
+
+  return report;
+}
+
+/**
+ * Inspect configuration + cache status with warnings.
+ * @param {object} [args]
+ * @returns {Promise<object>}
+ */
+export async function configStatus(args = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  const userConfig = loadUserConfig(repoPath);
+  const envConfig = getEnvConfig();
+  const cacheRoot = (userConfig.cache && userConfig.cache.root) || envConfig.cacheRoot || getCacheRoot();
+  const repoCacheRoot = getRepoCacheRoot(repoPath, userConfig);
+  const dictConfig = getDictConfig(repoPath, userConfig);
+  const dictionaryPaths = await getDictionaryPaths(repoPath, dictConfig);
+  const modelConfig = getModelConfig(repoPath, userConfig);
+  const modelsDir = modelConfig.dir;
+  const modelDirName = `models--${modelConfig.id.replace('/', '--')}`;
+  const modelPath = path.join(modelsDir, modelDirName);
+  const sqlitePaths = resolveSqlitePaths(repoPath, userConfig);
+  const sqliteConfigured = userConfig.sqlite?.use !== false;
+  const vectorConfig = getVectorExtensionConfig(repoPath, userConfig);
+  const vectorPath = resolveVectorExtensionPath(vectorConfig);
+
+  const warnings = [];
+  if (!dictionaryPaths.length && (dictConfig.languages.length || dictConfig.files.length || dictConfig.includeSlang || dictConfig.enableRepoDictionary)) {
+    warnings.push({
+      code: 'dictionary_missing',
+      message: 'No dictionary files found; identifier splitting will be limited.'
+    });
+  }
+  if (!fs.existsSync(modelPath)) {
+    warnings.push({
+      code: 'model_missing',
+      message: `Embedding model not found (${modelConfig.id}). Run npm run download-models.`
+    });
+  }
+  if (sqliteConfigured) {
+    const missing = [];
+    if (!fs.existsSync(sqlitePaths.codePath)) missing.push(`code=${sqlitePaths.codePath}`);
+    if (!fs.existsSync(sqlitePaths.prosePath)) missing.push(`prose=${sqlitePaths.prosePath}`);
+    if (missing.length) {
+      warnings.push({
+        code: 'sqlite_missing',
+        message: `SQLite indexes missing (${missing.join(', ')}). Run npm run build-sqlite-index.`
+      });
+    }
+  }
+  if (vectorConfig.enabled) {
+    if (!vectorPath || !fs.existsSync(vectorPath)) {
+      warnings.push({
+        code: 'extension_missing',
+        message: 'SQLite vector extension is enabled but not installed.'
+      });
+    }
+  }
+
+  return {
+    repoPath,
+    repoId: getRepoId(repoPath),
+    config: {
+      cacheRoot,
+      repoCacheRoot,
+      dictionary: dictConfig,
+      models: modelConfig,
+      sqlite: {
+        use: sqliteConfigured,
+        annMode: vectorConfig.annMode || null,
+        codeDbPath: sqlitePaths.codePath,
+        proseDbPath: sqlitePaths.prosePath
+      },
+      search: userConfig.search || {},
+      indexing: userConfig.indexing || {},
+      tooling: userConfig.tooling || {}
+    },
+    cache: {
+      cacheRootExists: fs.existsSync(cacheRoot),
+      repoCacheExists: fs.existsSync(repoCacheRoot),
+      dictionaries: dictionaryPaths,
+      modelAvailable: fs.existsSync(modelPath),
+      sqlite: {
+        codeExists: fs.existsSync(sqlitePaths.codePath),
+        proseExists: fs.existsSync(sqlitePaths.prosePath)
+      },
+      vectorExtension: {
+        enabled: vectorConfig.enabled,
+        path: vectorPath,
+        available: !!(vectorPath && fs.existsSync(vectorPath))
+      }
+    },
+    warnings
+  };
+}
diff --git a/tools/mcp/runner.js b/tools/mcp/runner.js
new file mode 100644
index 000000000..1d6730cae
--- /dev/null
+++ b/tools/mcp/runner.js
@@ -0,0 +1,181 @@
+import { execa, execaSync } from 'execa';
+import { ERROR_CODES } from '../../src/shared/error-codes.js';
+import { incTimeout } from '../../src/shared/metrics.js';
+
+/**
+ * Run a node command and return stdout.
+ * @param {string} cwd
+ * @param {string[]} args
+ * @returns {string}
+ */
+export function runNodeSync(cwd, args) {
+  const result = execaSync(process.execPath, args, {
+    cwd,
+    encoding: 'utf8',
+    reject: false
+  });
+  if (result.exitCode !== 0) {
+    const stderr = (result.stderr || '').trim();
+    const stdout = (result.stdout || '').trim();
+    const message = stderr || stdout || `Command failed: ${args.join(' ')}`;
+    const error = new Error(message.trim());
+    error.code = result.exitCode;
+    error.stderr = stderr;
+    error.stdout = stdout;
+    throw error;
+  }
+  return result.stdout || '';
+}
+
+/**
+ * Build a line buffer for progress streaming.
+ * @param {(line:string)=>void} onLine
+ * @returns {{push:(text:string)=>void,flush:()=>void}}
+ */
+function createLineBuffer(onLine) {
+  let buffer = '';
+  return {
+    push(text) {
+      buffer += text;
+      const lines = buffer.split(/\r?\n/);
+      buffer = lines.pop() || '';
+      for (const line of lines) {
+        const trimmed = line.trim();
+        if (trimmed) onLine(trimmed);
+      }
+    },
+    flush() {
+      const trimmed = buffer.trim();
+      if (trimmed) onLine(trimmed);
+      buffer = '';
+    }
+  };
+}
+
+/**
+ * Run a node command asynchronously with optional stderr streaming.
+ * @param {string} cwd
+ * @param {string[]} args
+ * @param {{streamOutput?:boolean,onLine?:(payload:{stream:string,line:string})=>void,maxBufferBytes?:number}} [options]
+ * @returns {Promise<{stdout:string,stderr:string}>}
+ */
+export function runNodeAsync(cwd, args, options = {}) {
+  return new Promise((resolve, reject) => {
+    const child = execa(process.execPath, args, {
+      cwd,
+      reject: false,
+      stdio: ['ignore', 'pipe', 'pipe']
+    });
+    let stdout = '';
+    let stderr = '';
+    const streamOutput = options.streamOutput === true;
+    const onLine = typeof options.onLine === 'function' ? options.onLine : null;
+    const maxBufferBytes = Number.isFinite(Number(options.maxBufferBytes))
+      ? Math.max(0, Number(options.maxBufferBytes))
+      : 1024 * 1024;
+    const appendLimited = (current, text) => {
+      if (!maxBufferBytes) return current + text;
+      const combined = current + text;
+      if (combined.length <= maxBufferBytes) return combined;
+      return combined.slice(combined.length - maxBufferBytes);
+    };
+    const stdoutBuffer = onLine
+      ? createLineBuffer((line) => onLine({ stream: 'stdout', line }))
+      : null;
+    const stderrBuffer = onLine
+      ? createLineBuffer((line) => onLine({ stream: 'stderr', line }))
+      : null;
+    child.stdout?.on('data', (chunk) => {
+      const text = chunk.toString();
+      stdout = appendLimited(stdout, text);
+      if (streamOutput) process.stderr.write(text);
+      stdoutBuffer?.push(text);
+    });
+    child.stderr?.on('data', (chunk) => {
+      const text = chunk.toString();
+      stderr = appendLimited(stderr, text);
+      if (streamOutput) process.stderr.write(text);
+      stderrBuffer?.push(text);
+    });
+    child
+      .then((result) => {
+        stdoutBuffer?.flush();
+        stderrBuffer?.flush();
+        if (result.exitCode === 0) {
+          resolve({ stdout, stderr });
+          return;
+        }
+        const error = new Error(stderr.trim() || `Command failed: ${args.join(' ')}`);
+        error.code = result.exitCode;
+        error.stdout = stdout;
+        error.stderr = stderr;
+        reject(error);
+      })
+      .catch((err) => {
+        const error = new Error(err?.shortMessage || err?.message || 'Command failed');
+        error.code = err?.exitCode;
+        error.stdout = err?.stdout || stdout;
+        error.stderr = err?.stderr || stderr;
+        reject(error);
+      });
+  });
+}
+
+/**
+ * Run a tool script with progress notifications.
+ * @param {{repoPath:string,scriptArgs:string[],context?:object,startMessage?:string,doneMessage?:string}} input
+ * @returns {Promise<string>}
+ */
+export async function runToolWithProgress({ repoPath, scriptArgs, context = {}, startMessage, doneMessage }) {
+  const progress = typeof context.progress === 'function' ? context.progress : null;
+  const progressLine = progress
+    ? ({ stream, line }) => progress({ message: line, stream })
+    : null;
+  if (progress && startMessage) {
+    progress({ message: startMessage, phase: 'start' });
+  }
+  const { stdout } = await runNodeAsync(repoPath, scriptArgs, {
+    streamOutput: true,
+    onLine: progressLine
+  });
+  if (progress && doneMessage) {
+    progress({ message: doneMessage, phase: 'done' });
+  }
+  return stdout || '';
+}
+
+export function parseCountSummary(stdout) {
+  const match = String(stdout || '').match(/downloaded=(\d+)\s+skipped=(\d+)/i);
+  if (!match) return null;
+  return {
+    downloaded: Number(match[1]),
+    skipped: Number(match[2])
+  };
+}
+
+export function parseExtensionPath(stdout) {
+  const match = String(stdout || '').match(/Extension present at (.+)$/im);
+  return match ? match[1].trim() : null;
+}
+
+export const withTimeout = async (promise, timeoutMs, { label, onTimeout } = {}) => {
+  if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
+    return await promise;
+  }
+  let timer = null;
+  const timeoutPromise = new Promise((_, reject) => {
+    timer = setTimeout(() => {
+      onTimeout?.();
+      incTimeout({ surface: 'mcp', operation: 'tool' });
+      const error = new Error(`Tool timeout after ${timeoutMs}ms (${label || 'tool'}).`);
+      error.code = ERROR_CODES.TOOL_TIMEOUT;
+      error.timeoutMs = timeoutMs;
+      reject(error);
+    }, timeoutMs);
+  });
+  try {
+    return await Promise.race([promise, timeoutPromise]);
+  } finally {
+    if (timer) clearTimeout(timer);
+  }
+};
diff --git a/tools/mcp/tools.js b/tools/mcp/tools.js
new file mode 100644
index 000000000..c107e36d2
--- /dev/null
+++ b/tools/mcp/tools.js
@@ -0,0 +1,676 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import {
+  DEFAULT_MODEL_ID,
+  getModelConfig,
+  loadUserConfig,
+  resolveToolRoot
+} from '../dict-utils.js';
+import { buildIndex as coreBuildIndex, buildSqliteIndex as coreBuildSqliteIndex, search as coreSearch, status as coreStatus } from '../../src/integrations/core/index.js';
+import { clearRepoCaches, configStatus, getRepoCaches, indexStatus, resolveRepoPath } from './repo.js';
+import { parseCountSummary, parseExtensionPath, runNodeAsync, runNodeSync, runToolWithProgress } from './runner.js';
+
+const toolRoot = resolveToolRoot();
+
+/**
+ * Normalize meta filters into CLI-friendly key/value strings.
+ * @param {any} meta
+ * @returns {string[]|null}
+ */
+function normalizeMetaFilters(meta) {
+  if (!meta) return null;
+  if (Array.isArray(meta)) {
+    const entries = meta.flatMap((entry) => {
+      if (entry == null) return [];
+      if (typeof entry === 'string') return [entry];
+      if (typeof entry === 'object') {
+        return Object.entries(entry).map(([key, value]) =>
+          value == null || value === '' ? String(key) : `${key}=${value}`
+        );
+      }
+      return [String(entry)];
+    });
+    return entries.length ? entries : null;
+  }
+  if (typeof meta === 'object') {
+    const entries = Object.entries(meta).map(([key, value]) =>
+      value == null || value === '' ? String(key) : `${key}=${value}`
+    );
+    return entries.length ? entries : null;
+  }
+  return [String(meta)];
+}
+
+/**
+ * Restore CI artifacts if present.
+ * @param {string} repoPath
+ * @param {string} artifactsDir
+ * @returns {boolean}
+ */
+function maybeRestoreArtifacts(repoPath, artifactsDir, progress) {
+  const fromDir = artifactsDir ? path.resolve(artifactsDir) : path.join(repoPath, 'ci-artifacts');
+  if (!fs.existsSync(path.join(fromDir, 'manifest.json'))) return false;
+  if (progress) {
+    progress({
+      message: `Restoring CI artifacts from ${fromDir}`,
+      phase: 'start'
+    });
+  }
+  runNodeSync(repoPath, [path.join(toolRoot, 'tools', 'ci-restore-artifacts.js'), '--repo', repoPath, '--from', fromDir]);
+  if (progress) {
+    progress({
+      message: 'CI artifacts restored.',
+      phase: 'done'
+    });
+  }
+  return true;
+}
+
+/**
+ * Handle the MCP build_index tool call.
+ * @param {object} [args]
+ * @returns {object}
+ */
+export async function buildIndex(args = {}, context = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  const userConfig = loadUserConfig(repoPath);
+  const sqliteConfigured = userConfig.sqlite?.use !== false;
+  const shouldUseSqlite = typeof args.sqlite === 'boolean' ? args.sqlite : sqliteConfigured;
+  const mode = args.mode || 'all';
+  const incremental = args.incremental === true;
+  const stubEmbeddings = args.stubEmbeddings === true;
+  const buildSqlite = shouldUseSqlite && mode !== 'records';
+  const useArtifacts = args.useArtifacts === true;
+  const progress = typeof context.progress === 'function' ? context.progress : null;
+
+  let restoredArtifacts = false;
+  if (useArtifacts) {
+    restoredArtifacts = maybeRestoreArtifacts(repoPath, args.artifactsDir, progress);
+  }
+
+  if (!restoredArtifacts) {
+    if (progress) {
+      progress({
+        message: `Building ${mode} index${incremental ? ' (incremental)' : ''}.`,
+        phase: 'start'
+      });
+    }
+    await coreBuildIndex(repoPath, {
+      mode,
+      incremental,
+      stubEmbeddings,
+      sqlite: buildSqlite,
+      emitOutput: true
+    });
+  }
+
+  if (buildSqlite) {
+    if (progress) {
+      progress({
+        message: `Building SQLite index${incremental ? ' (incremental)' : ''}.`,
+        phase: 'start'
+      });
+    }
+    await coreBuildSqliteIndex(repoPath, {
+      incremental,
+      emitOutput: true
+    });
+  }
+  if (progress) {
+    progress({
+      message: 'Index build complete.',
+      phase: 'done'
+    });
+  }
+  clearRepoCaches(repoPath);
+
+  return {
+    repoPath,
+    mode,
+    sqlite: buildSqlite,
+    incremental,
+    restoredArtifacts
+  };
+}
+
+/**
+ * Handle the MCP search tool call.
+ * @param {object} [args]
+ * @returns {object}
+ */
+export async function runSearch(args = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  const query = String(args.query || '').trim();
+  if (!query) throw new Error('Query is required.');
+
+  const mode = args.mode || 'both';
+  const backend = args.backend || null;
+  const output = typeof args.output === 'string' ? args.output.toLowerCase() : '';
+  const ann = typeof args.ann === 'boolean' ? args.ann : null;
+  const top = Number.isFinite(Number(args.top)) ? Math.max(1, Number(args.top)) : null;
+  const context = Number.isFinite(Number(args.context)) ? Math.max(0, Number(args.context)) : null;
+  const typeFilter = args.type ? String(args.type) : null;
+  const authorFilter = args.author ? String(args.author) : null;
+  const importFilter = args.import ? String(args.import) : null;
+  const callsFilter = args.calls ? String(args.calls) : null;
+  const usesFilter = args.uses ? String(args.uses) : null;
+  const signatureFilter = args.signature ? String(args.signature) : null;
+  const paramFilter = args.param ? String(args.param) : null;
+  const decoratorFilter = args.decorator ? String(args.decorator) : null;
+  const inferredTypeFilter = args.inferredType ? String(args.inferredType) : null;
+  const returnTypeFilter = args.returnType ? String(args.returnType) : null;
+  const throwsFilter = args.throws ? String(args.throws) : null;
+  const readsFilter = args.reads ? String(args.reads) : null;
+  const writesFilter = args.writes ? String(args.writes) : null;
+  const mutatesFilter = args.mutates ? String(args.mutates) : null;
+  const aliasFilter = args.alias ? String(args.alias) : null;
+  const awaitsFilter = args.awaits ? String(args.awaits) : null;
+  const riskFilter = args.risk ? String(args.risk) : null;
+  const riskTagFilter = args.riskTag ? String(args.riskTag) : null;
+  const riskSourceFilter = args.riskSource ? String(args.riskSource) : null;
+  const riskSinkFilter = args.riskSink ? String(args.riskSink) : null;
+  const riskCategoryFilter = args.riskCategory ? String(args.riskCategory) : null;
+  const riskFlowFilter = args.riskFlow ? String(args.riskFlow) : null;
+  const branchesMin = Number.isFinite(Number(args.branchesMin)) ? Number(args.branchesMin) : null;
+  const loopsMin = Number.isFinite(Number(args.loopsMin)) ? Number(args.loopsMin) : null;
+  const breaksMin = Number.isFinite(Number(args.breaksMin)) ? Number(args.breaksMin) : null;
+  const continuesMin = Number.isFinite(Number(args.continuesMin)) ? Number(args.continuesMin) : null;
+  const churnMin = Number.isFinite(Number(args.churnMin)) ? Number(args.churnMin) : null;
+  const chunkAuthorFilter = args.chunkAuthor ? String(args.chunkAuthor) : null;
+  const modifiedAfter = args.modifiedAfter ? String(args.modifiedAfter) : null;
+  const modifiedSince = Number.isFinite(Number(args.modifiedSince)) ? Number(args.modifiedSince) : null;
+  const visibilityFilter = args.visibility ? String(args.visibility) : null;
+  const extendsFilter = args.extends ? String(args.extends) : null;
+  const lintFilter = args.lint === true;
+  const asyncFilter = args.async === true;
+  const generatorFilter = args.generator === true;
+  const returnsFilter = args.returns === true;
+  const branchFilter = args.branch ? String(args.branch) : null;
+  const langFilter = args.lang ? String(args.lang) : null;
+  const caseAll = args.case === true;
+  const caseFile = args.caseFile === true || caseAll;
+  const caseTokens = args.caseTokens === true || caseAll;
+  const fileFilters = [];
+  const toList = (value) => (Array.isArray(value) ? value : (value == null ? [] : [value]));
+  fileFilters.push(...toList(args.path));
+  fileFilters.push(...toList(args.file));
+  const extFilters = toList(args.ext);
+  const metaFilters = normalizeMetaFilters(args.meta);
+  const metaJson = args.metaJson || null;
+
+  const useCompact = output !== 'full' && output !== 'json';
+  const searchArgs = [useCompact ? '--json-compact' : '--json', '--repo', repoPath];
+  if (mode && mode !== 'both') searchArgs.push('--mode', mode);
+  if (backend) searchArgs.push('--backend', backend);
+  if (ann === true) searchArgs.push('--ann');
+  if (ann === false) searchArgs.push('--no-ann');
+  if (top) searchArgs.push('-n', String(top));
+  if (context !== null) searchArgs.push('--context', String(context));
+  if (typeFilter) searchArgs.push('--type', typeFilter);
+  if (authorFilter) searchArgs.push('--author', authorFilter);
+  if (importFilter) searchArgs.push('--import', importFilter);
+  if (callsFilter) searchArgs.push('--calls', callsFilter);
+  if (usesFilter) searchArgs.push('--uses', usesFilter);
+  if (signatureFilter) searchArgs.push('--signature', signatureFilter);
+  if (paramFilter) searchArgs.push('--param', paramFilter);
+  if (decoratorFilter) searchArgs.push('--decorator', decoratorFilter);
+  if (inferredTypeFilter) searchArgs.push('--inferred-type', inferredTypeFilter);
+  if (returnTypeFilter) searchArgs.push('--return-type', returnTypeFilter);
+  if (throwsFilter) searchArgs.push('--throws', throwsFilter);
+  if (readsFilter) searchArgs.push('--reads', readsFilter);
+  if (writesFilter) searchArgs.push('--writes', writesFilter);
+  if (mutatesFilter) searchArgs.push('--mutates', mutatesFilter);
+  if (aliasFilter) searchArgs.push('--alias', aliasFilter);
+  if (awaitsFilter) searchArgs.push('--awaits', awaitsFilter);
+  if (riskFilter) searchArgs.push('--risk', riskFilter);
+  if (riskTagFilter) searchArgs.push('--risk-tag', riskTagFilter);
+  if (riskSourceFilter) searchArgs.push('--risk-source', riskSourceFilter);
+  if (riskSinkFilter) searchArgs.push('--risk-sink', riskSinkFilter);
+  if (riskCategoryFilter) searchArgs.push('--risk-category', riskCategoryFilter);
+  if (riskFlowFilter) searchArgs.push('--risk-flow', riskFlowFilter);
+  if (branchesMin !== null) searchArgs.push('--branches', String(branchesMin));
+  if (loopsMin !== null) searchArgs.push('--loops', String(loopsMin));
+  if (breaksMin !== null) searchArgs.push('--breaks', String(breaksMin));
+  if (continuesMin !== null) searchArgs.push('--continues', String(continuesMin));
+  if (churnMin !== null) searchArgs.push('--churn', String(churnMin));
+  if (chunkAuthorFilter) searchArgs.push('--chunk-author', chunkAuthorFilter);
+  if (modifiedAfter) searchArgs.push('--modified-after', modifiedAfter);
+  if (modifiedSince !== null) searchArgs.push('--modified-since', String(modifiedSince));
+  if (visibilityFilter) searchArgs.push('--visibility', visibilityFilter);
+  if (extendsFilter) searchArgs.push('--extends', extendsFilter);
+  if (lintFilter) searchArgs.push('--lint');
+  if (asyncFilter) searchArgs.push('--async');
+  if (generatorFilter) searchArgs.push('--generator');
+  if (returnsFilter) searchArgs.push('--returns');
+  if (branchFilter) searchArgs.push('--branch', branchFilter);
+  if (langFilter) searchArgs.push('--lang', langFilter);
+  if (caseAll) searchArgs.push('--case');
+  if (!caseAll && caseFile) searchArgs.push('--case-file');
+  if (!caseAll && caseTokens) searchArgs.push('--case-tokens');
+  for (const entry of fileFilters) {
+    if (entry == null || entry === '') continue;
+    searchArgs.push('--path', String(entry));
+  }
+  for (const entry of extFilters) {
+    if (entry == null || entry === '') continue;
+    searchArgs.push('--ext', String(entry));
+  }
+  if (Array.isArray(metaFilters)) {
+    metaFilters.forEach((entry) => searchArgs.push('--meta', entry));
+  }
+  if (metaJson) {
+    const jsonValue = typeof metaJson === 'string' ? metaJson : JSON.stringify(metaJson);
+    searchArgs.push('--meta-json', jsonValue);
+  }
+
+  const caches = getRepoCaches(repoPath);
+  return await coreSearch(repoPath, {
+    args: searchArgs,
+    query,
+    emitOutput: false,
+    exitOnError: false,
+    indexCache: caches.indexCache,
+    sqliteCache: caches.sqliteCache
+  });
+}
+
+/**
+ * Handle the MCP download_models tool call.
+ * @param {object} [args]
+ * @returns {{model:string,output:string}}
+ */
+export async function downloadModels(args = {}, context = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  const userConfig = loadUserConfig(repoPath);
+  const modelConfig = getModelConfig(repoPath, userConfig);
+  const model = args.model || modelConfig.id || DEFAULT_MODEL_ID;
+  const scriptArgs = [path.join(toolRoot, 'tools', 'download-models.js'), '--model', model, '--repo', repoPath];
+  if (args.cacheDir) scriptArgs.push('--cache-dir', args.cacheDir);
+  const progress = typeof context.progress === 'function' ? context.progress : null;
+  const progressLine = progress
+    ? ({ stream, line }) => progress({ message: line, stream })
+    : null;
+  if (progress) {
+    progress({ message: `Downloading model ${model}.`, phase: 'start' });
+  }
+  const { stdout } = await runNodeAsync(repoPath, scriptArgs, {
+    streamOutput: true,
+    onLine: progressLine
+  });
+  if (progress) {
+    progress({ message: `Model download complete (${model}).`, phase: 'done' });
+  }
+  return { model, output: stdout.trim() };
+}
+
+/**
+ * Handle the MCP download_dictionaries tool call.
+ * @param {object} [args]
+ * @returns {Promise<object>}
+ */
+export async function downloadDictionaries(args = {}, context = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  const scriptArgs = [path.join(toolRoot, 'tools', 'download-dicts.js'), '--repo', repoPath];
+  if (args.lang) scriptArgs.push('--lang', String(args.lang));
+  const urls = Array.isArray(args.url) ? args.url : (args.url ? [args.url] : []);
+  urls.forEach((value) => scriptArgs.push('--url', String(value)));
+  if (args.dir) scriptArgs.push('--dir', String(args.dir));
+  if (args.update === true) scriptArgs.push('--update');
+  if (args.force === true) scriptArgs.push('--force');
+  const stdout = await runToolWithProgress({
+    repoPath,
+    scriptArgs,
+    context,
+    startMessage: 'Downloading dictionaries.',
+    doneMessage: 'Dictionary download complete.'
+  });
+  const summary = parseCountSummary(stdout);
+  return {
+    repoPath,
+    output: stdout.trim(),
+    ...(summary || {})
+  };
+}
+
+/**
+ * Handle the MCP download_extensions tool call.
+ * @param {object} [args]
+ * @returns {Promise<object>}
+ */
+export async function downloadExtensions(args = {}, context = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  const scriptArgs = [path.join(toolRoot, 'tools', 'download-extensions.js'), '--repo', repoPath];
+  if (args.provider) scriptArgs.push('--provider', String(args.provider));
+  if (args.dir) scriptArgs.push('--dir', String(args.dir));
+  if (args.out) scriptArgs.push('--out', String(args.out));
+  if (args.platform) scriptArgs.push('--platform', String(args.platform));
+  if (args.arch) scriptArgs.push('--arch', String(args.arch));
+  const urls = Array.isArray(args.url) ? args.url : (args.url ? [args.url] : []);
+  urls.forEach((value) => scriptArgs.push('--url', String(value)));
+  if (args.update === true) scriptArgs.push('--update');
+  if (args.force === true) scriptArgs.push('--force');
+  const stdout = await runToolWithProgress({
+    repoPath,
+    scriptArgs,
+    context,
+    startMessage: 'Downloading extensions.',
+    doneMessage: 'Extension download complete.'
+  });
+  const summary = parseCountSummary(stdout);
+  const resolvedPath = parseExtensionPath(stdout);
+  return {
+    repoPath,
+    output: stdout.trim(),
+    extensionPath: resolvedPath,
+    ...(summary || {})
+  };
+}
+
+/**
+ * Handle the MCP verify_extensions tool call.
+ * @param {object} [args]
+ * @returns {object}
+ */
+export function verifyExtensions(args = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  const scriptArgs = [path.join(toolRoot, 'tools', 'verify-extensions.js'), '--json', '--repo', repoPath];
+  if (args.provider) scriptArgs.push('--provider', String(args.provider));
+  if (args.dir) scriptArgs.push('--dir', String(args.dir));
+  if (args.path) scriptArgs.push('--path', String(args.path));
+  if (args.platform) scriptArgs.push('--platform', String(args.platform));
+  if (args.arch) scriptArgs.push('--arch', String(args.arch));
+  if (args.module) scriptArgs.push('--module', String(args.module));
+  if (args.table) scriptArgs.push('--table', String(args.table));
+  if (args.column) scriptArgs.push('--column', String(args.column));
+  if (args.encoding) scriptArgs.push('--encoding', String(args.encoding));
+  if (args.options) scriptArgs.push('--options', String(args.options));
+  if (args.annMode) scriptArgs.push('--ann-mode', String(args.annMode));
+  if (args.load === false) scriptArgs.push('--no-load');
+  const stdout = runNodeSync(repoPath, scriptArgs);
+  try {
+    return JSON.parse(stdout || '{}');
+  } catch {
+    return { repoPath, output: stdout.trim() };
+  }
+}
+
+/**
+ * Handle the MCP build_sqlite_index tool call.
+ * @param {object} [args]
+ * @returns {Promise<object>}
+ */
+export async function buildSqliteIndex(args = {}, context = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  const progress = typeof context.progress === 'function' ? context.progress : null;
+  if (progress) {
+    progress({ message: 'Building SQLite index.', phase: 'start' });
+  }
+  const payload = await coreBuildSqliteIndex(repoPath, {
+    mode: args.mode,
+    incremental: args.incremental === true,
+    compact: args.compact === true,
+    codeDir: args.codeDir,
+    proseDir: args.proseDir,
+    out: args.out,
+    emitOutput: true,
+    exitOnError: false
+  });
+  clearRepoCaches(repoPath);
+  if (progress) {
+    progress({ message: 'SQLite index build complete.', phase: 'done' });
+  }
+  return payload;
+}
+
+/**
+ * Handle the MCP compact_sqlite_index tool call.
+ * @param {object} [args]
+ * @returns {Promise<object>}
+ */
+export async function compactSqliteIndex(args = {}, context = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  const scriptArgs = [path.join(toolRoot, 'tools', 'compact-sqlite-index.js'), '--repo', repoPath];
+  if (args.mode) scriptArgs.push('--mode', String(args.mode));
+  if (args.dryRun === true) scriptArgs.push('--dry-run');
+  if (args.keepBackup === true) scriptArgs.push('--keep-backup');
+  const stdout = await runToolWithProgress({
+    repoPath,
+    scriptArgs,
+    context,
+    startMessage: 'Compacting SQLite index.',
+    doneMessage: 'SQLite compaction complete.'
+  });
+  return { repoPath, output: stdout.trim() };
+}
+
+/**
+ * Handle the MCP cache_gc tool call.
+ * @param {object} [args]
+ * @returns {object}
+ */
+export function cacheGc(args = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  const scriptArgs = [path.join(toolRoot, 'tools', 'cache-gc.js'), '--json', '--repo', repoPath];
+  if (args.dryRun === true) scriptArgs.push('--dry-run');
+  if (Number.isFinite(Number(args.maxBytes))) scriptArgs.push('--max-bytes', String(args.maxBytes));
+  if (Number.isFinite(Number(args.maxGb))) scriptArgs.push('--max-gb', String(args.maxGb));
+  if (Number.isFinite(Number(args.maxAgeDays))) scriptArgs.push('--max-age-days', String(args.maxAgeDays));
+  const stdout = runNodeSync(repoPath, scriptArgs);
+  try {
+    return JSON.parse(stdout || '{}');
+  } catch {
+    return { repoPath, output: stdout.trim() };
+  }
+}
+
+/**
+ * Handle the MCP clean_artifacts tool call.
+ * @param {object} [args]
+ * @returns {Promise<object>}
+ */
+export async function cleanArtifacts(args = {}, context = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  const scriptArgs = [path.join(toolRoot, 'tools', 'clean-artifacts.js'), '--repo', repoPath];
+  if (args.all === true) scriptArgs.push('--all');
+  if (args.dryRun === true) scriptArgs.push('--dry-run');
+  const stdout = await runToolWithProgress({
+    repoPath,
+    scriptArgs,
+    context,
+    startMessage: 'Cleaning artifacts.',
+    doneMessage: 'Artifact cleanup complete.'
+  });
+  return { repoPath, output: stdout.trim() };
+}
+
+/**
+ * Handle the MCP bootstrap tool call.
+ * @param {object} [args]
+ * @returns {Promise<object>}
+ */
+export async function runBootstrap(args = {}, context = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  const scriptArgs = [path.join(toolRoot, 'tools', 'bootstrap.js'), '--repo', repoPath];
+  if (args.skipInstall === true) scriptArgs.push('--skip-install');
+  if (args.skipDicts === true) scriptArgs.push('--skip-dicts');
+  if (args.skipIndex === true) scriptArgs.push('--skip-index');
+  if (args.skipArtifacts === true) scriptArgs.push('--skip-artifacts');
+  if (args.skipTooling === true) scriptArgs.push('--skip-tooling');
+  if (args.withSqlite === true) scriptArgs.push('--with-sqlite');
+  if (args.incremental === true) scriptArgs.push('--incremental');
+  const stdout = await runToolWithProgress({
+    repoPath,
+    scriptArgs,
+    context,
+    startMessage: 'Bootstrapping repo.',
+    doneMessage: 'Bootstrap complete.'
+  });
+  return { repoPath, output: stdout.trim() };
+}
+
+/**
+ * Handle the MCP report_artifacts tool call.
+ * @param {object} [args]
+ * @returns {object}
+ */
+export async function reportArtifacts(args = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  return coreStatus(repoPath);
+}
+
+/**
+ * Handle the MCP triage_ingest tool call.
+ * @param {object} [args]
+ * @returns {Promise<object>}
+ */
+export async function triageIngest(args = {}, context = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  const source = String(args.source || '').trim();
+  const inputPath = String(args.inputPath || '').trim();
+  if (!source || !inputPath) {
+    throw new Error('source and inputPath are required.');
+  }
+  const resolvedInput = path.isAbsolute(inputPath) ? inputPath : path.join(repoPath, inputPath);
+  const metaFilters = normalizeMetaFilters(args.meta);
+  const ingestArgs = [path.join(toolRoot, 'tools', 'triage', 'ingest.js'), '--source', source, '--in', resolvedInput];
+  ingestArgs.push('--repo', repoPath);
+  if (Array.isArray(metaFilters)) {
+    metaFilters.forEach((entry) => ingestArgs.push('--meta', entry));
+  }
+  const progress = typeof context.progress === 'function' ? context.progress : null;
+  const progressLine = progress
+    ? ({ stream, line }) => progress({ message: line, stream })
+    : null;
+  if (progress) {
+    progress({ message: `Ingesting ${source} findings.`, phase: 'start' });
+  }
+  const { stdout } = await runNodeAsync(repoPath, ingestArgs, { streamOutput: true, onLine: progressLine });
+  let payload = {};
+  try {
+    payload = JSON.parse(stdout || '{}');
+  } catch (error) {
+    throw new Error(`Failed to parse ingest output: ${error?.message || error}`);
+  }
+  if (args.buildIndex) {
+    await buildIndex({
+      repoPath,
+      mode: 'records',
+      incremental: args.incremental === true,
+      stubEmbeddings: args.stubEmbeddings === true,
+      sqlite: false
+    }, context);
+  }
+  if (progress) {
+    progress({ message: 'Triage ingest complete.', phase: 'done' });
+  }
+  return payload;
+}
+
+/**
+ * Handle the MCP triage_decision tool call.
+ * @param {object} [args]
+ * @returns {object}
+ */
+export function triageDecision(args = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  const finding = String(args.finding || '').trim();
+  const status = String(args.status || '').trim();
+  if (!finding || !status) {
+    throw new Error('finding and status are required.');
+  }
+  const metaFilters = normalizeMetaFilters(args.meta);
+  const decisionArgs = [path.join(toolRoot, 'tools', 'triage', 'decision.js'), '--finding', finding, '--status', status];
+  decisionArgs.push('--repo', repoPath);
+  if (args.justification) decisionArgs.push('--justification', String(args.justification));
+  if (args.reviewer) decisionArgs.push('--reviewer', String(args.reviewer));
+  if (args.expires) decisionArgs.push('--expires', String(args.expires));
+  if (Array.isArray(metaFilters)) {
+    metaFilters.forEach((entry) => decisionArgs.push('--meta', entry));
+  }
+  const codes = Array.isArray(args.codes) ? args.codes : (args.codes ? [args.codes] : []);
+  const evidence = Array.isArray(args.evidence) ? args.evidence : (args.evidence ? [args.evidence] : []);
+  codes.filter(Boolean).forEach((code) => decisionArgs.push('--code', String(code)));
+  evidence.filter(Boolean).forEach((item) => decisionArgs.push('--evidence', String(item)));
+  const stdout = runNodeSync(repoPath, decisionArgs);
+  return JSON.parse(stdout || '{}');
+}
+
+/**
+ * Handle the MCP triage_context_pack tool call.
+ * @param {object} [args]
+ * @returns {Promise<object>}
+ */
+export async function triageContextPack(args = {}, context = {}) {
+  const repoPath = resolveRepoPath(args.repoPath);
+  const recordId = String(args.recordId || '').trim();
+  if (!recordId) throw new Error('recordId is required.');
+  const contextArgs = [path.join(toolRoot, 'tools', 'triage', 'context-pack.js'), '--record', recordId];
+  contextArgs.push('--repo', repoPath);
+  if (args.outPath) contextArgs.push('--out', String(args.outPath));
+  if (args.ann === true) contextArgs.push('--ann');
+  if (args.ann === false) contextArgs.push('--no-ann');
+  if (args.stubEmbeddings === true) contextArgs.push('--stub-embeddings');
+  const progress = typeof context.progress === 'function' ? context.progress : null;
+  const progressLine = progress
+    ? ({ stream, line }) => progress({ message: line, stream })
+    : null;
+  if (progress) {
+    progress({ message: 'Building triage context pack.', phase: 'start' });
+  }
+  const { stdout } = await runNodeAsync(repoPath, contextArgs, { streamOutput: true, onLine: progressLine });
+  if (progress) {
+    progress({ message: 'Context pack ready.', phase: 'done' });
+  }
+  try {
+    return JSON.parse(stdout || '{}');
+  } catch (error) {
+    throw new Error(`Failed to parse context pack output: ${error?.message || error}`);
+  }
+}
+
+/**
+ * Dispatch an MCP tool call by name.
+ * @param {string} name
+ * @param {object} args
+ * @returns {Promise<any>}
+ */
+export async function handleToolCall(name, args, context = {}) {
+  switch (name) {
+    case 'index_status':
+      return await indexStatus(args);
+    case 'config_status':
+      return await configStatus(args);
+    case 'build_index':
+      return await buildIndex(args, context);
+    case 'search':
+      return await runSearch(args);
+    case 'download_models':
+      return await downloadModels(args, context);
+    case 'download_dictionaries':
+      return await downloadDictionaries(args, context);
+    case 'download_extensions':
+      return await downloadExtensions(args, context);
+    case 'verify_extensions':
+      return verifyExtensions(args);
+    case 'build_sqlite_index':
+      return await buildSqliteIndex(args, context);
+    case 'compact_sqlite_index':
+      return await compactSqliteIndex(args, context);
+    case 'cache_gc':
+      return cacheGc(args);
+    case 'clean_artifacts':
+      return await cleanArtifacts(args, context);
+    case 'bootstrap':
+      return await runBootstrap(args, context);
+    case 'report_artifacts':
+      return await reportArtifacts(args);
+    case 'triage_ingest':
+      return await triageIngest(args, context);
+    case 'triage_decision':
+      return triageDecision(args);
+    case 'triage_context_pack':
+      return await triageContextPack(args, context);
+    default:
+      throw new Error(`Unknown tool: ${name}`);
+  }
+}
diff --git a/tools/mcp/transport.js b/tools/mcp/transport.js
new file mode 100644
index 000000000..f9ce844aa
--- /dev/null
+++ b/tools/mcp/transport.js
@@ -0,0 +1,216 @@
+import { StreamMessageReader } from 'vscode-jsonrpc';
+import { closeOutput, sendError, sendNotification, sendResult } from '../../src/integrations/mcp/protocol.js';
+import { ERROR_CODES } from '../../src/shared/error-codes.js';
+import { logError } from '../../src/shared/progress.js';
+import { withTimeout } from './runner.js';
+
+/**
+ * Format error payloads for tool responses.
+ * @param {any} error
+ * @returns {{message:string,code?:number,stderr?:string,stdout?:string}} 
+ */
+function getRemediationHint(error) {
+  const parts = [error?.message, error?.stderr, error?.stdout]
+    .filter(Boolean)
+    .join('\n')
+    .toLowerCase();
+  if (!parts) return null;
+
+  if (parts.includes('sqlite backend requested but index not found')
+    || parts.includes('missing required tables')) {
+    return 'Run `npm run build-sqlite-index` or set sqlite.use=false / --backend memory.';
+  }
+  if (parts.includes('better-sqlite3 is required')) {
+    return 'Run `npm install` and ensure better-sqlite3 can load on this platform.';
+  }
+  if (parts.includes('chunk_meta.json')
+    || parts.includes('minhash_signatures')
+    || parts.includes('index not found')
+    || parts.includes('build-index')
+    || parts.includes('build index')) {
+    return 'Run `npm run build-index` (or `npm run setup`/`npm run bootstrap`) to generate indexes.';
+  }
+  if ((parts.includes('model') || parts.includes('xenova') || parts.includes('transformers'))
+    && (parts.includes('not found') || parts.includes('failed') || parts.includes('fetch') || parts.includes('download') || parts.includes('enoent'))) {
+    return 'Run `npm run download-models` or use `--stub-embeddings` / `PAIROFCLEATS_EMBEDDINGS=stub`.';
+  }
+  if (parts.includes('dictionary')
+    || parts.includes('wordlist')
+    || parts.includes('words_alpha')
+    || parts.includes('download-dicts')) {
+    return 'Run `npm run download-dicts -- --lang en` (or configure dictionary.files/languages).';
+  }
+  return null;
+}
+
+/**
+ * Format error payloads for tool responses.
+ * @param {any} error
+ * @returns {{message:string,code?:number,stderr?:string,stdout?:string,hint?:string}}
+ */
+function formatToolError(error) {
+  const payload = {
+    message: error?.message || String(error)
+  };
+  if (error?.code !== undefined) payload.code = error.code;
+  if (error?.stderr) payload.stderr = String(error.stderr).trim();
+  if (error?.stdout) payload.stdout = String(error.stdout).trim();
+  if (error?.timeoutMs) payload.timeoutMs = error.timeoutMs;
+  const hint = getRemediationHint(error);
+  if (hint) payload.hint = hint;
+  return payload;
+}
+
+/**
+ * Emit a progress notification for long-running tools.
+ * @param {string|number|null} id
+ * @param {string} tool
+ * @param {{message:string,stream?:string,phase?:string}} payload
+ */
+function sendProgress(id, tool, payload) {
+  if (id === null || id === undefined) return;
+  const message = payload?.message ? String(payload.message) : '';
+  if (!message) return;
+  sendNotification('notifications/progress', {
+    id,
+    tool,
+    message,
+    stream: payload?.stream || 'info',
+    phase: payload?.phase || 'progress',
+    ts: new Date().toISOString()
+  });
+}
+
+/**
+ * Start the MCP stdio transport.
+ * @param {{toolDefs:any,serverInfo:{name:string,version:string},handleToolCall:Function,resolveToolTimeoutMs:Function,queueMax:number}} config
+ */
+export const createMcpTransport = ({ toolDefs, serverInfo, handleToolCall, resolveToolTimeoutMs, queueMax }) => {
+  let processing = false;
+  const queue = [];
+
+  /**
+   * Handle a JSON-RPC message from stdin.
+   * @param {object} message
+   * @returns {Promise<void>}
+   */
+  async function handleMessage(message) {
+    if (!message || message.jsonrpc !== '2.0') return;
+    const { id, method, params } = message;
+
+    if (method === 'initialize') {
+      sendResult(id, {
+        protocolVersion: '2024-11-05',
+        serverInfo,
+        capabilities: {
+          tools: { listChanged: false },
+          resources: { listChanged: false }
+        }
+      });
+      return;
+    }
+
+    if (method === 'shutdown') {
+      sendResult(id, {});
+      return;
+    }
+
+    if (method === 'exit') {
+      process.exit(0);
+    }
+
+    if (method === 'tools/list') {
+      sendResult(id, { tools: toolDefs });
+      return;
+    }
+
+    if (method === 'resources/list') {
+      sendResult(id, { resources: [] });
+      return;
+    }
+
+    if (method === 'tools/call') {
+      if (!id) return;
+      const name = params?.name;
+      const args = params?.arguments || {};
+      const timeoutMs = resolveToolTimeoutMs(name, args);
+      try {
+        let timedOut = false;
+        const progress = (payload) => {
+          if (timedOut) return;
+          sendProgress(id, name, payload);
+        };
+        const result = await withTimeout(
+          handleToolCall(name, args, { progress, toolCallId: id }),
+          timeoutMs,
+          { label: name, onTimeout: () => { timedOut = true; } }
+        );
+        sendResult(id, {
+          content: [{ type: 'text', text: JSON.stringify(result, null, 2) }]
+        });
+      } catch (error) {
+        const payload = formatToolError(error);
+        if (error?.code === 'TOOL_TIMEOUT' && timeoutMs) {
+          payload.timeoutMs = timeoutMs;
+        }
+        sendResult(id, {
+          content: [{ type: 'text', text: JSON.stringify(payload, null, 2) }],
+          isError: true
+        });
+      }
+      return;
+    }
+
+    if (id) {
+      sendError(id, -32601, `Method not found: ${method}`);
+    }
+  }
+
+  /**
+   * Process queued messages serially.
+   */
+  function processQueue() {
+    if (processing) return;
+    processing = true;
+    const run = async () => {
+      while (queue.length) {
+        const msg = queue.shift();
+        await handleMessage(msg);
+      }
+      processing = false;
+    };
+    run().catch((error) => {
+      processing = false;
+      logError('[mcp] queue error', { error: error?.message || String(error) });
+    });
+  }
+
+  /**
+   * Enqueue a message for processing.
+   * @param {object} message
+   */
+  function enqueueMessage(message) {
+    const inFlight = processing ? 1 : 0;
+    if (queue.length + inFlight >= queueMax) {
+      if (message?.id !== undefined && message?.id !== null) {
+        sendError(message.id, -32001, 'Server overloaded.', undefined, { code: ERROR_CODES.QUEUE_OVERLOADED });
+      }
+      return;
+    }
+    queue.push(message);
+    processQueue();
+  }
+
+  const start = () => {
+    const reader = new StreamMessageReader(process.stdin);
+    reader.onError((err) => logError('[mcp] stream error', { error: err?.message || String(err) }));
+    reader.onClose(() => {
+      closeOutput();
+      process.exit(0);
+    });
+    reader.listen(enqueueMessage);
+    return reader;
+  };
+
+  return { start };
+};
diff --git a/tools/merge-history.sh b/tools/merge-history.sh
index 1fe08279a..4ee250c1c 100644
--- a/tools/merge-history.sh
+++ b/tools/merge-history.sh
@@ -15,6 +15,6 @@ if [ ! -f "$THEIRS_FILE" ]; then
   exit 0
 fi
 
-node "$(dirname "$0")/mergeSearchHistory.js" "$THEIRS_FILE" "$OURS_FILE"
+node "$(dirname "$0")/mergeAppendOnly.js" "$THEIRS_FILE" "$OURS_FILE"
 
 exit 0
diff --git a/tools/merge-no-results.sh b/tools/merge-no-results.sh
index fb2f84165..4ee250c1c 100644
--- a/tools/merge-no-results.sh
+++ b/tools/merge-no-results.sh
@@ -15,6 +15,6 @@ if [ ! -f "$THEIRS_FILE" ]; then
   exit 0
 fi
 
-node "$(dirname "$0")/mergeNoResultQueries.js" "$THEIRS_FILE" "$OURS_FILE"
+node "$(dirname "$0")/mergeAppendOnly.js" "$THEIRS_FILE" "$OURS_FILE"
 
 exit 0
diff --git a/tools/mergeNoResultQueries.js b/tools/mergeNoResultQueries.js
deleted file mode 100644
index 8849a86bd..000000000
--- a/tools/mergeNoResultQueries.js
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/env node
-import { mergeAppendOnly } from './mergeAppendOnly.js';
-
-const [baseFile, targetFile] = process.argv.slice(2);
-if (!baseFile || !targetFile) {
-  console.error('usage: mergeNoResultQueries.js <baseFile> <targetFile>');
-  process.exit(1);
-}
-
-await mergeAppendOnly(baseFile, targetFile);
diff --git a/tools/mergeSearchHistory.js b/tools/mergeSearchHistory.js
deleted file mode 100644
index 2ca1eccd9..000000000
--- a/tools/mergeSearchHistory.js
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/usr/bin/env node
-import { mergeAppendOnly } from './mergeAppendOnly.js';
-
-const [baseFile, targetFile] = process.argv.slice(2);
-if (!baseFile || !targetFile) {
-  console.error('usage: mergeSearchHistory.js <baseFile> <targetFile>');
-  process.exit(1);
-}
-
-await mergeAppendOnly(baseFile, targetFile);
diff --git a/tools/parity-matrix.js b/tools/parity-matrix.js
new file mode 100644
index 000000000..33bacb168
--- /dev/null
+++ b/tools/parity-matrix.js
@@ -0,0 +1,277 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import { execa } from 'execa';
+import { createCli } from '../src/shared/cli.js';
+import { resolveToolRoot } from './dict-utils.js';
+
+const argv = createCli({
+  scriptName: 'parity-matrix',
+  options: {
+    backend: { type: 'string' },
+    backends: { type: 'string' },
+    'ann-modes': { type: 'string' },
+    queries: { type: 'string' },
+    'queries-dir': { type: 'string' },
+    top: { type: 'number' },
+    limit: { type: 'number' },
+    results: { type: 'string' },
+    'out-dir': { type: 'string' },
+    search: { type: 'string' },
+    'dry-run': { type: 'boolean', default: false },
+    'fail-fast': { type: 'boolean', default: false }
+  }
+}).parse();
+
+const scriptRoot = resolveToolRoot();
+const parityScript = path.join(scriptRoot, 'tests', 'parity.js');
+const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
+const resultsRoot = path.resolve(
+  argv.results || path.join(scriptRoot, 'benchmarks', 'results')
+);
+const runRoot = path.resolve(
+  argv['out-dir'] || path.join(resultsRoot, 'parity', timestamp)
+);
+const logRoot = path.join(runRoot, 'logs');
+const outRoot = path.join(runRoot, 'runs');
+
+const DEFAULT_BACKENDS = ['sqlite', 'sqlite-fts'];
+const DEFAULT_ANN_MODES = ['on', 'off'];
+const DEFAULT_TOP = 10;
+
+const parseList = (value) => {
+  if (!value) return [];
+  return String(value)
+    .split(',')
+    .map((entry) => entry.trim())
+    .filter(Boolean);
+};
+
+const normalizeBackend = (raw) => {
+  const value = String(raw || '').toLowerCase();
+  if (value === 'fts') return 'sqlite-fts';
+  return value;
+};
+
+const resolveBackends = () => {
+  const raw = argv.backends || argv.backend || '';
+  const list = parseList(raw).map(normalizeBackend).filter(Boolean);
+  if (!list.length || list.includes('all')) return DEFAULT_BACKENDS.slice();
+  return Array.from(new Set(list));
+};
+
+const normalizeAnnMode = (raw) => {
+  const value = String(raw || '').toLowerCase();
+  if (value === 'true' || value === '1' || value === 'on' || value === 'yes') {
+    return 'on';
+  }
+  if (value === 'false' || value === '0' || value === 'off' || value === 'no') {
+    return 'off';
+  }
+  return null;
+};
+
+const resolveAnnModes = () => {
+  const raw = parseList(argv['ann-modes']);
+  const modes = raw.map(normalizeAnnMode).filter(Boolean);
+  return modes.length ? Array.from(new Set(modes)) : DEFAULT_ANN_MODES.slice();
+};
+
+const toSafeName = (value) =>
+  String(value || '')
+    .replace(/[^a-z0-9-_]+/gi, '_')
+    .replace(/^_+|_+$/g, '')
+    .toLowerCase();
+
+const appendArgs = (args, flag, value) => {
+  if (value === undefined || value === null || value === '') return;
+  args.push(flag, String(value));
+};
+
+async function loadQueriesFromFile(filePath) {
+  const raw = await fsPromises.readFile(filePath, 'utf8');
+  if (filePath.endsWith('.json')) {
+    const parsed = JSON.parse(raw);
+    if (Array.isArray(parsed)) {
+      return parsed
+        .map((entry) => {
+          if (typeof entry === 'string') return entry;
+          if (entry && typeof entry.query === 'string') return entry.query;
+          return null;
+        })
+        .filter(Boolean);
+    }
+    if (Array.isArray(parsed.queries)) {
+      return parsed.queries
+        .map((entry) => {
+          if (typeof entry === 'string') return entry;
+          if (entry && typeof entry.query === 'string') return entry.query;
+          return null;
+        })
+        .filter(Boolean);
+    }
+    return [];
+  }
+  return raw
+    .split(/\r?\n/)
+    .map((line) => line.trim())
+    .filter((line) => line && !line.startsWith('#'));
+}
+
+async function resolveQueryFile() {
+  if (argv.queries) {
+    const resolved = path.resolve(argv.queries);
+    if (!fs.existsSync(resolved)) {
+      throw new Error(`Query file not found: ${resolved}`);
+    }
+    const queries = await loadQueriesFromFile(resolved);
+    return { path: resolved, source: 'file', count: queries.length };
+  }
+
+  const queriesDir = path.resolve(
+    argv['queries-dir'] || path.join(scriptRoot, 'benchmarks', 'queries')
+  );
+  const entries = await fsPromises.readdir(queriesDir, { withFileTypes: true });
+  const files = entries
+    .filter((entry) => entry.isFile() && entry.name.endsWith('.txt'))
+    .map((entry) => entry.name)
+    .sort();
+  if (!files.length) {
+    throw new Error(`No query files found in ${queriesDir}`);
+  }
+
+  const seen = new Set();
+  const combined = [];
+  for (const file of files) {
+    const filePath = path.join(queriesDir, file);
+    const lines = await loadQueriesFromFile(filePath);
+    for (const line of lines) {
+      if (seen.has(line)) continue;
+      seen.add(line);
+      combined.push(line);
+    }
+  }
+
+  if (!combined.length) {
+    throw new Error(`No queries resolved from ${queriesDir}`);
+  }
+
+  const outPath = path.join(runRoot, 'parity-queries.txt');
+  const header = [
+    '# Generated from benchmarks/queries/*.txt',
+    `# Source: ${queriesDir}`,
+    ''
+  ];
+  await fsPromises.writeFile(outPath, `${header.join('\n')}${combined.join('\n')}\n`);
+  return { path: outPath, source: queriesDir, count: combined.length };
+}
+
+const configToArgs = (config, queryFile, outFile, top, limit) => {
+  const args = [parityScript];
+  appendArgs(args, '--sqlite-backend', config.backend);
+  appendArgs(args, '--queries', queryFile);
+  appendArgs(args, '--top', top);
+  appendArgs(args, '--limit', limit);
+  appendArgs(args, '--search', argv.search);
+  args.push('--write-report');
+  appendArgs(args, '--out', outFile);
+  if (config.annMode === 'on') args.push('--ann');
+  if (config.annMode === 'off') args.push('--no-ann');
+  return args;
+};
+
+async function main() {
+  await fsPromises.mkdir(logRoot, { recursive: true });
+  await fsPromises.mkdir(outRoot, { recursive: true });
+
+  const queryInfo = await resolveQueryFile();
+  const top = Number.isFinite(Number(argv.top))
+    ? Math.max(1, Number(argv.top))
+    : DEFAULT_TOP;
+  const limit = Number.isFinite(Number(argv.limit))
+    ? Math.max(0, Number(argv.limit))
+    : 0;
+
+  const backends = resolveBackends();
+  const annModes = resolveAnnModes();
+  const configs = [];
+  for (const backend of backends) {
+    for (const annMode of annModes) {
+      const id = toSafeName([backend, annMode].join('-'));
+      configs.push({ id, backend, annMode });
+    }
+  }
+
+  if (!configs.length) {
+    throw new Error('No parity configurations resolved.');
+  }
+
+  const results = [];
+  for (const config of configs) {
+    const label = `${config.backend}/${config.annMode}`;
+    const outFile = path.join(outRoot, `${config.id}.json`);
+    const logFile = path.join(logRoot, `${config.id}.log`);
+    const args = configToArgs(config, queryInfo.path, outFile, top, limit);
+
+    console.log(`\n[parity-matrix] ${label}`);
+    console.log(
+      `node ${args.map((arg) => (arg.includes(' ') ? `"${arg}"` : arg)).join(' ')}`
+    );
+
+    if (argv['dry-run']) {
+      results.push({ ...config, outFile, logFile, status: 'dry-run' });
+      continue;
+    }
+
+    try {
+      const child = await execa(process.execPath, args, { all: true });
+      if (child.all) process.stdout.write(child.all);
+      await fsPromises.writeFile(logFile, child.all || '');
+
+      let summary = null;
+      try {
+        const report = JSON.parse(await fsPromises.readFile(outFile, 'utf8'));
+        summary = report.summary || null;
+      } catch {
+        summary = null;
+      }
+
+      results.push({ ...config, outFile, logFile, status: 'ok', summary });
+    } catch (err) {
+      const output = err?.all || err?.stdout || err?.stderr || String(err);
+      if (output) process.stdout.write(output);
+      await fsPromises.writeFile(logFile, output || '');
+      results.push({
+        ...config,
+        outFile,
+        logFile,
+        status: 'failed',
+        exitCode: err?.exitCode ?? null,
+        error: err?.message || String(err)
+      });
+      if (argv['fail-fast']) break;
+    }
+  }
+
+  const matrix = {
+    generatedAt: new Date().toISOString(),
+    runRoot,
+    outRoot,
+    logRoot,
+    queryFile: queryInfo.path,
+    querySource: queryInfo.source,
+    queryCount: queryInfo.count,
+    top,
+    limit,
+    results
+  };
+  const matrixPath = path.join(runRoot, 'matrix.json');
+  await fsPromises.writeFile(matrixPath, JSON.stringify(matrix, null, 2));
+  console.log(`\n[parity-matrix] summary written to ${matrixPath}`);
+}
+
+main().catch((err) => {
+  console.error(err?.message || String(err));
+  process.exit(1);
+});
diff --git a/tools/release-check.js b/tools/release-check.js
new file mode 100644
index 000000000..d928d0baa
--- /dev/null
+++ b/tools/release-check.js
@@ -0,0 +1,63 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import path from 'node:path';
+
+const args = process.argv.slice(2);
+const requireBreaking = args.includes('--breaking')
+  || process.env.PAIROFCLEATS_BREAKING === '1';
+
+const root = process.cwd();
+const packagePath = path.join(root, 'package.json');
+const changelogPath = path.join(root, 'CHANGELOG.md');
+
+if (!fs.existsSync(packagePath)) {
+  console.error('release-check: package.json not found.');
+  process.exit(1);
+}
+if (!fs.existsSync(changelogPath)) {
+  console.error('release-check: CHANGELOG.md not found.');
+  process.exit(1);
+}
+
+const pkg = JSON.parse(fs.readFileSync(packagePath, 'utf8'));
+const version = pkg?.version ? String(pkg.version).trim() : '';
+if (!version) {
+  console.error('release-check: package.json version missing.');
+  process.exit(1);
+}
+
+const changelog = fs.readFileSync(changelogPath, 'utf8');
+const headerRe = new RegExp(`^##\\s+v?${version.replace(/\./g, '\\.')}(\\b|\\s)`, 'm');
+const match = headerRe.exec(changelog);
+if (!match) {
+  console.error(`release-check: CHANGELOG.md missing section for v${version}.`);
+  process.exit(1);
+}
+
+const sectionStart = match.index;
+const nextHeaderMatch = changelog.slice(sectionStart + match[0].length).match(/^##\s+/m);
+const sectionEnd = nextHeaderMatch
+  ? sectionStart + match[0].length + nextHeaderMatch.index
+  : changelog.length;
+const section = changelog.slice(sectionStart, sectionEnd);
+
+if (requireBreaking) {
+  const breakingHeader = section.match(/^###\s+Breaking\s*$/m);
+  if (!breakingHeader) {
+    console.error(`release-check: missing "### Breaking" section for v${version}.`);
+    process.exit(1);
+  }
+  const afterBreaking = section.slice(breakingHeader.index + breakingHeader[0].length);
+  const nextSubsection = afterBreaking.match(/^###\s+/m);
+  const breakingBlock = nextSubsection
+    ? afterBreaking.slice(0, nextSubsection.index)
+    : afterBreaking;
+  const bullets = breakingBlock.split('\n').map((line) => line.trim()).filter((line) => line.startsWith('-'));
+  const hasRealEntry = bullets.some((line) => !line.toLowerCase().includes('none'));
+  if (!bullets.length || !hasRealEntry) {
+    console.error(`release-check: add breaking change notes under v${version}.`);
+    process.exit(1);
+  }
+}
+
+console.log(`release-check: changelog entry ok for v${version}.`);
diff --git a/tools/repometrics-dashboard.js b/tools/repometrics-dashboard.js
index 25fc6310a..5253b4fa6 100644
--- a/tools/repometrics-dashboard.js
+++ b/tools/repometrics-dashboard.js
@@ -2,14 +2,18 @@
 import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
 import { getMetricsDir, loadUserConfig, resolveRepoRoot } from './dict-utils.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['json'],
-  string: ['out', 'repo'],
-  default: { top: 5 }
-});
+const argv = createCli({
+  scriptName: 'repometrics-dashboard',
+  options: {
+    json: { type: 'boolean', default: false },
+    out: { type: 'string' },
+    repo: { type: 'string' },
+    top: { type: 'number', default: 5 }
+  }
+}).parse();
 
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
 const root = rootArg || resolveRepoRoot(process.cwd());
diff --git a/tools/report-artifacts.js b/tools/report-artifacts.js
index 061aa31c4..b7c570d76 100644
--- a/tools/report-artifacts.js
+++ b/tools/report-artifacts.js
@@ -1,50 +1,91 @@
 #!/usr/bin/env node
 import fs from 'node:fs';
-import fsPromises from 'node:fs/promises';
 import path from 'node:path';
-import minimist from 'minimist';
-import { getCacheRoot, getDictConfig, getRepoCacheRoot, loadUserConfig, resolveRepoRoot, resolveSqlitePaths } from './dict-utils.js';
-
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['json', 'all'],
-  string: ['repo'],
-  default: { json: false, all: false }
-});
+import { createCli } from '../src/shared/cli.js';
+import { getStatus } from '../src/integrations/core/status.js';
+import { validateIndexArtifacts } from '../src/index/validate.js';
+import { getMetricsDir, loadUserConfig, resolveRepoRoot } from './dict-utils.js';
+
+const argv = createCli({
+  scriptName: 'report-artifacts',
+  options: {
+    json: { type: 'boolean', default: false },
+    all: { type: 'boolean', default: false },
+    repo: { type: 'string' }
+  }
+}).parse();
 
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
 const root = rootArg || resolveRepoRoot(process.cwd());
 const userConfig = loadUserConfig(root);
-const cacheRoot = (userConfig.cache && userConfig.cache.root) || process.env.PAIROFCLEATS_CACHE_ROOT || getCacheRoot();
-const repoCacheRoot = getRepoCacheRoot(root, userConfig);
-const dictConfig = getDictConfig(root, userConfig);
-const dictDir = dictConfig.dir;
-const sqlitePaths = resolveSqlitePaths(root, userConfig);
-const sqliteTargets = [
-  { label: 'code', path: sqlitePaths.codePath },
-  { label: 'prose', path: sqlitePaths.prosePath }
-];
+const metricsDir = getMetricsDir(root, userConfig);
+const status = await getStatus({ repoRoot: root, includeAll: argv.all });
 
-/**
- * Recursively compute the size of a file or directory.
- * @param {string} targetPath
- * @returns {Promise<number>}
- */
-async function sizeOfPath(targetPath) {
+const readJson = (targetPath) => {
+  if (!fs.existsSync(targetPath)) return null;
   try {
-    const stat = await fsPromises.lstat(targetPath);
-    if (stat.isSymbolicLink()) return 0;
-    if (stat.isFile()) return stat.size;
-    if (!stat.isDirectory()) return 0;
-
-    const entries = await fsPromises.readdir(targetPath);
-    let total = 0;
-    for (const entry of entries) {
-      total += await sizeOfPath(path.join(targetPath, entry));
-    }
-    return total;
+    return JSON.parse(fs.readFileSync(targetPath, 'utf8'));
   } catch {
-    return 0;
+    return null;
   }
+};
+
+const indexMetrics = {
+  code: readJson(path.join(metricsDir, 'index-code.json')),
+  prose: readJson(path.join(metricsDir, 'index-prose.json'))
+};
+const lmdbMetrics = {
+  code: readJson(path.join(metricsDir, 'lmdb-code.json')),
+  prose: readJson(path.join(metricsDir, 'lmdb-prose.json'))
+};
+
+const computeRate = (count, ms) => {
+  const total = Number(count);
+  const elapsed = Number(ms);
+  if (!Number.isFinite(total) || !Number.isFinite(elapsed) || elapsed <= 0) return null;
+  return total / (elapsed / 1000);
+};
+
+const buildThroughput = (mode, metrics, bytes) => {
+  if (!metrics) return null;
+  const totalMs = Number(metrics?.timings?.totalMs);
+  const writeMs = Number(metrics?.timings?.writeMs);
+  const files = Number(metrics?.files?.candidates);
+  const chunks = Number(metrics?.chunks?.total);
+  const tokens = Number(metrics?.tokens?.total);
+  const payload = {
+    mode,
+    totalMs: Number.isFinite(totalMs) ? totalMs : null,
+    writeMs: Number.isFinite(writeMs) ? writeMs : null,
+    files: Number.isFinite(files) ? files : null,
+    chunks: Number.isFinite(chunks) ? chunks : null,
+    tokens: Number.isFinite(tokens) ? tokens : null,
+    bytes: Number.isFinite(Number(bytes)) ? Number(bytes) : null
+  };
+  payload.filesPerSec = computeRate(payload.files, payload.totalMs);
+  payload.chunksPerSec = computeRate(payload.chunks, payload.totalMs);
+  payload.tokensPerSec = computeRate(payload.tokens, payload.totalMs);
+  payload.bytesPerSec = computeRate(payload.bytes, payload.totalMs);
+  payload.writeBytesPerSec = computeRate(payload.bytes, payload.writeMs);
+  return payload;
+};
+
+const throughput = {
+  code: buildThroughput('code', indexMetrics.code, status.repo?.artifacts?.indexCode),
+  prose: buildThroughput('prose', indexMetrics.prose, status.repo?.artifacts?.indexProse),
+  lmdb: {
+    code: buildThroughput('lmdb code', lmdbMetrics.code, status.repo?.lmdb?.code?.bytes),
+    prose: buildThroughput('lmdb prose', lmdbMetrics.prose, status.repo?.lmdb?.prose?.bytes)
+  }
+};
+
+const corruption = await validateIndexArtifacts({ root, userConfig, modes: ['code', 'prose'] });
+status.throughput = throughput;
+status.corruption = corruption;
+
+if (argv.json) {
+  console.log(JSON.stringify(status, null, 2));
+  process.exit(0);
 }
 
 /**
@@ -65,169 +106,84 @@ function formatBytes(bytes) {
   return `${rounded} ${units[unit]}`;
 }
 
-/**
- * Check if a path is contained within another path.
- * @param {string} parent
- * @param {string} child
- * @returns {boolean}
- */
-function isInside(parent, child) {
-  const rel = path.relative(parent, child);
-  return rel === '' || (!rel.startsWith('..') && !path.isAbsolute(rel));
-}
-
-const repoArtifacts = {
-  indexCode: path.join(repoCacheRoot, 'index-code'),
-  indexProse: path.join(repoCacheRoot, 'index-prose'),
-  repometrics: path.join(repoCacheRoot, 'repometrics'),
-  incremental: path.join(repoCacheRoot, 'incremental')
-};
-
-const repoCacheSize = await sizeOfPath(repoCacheRoot);
-const repoArtifactSizes = {};
-for (const [name, artifactPath] of Object.entries(repoArtifacts)) {
-  repoArtifactSizes[name] = await sizeOfPath(artifactPath);
-}
+const repo = status.repo;
+const overall = status.overall;
+const code = repo.sqlite?.code;
+const prose = repo.sqlite?.prose;
+const lmdbCode = repo.lmdb?.code;
+const lmdbProse = repo.lmdb?.prose;
 
-const sqliteStats = {};
-let sqliteOutsideCacheSize = 0;
-for (const target of sqliteTargets) {
-  const exists = fs.existsSync(target.path);
-  const size = exists ? await sizeOfPath(target.path) : 0;
-  sqliteStats[target.label] = exists ? { path: target.path, bytes: size } : null;
-  if (exists && !isInside(path.resolve(cacheRoot), target.path)) {
-    sqliteOutsideCacheSize += size;
-  }
-}
-const cacheRootSize = await sizeOfPath(cacheRoot);
-const dictSize = await sizeOfPath(dictDir);
-const overallSize = cacheRootSize + sqliteOutsideCacheSize;
-
-const health = { issues: [], hints: [] };
-const indexIssues = [];
-if (!fs.existsSync(repoArtifacts.indexCode)) {
-  indexIssues.push('index-code directory missing');
-} else {
-  if (!fs.existsSync(path.join(repoArtifacts.indexCode, 'chunk_meta.json'))) {
-    indexIssues.push('index-code chunk_meta.json missing');
-  }
-  if (!fs.existsSync(path.join(repoArtifacts.indexCode, 'token_postings.json'))) {
-    indexIssues.push('index-code token_postings.json missing');
-  }
-}
-if (!fs.existsSync(repoArtifacts.indexProse)) {
-  indexIssues.push('index-prose directory missing');
-} else {
-  if (!fs.existsSync(path.join(repoArtifacts.indexProse, 'chunk_meta.json'))) {
-    indexIssues.push('index-prose chunk_meta.json missing');
-  }
-  if (!fs.existsSync(path.join(repoArtifacts.indexProse, 'token_postings.json'))) {
-    indexIssues.push('index-prose token_postings.json missing');
-  }
-}
-if (indexIssues.length) {
-  health.issues.push(...indexIssues);
-  health.hints.push('Run `npm run build-index` to rebuild file-backed indexes.');
+console.log('Repo artifacts');
+console.log(`- cache root: ${formatBytes(repo.totalBytes)} (${repo.root})`);
+console.log(`- index-code: ${formatBytes(repo.artifacts.indexCode)} (${repo.artifacts.indexCode})`);
+console.log(`- index-prose: ${formatBytes(repo.artifacts.indexProse)} (${repo.artifacts.indexProse})`);
+console.log(`- repometrics: ${formatBytes(repo.artifacts.repometrics)} (${path.join(repo.root, 'repometrics')})`);
+console.log(`- incremental: ${formatBytes(repo.artifacts.incremental)} (${path.join(repo.root, 'incremental')})`);
+console.log(`- sqlite code db: ${code ? formatBytes(code.bytes) : 'missing'} (${code?.path || status.repo.sqlite?.code?.path || 'missing'})`);
+console.log(`- sqlite prose db: ${prose ? formatBytes(prose.bytes) : 'missing'} (${prose?.path || status.repo.sqlite?.prose?.path || 'missing'})`);
+console.log(`- lmdb code db: ${lmdbCode ? formatBytes(lmdbCode.bytes) : 'missing'} (${lmdbCode?.path || status.repo.lmdb?.code?.path || 'missing'})`);
+console.log(`- lmdb prose db: ${lmdbProse ? formatBytes(lmdbProse.bytes) : 'missing'} (${lmdbProse?.path || status.repo.lmdb?.prose?.path || 'missing'})`);
+if (repo.sqlite?.legacy) {
+  console.log(`- legacy sqlite db: ${repo.sqlite.legacy.path}`);
 }
 
-const sqliteIssues = [];
-if (userConfig.sqlite?.use === true) {
-  if (!fs.existsSync(sqlitePaths.codePath)) sqliteIssues.push('sqlite code db missing');
-  if (!fs.existsSync(sqlitePaths.prosePath)) sqliteIssues.push('sqlite prose db missing');
+console.log('\nOverall');
+console.log(`- cache root: ${formatBytes(overall.cacheBytes)} (${overall.cacheRoot})`);
+console.log(`- dictionaries: ${formatBytes(overall.dictionaryBytes)}`);
+if (overall.sqliteOutsideCacheBytes) {
+  console.log(`- sqlite outside cache: ${formatBytes(overall.sqliteOutsideCacheBytes)}`);
 }
-if (sqliteIssues.length) {
-  health.issues.push(...sqliteIssues);
-  health.hints.push('Run `npm run build-sqlite-index` to rebuild SQLite indexes.');
+if (overall.lmdbOutsideCacheBytes) {
+  console.log(`- lmdb outside cache: ${formatBytes(overall.lmdbOutsideCacheBytes)}`);
 }
+console.log(`- total: ${formatBytes(overall.totalBytes)}`);
 
-const repoRollups = [];
-if (argv.all) {
-  const reposRoot = path.join(cacheRoot, 'repos');
-  if (fs.existsSync(reposRoot)) {
-    const entries = await fsPromises.readdir(reposRoot, { withFileTypes: true });
-    for (const entry of entries) {
-      if (!entry.isDirectory()) continue;
-      const repoPath = path.join(reposRoot, entry.name);
-      const bytes = await sizeOfPath(repoPath);
-      const stat = await fsPromises.stat(repoPath);
-      repoRollups.push({
-        id: entry.name,
-        path: path.resolve(repoPath),
-        bytes,
-        mtime: stat.mtime ? stat.mtime.toISOString() : null
-      });
-    }
+if (status.health?.issues?.length) {
+  console.log('\nHealth');
+  status.health.issues.forEach((issue) => console.log(`- issue: ${issue}`));
+  status.health.hints.forEach((hint) => console.log(`- hint: ${hint}`));
+}
+
+if (status.throughput) {
+  const formatRate = (value, unit) => (Number.isFinite(value) ? `${value.toFixed(1)} ${unit}/s` : 'n/a');
+  const formatMs = (value) => (Number.isFinite(value) ? `${value.toFixed(0)} ms` : 'n/a');
+  console.log('\nThroughput');
+  const entries = [
+    ['code', status.throughput.code],
+    ['prose', status.throughput.prose],
+    ['lmdb code', status.throughput.lmdb?.code],
+    ['lmdb prose', status.throughput.lmdb?.prose]
+  ];
+  for (const [mode, entry] of entries) {
+    if (!entry) continue;
+    console.log(
+      `- ${mode}: files ${formatRate(entry.filesPerSec, 'files')}, ` +
+      `chunks ${formatRate(entry.chunksPerSec, 'chunks')}, ` +
+      `tokens ${formatRate(entry.tokensPerSec, 'tokens')}, ` +
+      `bytes ${formatRate(entry.bytesPerSec, 'bytes')} (total ${formatMs(entry.totalMs)})`
+    );
   }
 }
 
-if (argv.json) {
-  const sqlitePayload = {
-    code: sqliteStats.code,
-    prose: sqliteStats.prose,
-    legacy: sqlitePaths.legacyExists ? { path: sqlitePaths.legacyPath } : null
-  };
-  const payload = {
-    repo: {
-      root: path.resolve(repoCacheRoot),
-      totalBytes: repoCacheSize,
-      artifacts: repoArtifactSizes,
-      sqlite: sqlitePayload
-    },
-    health,
-    overall: {
-      cacheRoot: path.resolve(cacheRoot),
-      cacheBytes: cacheRootSize,
-      dictionaryBytes: dictSize,
-      sqliteOutsideCacheBytes: sqliteOutsideCacheSize,
-      totalBytes: overallSize
-    }
-  };
-  if (argv.all) {
-    const totalRepoBytes = repoRollups.reduce((sum, repo) => sum + repo.bytes, 0);
-    payload.allRepos = {
-      root: path.resolve(path.join(cacheRoot, 'repos')),
-      repos: repoRollups,
-      totalBytes: totalRepoBytes
-    };
+if (status.corruption) {
+  const validation = status.corruption;
+  const statusLabel = validation.ok ? 'ok' : 'issues';
+  console.log('\nIntegrity');
+  console.log(`- index-validate: ${statusLabel}`);
+  if (!validation.ok && validation.issues?.length) {
+    validation.issues.forEach((issue) => console.log(`- issue: ${issue}`));
+  }
+  if (validation.warnings?.length) {
+    validation.warnings.forEach((warning) => console.log(`- warning: ${warning}`));
   }
-  console.log(JSON.stringify(payload, null, 2));
-  process.exit(0);
-}
-
-console.log('Repo artifacts');
-console.log(`- cache root: ${formatBytes(repoCacheSize)} (${path.resolve(repoCacheRoot)})`);
-console.log(`- index-code: ${formatBytes(repoArtifactSizes.indexCode)} (${path.resolve(repoArtifacts.indexCode)})`);
-console.log(`- index-prose: ${formatBytes(repoArtifactSizes.indexProse)} (${path.resolve(repoArtifacts.indexProse)})`);
-console.log(`- repometrics: ${formatBytes(repoArtifactSizes.repometrics)} (${path.resolve(repoArtifacts.repometrics)})`);
-console.log(`- incremental: ${formatBytes(repoArtifactSizes.incremental)} (${path.resolve(repoArtifacts.incremental)})`);
-const code = sqliteStats.code;
-const prose = sqliteStats.prose;
-console.log(`- sqlite code db: ${code ? formatBytes(code.bytes) : 'missing'} (${code?.path || sqlitePaths.codePath})`);
-console.log(`- sqlite prose db: ${prose ? formatBytes(prose.bytes) : 'missing'} (${prose?.path || sqlitePaths.prosePath})`);
-if (sqlitePaths.legacyExists) {
-  console.log(`- legacy sqlite db: ${sqlitePaths.legacyPath}`);
-}
-
-console.log('\nOverall');
-console.log(`- cache root: ${formatBytes(cacheRootSize)} (${path.resolve(cacheRoot)})`);
-console.log(`- dictionaries: ${formatBytes(dictSize)} (${path.resolve(dictDir)})`);
-if (sqliteOutsideCacheSize) {
-  console.log(`- sqlite outside cache: ${formatBytes(sqliteOutsideCacheSize)}`);
-}
-console.log(`- total: ${formatBytes(overallSize)}`);
-
-if (health.issues.length) {
-  console.log('\nHealth');
-  health.issues.forEach((issue) => console.log(`- issue: ${issue}`));
-  health.hints.forEach((hint) => console.log(`- hint: ${hint}`));
 }
 
-if (argv.all) {
-  const totalRepoBytes = repoRollups.reduce((sum, repo) => sum + repo.bytes, 0);
+if (status.allRepos) {
+  const repos = status.allRepos.repos.slice().sort((a, b) => b.bytes - a.bytes);
   console.log('\nAll repos');
-  console.log(`- root: ${path.resolve(path.join(cacheRoot, 'repos'))}`);
-  console.log(`- total: ${formatBytes(totalRepoBytes)}`);
-  for (const repo of repoRollups.sort((a, b) => b.bytes - a.bytes)) {
-    console.log(`- ${repo.id}: ${formatBytes(repo.bytes)} (${repo.path})`);
+  console.log(`- root: ${status.allRepos.root}`);
+  console.log(`- total: ${formatBytes(status.allRepos.totalBytes)}`);
+  for (const repoEntry of repos) {
+    console.log(`- ${repoEntry.id}: ${formatBytes(repoEntry.bytes)} (${repoEntry.path})`);
   }
 }
diff --git a/tools/report-code-map.js b/tools/report-code-map.js
new file mode 100644
index 000000000..1b03235e3
--- /dev/null
+++ b/tools/report-code-map.js
@@ -0,0 +1,244 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+import { pathToFileURL } from 'node:url';
+import { createCli } from '../src/shared/cli.js';
+import { buildCodeMap, buildNodeList, buildMapCacheKey } from '../src/map/build-map.js';
+import { renderDot } from '../src/map/dot-writer.js';
+import { renderSvgHtml } from '../src/map/html-writer.js';
+import { renderIsometricHtml } from '../src/map/isometric-viewer.js';
+import { loadUserConfig, resolveRepoRoot, getIndexDir, getCurrentBuildInfo, getRepoId } from './dict-utils.js';
+
+const argv = createCli({
+  scriptName: 'report map',
+  options: {
+    repo: { type: 'string', describe: 'Repo root.' },
+    mode: { type: 'string', default: 'code' },
+    'index-root': { type: 'string' },
+    scope: { type: 'string', default: 'repo' },
+    focus: { type: 'string' },
+    include: { type: 'string' },
+    'only-exported': { type: 'boolean', default: false },
+    collapse: { type: 'string', default: 'none' },
+    'max-files': { type: 'number' },
+    'max-members-per-file': { type: 'number' },
+    'max-edges': { type: 'number' },
+    'top-k-by-degree': { type: 'boolean', default: false },
+    format: { type: 'string', default: 'json' },
+    out: { type: 'string' },
+    'model-out': { type: 'string' },
+    'node-list-out': { type: 'string' },
+    json: { type: 'boolean', default: false },
+    pretty: { type: 'boolean', default: false },
+    'open-uri-template': { type: 'string' },
+    'three-url': { type: 'string' },
+    'wasd-sensitivity': { type: 'number' },
+    'wasd-acceleration': { type: 'number' },
+    'wasd-max-speed': { type: 'number' },
+    'wasd-drag': { type: 'number' },
+    'zoom-sensitivity': { type: 'number' },
+    'cache-dir': { type: 'string' },
+    refresh: { type: 'boolean', default: false }
+  }
+}).parse();
+
+const rootArg = argv.repo ? path.resolve(argv.repo) : null;
+const repoRoot = rootArg || resolveRepoRoot(process.cwd());
+const userConfig = loadUserConfig(repoRoot);
+const mode = String(argv.mode || 'code').toLowerCase();
+const indexDir = getIndexDir(repoRoot, mode, userConfig, {
+  indexRoot: argv['index-root'] ? path.resolve(argv['index-root']) : null
+});
+
+const scope = String(argv.scope || 'repo').toLowerCase();
+const focus = argv.focus ? String(argv.focus) : '';
+const formatRaw = String(argv.format || 'json').toLowerCase();
+const format = formatRaw === 'iso' ? 'html-iso' : formatRaw;
+
+const viewerControls = {
+  wasd: {
+    ...(Number.isFinite(argv['wasd-sensitivity']) ? { sensitivity: Number(argv['wasd-sensitivity']) } : {}),
+    ...(Number.isFinite(argv['wasd-acceleration']) ? { acceleration: Number(argv['wasd-acceleration']) } : {}),
+    ...(Number.isFinite(argv['wasd-max-speed']) ? { maxSpeed: Number(argv['wasd-max-speed']) } : {}),
+    ...(Number.isFinite(argv['wasd-drag']) ? { drag: Number(argv['wasd-drag']) } : {})
+  },
+  ...(Number.isFinite(argv['zoom-sensitivity']) ? { zoomSensitivity: Number(argv['zoom-sensitivity']) } : {})
+};
+
+const buildOptions = {
+  mode,
+  scope,
+  focus,
+  include: argv.include,
+  onlyExported: argv['only-exported'] === true,
+  collapse: argv.collapse,
+  maxFiles: argv['max-files'],
+  maxMembersPerFile: argv['max-members-per-file'],
+  maxEdges: argv['max-edges'],
+  topKByDegree: argv['top-k-by-degree'] === true,
+  viewer: {
+    controls: viewerControls,
+    openUriTemplate: argv['open-uri-template'] || null
+  }
+};
+
+const buildInfo = getCurrentBuildInfo(repoRoot, userConfig, { mode });
+const cacheKey = buildMapCacheKey({ buildId: buildInfo?.buildId || null, options: buildOptions });
+const cacheDir = argv['cache-dir']
+  ? path.resolve(argv['cache-dir'])
+  : path.join(repoRoot, '.pairofcleats', 'maps', 'cache');
+const cachePath = path.join(cacheDir, `${cacheKey}.json`);
+
+const ensureDir = (targetPath) => {
+  if (!targetPath) return;
+  const dir = path.dirname(targetPath);
+  fs.mkdirSync(dir, { recursive: true });
+};
+
+let mapModel = null;
+const warnings = [];
+
+if (!argv.refresh && fs.existsSync(cachePath)) {
+  try {
+    mapModel = JSON.parse(fs.readFileSync(cachePath, 'utf8'));
+  } catch (err) {
+    warnings.push(`cache read failed: ${err?.message || err}`);
+  }
+}
+
+if (!mapModel) {
+  mapModel = buildCodeMap({ repoRoot, indexDir, options: buildOptions });
+  mapModel.root.id = getRepoId(repoRoot);
+  try {
+    ensureDir(cachePath);
+    fs.writeFileSync(cachePath, JSON.stringify(mapModel, null, 2));
+  } catch (err) {
+    warnings.push(`cache write failed: ${err?.message || err}`);
+  }
+}
+
+if (mapModel) {
+  mapModel.root = mapModel.root || { path: repoRoot, id: null };
+  mapModel.root.path = repoRoot;
+  mapModel.root.id = mapModel.root.id || getRepoId(repoRoot);
+  warnings.push(...(mapModel.warnings || []));
+}
+
+const modelOut = argv['model-out'] ? path.resolve(argv['model-out']) : null;
+if (modelOut) {
+  try {
+    ensureDir(modelOut);
+    fs.writeFileSync(modelOut, JSON.stringify(mapModel, null, 2));
+  } catch (err) {
+    warnings.push(`model output failed: ${err?.message || err}`);
+  }
+}
+
+const nodeListOut = argv['node-list-out'] ? path.resolve(argv['node-list-out']) : null;
+if (nodeListOut) {
+  try {
+    ensureDir(nodeListOut);
+    const list = buildNodeList(mapModel);
+    fs.writeFileSync(nodeListOut, JSON.stringify(list, null, 2));
+  } catch (err) {
+    warnings.push(`node list output failed: ${err?.message || err}`);
+  }
+}
+
+const resolveThreeUrl = (targetPath) => {
+  if (argv['three-url']) return argv['three-url'];
+  const modulePath = path.join(repoRoot, 'node_modules', 'three', 'build', 'three.module.js');
+  if (!fs.existsSync(modulePath)) return '';
+  if (targetPath) {
+    const rel = path.relative(path.dirname(targetPath), modulePath).replace(/\\/g, '/');
+    return rel.startsWith('.') ? rel : `./${rel}`;
+  }
+  return pathToFileURL(modulePath).href;
+};
+
+const formatOutputPath = (targetPath, fallbackExt) => {
+  if (!targetPath) return null;
+  if (!fallbackExt) return targetPath;
+  const currentExt = path.extname(targetPath);
+  if (currentExt.toLowerCase() === fallbackExt) return targetPath;
+  return `${targetPath.slice(0, targetPath.length - currentExt.length)}${fallbackExt}`;
+};
+
+const renderSvg = (dot) => {
+  const result = spawnSync('dot', ['-Tsvg'], {
+    input: dot,
+    encoding: 'utf8'
+  });
+  if (result.status !== 0) {
+    const message = result.stderr || result.stdout || 'Graphviz dot failed.';
+    warnings.push(message.trim());
+    return null;
+  }
+  return result.stdout;
+};
+
+let output = null;
+let outputPath = argv.out ? path.resolve(argv.out) : null;
+let resolvedFormat = format;
+
+if (format === 'json') {
+  output = JSON.stringify(mapModel, null, argv.pretty ? 2 : 0);
+} else if (format === 'dot') {
+  output = renderDot(mapModel);
+} else if (format === 'svg' || format === 'html') {
+  const dot = renderDot(mapModel);
+  const svg = renderSvg(dot);
+  if (!svg) {
+    resolvedFormat = 'dot';
+    output = dot;
+    outputPath = formatOutputPath(outputPath, '.dot');
+  } else if (format === 'svg') {
+    output = svg;
+  } else {
+    output = renderSvgHtml({ svg, mapModel, title: 'Code Map' });
+  }
+} else if (format === 'html-iso') {
+  const threeUrl = resolveThreeUrl(outputPath);
+  if (!threeUrl) warnings.push('three.js module missing; install three or set --three-url');
+  output = renderIsometricHtml({
+    mapModel,
+    threeUrl,
+    openUriTemplate: argv['open-uri-template'] || mapModel.viewer?.openUriTemplate,
+    viewerConfig: mapModel.viewer || {}
+  });
+} else {
+  output = JSON.stringify(mapModel, null, argv.pretty ? 2 : 0);
+  resolvedFormat = 'json';
+}
+
+if (outputPath) {
+  try {
+    ensureDir(outputPath);
+    fs.writeFileSync(outputPath, output);
+  } catch (err) {
+    warnings.push(`output write failed: ${err?.message || err}`);
+  }
+}
+
+const report = {
+  ok: true,
+  format: resolvedFormat,
+  outPath: outputPath,
+  modelPath: modelOut || null,
+  nodeListPath: nodeListOut || null,
+  cacheKey,
+  summary: mapModel.summary || null,
+  warnings: Array.from(new Set(warnings.filter(Boolean)))
+};
+
+if (argv.json) {
+  console.log(JSON.stringify(report, null, argv.pretty ? 2 : 0));
+  process.exit(0);
+}
+
+if (!outputPath) {
+  process.stdout.write(output);
+} else if (!argv.json) {
+  console.log(`Wrote ${resolvedFormat} map to ${outputPath}`);
+}
diff --git a/tools/reset-config.js b/tools/reset-config.js
new file mode 100644
index 000000000..a08a0c1f5
--- /dev/null
+++ b/tools/reset-config.js
@@ -0,0 +1,68 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import path from 'node:path';
+import { createCli } from '../src/shared/cli.js';
+import { resolveRepoRoot } from './dict-utils.js';
+import { DEFAULT_USER_CONFIG_TEMPLATE } from './default-config-template.js';
+
+const argv = createCli({
+  scriptName: 'reset-config',
+  options: {
+    repo: { type: 'string' },
+    config: { type: 'string' },
+    force: { type: 'boolean', default: false },
+    backup: { type: 'boolean', default: true },
+    json: { type: 'boolean', default: false }
+  }
+}).parse();
+
+const isTruthy = (value) => {
+  if (value == null) return false;
+  const normalized = String(value).trim().toLowerCase();
+  return ['1', 'true', 'yes', 'on'].includes(normalized);
+};
+
+const forceRequested = argv.force
+  || isTruthy(process.env.PAIROFCLEATS_RESET_FORCE)
+  || isTruthy(process.env.npm_config_force);
+
+const repoRoot = argv.repo ? path.resolve(argv.repo) : resolveRepoRoot(process.cwd());
+const configPath = argv.config
+  ? path.resolve(argv.config)
+  : path.join(repoRoot, '.pairofcleats.json');
+const existing = fs.existsSync(configPath);
+const result = {
+  ok: true,
+  configPath,
+  backupPath: null,
+  reset: false
+};
+
+if (existing && !forceRequested) {
+  result.ok = false;
+  if (argv.json) {
+    console.log(JSON.stringify(result, null, 2));
+  } else {
+    console.error(`[reset-config] Refusing to overwrite ${configPath} without --force.`);
+  }
+  process.exit(1);
+}
+
+if (existing && argv.backup) {
+  const backupPath = `${configPath}.bak`;
+  fs.copyFileSync(configPath, backupPath);
+  result.backupPath = backupPath;
+}
+
+const template = DEFAULT_USER_CONFIG_TEMPLATE.trimEnd();
+fs.writeFileSync(configPath, `${template}\n`, 'utf8');
+result.reset = true;
+
+if (argv.json) {
+  console.log(JSON.stringify(result, null, 2));
+} else {
+  console.log(`[reset-config] Wrote default config to ${configPath}`);
+  if (result.backupPath) {
+    console.log(`[reset-config] Backup saved to ${result.backupPath}`);
+  }
+}
diff --git a/tools/run-phase22-gates.js b/tools/run-phase22-gates.js
new file mode 100644
index 000000000..848980df3
--- /dev/null
+++ b/tools/run-phase22-gates.js
@@ -0,0 +1,20 @@
+#!/usr/bin/env node
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const root = process.cwd();
+const tests = [
+  { label: 'type-inference-lsp-enrichment', file: path.join(root, 'tests', 'type-inference-lsp-enrichment.js') },
+  { label: 'embeddings-dims-mismatch', file: path.join(root, 'tests', 'embeddings-dims-mismatch.js') },
+  { label: 'embeddings-cache-identity', file: path.join(root, 'tests', 'embeddings-cache-identity.js') }
+];
+
+for (const test of tests) {
+  const result = spawnSync(process.execPath, [test.file], { stdio: 'inherit' });
+  if (result.status !== 0) {
+    console.error(`phase22 gate failed: ${test.label}`);
+    process.exit(result.status ?? 1);
+  }
+}
+
+console.log('phase22 gate tests passed');
diff --git a/tools/scip-ingest.js b/tools/scip-ingest.js
new file mode 100644
index 000000000..8e246e552
--- /dev/null
+++ b/tools/scip-ingest.js
@@ -0,0 +1,239 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import readline from 'node:readline';
+import { spawn } from 'node:child_process';
+import { createCli } from '../src/shared/cli.js';
+import { getRepoCacheRoot, loadUserConfig, resolveRepoRoot } from './dict-utils.js';
+
+const argv = createCli({
+  scriptName: 'scip-ingest',
+  options: {
+    repo: { type: 'string' },
+    input: { type: 'string' },
+    out: { type: 'string' },
+    json: { type: 'boolean', default: false },
+    run: { type: 'boolean', default: false },
+    scip: { type: 'string', default: 'scip' },
+    args: { type: 'string' }
+  }
+}).parse();
+
+const repoRoot = argv.repo ? path.resolve(argv.repo) : resolveRepoRoot(process.cwd());
+const userConfig = loadUserConfig(repoRoot);
+const cacheRoot = getRepoCacheRoot(repoRoot, userConfig);
+const outputPath = argv.out
+  ? path.resolve(argv.out)
+  : path.join(cacheRoot, 'scip', 'scip.jsonl');
+const metaPath = `${outputPath}.meta.json`;
+const inputPath = argv.input ? String(argv.input) : null;
+const runScip = argv.run === true;
+const scipCmd = argv.scip || 'scip';
+
+const toPosix = (value) => value.replace(/\\/g, '/');
+const normalizePath = (value) => {
+  if (!value) return null;
+  const raw = String(value);
+  const resolved = path.isAbsolute(raw) ? raw : path.resolve(repoRoot, raw);
+  const rel = path.relative(repoRoot, resolved);
+  return toPosix(rel || raw);
+};
+
+const stats = {
+  documents: 0,
+  occurrences: 0,
+  definitions: 0,
+  references: 0,
+  errors: 0,
+  kinds: {},
+  languages: {}
+};
+
+const bump = (bucket, key) => {
+  if (!key) return;
+  const k = String(key);
+  bucket[k] = (bucket[k] || 0) + 1;
+};
+
+const ensureOutputDir = async () => {
+  await fsPromises.mkdir(path.dirname(outputPath), { recursive: true });
+};
+
+const writeStream = fs.createWriteStream(outputPath, { encoding: 'utf8' });
+
+const roleInfo = (roles) => {
+  const value = Number(roles) || 0;
+  const isDefinition = (value & 1) === 1;
+  const isReference = (value & 2) === 2;
+  return { isDefinition, isReference };
+};
+
+const normalizeRange = (range) => {
+  if (!Array.isArray(range) || !range.length) return null;
+  const startLine = Number.isFinite(Number(range[0])) ? Number(range[0]) : 0;
+  const startChar = Number.isFinite(Number(range[1])) ? Number(range[1]) : 0;
+  let endLine = startLine;
+  let endChar = startChar;
+  if (range.length === 3) {
+    endChar = Number.isFinite(Number(range[2])) ? Number(range[2]) : endChar;
+  } else if (range.length >= 4) {
+    endLine = Number.isFinite(Number(range[2])) ? Number(range[2]) : endLine;
+    endChar = Number.isFinite(Number(range[3])) ? Number(range[3]) : endChar;
+  }
+  return {
+    startLine: startLine + 1,
+    startChar,
+    endLine: endLine + 1,
+    endChar
+  };
+};
+
+const extractSymbolInfo = (doc) => {
+  const entries = doc?.symbols || doc?.symbolInformation || doc?.symbolInformations || [];
+  if (!Array.isArray(entries) || !entries.length) return new Map();
+  const map = new Map();
+  for (const entry of entries) {
+    if (!entry || !entry.symbol) continue;
+    map.set(entry.symbol, entry);
+  }
+  return map;
+};
+
+const writeOccurrence = (doc, occurrence, symbolInfo) => {
+  if (!occurrence || !occurrence.symbol) return;
+  const file = normalizePath(doc.relativePath || doc.path || doc.file || '');
+  if (!file) return;
+  const range = normalizeRange(occurrence.range || occurrence.enclosingRange);
+  const info = symbolInfo.get(occurrence.symbol) || {};
+  const role = roleInfo(occurrence.symbolRoles);
+  const entry = {
+    file,
+    ext: path.extname(file).toLowerCase(),
+    name: info.displayName || info.symbol || occurrence.symbol,
+    symbol: occurrence.symbol,
+    kind: info.kind || info.symbolKind || null,
+    signature: info.signature || info.signatureDocumentation || null,
+    startLine: range ? range.startLine : null,
+    endLine: range ? range.endLine : null,
+    startChar: range ? range.startChar : null,
+    endChar: range ? range.endChar : null,
+    role: role.isDefinition ? 'definition' : (role.isReference ? 'reference' : 'other'),
+    language: info.language || doc.language || null,
+    scope: info.scope || null,
+    scopeKind: info.scopeKind || null
+  };
+  stats.occurrences += 1;
+  if (role.isDefinition) stats.definitions += 1;
+  if (role.isReference) stats.references += 1;
+  bump(stats.kinds, entry.kind || 'unknown');
+  bump(stats.languages, entry.language || 'unknown');
+  writeStream.write(`${JSON.stringify(entry)}\n`);
+};
+
+const handleDocument = (doc) => {
+  if (!doc || typeof doc !== 'object') return;
+  const file = doc.relativePath || doc.path || doc.file || null;
+  if (!file) return;
+  stats.documents += 1;
+  const symbolInfo = extractSymbolInfo(doc);
+  const occurrences = Array.isArray(doc.occurrences) ? doc.occurrences : [];
+  for (const occ of occurrences) {
+    writeOccurrence(doc, occ, symbolInfo);
+  }
+};
+
+const handlePayload = (payload) => {
+  if (!payload) return;
+  if (Array.isArray(payload)) {
+    payload.forEach(handlePayload);
+    return;
+  }
+  if (Array.isArray(payload.documents)) {
+    payload.documents.forEach(handleDocument);
+    return;
+  }
+  if (payload.relativePath || payload.path || payload.file) {
+    handleDocument(payload);
+  }
+};
+
+const ingestJsonLines = async (stream) => {
+  const rl = readline.createInterface({ input: stream, crlfDelay: Infinity });
+  for await (const line of rl) {
+    const trimmed = line.trim();
+    if (!trimmed) continue;
+    let parsed = null;
+    try {
+      parsed = JSON.parse(trimmed);
+    } catch {
+      stats.errors += 1;
+      continue;
+    }
+    handlePayload(parsed);
+  }
+};
+
+const ingestJsonFile = async (filePath) => {
+  try {
+    const raw = await fsPromises.readFile(filePath, 'utf8');
+    const parsed = JSON.parse(raw);
+    handlePayload(parsed);
+    return true;
+  } catch {
+    return false;
+  }
+};
+
+const runScipCommand = async () => {
+  const args = ['print', '--format=json'];
+  if (inputPath) args.push('--input', inputPath);
+  if (argv.args) {
+    const extra = String(argv.args)
+      .split(/\s+/)
+      .map((entry) => entry.trim())
+      .filter(Boolean);
+    args.push(...extra);
+  }
+  const child = spawn(scipCmd, args, { stdio: ['ignore', 'pipe', 'pipe'] });
+  child.stderr.on('data', (chunk) => process.stderr.write(chunk));
+  await ingestJsonLines(child.stdout);
+  const exitCode = await new Promise((resolve) => {
+    child.on('close', (code) => resolve(code ?? 0));
+  });
+  if (exitCode !== 0) {
+    throw new Error(`scip exited with code ${exitCode}`);
+  }
+};
+
+await ensureOutputDir();
+if (runScip) {
+  await runScipCommand();
+} else if (inputPath && inputPath !== '-') {
+  const parsed = await ingestJsonFile(inputPath);
+  if (!parsed) {
+    const inputStream = fs.createReadStream(inputPath, { encoding: 'utf8' });
+    await ingestJsonLines(inputStream);
+  }
+} else {
+  await ingestJsonLines(process.stdin);
+}
+
+writeStream.end();
+
+const summary = {
+  generatedAt: new Date().toISOString(),
+  repoRoot: path.resolve(repoRoot),
+  input: inputPath || (runScip ? 'scip' : 'stdin'),
+  output: path.resolve(outputPath),
+  stats
+};
+await fsPromises.writeFile(metaPath, JSON.stringify(summary, null, 2));
+
+if (argv.json) {
+  console.log(JSON.stringify(summary, null, 2));
+} else {
+  console.log(`SCIP ingest: ${stats.occurrences} occurrences (${stats.errors} parse errors)`);
+  console.log(`- output: ${outputPath}`);
+  console.log(`- meta: ${metaPath}`);
+}
diff --git a/tools/search-sqlite.js b/tools/search-sqlite.js
deleted file mode 100644
index 645b7fa34..000000000
--- a/tools/search-sqlite.js
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/usr/bin/env node
-import { spawnSync } from 'node:child_process';
-import path from 'node:path';
-import { fileURLToPath } from 'node:url';
-
-const args = process.argv.slice(2);
-const hasBackend = args.includes('--backend');
-const scriptRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
-const searchPath = path.join(scriptRoot, 'search.js');
-const forwarded = hasBackend ? args : ['--backend', 'sqlite-fts', ...args];
-
-const result = spawnSync(process.execPath, [searchPath, ...forwarded], {
-  stdio: 'inherit',
-  env: process.env
-});
-
-process.exit(result.status ?? 1);
diff --git a/tools/service/config.js b/tools/service/config.js
new file mode 100644
index 000000000..8bb698583
--- /dev/null
+++ b/tools/service/config.js
@@ -0,0 +1,48 @@
+import fs from 'node:fs';
+import path from 'node:path';
+import { getCacheRoot } from '../dict-utils.js';
+
+export function getServiceConfigPath(inputPath = null) {
+  if (inputPath) return path.resolve(inputPath);
+  return path.join(getCacheRoot(), 'service', 'config.json');
+}
+
+export function loadServiceConfig(configPath) {
+  if (!configPath || !fs.existsSync(configPath)) {
+    return {
+      repos: [],
+      queue: {
+        maxQueued: 20
+      },
+      worker: {
+        concurrency: 1
+      },
+      embeddings: {
+        queue: {
+          maxQueued: 10
+        },
+        worker: {
+          concurrency: 1,
+          maxMemoryMb: 4096
+        }
+      },
+      sync: {
+        policy: 'pull',
+        intervalMs: 5 * 60 * 1000
+      }
+    };
+  }
+  const raw = JSON.parse(fs.readFileSync(configPath, 'utf8'));
+  return raw && typeof raw === 'object' ? raw : {};
+}
+
+export function resolveRepoRegistry(config, configPath) {
+  if (Array.isArray(config?.repos)) return config.repos;
+  const repoFile = config?.reposFile;
+  if (!repoFile) return [];
+  const baseDir = configPath ? path.dirname(configPath) : process.cwd();
+  const resolved = path.isAbsolute(repoFile) ? repoFile : path.join(baseDir, repoFile);
+  if (!fs.existsSync(resolved)) return [];
+  const payload = JSON.parse(fs.readFileSync(resolved, 'utf8'));
+  return Array.isArray(payload?.repos) ? payload.repos : [];
+}
diff --git a/tools/service/logger.js b/tools/service/logger.js
new file mode 100644
index 000000000..29700b6df
--- /dev/null
+++ b/tools/service/logger.js
@@ -0,0 +1,42 @@
+import { getEnvConfig } from '../../src/shared/env.js';
+import { configureLogger, log, logError, logLine, updateLogContext } from '../../src/shared/progress.js';
+import { loadUserConfig } from '../dict-utils.js';
+
+const normalizeLevel = (value) => {
+  if (typeof value === 'string' && value.trim()) return value.trim().toLowerCase();
+  return 'info';
+};
+
+const normalizeFormat = (value) => {
+  if (value === 'json' || value === 'pretty') return value;
+  return 'text';
+};
+
+export function configureServiceLogger({ repoRoot, service, context = {} }) {
+  const envConfig = getEnvConfig();
+  const userConfig = repoRoot ? loadUserConfig(repoRoot) : {};
+  const loggingConfig = userConfig?.logging || {};
+  const logFormat = normalizeFormat(envConfig.logFormat || loggingConfig.format);
+  const logLevel = normalizeLevel(envConfig.logLevel || loggingConfig.level);
+  const ringMax = Number.isFinite(Number(loggingConfig.ringMax))
+    ? Math.max(1, Math.floor(Number(loggingConfig.ringMax)))
+    : 200;
+  const ringMaxBytes = Number.isFinite(Number(loggingConfig.ringMaxBytes))
+    ? Math.max(1024, Math.floor(Number(loggingConfig.ringMaxBytes)))
+    : 2 * 1024 * 1024;
+  configureLogger({
+    enabled: logFormat !== 'text',
+    pretty: logFormat === 'pretty',
+    level: logLevel,
+    ringMax,
+    ringMaxBytes,
+    redact: loggingConfig.redact,
+    context: {
+      service: service || 'service',
+      repoRoot: repoRoot || null,
+      ...context
+    }
+  });
+  updateLogContext({ service: service || 'service' });
+  return { log, logLine, logError };
+}
diff --git a/tools/service/queue.js b/tools/service/queue.js
new file mode 100644
index 000000000..36f7d4ad2
--- /dev/null
+++ b/tools/service/queue.js
@@ -0,0 +1,262 @@
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+
+const readJson = async (filePath, fallback) => {
+  try {
+    const raw = await fs.readFile(filePath, 'utf8');
+    return JSON.parse(raw);
+  } catch {
+    return fallback;
+  }
+};
+
+const withLock = async (lockPath, worker) => {
+  const start = Date.now();
+  while (true) {
+    try {
+      const handle = await fs.open(lockPath, 'wx');
+      try {
+        return await worker();
+      } finally {
+        await handle.close();
+        await fs.rm(lockPath, { force: true });
+      }
+    } catch (err) {
+      if (err?.code !== 'EEXIST') throw err;
+      if (Date.now() - start > 5000) throw new Error('Queue lock timeout.');
+      await new Promise((resolve) => setTimeout(resolve, 100));
+    }
+  }
+};
+
+export async function ensureQueueDir(dirPath) {
+  await fs.mkdir(dirPath, { recursive: true });
+}
+
+const ensureJobDirs = async (dirPath) => {
+  const logsDir = path.join(dirPath, 'logs');
+  const reportsDir = path.join(dirPath, 'reports');
+  await fs.mkdir(logsDir, { recursive: true });
+  await fs.mkdir(reportsDir, { recursive: true });
+  return { logsDir, reportsDir };
+};
+
+const normalizeQueueName = (value) => {
+  const raw = typeof value === 'string' ? value.trim().toLowerCase() : '';
+  if (!raw || raw === 'index') return null;
+  return raw.replace(/[^a-z0-9_-]+/g, '-');
+};
+
+export function resolveQueueName(queueName, job = null) {
+  const normalized = normalizeQueueName(queueName);
+  if (normalized && normalized !== 'auto') return normalized;
+  if (normalized === 'auto') {
+    const base = job?.reason === 'embeddings' ? 'embeddings' : 'index';
+    const parts = [];
+    if (job?.stage) parts.push(String(job.stage).toLowerCase());
+    if (job?.mode && job.mode !== 'both') parts.push(String(job.mode).toLowerCase());
+    return parts.length ? `${base}-${parts.join('-')}` : base;
+  }
+  return normalized;
+}
+
+export function getQueuePaths(dirPath, queueName = null) {
+  const normalized = normalizeQueueName(queueName);
+  const suffix = normalized ? `-${normalized}` : '';
+  return {
+    queuePath: path.join(dirPath, `queue${suffix}.json`),
+    lockPath: path.join(dirPath, `queue${suffix}.lock`)
+  };
+}
+
+export async function loadQueue(dirPath, queueName = null) {
+  const { queuePath } = getQueuePaths(dirPath, queueName);
+  const payload = await readJson(queuePath, { jobs: [] });
+  return {
+    jobs: Array.isArray(payload.jobs) ? payload.jobs : []
+  };
+}
+
+export async function saveQueue(dirPath, queue, queueName = null) {
+  const { queuePath } = getQueuePaths(dirPath, queueName);
+  await fs.writeFile(queuePath, JSON.stringify(queue, null, 2));
+}
+
+export async function enqueueJob(dirPath, job, maxQueued = null, queueName = null) {
+  await ensureQueueDir(dirPath);
+  const { logsDir, reportsDir } = await ensureJobDirs(dirPath);
+  const resolvedQueueName = resolveQueueName(queueName, job);
+  const { lockPath } = getQueuePaths(dirPath, resolvedQueueName);
+  return withLock(lockPath, async () => {
+    const queue = await loadQueue(dirPath, resolvedQueueName);
+    const queued = queue.jobs.filter((entry) => entry.status === 'queued');
+    if (Number.isFinite(maxQueued) && queued.length >= maxQueued) {
+      return { ok: false, message: 'Queue is full.' };
+    }
+    const maxRetries = Number.isFinite(Number(job.maxRetries)) && Number(job.maxRetries) >= 0
+      ? Math.floor(Number(job.maxRetries))
+      : null;
+    const next = {
+      id: job.id,
+      createdAt: job.createdAt,
+      status: 'queued',
+      repo: job.repo,
+      mode: job.mode,
+      reason: job.reason || null,
+      stage: job.stage || null,
+      args: Array.isArray(job.args) && job.args.length ? job.args : null,
+      attempts: 0,
+      maxRetries,
+      nextEligibleAt: null,
+      lastHeartbeatAt: null,
+      logPath: path.join(logsDir, `${job.id}.log`),
+      reportPath: path.join(reportsDir, `${job.id}.json`)
+    };
+    queue.jobs.push(next);
+    await saveQueue(dirPath, queue, resolvedQueueName);
+    return { ok: true, job: next };
+  });
+}
+
+export async function claimNextJob(dirPath, queueName = null) {
+  const { lockPath } = getQueuePaths(dirPath, queueName);
+  return withLock(lockPath, async () => {
+    const { logsDir, reportsDir } = await ensureJobDirs(dirPath);
+    const queue = await loadQueue(dirPath, queueName);
+    const now = Date.now();
+    const job = queue.jobs.find((entry) => {
+      if (entry.status !== 'queued') return false;
+      if (!entry.nextEligibleAt) return true;
+      const eligibleAt = Date.parse(entry.nextEligibleAt);
+      return Number.isNaN(eligibleAt) || eligibleAt <= now;
+    });
+    if (!job) return null;
+    if (!job.logPath) job.logPath = path.join(logsDir, `${job.id}.log`);
+    if (!job.reportPath) job.reportPath = path.join(reportsDir, `${job.id}.json`);
+    job.status = 'running';
+    job.startedAt = new Date().toISOString();
+    job.lastHeartbeatAt = job.startedAt;
+    await saveQueue(dirPath, queue, queueName);
+    return job;
+  });
+}
+
+export async function completeJob(dirPath, jobId, status, result, queueName = null) {
+  const { lockPath } = getQueuePaths(dirPath, queueName);
+  return withLock(lockPath, async () => {
+    const { reportsDir } = await ensureJobDirs(dirPath);
+    const queue = await loadQueue(dirPath, queueName);
+    const job = queue.jobs.find((entry) => entry.id === jobId);
+    if (!job) return null;
+    job.status = status;
+    job.finishedAt = new Date().toISOString();
+    job.result = result || null;
+    if (Number.isFinite(result?.attempts)) {
+      job.attempts = Math.max(0, Math.floor(result.attempts));
+    }
+    if (result?.error) {
+      job.lastError = result.error;
+    }
+    job.lastHeartbeatAt = null;
+    await saveQueue(dirPath, queue, queueName);
+    const reportPath = job.reportPath || path.join(reportsDir, `${job.id}.json`);
+    try {
+      await fs.writeFile(reportPath, JSON.stringify({
+        updatedAt: new Date().toISOString(),
+        status: job.status,
+        job
+      }, null, 2));
+    } catch {}
+    return job;
+  });
+}
+
+export async function touchJobHeartbeat(dirPath, jobId, queueName = null) {
+  const { lockPath } = getQueuePaths(dirPath, queueName);
+  return withLock(lockPath, async () => {
+    const queue = await loadQueue(dirPath, queueName);
+    const job = queue.jobs.find((entry) => entry.id === jobId);
+    if (!job) return null;
+    if (job.status !== 'running') return job;
+    job.lastHeartbeatAt = new Date().toISOString();
+    await saveQueue(dirPath, queue, queueName);
+    return job;
+  });
+}
+
+const resolveStaleThresholdMs = (job, queueName) => {
+  const stage = typeof job?.stage === 'string' ? job.stage.toLowerCase() : '';
+  if (queueName === 'embeddings' || job?.reason === 'embeddings' || stage === 'stage3') {
+    return 15 * 60 * 1000;
+  }
+  if (stage === 'stage2') return 10 * 60 * 1000;
+  return null;
+};
+
+const resolveRetryDelayMs = (attempts) => {
+  if (attempts <= 0) return 0;
+  if (attempts === 1) return 2 * 60 * 1000;
+  return 10 * 60 * 1000;
+};
+
+export async function requeueStaleJobs(dirPath, queueName = null, options = {}) {
+  const { lockPath } = getQueuePaths(dirPath, queueName);
+  return withLock(lockPath, async () => {
+    const queue = await loadQueue(dirPath, queueName);
+    const now = Date.now();
+    const stale = [];
+    for (const job of queue.jobs) {
+      if (job.status !== 'running') continue;
+      const threshold = resolveStaleThresholdMs(job, queueName);
+      if (!threshold) continue;
+      const heartbeatAt = Date.parse(job.lastHeartbeatAt || job.startedAt || '');
+      if (Number.isNaN(heartbeatAt)) continue;
+      if (now - heartbeatAt <= threshold) continue;
+      stale.push(job);
+    }
+    if (!stale.length) return { stale: 0, retried: 0, failed: 0 };
+    let retried = 0;
+    let failed = 0;
+    for (const job of stale) {
+      const attempts = Number.isFinite(job.attempts) ? job.attempts : 0;
+      const maxRetries = Number.isFinite(job.maxRetries)
+        ? job.maxRetries
+        : (Number.isFinite(options.maxRetries) ? options.maxRetries : 2);
+      const nextAttempts = attempts + 1;
+      if (nextAttempts <= maxRetries) {
+        retried += 1;
+        job.status = 'queued';
+        job.attempts = nextAttempts;
+        job.lastError = 'stale job heartbeat';
+        const delayMs = resolveRetryDelayMs(nextAttempts);
+        job.nextEligibleAt = new Date(now + delayMs).toISOString();
+      } else {
+        failed += 1;
+        job.status = 'failed';
+        job.finishedAt = new Date().toISOString();
+        job.result = { error: 'stale job heartbeat', attempts: nextAttempts };
+      }
+      job.lastHeartbeatAt = null;
+    }
+    await saveQueue(dirPath, queue, queueName);
+    return { stale: stale.length, retried, failed };
+  });
+}
+
+export async function queueSummary(dirPath, queueName = null) {
+  const { queuePath } = getQueuePaths(dirPath, queueName);
+  if (!fsSync.existsSync(queuePath)) {
+    return { total: 0, queued: 0, running: 0, done: 0, failed: 0, retries: 0 };
+  }
+  const queue = await loadQueue(dirPath, queueName);
+  const summary = { total: queue.jobs.length, queued: 0, running: 0, done: 0, failed: 0, retries: 0 };
+  for (const job of queue.jobs) {
+    if (job.status === 'queued') summary.queued += 1;
+    else if (job.status === 'running') summary.running += 1;
+    else if (job.status === 'done') summary.done += 1;
+    else if (job.status === 'failed') summary.failed += 1;
+    if (Number.isFinite(job.attempts) && job.attempts > 0) summary.retries += 1;
+  }
+  return summary;
+}
diff --git a/tools/service/repos.js b/tools/service/repos.js
new file mode 100644
index 000000000..229e3544d
--- /dev/null
+++ b/tools/service/repos.js
@@ -0,0 +1,41 @@
+import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
+import path from 'node:path';
+import { spawnSync } from 'node:child_process';
+
+const runGit = (args, cwd) => spawnSync('git', args, { cwd, encoding: 'utf8' });
+
+export function resolveRepoPath(entry, baseDir) {
+  if (!entry?.path) return null;
+  return path.isAbsolute(entry.path) ? entry.path : path.join(baseDir, entry.path);
+}
+
+export async function ensureRepo(entry, baseDir, defaultPolicy = 'pull') {
+  const repoPath = resolveRepoPath(entry, baseDir);
+  if (!repoPath) return { ok: false, message: 'Missing repo path.' };
+  const branch = entry.branch || 'main';
+  const policy = entry.syncPolicy || defaultPolicy;
+  const depth = Number.isFinite(Number(entry.cloneDepth)) ? Math.max(0, Number(entry.cloneDepth)) : 0;
+
+  if (!fsSync.existsSync(repoPath)) {
+    if (!entry.url) return { ok: false, message: `Missing repo url for ${repoPath}` };
+    await fs.mkdir(path.dirname(repoPath), { recursive: true });
+    const cloneArgs = ['clone'];
+    if (depth > 0) cloneArgs.push('--depth', String(depth));
+    if (branch) cloneArgs.push('--branch', branch);
+    cloneArgs.push(entry.url, repoPath);
+    const clone = runGit(cloneArgs, process.cwd());
+    if (clone.status !== 0) {
+      return { ok: false, message: clone.stderr || clone.stdout || 'git clone failed' };
+    }
+    return { ok: true, repoPath, action: 'clone' };
+  }
+
+  if (policy === 'none') return { ok: true, repoPath, action: 'skip' };
+  const args = policy === 'fetch' ? ['fetch', '--all', '--prune'] : ['pull', '--ff-only'];
+  const sync = runGit(args, repoPath);
+  if (sync.status !== 0) {
+    return { ok: false, repoPath, message: sync.stderr || sync.stdout || 'git sync failed' };
+  }
+  return { ok: true, repoPath, action: policy };
+}
diff --git a/tools/setup.js b/tools/setup.js
index 581ae31a9..902cefb3f 100644
--- a/tools/setup.js
+++ b/tools/setup.js
@@ -2,60 +2,54 @@
 import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
-import minimist from 'minimist';
+import os from 'node:os';
+import { createCli } from '../src/shared/cli.js';
 import readline from 'node:readline/promises';
+import { readJsoncFile } from '../src/shared/jsonc.js';
 import {
   getDictionaryPaths,
   getDictConfig,
   getIndexDir,
   getModelConfig,
   getRepoCacheRoot,
+  getRuntimeConfig,
   getToolingConfig,
   loadUserConfig,
-  resolveRepoRoot
+  resolveRuntimeEnv,
+  resolveRepoRoot,
+  resolveToolRoot
 } from './dict-utils.js';
 import { runCommand as runCommandBase } from './cli-utils.js';
 import { getVectorExtensionConfig, resolveVectorExtensionPath } from './vector-extension.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: [
-    'json',
-    'non-interactive',
-    'validate-config',
-    'skip-validate',
-    'skip-install',
-    'skip-dicts',
-    'skip-models',
-    'skip-extensions',
-    'skip-tooling',
-    'skip-index',
-    'skip-sqlite',
-    'skip-artifacts',
-    'with-sqlite',
-    'incremental'
-  ],
-  string: ['root', 'repo', 'tooling-scope'],
-  alias: { ci: 'non-interactive', s: 'with-sqlite', i: 'incremental' },
-  default: {
-    'non-interactive': false,
-    'validate-config': false,
-    'skip-validate': false,
-    'skip-install': false,
-    'skip-dicts': false,
-    'skip-models': false,
-    'skip-extensions': false,
-    'skip-tooling': false,
-    'skip-index': false,
-    'skip-sqlite': false,
-    'skip-artifacts': false,
-    'with-sqlite': false,
-    incremental: false,
-    json: false
-  }
-});
+const argv = createCli({
+  scriptName: 'setup',
+  options: {
+    json: { type: 'boolean', default: false },
+    'non-interactive': { type: 'boolean', default: false },
+    'validate-config': { type: 'boolean', default: false },
+    'skip-validate': { type: 'boolean', default: false },
+    'skip-install': { type: 'boolean', default: false },
+    'skip-dicts': { type: 'boolean', default: false },
+    'skip-models': { type: 'boolean', default: false },
+    'skip-extensions': { type: 'boolean', default: false },
+    'skip-tooling': { type: 'boolean', default: false },
+    'skip-index': { type: 'boolean', default: false },
+    'skip-sqlite': { type: 'boolean', default: false },
+    'skip-artifacts': { type: 'boolean', default: false },
+    'with-sqlite': { type: 'boolean', default: false },
+    incremental: { type: 'boolean', default: false },
+    root: { type: 'string' },
+    repo: { type: 'string' },
+    'tooling-scope': { type: 'string' },
+    'heap-mb': { type: 'string' }
+  },
+  aliases: { ci: 'non-interactive', s: 'with-sqlite', i: 'incremental' }
+}).parse();
 
 const explicitRoot = argv.root || argv.repo;
 const root = explicitRoot ? path.resolve(explicitRoot) : resolveRepoRoot(process.cwd());
+const toolRoot = resolveToolRoot();
 const jsonOutput = argv.json === true;
 const nonInteractive = argv['non-interactive'] === true;
 const rl = nonInteractive ? null : readline.createInterface({ input: process.stdin, output: process.stdout });
@@ -110,8 +104,62 @@ async function promptChoice(question, choices, defaultChoice) {
   return match || defaultChoice;
 }
 
+function formatGb(mb) {
+  return `${(mb / 1024).toFixed(1)} GB`;
+}
+
+function getRecommendedHeapMb() {
+  const totalMb = Math.floor(os.totalmem() / (1024 * 1024));
+  const recommended = Math.max(4096, Math.floor(totalMb * 0.75));
+  const rounded = Math.floor(recommended / 256) * 256;
+  return {
+    totalMb,
+    recommendedMb: Math.max(4096, rounded)
+  };
+}
+
+async function updateRuntimeConfig(maxOldSpaceMb) {
+  const existing = configExists
+    ? readJsoncFile(configPath)
+    : {};
+  const next = {
+    ...existing,
+    runtime: {
+      ...(existing.runtime || {}),
+      maxOldSpaceMb
+    }
+  };
+  await fsPromises.writeFile(configPath, JSON.stringify(next, null, 2));
+  configExists = true;
+  return next;
+}
+
+async function updateProfileConfig(profileName) {
+  const existing = configExists
+    ? readJsoncFile(configPath)
+    : {};
+  const next = {
+    ...existing,
+    profile: profileName
+  };
+  await fsPromises.writeFile(configPath, JSON.stringify(next, null, 2));
+  configExists = true;
+  return next;
+}
+
+function buildRuntimeEnv(config) {
+  const runtimeConfig = getRuntimeConfig(root, config);
+  return resolveRuntimeEnv(runtimeConfig, process.env);
+}
+
+let runtimeEnv = { ...process.env };
+
 function runCommand(cmd, args, options = {}) {
-  const spawnOptions = { cwd: root, ...options };
+  const spawnOptions = {
+    cwd: root,
+    ...options,
+    env: { ...runtimeEnv, ...(options.env || {}) }
+  };
   if (!('stdio' in spawnOptions)) {
     spawnOptions.stdio = jsonOutput ? 'pipe' : 'inherit';
   }
@@ -143,7 +191,7 @@ async function hasEntries(dirPath) {
 log(`Starting setup in ${root}`);
 
 const configPath = path.join(root, '.pairofcleats.json');
-const configExists = fs.existsSync(configPath);
+let configExists = fs.existsSync(configPath);
 let shouldValidateConfig = argv['validate-config'] === true;
 if (!argv['skip-validate'] && configExists && !shouldValidateConfig && !nonInteractive) {
   shouldValidateConfig = await promptYesNo('Validate .pairofcleats.json now?', true);
@@ -151,7 +199,7 @@ if (!argv['skip-validate'] && configExists && !shouldValidateConfig && !nonInter
 if (argv['skip-validate']) shouldValidateConfig = false;
 
 if (shouldValidateConfig && configExists) {
-  const args = [path.join(root, 'tools', 'validate-config.js'), '--config', configPath];
+  const args = [path.join(toolRoot, 'tools', 'validate-config.js'), '--config', configPath];
   if (jsonOutput) args.push('--json');
   const result = runCommand(process.execPath, args);
   recordStep('config', { skipped: false, ok: result.ok, configPath });
@@ -169,13 +217,57 @@ if (shouldValidateConfig && configExists) {
   recordStep('config', { skipped: true, present: configExists, configPath });
 }
 
-const userConfig = loadUserConfig(root);
+const profileName = typeof argv.profile === 'string' ? argv.profile.trim() : '';
+let userConfig = loadUserConfig(root);
+if (profileName) {
+  await updateProfileConfig(profileName);
+  userConfig = loadUserConfig(root);
+  recordStep('profile', { configured: true, profile: profileName });
+} else {
+  recordStep('profile', { configured: false });
+}
+runtimeEnv = buildRuntimeEnv(userConfig);
 const repoCacheRoot = getRepoCacheRoot(root, userConfig);
 const incrementalCacheRoot = path.join(repoCacheRoot, 'incremental');
 const useIncremental = argv.incremental || fs.existsSync(incrementalCacheRoot);
 summary.incremental = useIncremental;
 if (useIncremental) log('Incremental indexing enabled.');
 
+const heapArgRaw = argv['heap-mb'];
+const heapArg = Number.isFinite(Number(heapArgRaw)) ? Number(heapArgRaw) : null;
+const currentHeap = Number(userConfig.runtime?.maxOldSpaceMb);
+const heapConfigured = Number.isFinite(currentHeap) && currentHeap > 0;
+const heapRecommendation = getRecommendedHeapMb();
+let runtimeUpdated = false;
+let heapValue = heapConfigured ? currentHeap : null;
+
+if (Number.isFinite(heapArg) && heapArg > 0) {
+  userConfig = await updateRuntimeConfig(Math.floor(heapArg));
+  runtimeEnv = buildRuntimeEnv(userConfig);
+  runtimeUpdated = true;
+  heapValue = Math.floor(heapArg);
+  log(`Configured Node heap limit at ${formatGb(heapValue)}.`);
+} else if (!heapConfigured) {
+  const defaultYes = heapRecommendation.totalMb >= 16384;
+  const shouldSet = await promptYesNo(
+    `Set Node heap limit to ${formatGb(heapRecommendation.recommendedMb)}?`,
+    defaultYes
+  );
+  if (shouldSet) {
+    userConfig = await updateRuntimeConfig(heapRecommendation.recommendedMb);
+    runtimeEnv = buildRuntimeEnv(userConfig);
+    runtimeUpdated = true;
+    heapValue = heapRecommendation.recommendedMb;
+    log(`Configured Node heap limit at ${formatGb(heapValue)}.`);
+  }
+}
+recordStep('runtime', {
+  configured: runtimeUpdated || heapConfigured,
+  maxOldSpaceMb: heapValue,
+  recommendedMb: heapRecommendation.recommendedMb,
+  skipped: !(runtimeUpdated || heapConfigured)
+});
+
 const nodeModules = path.join(root, 'node_modules');
 if (argv['skip-install']) {
   recordStep('install', { skipped: true, present: fs.existsSync(nodeModules) });
@@ -205,7 +297,7 @@ if (argv['skip-dicts']) {
   if (!hasDicts || needsEnglish) {
     const shouldDownload = await promptYesNo('Download English dictionary wordlist?', true);
     if (shouldDownload) {
-      const result = runCommand(process.execPath, [path.join(root, 'tools', 'download-dicts.js'), '--lang', 'en']);
+      const result = runCommand(process.execPath, [path.join(toolRoot, 'tools', 'download-dicts.js'), '--lang', 'en']);
       if (!result.ok) {
         warn('Dictionary download failed.');
         recordError('dictionaries', result, 'download failed');
@@ -236,7 +328,7 @@ if (argv['skip-models']) {
     const shouldDownload = await promptYesNo(`Download embedding model ${modelConfig.id}?`, true);
     if (shouldDownload) {
       const result = runCommand(process.execPath, [
-        path.join(root, 'tools', 'download-models.js'),
+        path.join(toolRoot, 'tools', 'download-models.js'),
         '--model',
         modelConfig.id,
         '--cache-dir',
@@ -268,7 +360,7 @@ if (argv['skip-extensions']) {
     if (!hasExtension) {
       const shouldDownload = await promptYesNo('Download SQLite ANN extension?', true);
       if (shouldDownload) {
-        const result = runCommand(process.execPath, [path.join(root, 'tools', 'download-extensions.js')]);
+        const result = runCommand(process.execPath, [path.join(toolRoot, 'tools', 'download-extensions.js')]);
         if (!result.ok) {
           warn('Extension download failed.');
           recordError('extensions', result, 'download failed');
@@ -301,7 +393,7 @@ if (argv['skip-tooling']) {
   let toolingInstalled = false;
   const detectResult = runCommand(
     process.execPath,
-    [path.join(root, 'tools', 'tooling-detect.js'), '--root', root, '--json'],
+    [path.join(toolRoot, 'tools', 'tooling-detect.js'), '--root', root, '--json'],
     { encoding: 'utf8', stdio: 'pipe' }
   );
   if (detectResult.status === 0 && detectResult.stdout) {
@@ -318,7 +410,7 @@ if (argv['skip-tooling']) {
         if (shouldInstall) {
           const scopeDefault = argv['tooling-scope'] || toolingConfig.installScope || 'cache';
           const scope = await promptChoice('Install tooling scope', ['cache', 'global'], scopeDefault);
-          const installArgs = [path.join(root, 'tools', 'tooling-install.js'), '--root', root, '--scope', scope];
+          const installArgs = [path.join(toolRoot, 'tools', 'tooling-install.js'), '--root', root, '--scope', scope];
           if (!toolingConfig.allowGlobalFallback) installArgs.push('--no-fallback');
           const result = runCommand(process.execPath, installArgs);
           if (!result.ok) {
@@ -353,7 +445,7 @@ if (!argv['skip-artifacts']) {
   if (fs.existsSync(manifestPath)) {
     const shouldRestore = await promptYesNo('Restore CI artifacts from ci-artifacts?', true);
     if (shouldRestore) {
-      const result = runCommand(process.execPath, [path.join(root, 'tools', 'ci-restore-artifacts.js'), '--from', artifactsDir]);
+      const result = runCommand(process.execPath, [path.join(toolRoot, 'tools', 'ci-restore-artifacts.js'), '--from', artifactsDir]);
       restoredArtifacts = result.ok;
       if (!result.ok) {
         warn('CI artifact restore failed.');
@@ -369,8 +461,17 @@ recordStep('artifacts', {
 
 const codeIndexDir = getIndexDir(root, 'code', userConfig);
 const proseIndexDir = getIndexDir(root, 'prose', userConfig);
-const codeIndexPresent = fs.existsSync(path.join(codeIndexDir, 'chunk_meta.json'));
-const proseIndexPresent = fs.existsSync(path.join(proseIndexDir, 'chunk_meta.json'));
+const hasChunkMeta = (indexDir) => {
+  const jsonPath = path.join(indexDir, 'chunk_meta.json');
+  const jsonlPath = path.join(indexDir, 'chunk_meta.jsonl');
+  const metaPath = path.join(indexDir, 'chunk_meta.meta.json');
+  const partsDir = path.join(indexDir, 'chunk_meta.parts');
+  return fs.existsSync(jsonPath)
+    || fs.existsSync(jsonlPath)
+    || (fs.existsSync(metaPath) && fs.existsSync(partsDir));
+};
+const codeIndexPresent = hasChunkMeta(codeIndexDir);
+const proseIndexPresent = hasChunkMeta(proseIndexDir);
 let indexReady = restoredArtifacts || codeIndexPresent || proseIndexPresent;
 let indexBuilt = false;
 let indexBuildOk = true;
@@ -381,7 +482,7 @@ if (!argv['skip-index'] && !restoredArtifacts) {
     !indexReady
   );
   if (shouldBuild) {
-    const args = [path.join(root, 'build_index.js')];
+    const args = [path.join(toolRoot, 'build_index.js')];
     if (useIncremental) args.push('--incremental');
     const result = runCommand(process.execPath, args);
     if (!result.ok) {
@@ -397,7 +498,7 @@ if (!argv['skip-index'] && !restoredArtifacts) {
 let sqliteBuilt = false;
 let sqliteOk = true;
 if (!argv['skip-sqlite']) {
-  const sqliteConfigured = userConfig.sqlite?.use === true;
+  const sqliteConfigured = userConfig.sqlite?.use !== false;
   const sqliteDefault = argv['with-sqlite'] ? true : sqliteConfigured;
   const shouldBuildSqlite = argv['with-sqlite']
     ? true
@@ -406,7 +507,7 @@ if (!argv['skip-sqlite']) {
     if (!indexReady) {
       const shouldBuildIndex = await promptYesNo('SQLite build requires file-backed indexes. Build index now?', true);
       if (shouldBuildIndex && !argv['skip-index']) {
-        const args = [path.join(root, 'build_index.js')];
+        const args = [path.join(toolRoot, 'build_index.js')];
         if (useIncremental) args.push('--incremental');
         const result = runCommand(process.execPath, args);
         if (!result.ok) {
@@ -419,7 +520,7 @@ if (!argv['skip-sqlite']) {
       }
     }
     if (indexReady) {
-      const sqliteArgs = [path.join(root, 'tools', 'build-sqlite-index.js')];
+      const sqliteArgs = [path.join(toolRoot, 'tools', 'build-sqlite-index.js')];
       if (useIncremental) sqliteArgs.push('--incremental');
       const result = runCommand(process.execPath, sqliteArgs);
       sqliteBuilt = true;
@@ -451,6 +552,7 @@ recordStep('index', {
 if (rl) rl.close();
 
 log('Setup complete.');
+log('Tip: run npm run index-validate to verify index artifacts.');
 if (jsonOutput) {
   console.log(JSON.stringify(summary, null, 2));
 }
diff --git a/tools/shard-census.js b/tools/shard-census.js
new file mode 100644
index 000000000..e1cef7664
--- /dev/null
+++ b/tools/shard-census.js
@@ -0,0 +1,269 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import fsPromises from 'node:fs/promises';
+import path from 'node:path';
+import os from 'node:os';
+import { createCli } from '../src/shared/cli.js';
+import { loadUserConfig, resolveToolRoot } from './dict-utils.js';
+import { buildIgnoreMatcher } from '../src/index/build/ignore.js';
+import { discoverFilesForModes } from '../src/index/build/discover.js';
+import { planShardBatches, planShards } from '../src/index/build/shards.js';
+import { countLinesForEntries } from '../src/shared/file-stats.js';
+
+const argv = createCli({
+  scriptName: 'shard-census',
+  usage: 'Usage: shard-census --repo <path> | --bench',
+  options: {
+    bench: { type: 'boolean', default: false },
+    repo: { type: 'string' }
+  }
+}).parse();
+
+const scriptRoot = resolveToolRoot();
+const benchConfigPath = path.join(scriptRoot, 'benchmarks', 'repos.json');
+const benchReposRoot = path.join(scriptRoot, 'benchmarks', 'repos');
+
+const normalizeLimit = (value, fallback) => {
+  if (value === 0 || value === false) return null;
+  const parsed = Number(value);
+  if (Number.isFinite(parsed) && parsed > 0) return Math.floor(parsed);
+  return fallback;
+};
+
+const normalizeDepth = (value, fallback) => {
+  if (value === 0) return 0;
+  if (value === false) return null;
+  const parsed = Number(value);
+  if (Number.isFinite(parsed) && parsed > 0) return Math.floor(parsed);
+  return fallback;
+};
+
+const normalizeCapValue = (value) => {
+  if (value === 0 || value === false) return null;
+  const parsed = Number(value);
+  if (Number.isFinite(parsed) && parsed > 0) return Math.floor(parsed);
+  return null;
+};
+
+const normalizeCapEntry = (raw) => {
+  const input = raw && typeof raw === 'object' ? raw : {};
+  const maxBytes = normalizeCapValue(input.maxBytes);
+  const maxLines = normalizeCapValue(input.maxLines);
+  return { maxBytes, maxLines };
+};
+
+const normalizeCapsByExt = (raw) => {
+  const input = raw && typeof raw === 'object' ? raw : {};
+  const output = {};
+  for (const [key, value] of Object.entries(input)) {
+    const entry = normalizeCapEntry(value);
+    if (entry.maxBytes == null && entry.maxLines == null) continue;
+    const normalizedKey = key.startsWith('.') ? key.toLowerCase() : `.${key.toLowerCase()}`;
+    output[normalizedKey] = entry;
+  }
+  return output;
+};
+
+const normalizeCapsByLanguage = (raw) => {
+  const input = raw && typeof raw === 'object' ? raw : {};
+  const output = {};
+  for (const [key, value] of Object.entries(input)) {
+    const entry = normalizeCapEntry(value);
+    if (entry.maxBytes == null && entry.maxLines == null) continue;
+    output[key.toLowerCase()] = entry;
+  }
+  return output;
+};
+
+const resolveMaxFileBytes = (indexingConfig) => {
+  const maxFileBytesRaw = indexingConfig?.maxFileBytes;
+  const maxFileBytesParsed = Number(maxFileBytesRaw);
+  if (maxFileBytesRaw === false || maxFileBytesRaw === 0) {
+    return null;
+  }
+  if (Number.isFinite(maxFileBytesParsed) && maxFileBytesParsed > 0) {
+    return maxFileBytesParsed;
+  }
+  return 5 * 1024 * 1024;
+};
+
+const resolveFileCaps = (indexingConfig) => {
+  const fileCapsConfig = indexingConfig?.fileCaps || {};
+  return {
+    default: normalizeCapEntry(fileCapsConfig.default || {}),
+    byExt: normalizeCapsByExt(fileCapsConfig.byExt || {}),
+    byLanguage: normalizeCapsByLanguage(fileCapsConfig.byLanguage || {})
+  };
+};
+
+const resolveShardConfig = (indexingConfig) => {
+  const shardsConfig = indexingConfig?.shards || {};
+  return {
+    enabled: shardsConfig.enabled === true,
+    maxShards: normalizeLimit(shardsConfig.maxShards, null),
+    minFiles: normalizeLimit(shardsConfig.minFiles, null),
+    dirDepth: normalizeDepth(shardsConfig.dirDepth, 3),
+    maxWorkers: normalizeLimit(shardsConfig.maxWorkers, null)
+  };
+};
+
+const loadBenchConfig = async () => {
+  const raw = await fsPromises.readFile(benchConfigPath, 'utf8');
+  return JSON.parse(raw);
+};
+
+const buildBenchTasks = (config) => {
+  const tasks = [];
+  for (const [language, entry] of Object.entries(config || {})) {
+    const repos = entry?.repos || {};
+    for (const tier of Object.keys(repos)) {
+      const list = Array.isArray(repos[tier]) ? repos[tier] : [];
+      for (const repo of list) {
+        tasks.push({ language, repo, tier });
+      }
+    }
+  }
+  return tasks;
+};
+
+const resolveRepoPath = async (repoArg) => {
+  if (!repoArg) return null;
+  const direct = path.resolve(repoArg);
+  if (fs.existsSync(direct)) return direct;
+  if (!repoArg.includes('/')) return null;
+  const config = await loadBenchConfig();
+  const matches = [];
+  for (const [language, entry] of Object.entries(config || {})) {
+    const repos = entry?.repos || {};
+    for (const list of Object.values(repos)) {
+      if (!Array.isArray(list)) continue;
+      if (list.includes(repoArg)) {
+        matches.push(path.join(benchReposRoot, language, repoArg));
+      }
+    }
+  }
+  if (matches.length === 1) return matches[0];
+  return null;
+};
+
+const formatNumber = (value) => value.toLocaleString('en-US');
+
+const censusRepo = async (repoPath, label) => {
+  const userConfig = loadUserConfig(repoPath);
+  const indexingConfig = userConfig.indexing || {};
+  const maxFileBytes = resolveMaxFileBytes(indexingConfig);
+  const fileCaps = resolveFileCaps(indexingConfig);
+  const shardConfig = resolveShardConfig(indexingConfig);
+  const { ignoreMatcher } = await buildIgnoreMatcher({ root: repoPath, userConfig });
+
+  const modes = ['code', 'prose'];
+  const skippedByMode = { code: [], prose: [] };
+  const entriesByMode = await discoverFilesForModes({
+    root: repoPath,
+    modes,
+    ignoreMatcher,
+    skippedByMode,
+    maxFileBytes,
+    fileCaps
+  });
+
+  const concurrency = Math.max(1, Math.min(32, os.cpus().length * 2));
+  console.log(`\n${label}`);
+  console.log(`Repo: ${repoPath}`);
+  for (const mode of modes) {
+    const entries = entriesByMode[mode] || [];
+    if (!entries.length) {
+      console.log(`Mode ${mode}: no files`);
+      continue;
+    }
+    const lineCounts = await countLinesForEntries(entries, { concurrency });
+    const shards = planShards(entries, {
+      mode,
+      maxShards: shardConfig.maxShards,
+      minFiles: shardConfig.minFiles,
+      dirDepth: shardConfig.dirDepth,
+      lineCounts
+    });
+    const shardStats = shards.map((shard) => {
+      const lines = Number.isFinite(shard.lineCount) ? shard.lineCount : 0;
+      return {
+        id: shard.id,
+        label: shard.label || shard.id,
+        files: shard.entries.length,
+        lines
+      };
+    });
+    shardStats.sort((a, b) => {
+      if (b.lines !== a.lines) return b.lines - a.lines;
+      if (b.files !== a.files) return b.files - a.files;
+      return a.label < b.label ? -1 : a.label > b.label ? 1 : 0;
+    });
+    const totalFiles = entries.length;
+    const totalLines = shardStats.reduce((sum, shard) => sum + shard.lines, 0);
+    console.log(
+      `Mode ${mode}: ${shardStats.length} shards, ${formatNumber(totalFiles)} files, ${formatNumber(totalLines)} lines`
+    );
+    for (const shard of shardStats) {
+      console.log(
+        `- ${shard.label} | files ${formatNumber(shard.files)} | lines ${formatNumber(shard.lines)}`
+      );
+    }
+    if (shardConfig.maxWorkers) {
+      const shardBatches = planShardBatches(shards, shardConfig.maxWorkers, {
+        resolveWeight: (shard) => shard.costMs || shard.lineCount || shard.entries.length || 0
+      });
+      if (shardBatches.length) {
+        console.log(`Batch plan (${shardBatches.length} workers):`);
+        shardBatches.forEach((batch, index) => {
+          const batchFiles = batch.reduce((sum, shard) => sum + shard.entries.length, 0);
+          const batchLines = batch.reduce((sum, shard) => sum + (shard.lineCount || 0), 0);
+          console.log(
+            `- batch ${index + 1} | shards ${batch.length} | files ${formatNumber(batchFiles)} | lines ${formatNumber(batchLines)}`
+          );
+        });
+      }
+    }
+  }
+};
+
+const main = async () => {
+  if (argv.bench && argv.repo) {
+    console.error('Use either --bench or --repo, not both.');
+    process.exit(1);
+  }
+  if (!argv.bench && !argv.repo) {
+    console.error('Missing --bench or --repo.');
+    process.exit(1);
+  }
+  if (argv.bench) {
+    const config = await loadBenchConfig();
+    const tasks = buildBenchTasks(config);
+    let missing = 0;
+    for (const task of tasks) {
+      const repoPath = path.join(benchReposRoot, task.language, task.repo);
+      const label = `${task.language}/${task.repo}`;
+      if (!fs.existsSync(repoPath)) {
+        console.error(`Missing ${label} at ${repoPath}`);
+        missing += 1;
+        continue;
+      }
+      await censusRepo(repoPath, label);
+    }
+    if (missing) {
+      console.error(`Skipped ${missing} repos (missing on disk).`);
+    }
+    return;
+  }
+
+  const repoPath = await resolveRepoPath(argv.repo);
+  if (!repoPath || !fs.existsSync(repoPath)) {
+    console.error(`Repo not found: ${argv.repo}`);
+    process.exit(1);
+  }
+  await censusRepo(repoPath, `repo ${argv.repo}`);
+};
+
+main().catch((err) => {
+  console.error(err?.stack || err?.message || err);
+  process.exit(1);
+});
diff --git a/tools/show-throughput.js b/tools/show-throughput.js
new file mode 100644
index 000000000..d98869ce1
--- /dev/null
+++ b/tools/show-throughput.js
@@ -0,0 +1,268 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import path from 'node:path';
+import { color } from '../src/retrieval/cli/ansi.js';
+import { getMetricsDir, loadUserConfig } from './dict-utils.js';
+
+const resultsRoot = path.join(process.cwd(), 'benchmarks', 'results');
+
+const listDirs = (root) => fs.existsSync(root)
+  ? fs.readdirSync(root, { withFileTypes: true }).filter((entry) => entry.isDirectory())
+  : [];
+
+const formatNumber = (value, digits = 1) => (
+  Number.isFinite(value) ? value.toFixed(digits) : 'n/a'
+);
+
+const formatCount = (value) => (
+  Number.isFinite(value) ? value.toLocaleString() : 'n/a'
+);
+
+const formatMs = (value) => {
+  if (!Number.isFinite(value)) return 'n/a';
+  if (value < 1000) return `${Math.round(value)}ms`;
+  const seconds = value / 1000;
+  if (seconds < 60) return `${seconds.toFixed(1)}s`;
+  const minutes = Math.floor(seconds / 60);
+  const rem = (seconds % 60).toFixed(0);
+  return `${minutes}m ${rem}s`;
+};
+
+const formatBytesPerSec = (value) => {
+  if (!Number.isFinite(value)) return 'n/a';
+  const mb = value / (1024 * 1024);
+  if (mb < 1024) return `${mb.toFixed(1)} MB/s`;
+  return `${(mb / 1024).toFixed(2)} GB/s`;
+};
+
+const mean = (values) => {
+  if (!values.length) return null;
+  return values.reduce((sum, val) => sum + val, 0) / values.length;
+};
+
+const collect = (items, selector) => items
+  .map((item) => selector(item))
+  .filter((value) => Number.isFinite(value));
+
+const mergeTotals = (target, entry) => {
+  if (!entry) return;
+  if (Number.isFinite(entry.files)) target.files += entry.files;
+  if (Number.isFinite(entry.chunks)) target.chunks += entry.chunks;
+  if (Number.isFinite(entry.tokens)) target.tokens += entry.tokens;
+  if (Number.isFinite(entry.bytes)) target.bytes += entry.bytes;
+  if (Number.isFinite(entry.totalMs)) target.totalMs += entry.totalMs;
+};
+
+const rateFromTotals = (totals, key) => {
+  if (!Number.isFinite(totals.totalMs) || totals.totalMs <= 0) return null;
+  const value = totals[key];
+  if (!Number.isFinite(value)) return null;
+  return value / (totals.totalMs / 1000);
+};
+
+const sumRates = (...values) => {
+  let sum = 0;
+  let found = false;
+  for (const value of values) {
+    if (!Number.isFinite(value)) continue;
+    sum += value;
+    found = true;
+  }
+  return found ? sum : null;
+};
+
+const loadJson = (filePath) => {
+  try {
+    return JSON.parse(fs.readFileSync(filePath, 'utf8'));
+  } catch {
+    return null;
+  }
+};
+
+const loadFeatureMetrics = (repoRoot) => {
+  if (!repoRoot) return null;
+  const userConfig = loadUserConfig(repoRoot);
+  const metricsDir = getMetricsDir(repoRoot, userConfig);
+  const runPath = path.join(metricsDir, 'feature-metrics-run.json');
+  const mergedPath = path.join(metricsDir, 'feature-metrics.json');
+  return loadJson(runPath) || loadJson(mergedPath);
+};
+
+const collectLanguageLines = (metrics, totals) => {
+  if (!metrics || !metrics.modes) return;
+  for (const modeEntry of Object.values(metrics.modes)) {
+    const languages = modeEntry?.languages || {};
+    for (const [language, bucket] of Object.entries(languages)) {
+      const lines = Number(bucket?.lines) || 0;
+      if (!lines) continue;
+      totals.set(language, (totals.get(language) || 0) + lines);
+    }
+  }
+};
+
+if (!fs.existsSync(resultsRoot)) {
+  console.error(`No benchmark results found at ${resultsRoot}`);
+  process.exit(1);
+}
+
+const folders = listDirs(resultsRoot).filter((dir) => dir.name !== 'logs');
+if (!folders.length) {
+  console.log('No benchmark results folders found.');
+  process.exit(0);
+}
+
+const totalThroughput = {
+  code: { files: 0, chunks: 0, tokens: 0, bytes: 0, totalMs: 0 },
+  prose: { files: 0, chunks: 0, tokens: 0, bytes: 0, totalMs: 0 }
+};
+const languageTotals = new Map();
+const reposWithMetrics = new Set();
+
+console.log(color.bold(color.cyan('Benchmark Performance Overview')));
+console.log(color.gray(`Root: ${resultsRoot}`));
+
+for (const dir of folders) {
+  const folderPath = path.join(resultsRoot, dir.name);
+  const files = fs.readdirSync(folderPath).filter((name) => name.endsWith('.json'));
+  const runs = [];
+  const throughputs = [];
+
+  for (const file of files) {
+    const payload = loadJson(path.join(folderPath, file));
+    if (!payload) continue;
+    const summary = payload.summary || payload.runs?.[0] || null;
+    const throughput = payload.artifacts?.throughput || {};
+    runs.push({ file, summary, throughput });
+    throughputs.push(throughput);
+    mergeTotals(totalThroughput.code, throughput.code);
+    mergeTotals(totalThroughput.prose, throughput.prose);
+    const repoRoot = payload.repo?.root;
+    if (repoRoot && !reposWithMetrics.has(repoRoot)) {
+      const metrics = loadFeatureMetrics(repoRoot);
+      if (metrics) {
+        collectLanguageLines(metrics, languageTotals);
+        reposWithMetrics.add(repoRoot);
+      }
+    }
+  }
+
+  const header = `${dir.name} (${runs.length} run${runs.length === 1 ? '' : 's'})`;
+  console.log('');
+  console.log(color.bold(color.blue(header)));
+
+  if (!runs.length) {
+    console.log(color.gray('  No benchmark JSON files found.'));
+    continue;
+  }
+
+  const code = throughputs.map((t) => t.code).filter(Boolean);
+  const prose = throughputs.map((t) => t.prose).filter(Boolean);
+
+  if (code.length) {
+    console.log(
+      `  ${color.bold('Code throughput')}: ` +
+      `${formatNumber(mean(collect(code, (c) => c.chunksPerSec)))} chunks/s | ` +
+      `${formatNumber(mean(collect(code, (c) => c.tokensPerSec)))} tokens/s | ` +
+      `${formatBytesPerSec(mean(collect(code, (c) => c.bytesPerSec)))} | ` +
+      `${formatNumber(mean(collect(code, (c) => c.filesPerSec)))} files/s`
+    );
+  }
+
+  if (prose.length) {
+    console.log(
+      `  ${color.bold('Prose throughput')}: ` +
+      `${formatNumber(mean(collect(prose, (c) => c.chunksPerSec)))} chunks/s | ` +
+      `${formatNumber(mean(collect(prose, (c) => c.tokensPerSec)))} tokens/s | ` +
+      `${formatBytesPerSec(mean(collect(prose, (c) => c.bytesPerSec)))} | ` +
+      `${formatNumber(mean(collect(prose, (c) => c.filesPerSec)))} files/s`
+    );
+  }
+
+  const summaries = runs.map((r) => r.summary).filter(Boolean);
+  if (summaries.length) {
+    const wallPerQuery = mean(collect(summaries, (s) => s.queryWallMsPerQuery));
+    const wallPerSearch = mean(collect(summaries, (s) => s.queryWallMsPerSearch));
+    if (wallPerQuery || wallPerSearch) {
+      console.log(
+        `  ${color.bold('Query wall time')}: ` +
+        `avg/query ${formatMs(wallPerQuery)} | avg/search ${formatMs(wallPerSearch)}`
+      );
+    }
+
+    const backendLatency = {};
+    for (const summary of summaries) {
+      const latency = summary.latencyMs || {};
+      for (const [backend, stats] of Object.entries(latency)) {
+        if (!backendLatency[backend]) backendLatency[backend] = { mean: [], p95: [] };
+        if (Number.isFinite(stats?.mean)) backendLatency[backend].mean.push(stats.mean);
+        if (Number.isFinite(stats?.p95)) backendLatency[backend].p95.push(stats.p95);
+      }
+    }
+    const latencyLine = Object.entries(backendLatency)
+      .map(([backend, stats]) => (
+        `${backend} ${formatNumber(mean(stats.mean))}ms (p95 ${formatNumber(mean(stats.p95))}ms)`
+      ))
+      .join(' | ');
+    if (latencyLine) {
+      console.log(`  ${color.bold('Latency')}: ${latencyLine}`);
+    }
+
+    const buildIndexMs = mean(collect(summaries, (s) => s.buildMs?.index));
+    const buildSqliteMs = mean(collect(summaries, (s) => s.buildMs?.sqlite));
+    if (buildIndexMs || buildSqliteMs) {
+      console.log(
+        `  ${color.bold('Build time')}: ` +
+        `index ${formatMs(buildIndexMs)} | sqlite ${formatMs(buildSqliteMs)}`
+      );
+    }
+  }
+
+  console.log(color.gray('  Runs:'));
+  for (const run of runs) {
+    const repoLabel = run.file.replace(/\.json$/, '');
+    const codeStats = run.throughput?.code || {};
+    const proseStats = run.throughput?.prose || {};
+    const summary = run.summary || {};
+    const line = [
+      color.bold(repoLabel),
+      `code ${formatNumber(codeStats.chunksPerSec)} ch/s`,
+      `prose ${formatNumber(proseStats.chunksPerSec)} ch/s`,
+      `query ${formatMs(summary.queryWallMsPerQuery)}`
+    ].join(' | ');
+    console.log(`    ${line}`);
+  }
+}
+
+const totalFilesPerSec = sumRates(
+  rateFromTotals(totalThroughput.code, 'files'),
+  rateFromTotals(totalThroughput.prose, 'files')
+);
+const totalChunksPerSec = sumRates(
+  rateFromTotals(totalThroughput.code, 'chunks'),
+  rateFromTotals(totalThroughput.prose, 'chunks')
+);
+const totalTokensPerSec = sumRates(
+  rateFromTotals(totalThroughput.code, 'tokens'),
+  rateFromTotals(totalThroughput.prose, 'tokens')
+);
+const totalBytesPerSec = sumRates(
+  rateFromTotals(totalThroughput.code, 'bytes'),
+  rateFromTotals(totalThroughput.prose, 'bytes')
+);
+
+console.log('');
+console.log(color.bold(color.green('Totals')));
+console.log(
+  `  ${color.bold('Files')}: ${formatNumber(totalFilesPerSec)} files/s | ` +
+  `${color.bold('Chunks')}: ${formatNumber(totalChunksPerSec)} chunks/s | ` +
+  `${color.bold('Tokens')}: ${formatNumber(totalTokensPerSec)} tokens/s | ` +
+  `${color.bold('Bytes')}: ${formatBytesPerSec(totalBytesPerSec)}`
+);
+if (languageTotals.size) {
+  const sortedLanguages = Array.from(languageTotals.entries())
+    .sort((a, b) => b[1] - a[1]);
+  console.log(`  ${color.bold('Lines by language')}:`);
+  for (const [language, lines] of sortedLanguages) {
+    console.log(`    ${language}: ${formatCount(lines)} lines`);
+  }
+}
diff --git a/tools/structural-search.js b/tools/structural-search.js
new file mode 100644
index 000000000..19d7c4320
--- /dev/null
+++ b/tools/structural-search.js
@@ -0,0 +1,93 @@
+#!/usr/bin/env node
+import fs from 'node:fs';
+import path from 'node:path';
+import { fileURLToPath } from 'node:url';
+import { createCli } from '../src/shared/cli.js';
+import { loadRegistry, resolvePacks } from '../src/experimental/structural/registry.js';
+import { runStructuralSearch } from '../src/experimental/structural/runner.js';
+import { writeJson, writeJsonl } from '../src/experimental/structural/io.js';
+import { loadUserConfig, resolveRepoRoot } from './dict-utils.js';
+
+const argv = createCli({
+  scriptName: 'structural-search',
+  options: {
+    repo: { type: 'string' },
+    engine: { type: 'string' },
+    pack: { type: 'array' },
+    registry: { type: 'string' },
+    rule: { type: 'array' },
+    format: { type: 'string', default: 'jsonl' },
+    out: { type: 'string' },
+    json: { type: 'boolean', default: false },
+    profile: { type: 'string' },
+    'list-packs': { type: 'boolean', default: false }
+  }
+}).parse();
+
+const scriptRoot = path.dirname(fileURLToPath(import.meta.url));
+const repoRoot = argv.repo ? path.resolve(argv.repo) : resolveRepoRoot(process.cwd());
+const userConfig = loadUserConfig(repoRoot, { profile: argv.profile });
+if (userConfig.profile !== 'full') {
+  console.error('structural-search is experimental. Run with profile=full or set PAIROFCLEATS_PROFILE=full.');
+  process.exit(1);
+}
+const registryPath = argv.registry
+  ? path.resolve(argv.registry)
+  : path.resolve(scriptRoot, '..', 'rules', 'registry.json');
+const outputPath = argv.out ? path.resolve(argv.out) : null;
+const format = argv.json ? 'json' : (argv.format || 'jsonl');
+
+const registry = loadRegistry(registryPath);
+if (argv['list-packs']) {
+  const output = registry.packs.map((pack) => ({
+    id: pack.id,
+    label: pack.label,
+    engine: pack.engine,
+    rules: pack.rules
+  }));
+  console.log(JSON.stringify(output, null, 2));
+  process.exit(0);
+}
+
+const packIds = (argv.pack || []).map((entry) => String(entry).trim()).filter(Boolean);
+const rulePaths = (argv.rule || []).map((entry) => String(entry)).filter(Boolean);
+const engineOverride = argv.engine ? String(argv.engine).trim() : '';
+
+const { selectedPacks, missingPacks } = resolvePacks(registry, packIds);
+if (missingPacks.length) {
+  console.error(`Unknown packs: ${missingPacks.join(', ')}`);
+}
+
+if (!selectedPacks.length && !engineOverride) {
+  console.error('No packs selected and no engine specified.');
+  process.exit(1);
+}
+
+const resolveRulePath = (rulePath) => {
+  if (!rulePath) return null;
+  const resolved = path.isAbsolute(rulePath)
+    ? rulePath
+    : path.resolve(scriptRoot, '..', rulePath);
+  return fs.existsSync(resolved) ? resolved : null;
+};
+
+const packsToRun = selectedPacks.map((pack) => ({
+  pack,
+  engine: pack.engine,
+  rules: pack.rules.map(resolveRulePath).filter(Boolean)
+}));
+if (engineOverride || rulePaths.length) {
+  packsToRun.push({
+    pack: null,
+    engine: engineOverride,
+    rules: rulePaths.map(resolveRulePath).filter(Boolean)
+  });
+}
+
+const results = runStructuralSearch({ repoRoot, packsToRun });
+
+if (format === 'json') {
+  await writeJson(results, outputPath);
+} else {
+  writeJsonl(results, outputPath);
+}
diff --git a/tools/tooling-detect.js b/tools/tooling-detect.js
index ea57ffe5d..23933cfcb 100644
--- a/tools/tooling-detect.js
+++ b/tools/tooling-detect.js
@@ -1,19 +1,26 @@
 #!/usr/bin/env node
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
+import path from 'node:path';
 import { buildToolingReport, normalizeLanguageList } from './tooling-utils.js';
 import { resolveRepoRoot } from './dict-utils.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['json'],
-  string: ['root', 'repo', 'languages'],
-  default: { json: false }
-});
+const argv = createCli({
+  scriptName: 'tooling-detect',
+  options: {
+    json: { type: 'boolean', default: false },
+    root: { type: 'string' },
+    repo: { type: 'string' },
+    languages: { type: 'string' }
+  }
+}).parse();
 
 const explicitRoot = argv.root || argv.repo;
 const root = explicitRoot ? path.resolve(explicitRoot) : resolveRepoRoot(process.cwd());
 const languageOverride = normalizeLanguageList(argv.languages);
 
-const report = await buildToolingReport(root, languageOverride);
+const report = await buildToolingReport(root, languageOverride, {
+  skipScan: languageOverride.length > 0
+});
 
 if (argv.json) {
   console.log(JSON.stringify(report, null, 2));
diff --git a/tools/tooling-install.js b/tools/tooling-install.js
index 5749b87e8..ade0315f0 100644
--- a/tools/tooling-install.js
+++ b/tools/tooling-install.js
@@ -1,14 +1,23 @@
 #!/usr/bin/env node
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
+import path from 'node:path';
 import { spawnSync } from 'node:child_process';
 import { buildToolingReport, detectTool, normalizeLanguageList, resolveToolsById, resolveToolsForLanguages, selectInstallPlan } from './tooling-utils.js';
 import { getToolingConfig, resolveRepoRoot } from './dict-utils.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['json', 'dry-run', 'no-fallback'],
-  string: ['root', 'repo', 'scope', 'languages', 'tools'],
-  default: { 'dry-run': false, json: false, 'no-fallback': false }
-});
+const argv = createCli({
+  scriptName: 'tooling-install',
+  options: {
+    json: { type: 'boolean', default: false },
+    'dry-run': { type: 'boolean', default: false },
+    'no-fallback': { type: 'boolean', default: false },
+    root: { type: 'string' },
+    repo: { type: 'string' },
+    scope: { type: 'string' },
+    languages: { type: 'string' },
+    tools: { type: 'string' }
+  }
+}).parse();
 
 const explicitRoot = argv.root || argv.repo;
 const root = explicitRoot ? path.resolve(explicitRoot) : resolveRepoRoot(process.cwd());
@@ -18,11 +27,13 @@ const allowFallback = argv['no-fallback'] ? false : toolingConfig.allowGlobalFal
 const languageOverride = normalizeLanguageList(argv.languages);
 const toolOverride = normalizeLanguageList(argv.tools);
 
-const report = await buildToolingReport(root, languageOverride);
+const report = toolOverride.length
+  ? { languages: {}, formats: {} }
+  : await buildToolingReport(root, languageOverride, { skipScan: languageOverride.length > 0 });
 const languageList = languageOverride.length ? languageOverride : Object.keys(report.languages || {});
 const tools = toolOverride.length
-  ? resolveToolsById(toolOverride, toolingConfig.dir, root)
-  : resolveToolsForLanguages(languageList, toolingConfig.dir, root);
+  ? resolveToolsById(toolOverride, toolingConfig.dir, root, toolingConfig)
+  : resolveToolsForLanguages(languageList, toolingConfig.dir, root, toolingConfig);
 
 const actions = [];
 const results = [];
diff --git a/tools/tooling-utils.js b/tools/tooling-utils.js
index f5ee5b450..766670b53 100644
--- a/tools/tooling-utils.js
+++ b/tools/tooling-utils.js
@@ -1,20 +1,21 @@
 import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
-import { spawnSync } from 'node:child_process';
-import { SKIP_DIRS, SKIP_FILES } from '../src/indexer/constants.js';
+import { execaSync } from 'execa';
+import { LOCK_FILES, MANIFEST_FILES, SKIP_DIRS, SKIP_FILES } from '../src/index/constants.js';
 import { getToolingConfig } from './dict-utils.js';
 
 const LANGUAGE_EXTENSIONS = {
   javascript: ['.js', '.mjs', '.cjs'],
   typescript: ['.ts', '.tsx', '.mts', '.cts'],
-  python: ['.py'],
+  python: ['.py', '.pyi'],
   c: ['.c', '.h'],
   cpp: ['.cc', '.cpp', '.hpp', '.hh'],
   objc: ['.m', '.mm'],
   rust: ['.rs'],
   go: ['.go'],
   java: ['.java'],
+  swift: ['.swift'],
   shell: ['.sh', '.bash', '.zsh', '.ksh'],
   csharp: ['.cs'],
   kotlin: ['.kt', '.kts'],
@@ -36,19 +37,34 @@ const FORMAT_EXTENSIONS = {
 
 const FORMAT_FILENAMES = {
   dockerfile: ['dockerfile'],
-  makefile: ['makefile']
+  makefile: ['makefile', 'gnumakefile'],
+  manifest: Array.from(MANIFEST_FILES),
+  lockfile: Array.from(LOCK_FILES)
+};
+
+const FORMAT_FILENAME_PREFIXES = {
+  dockerfile: ['dockerfile.'],
+  makefile: ['makefile.']
 };
 
 const TOOL_DOCS = {
   tsserver: 'https://www.typescriptlang.org/',
+  'typescript-language-server': 'https://github.com/typescript-language-server/typescript-language-server',
   clangd: 'https://clangd.llvm.org/installation',
   'rust-analyzer': 'https://rust-analyzer.github.io/',
   gopls: 'https://pkg.go.dev/golang.org/x/tools/gopls',
   jdtls: 'https://github.com/eclipse-jdtls/eclipse.jdt.ls',
+  'sourcekit-lsp': 'https://www.swift.org/download/',
   'kotlin-language-server': 'https://github.com/fwcd/kotlin-language-server',
+  'kotlin-lsp': 'https://kotlinlang.org/docs/',
+  pyright: 'https://github.com/microsoft/pyright',
   omnisharp: 'https://github.com/OmniSharp/omnisharp-roslyn',
+  'csharp-ls': 'https://github.com/razzmatazz/csharp-language-server',
+  'ruby-lsp': 'https://shopify.github.io/ruby-lsp/',
   solargraph: 'https://solargraph.org/',
   phpactor: 'https://phpactor.readthedocs.io/',
+  intelephense: 'https://github.com/bmewburn/intelephense-docs',
+  'bash-language-server': 'https://github.com/bash-lsp/bash-language-server',
   'lua-language-server': 'https://github.com/LuaLS/lua-language-server',
   sqls: 'https://github.com/lighttiger2505/sqls'
 };
@@ -72,14 +88,18 @@ function findBinaryInDirs(name, dirs) {
 }
 
 function canRun(cmd, args = ['--version']) {
-  const result = spawnSync(cmd, args, { encoding: 'utf8' });
-  return result.status === 0;
+  try {
+    const result = execaSync(cmd, args, { encoding: 'utf8', stdio: 'ignore', reject: false });
+    return result.exitCode === 0;
+  } catch {
+    return false;
+  }
 }
 
 async function scanRepo(root) {
   const extCounts = new Map();
-  const filePaths = [];
   const lowerNames = new Set();
+  let workflowCount = 0;
   const visit = async (dir) => {
     let entries;
     try {
@@ -98,12 +118,16 @@ async function scanRepo(root) {
       if (SKIP_FILES.has(entry.name)) continue;
       const ext = path.extname(entry.name).toLowerCase();
       if (ext) extCounts.set(ext, (extCounts.get(ext) || 0) + 1);
-      filePaths.push(abs);
       lowerNames.add(entry.name.toLowerCase());
+      const normalized = abs.replace(/\\/g, '/').toLowerCase();
+      if (normalized.includes('/.github/workflows/')
+        && (normalized.endsWith('.yml') || normalized.endsWith('.yaml'))) {
+        workflowCount += 1;
+      }
     }
   };
   await visit(root);
-  return { extCounts, filePaths, lowerNames };
+  return { extCounts, lowerNames, workflowCount };
 }
 
 function buildLangHits(extCounts) {
@@ -117,8 +141,16 @@ function buildLangHits(extCounts) {
   return hits;
 }
 
-function buildFormatHits(extCounts, lowerNames, filePaths) {
+function buildFormatHits(extCounts, lowerNames, workflowCount) {
   const hits = {};
+  const hasPrefixName = (prefix) => {
+    const key = prefix.toLowerCase();
+    if (lowerNames.has(key)) return true;
+    for (const name of lowerNames) {
+      if (name.startsWith(key)) return true;
+    }
+    return false;
+  };
   for (const [format, exts] of Object.entries(FORMAT_EXTENSIONS)) {
     const matched = exts.filter((ext) => extCounts.has(ext));
     if (!matched.length) continue;
@@ -126,25 +158,23 @@ function buildFormatHits(extCounts, lowerNames, filePaths) {
     hits[format] = { extensions: matched, files: count };
   }
   for (const [format, names] of Object.entries(FORMAT_FILENAMES)) {
-    if (names.some((name) => lowerNames.has(name))) {
+    const prefixes = FORMAT_FILENAME_PREFIXES[format] || [];
+    const hasExact = names.some((name) => lowerNames.has(name));
+    const hasPrefix = prefixes.some((prefix) => hasPrefixName(prefix));
+    if (hasExact || hasPrefix) {
       hits[format] = { filenames: names, files: names.length };
     }
   }
-  const ghWorkflows = filePaths.filter((filePath) => {
-    const normalized = filePath.replace(/\\/g, '/').toLowerCase();
-    if (!normalized.includes('/.github/workflows/')) return false;
-    return normalized.endsWith('.yml') || normalized.endsWith('.yaml');
-  });
-  if (ghWorkflows.length) {
-    hits['github-actions'] = { extensions: ['.yml', '.yaml'], files: ghWorkflows.length };
+  if (workflowCount) {
+    hits['github-actions'] = { extensions: ['.yml', '.yaml'], files: workflowCount };
   }
   return hits;
 }
 
 export async function detectRepoLanguages(root) {
-  const { extCounts, filePaths, lowerNames } = await scanRepo(root);
+  const { extCounts, lowerNames, workflowCount } = await scanRepo(root);
   const languages = buildLangHits(extCounts);
-  const formats = buildFormatHits(extCounts, lowerNames, filePaths);
+  const formats = buildFormatHits(extCounts, lowerNames, workflowCount);
   return { languages, formats, extCounts };
 }
 
@@ -170,6 +200,17 @@ export function getToolingRegistry(toolingRoot, repoRoot) {
       },
       docs: TOOL_DOCS.tsserver
     },
+    {
+      id: 'typescript-language-server',
+      label: 'TypeScript language server',
+      languages: ['typescript'],
+      detect: { cmd: 'typescript-language-server', args: ['--version'], binDirs: [repoNodeBin, nodeBin] },
+      install: {
+        cache: { cmd: 'npm', args: ['install', '--prefix', nodeDir, 'typescript-language-server'] },
+        user: { cmd: 'npm', args: ['install', '-g', 'typescript-language-server'] }
+      },
+      docs: TOOL_DOCS['typescript-language-server']
+    },
     {
       id: 'clangd',
       label: 'clangd',
@@ -180,6 +221,27 @@ export function getToolingRegistry(toolingRoot, repoRoot) {
       },
       docs: TOOL_DOCS.clangd
     },
+    {
+      id: 'sourcekit-lsp',
+      label: 'SourceKit-LSP',
+      languages: ['swift'],
+      detect: { cmd: 'sourcekit-lsp', args: ['--help'], binDirs: [] },
+      install: {
+        manual: true
+      },
+      docs: TOOL_DOCS['sourcekit-lsp']
+    },
+    {
+      id: 'pyright',
+      label: 'Pyright',
+      languages: ['python'],
+      detect: { cmd: 'pyright', args: ['--version'], binDirs: [repoNodeBin, nodeBin] },
+      install: {
+        cache: { cmd: 'npm', args: ['install', '--prefix', nodeDir, 'pyright'] },
+        user: { cmd: 'npm', args: ['install', '-g', 'pyright'] }
+      },
+      docs: TOOL_DOCS.pyright
+    },
     {
       id: 'rust-analyzer',
       label: 'rust-analyzer',
@@ -221,6 +283,16 @@ export function getToolingRegistry(toolingRoot, repoRoot) {
       },
       docs: TOOL_DOCS['kotlin-language-server']
     },
+    {
+      id: 'kotlin-lsp',
+      label: 'Kotlin LSP',
+      languages: ['kotlin'],
+      detect: { cmd: 'kotlin-lsp', args: ['--version'], binDirs: [] },
+      install: {
+        manual: true
+      },
+      docs: TOOL_DOCS['kotlin-lsp']
+    },
     {
       id: 'omnisharp',
       label: 'OmniSharp',
@@ -232,6 +304,28 @@ export function getToolingRegistry(toolingRoot, repoRoot) {
       },
       docs: TOOL_DOCS.omnisharp
     },
+    {
+      id: 'csharp-ls',
+      label: 'C# LSP (Roslyn)',
+      languages: ['csharp'],
+      detect: { cmd: 'csharp-ls', args: ['--version'], binDirs: [dotnetDir] },
+      install: {
+        cache: { cmd: 'dotnet', args: ['tool', 'install', '--tool-path', dotnetDir, 'csharp-ls'], requires: 'dotnet' },
+        user: { cmd: 'dotnet', args: ['tool', 'install', '-g', 'csharp-ls'], requires: 'dotnet' }
+      },
+      docs: TOOL_DOCS['csharp-ls']
+    },
+    {
+      id: 'ruby-lsp',
+      label: 'Ruby LSP',
+      languages: ['ruby'],
+      detect: { cmd: 'ruby-lsp', args: ['--version'], binDirs: [binDir] },
+      install: {
+        cache: { cmd: 'gem', args: ['install', '-i', gemsDir, '-n', binDir, 'ruby-lsp'], requires: 'gem' },
+        user: { cmd: 'gem', args: ['install', 'ruby-lsp'], requires: 'gem' }
+      },
+      docs: TOOL_DOCS['ruby-lsp']
+    },
     {
       id: 'solargraph',
       label: 'Solargraph',
@@ -254,6 +348,17 @@ export function getToolingRegistry(toolingRoot, repoRoot) {
       },
       docs: TOOL_DOCS.phpactor
     },
+    {
+      id: 'intelephense',
+      label: 'Intelephense',
+      languages: ['php'],
+      detect: { cmd: 'intelephense', args: ['--version'], binDirs: [repoNodeBin, nodeBin] },
+      install: {
+        cache: { cmd: 'npm', args: ['install', '--prefix', nodeDir, 'intelephense'] },
+        user: { cmd: 'npm', args: ['install', '-g', 'intelephense'] }
+      },
+      docs: TOOL_DOCS.intelephense
+    },
     {
       id: 'lua-language-server',
       label: 'lua-language-server',
@@ -264,6 +369,17 @@ export function getToolingRegistry(toolingRoot, repoRoot) {
       },
       docs: TOOL_DOCS['lua-language-server']
     },
+    {
+      id: 'bash-language-server',
+      label: 'bash-language-server',
+      languages: ['shell'],
+      detect: { cmd: 'bash-language-server', args: ['--version'], binDirs: [repoNodeBin, nodeBin] },
+      install: {
+        cache: { cmd: 'npm', args: ['install', '--prefix', nodeDir, 'bash-language-server'] },
+        user: { cmd: 'npm', args: ['install', '-g', 'bash-language-server'] }
+      },
+      docs: TOOL_DOCS['bash-language-server']
+    },
     {
       id: 'sqls',
       label: 'sqls',
@@ -278,16 +394,33 @@ export function getToolingRegistry(toolingRoot, repoRoot) {
   ];
 }
 
-export function resolveToolsForLanguages(languages, toolingRoot, repoRoot) {
+function filterToolsByConfig(tools, toolingConfig) {
+  const enabled = Array.isArray(toolingConfig?.enabledTools) ? toolingConfig.enabledTools : [];
+  const disabled = Array.isArray(toolingConfig?.disabledTools) ? toolingConfig.disabledTools : [];
+  let filtered = tools;
+  if (enabled.length) {
+    const enabledSet = new Set(enabled);
+    filtered = filtered.filter((tool) => enabledSet.has(tool.id));
+  }
+  if (disabled.length) {
+    const disabledSet = new Set(disabled);
+    filtered = filtered.filter((tool) => !disabledSet.has(tool.id));
+  }
+  return filtered;
+}
+
+export function resolveToolsForLanguages(languages, toolingRoot, repoRoot, toolingConfig = null) {
   const languageSet = new Set(languages);
   const registry = getToolingRegistry(toolingRoot, repoRoot);
-  return registry.filter((tool) => tool.languages.some((lang) => languageSet.has(lang)));
+  const matched = registry.filter((tool) => tool.languages.some((lang) => languageSet.has(lang)));
+  return filterToolsByConfig(matched, toolingConfig);
 }
 
-export function resolveToolsById(ids, toolingRoot, repoRoot) {
+export function resolveToolsById(ids, toolingRoot, repoRoot, toolingConfig = null) {
   const idSet = new Set(ids);
   const registry = getToolingRegistry(toolingRoot, repoRoot);
-  return registry.filter((tool) => idSet.has(tool.id));
+  const matched = registry.filter((tool) => idSet.has(tool.id));
+  return filterToolsByConfig(matched, toolingConfig);
 }
 
 export function detectTool(tool) {
@@ -317,13 +450,22 @@ export function hasCommand(cmd) {
   return canRun(cmd, ['--version']);
 }
 
-export async function buildToolingReport(root, languageOverride = null) {
+export async function buildToolingReport(root, languageOverride = null, options = {}) {
   const toolingConfig = getToolingConfig(root);
-  const { languages, formats } = await detectRepoLanguages(root);
+  const skipScan = options.skipScan === true;
+  const detected = skipScan ? { languages: {}, formats: {} } : await detectRepoLanguages(root);
+  const languages = detected.languages || {};
+  const formats = detected.formats || {};
   const languageList = languageOverride && languageOverride.length
     ? languageOverride
     : Object.keys(languages);
-  const tools = resolveToolsForLanguages(languageList, toolingConfig.dir, root).map((tool) => {
+  const languageMap = (languageOverride && languageOverride.length && skipScan)
+    ? languageOverride.reduce((acc, lang) => {
+      acc[lang] = { extensions: [], files: 0, override: true };
+      return acc;
+    }, {})
+    : languages;
+  const tools = resolveToolsForLanguages(languageList, toolingConfig.dir, root, toolingConfig).map((tool) => {
     const status = detectTool(tool);
     return {
       id: tool.id,
@@ -339,7 +481,7 @@ export async function buildToolingReport(root, languageOverride = null) {
   return {
     root,
     toolingRoot: toolingConfig.dir,
-    languages,
+    languages: languageMap,
     formats,
     tools
   };
diff --git a/tools/triage/context-pack.js b/tools/triage/context-pack.js
index 2db6bf598..3eb89efab 100644
--- a/tools/triage/context-pack.js
+++ b/tools/triage/context-pack.js
@@ -1,15 +1,20 @@
 #!/usr/bin/env node
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
-import { spawnSync } from 'node:child_process';
-import { fileURLToPath } from 'node:url';
-import minimist from 'minimist';
-import { getRepoCacheRoot, getTriageConfig, loadUserConfig, resolveRepoRoot } from '../dict-utils.js';
-
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['stub-embeddings', 'ann'],
-  string: ['repo', 'record', 'out']
-});
+import { execaSync } from 'execa';
+import { createCli } from '../../src/shared/cli.js';
+import { getRepoCacheRoot, getRuntimeConfig, getTriageConfig, loadUserConfig, resolveRepoRoot, resolveRuntimeEnv, resolveToolRoot } from '../dict-utils.js';
+
+const argv = createCli({
+  scriptName: 'triage-context-pack',
+  options: {
+    'stub-embeddings': { type: 'boolean', default: false },
+    ann: { type: 'boolean' },
+    repo: { type: 'string' },
+    record: { type: 'string' },
+    out: { type: 'string' }
+  }
+}).parse();
 const rawArgs = process.argv.slice(2);
 const annFlagPresent = rawArgs.includes('--ann') || rawArgs.includes('--no-ann');
 
@@ -21,6 +26,8 @@ if (!recordId) {
 }
 
 const userConfig = loadUserConfig(repoRoot);
+const runtimeConfig = getRuntimeConfig(repoRoot, userConfig);
+const baseEnv = resolveRuntimeEnv(runtimeConfig, process.env);
 const triageConfig = getTriageConfig(repoRoot, userConfig);
 const repoCacheRoot = getRepoCacheRoot(repoRoot, userConfig);
 const recordsDir = triageConfig.recordsDir;
@@ -225,7 +232,7 @@ async function loadRecord(recordsDir, recordId) {
 }
 
 function runSearchJson({ repoRoot, query, mode, metaFilters, top }) {
-  const scriptRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..');
+  const scriptRoot = resolveToolRoot();
   const searchPath = path.join(scriptRoot, 'search.js');
   const args = [searchPath, query, '--mode', mode, '--json', '--top', String(top), '--repo', repoRoot];
   if (Array.isArray(metaFilters)) {
@@ -235,10 +242,10 @@ function runSearchJson({ repoRoot, query, mode, metaFilters, top }) {
   }
   if (annFlagPresent && argv.ann === true) args.push('--ann');
   if (annFlagPresent && argv.ann === false) args.push('--no-ann');
-  const env = { ...process.env };
+  const env = { ...baseEnv };
   if (argv['stub-embeddings']) env.PAIROFCLEATS_EMBEDDINGS = 'stub';
-  const result = spawnSync(process.execPath, args, { cwd: repoRoot, env, encoding: 'utf8' });
-  if (result.status !== 0) {
+  const result = execaSync(process.execPath, args, { cwd: repoRoot, env, encoding: 'utf8', reject: false });
+  if (result.exitCode !== 0) {
     return { ok: false, error: result.stderr || result.stdout || 'search failed', payload: null };
   }
   try {
diff --git a/tools/triage/decision.js b/tools/triage/decision.js
index f9caaaa92..9ac1c4768 100644
--- a/tools/triage/decision.js
+++ b/tools/triage/decision.js
@@ -1,16 +1,28 @@
 #!/usr/bin/env node
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
-import minimist from 'minimist';
+import { createCli } from '../../src/shared/cli.js';
 import { getTriageConfig, loadUserConfig, resolveRepoRoot } from '../dict-utils.js';
-import { buildRecordId } from '../../src/triage/record-utils.js';
-import { applyRoutingMeta } from '../../src/triage/normalize/helpers.js';
-import { renderRecordMarkdown } from '../../src/triage/render.js';
+import { buildRecordId } from '../../src/integrations/triage/record-utils.js';
+import { applyRoutingMeta } from '../../src/integrations/triage/normalize/helpers.js';
+import { renderRecordMarkdown } from '../../src/integrations/triage/render.js';
 
-const argv = minimist(process.argv.slice(2), {
-  string: ['repo', 'finding', 'status', 'justification', 'reviewer', 'expires', 'meta', 'code', 'evidence'],
-  alias: { r: 'repo' }
-});
+const argv = createCli({
+  scriptName: 'triage-decision',
+  options: {
+    repo: { type: 'string' },
+    finding: { type: 'string' },
+    record: { type: 'string' },
+    status: { type: 'string' },
+    justification: { type: 'string' },
+    reviewer: { type: 'string' },
+    expires: { type: 'string' },
+    meta: { type: 'string', array: true },
+    code: { type: 'string', array: true },
+    evidence: { type: 'string', array: true }
+  },
+  aliases: { r: 'repo' }
+}).parse();
 
 const repoRoot = argv.repo ? path.resolve(argv.repo) : resolveRepoRoot(process.cwd());
 const findingId = argv.finding || argv.record;
diff --git a/tools/triage/ingest.js b/tools/triage/ingest.js
index f10a37ac7..bdc98f4ed 100644
--- a/tools/triage/ingest.js
+++ b/tools/triage/ingest.js
@@ -1,24 +1,31 @@
 #!/usr/bin/env node
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
-import { spawnSync } from 'node:child_process';
-import { fileURLToPath } from 'node:url';
-import minimist from 'minimist';
-import { getTriageConfig, loadUserConfig, resolveRepoRoot } from '../dict-utils.js';
-import { normalizeDependabot } from '../../src/triage/normalize/dependabot.js';
-import { normalizeAwsInspector } from '../../src/triage/normalize/aws-inspector.js';
-import { normalizeGeneric } from '../../src/triage/normalize/generic.js';
-import { renderRecordMarkdown } from '../../src/triage/render.js';
+import { execaSync } from 'execa';
+import { createCli } from '../../src/shared/cli.js';
+import { getRuntimeConfig, getTriageConfig, loadUserConfig, resolveRepoRoot, resolveRuntimeEnv, resolveToolRoot } from '../dict-utils.js';
+import { normalizeDependabot } from '../../src/integrations/triage/normalize/dependabot.js';
+import { normalizeAwsInspector } from '../../src/integrations/triage/normalize/aws-inspector.js';
+import { normalizeGeneric } from '../../src/integrations/triage/normalize/generic.js';
+import { renderRecordMarkdown } from '../../src/integrations/triage/render.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['build-index', 'incremental', 'stub-embeddings'],
-  string: ['repo', 'source', 'in', 'meta'],
-  alias: { i: 'in' }
-});
+const argv = createCli({
+  scriptName: 'triage-ingest',
+  options: {
+    'build-index': { type: 'boolean', default: false },
+    incremental: { type: 'boolean', default: false },
+    'stub-embeddings': { type: 'boolean', default: false },
+    repo: { type: 'string' },
+    source: { type: 'string' },
+    in: { type: 'string' },
+    meta: { type: 'string', array: true }
+  },
+  aliases: { i: 'in' }
+}).parse();
 
 const repoRoot = argv.repo ? path.resolve(argv.repo) : resolveRepoRoot(process.cwd());
 const source = normalizeSource(argv.source);
-const inputPath = argv.in ? path.resolve(argv.in) : null;
+const inputPath = argv.in ? path.resolve(repoRoot, argv.in) : null;
 
 if (!source || !inputPath) {
   console.error('usage: node tools/triage/ingest.js --source dependabot|aws_inspector|generic --in <file> [--repo <path>] [--meta key=value] [--build-index]');
@@ -26,6 +33,8 @@ if (!source || !inputPath) {
 }
 
 const userConfig = loadUserConfig(repoRoot);
+const runtimeConfig = getRuntimeConfig(repoRoot, userConfig);
+const baseEnv = resolveRuntimeEnv(runtimeConfig, process.env);
 const triageConfig = getTriageConfig(repoRoot, userConfig);
 const meta = parseMeta(argv.meta);
 
@@ -77,12 +86,14 @@ for (let index = 0; index < rawEntries.length; index += 1) {
 }
 
 if (argv['build-index']) {
-  const scriptRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..');
+  const scriptRoot = resolveToolRoot();
   const args = [path.join(scriptRoot, 'build_index.js'), '--mode', 'records', '--repo', repoRoot];
   if (argv.incremental) args.push('--incremental');
   if (argv['stub-embeddings']) args.push('--stub-embeddings');
-  const result = spawnSync(process.execPath, args, { cwd: repoRoot, stdio: 'inherit' });
-  if (result.status !== 0) process.exit(result.status ?? 1);
+  const env = { ...baseEnv };
+  if (argv['stub-embeddings']) env.PAIROFCLEATS_EMBEDDINGS = 'stub';
+  const result = execaSync(process.execPath, args, { cwd: repoRoot, stdio: 'inherit', env, reject: false });
+  if (result.exitCode !== 0) process.exit(result.exitCode ?? 1);
 }
 
 console.log(JSON.stringify(results, null, 2));
diff --git a/tools/uninstall.js b/tools/uninstall.js
index 7db450c85..c01416882 100644
--- a/tools/uninstall.js
+++ b/tools/uninstall.js
@@ -3,23 +3,28 @@ import fs from 'node:fs';
 import fsPromises from 'node:fs/promises';
 import path from 'node:path';
 import readline from 'node:readline/promises';
-import minimist from 'minimist';
+import { createCli } from '../src/shared/cli.js';
+import { getEnvConfig } from '../src/shared/env.js';
 import { getCacheRoot, getDictConfig, getExtensionsDir, getModelsDir, loadUserConfig, resolveRepoRoot } from './dict-utils.js';
 import { isInside, isRootPath } from './path-utils.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['yes', 'dry-run'],
-  string: ['repo'],
-  default: { yes: false, 'dry-run': false }
-});
+const argv = createCli({
+  scriptName: 'uninstall',
+  options: {
+    yes: { type: 'boolean', default: false },
+    'dry-run': { type: 'boolean', default: false },
+    repo: { type: 'string' }
+  }
+}).parse();
 
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
 const root = rootArg || resolveRepoRoot(process.cwd());
 const userConfig = loadUserConfig(root);
 const dictConfig = getDictConfig(root, userConfig);
+const envConfig = getEnvConfig();
 const defaultCacheRoot = getCacheRoot();
-const configuredCacheRoot = (userConfig.cache && userConfig.cache.root) || process.env.PAIROFCLEATS_CACHE_ROOT || defaultCacheRoot;
-const envCacheRoot = process.env.PAIROFCLEATS_CACHE_ROOT || null;
+const configuredCacheRoot = (userConfig.cache && userConfig.cache.root) || envConfig.cacheRoot || defaultCacheRoot;
+const envCacheRoot = envConfig.cacheRoot || null;
 const modelsDir = getModelsDir(root, userConfig);
 const extensionsDir = getExtensionsDir(root, userConfig);
 
diff --git a/tools/validate-config.js b/tools/validate-config.js
index aaa877600..d85294e34 100644
--- a/tools/validate-config.js
+++ b/tools/validate-config.js
@@ -1,21 +1,24 @@
 #!/usr/bin/env node
 import fs from 'node:fs';
 import path from 'node:path';
-import minimist from 'minimist';
-import { fileURLToPath } from 'node:url';
-import { resolveRepoRoot } from './dict-utils.js';
+import { createCli } from '../src/shared/cli.js';
+import { readJsoncFile } from '../src/shared/jsonc.js';
+import { resolveRepoRoot, resolveToolRoot } from './dict-utils.js';
 import { validateConfig } from '../src/config/validate.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['json'],
-  string: ['repo', 'config'],
-  default: { json: false }
-});
+const argv = createCli({
+  scriptName: 'config-validate',
+  options: {
+    json: { type: 'boolean', default: false },
+    repo: { type: 'string' },
+    config: { type: 'string' }
+  }
+}).parse();
 
 const repoArg = argv.repo ? path.resolve(argv.repo) : null;
 const repoRoot = repoArg || resolveRepoRoot(process.cwd());
 const configPath = argv.config ? path.resolve(argv.config) : path.join(repoRoot, '.pairofcleats.json');
-const toolRoot = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..');
+const toolRoot = resolveToolRoot();
 const schemaPath = path.join(toolRoot, 'docs', 'config-schema.json');
 
 if (!fs.existsSync(schemaPath)) {
@@ -35,7 +38,7 @@ if (!fs.existsSync(configPath)) {
 
 let config;
 try {
-  config = JSON.parse(fs.readFileSync(configPath, 'utf8'));
+  config = readJsoncFile(configPath);
 } catch (err) {
   const message = `Failed to parse config: ${err?.message || err}`;
   if (argv.json) {
@@ -58,6 +61,27 @@ if (!config || typeof config !== 'object' || Array.isArray(config)) {
 
 const schema = JSON.parse(fs.readFileSync(schemaPath, 'utf8'));
 const result = validateConfig(schema, config);
+const profileErrors = [];
+const profileName = typeof config.profile === 'string' ? config.profile.trim() : '';
+if (profileName) {
+  const profilePath = path.join(toolRoot, 'profiles', `${profileName}.json`);
+  if (!fs.existsSync(profilePath)) {
+    profileErrors.push(`Profile not found: ${profilePath}`);
+  } else {
+    try {
+      const profileRaw = JSON.parse(fs.readFileSync(profilePath, 'utf8'));
+      if (!profileRaw || typeof profileRaw !== 'object' || Array.isArray(profileRaw)) {
+        profileErrors.push(`Profile must be a JSON object: ${profilePath}`);
+      }
+    } catch (err) {
+      profileErrors.push(`Failed to parse profile ${profilePath}: ${err?.message || err}`);
+    }
+  }
+}
+if (profileErrors.length) {
+  result.ok = false;
+  result.errors = result.errors.concat(profileErrors);
+}
 if (argv.json) {
   console.log(JSON.stringify({ ok: result.ok, found: true, configPath, errors: result.errors }, null, 2));
 } else if (result.ok) {
diff --git a/tools/vector-extension.js b/tools/vector-extension.js
index 100732f72..48865f707 100644
--- a/tools/vector-extension.js
+++ b/tools/vector-extension.js
@@ -1,12 +1,17 @@
 import fs from 'node:fs';
 import path from 'node:path';
 import { getExtensionsDir, loadUserConfig } from './dict-utils.js';
+import { getEnvConfig } from '../src/shared/env.js';
+import { incFallback } from '../src/shared/metrics.js';
 
 const DEFAULT_PROVIDER = 'sqlite-vec';
 const DEFAULT_MODULE = 'vec0';
 const DEFAULT_TABLE = 'dense_vectors_ann';
 const DEFAULT_COLUMN = 'embedding';
 const DEFAULT_ENCODING = 'float32';
+const SQLITE_IN_LIMIT = 900;
+const IDENTIFIER_RE = /^[A-Za-z_][A-Za-z0-9_]*$/;
+const OPTION_RE = /^([A-Za-z_][A-Za-z0-9_]*)(?:\s*=\s*([A-Za-z0-9_.-]+))?$/;
 
 const PROVIDERS = {
   'sqlite-vec': {
@@ -17,6 +22,61 @@ const PROVIDERS = {
   }
 };
 
+const warningCache = new Set();
+
+function warnOnce(key, message) {
+  if (warningCache.has(key)) return;
+  warningCache.add(key);
+  console.warn(message);
+}
+
+function isSafeIdentifier(value) {
+  return IDENTIFIER_RE.test(String(value || ''));
+}
+
+function normalizeOptionValue(value) {
+  return String(value || '').replace(/\\/g, '/').trim();
+}
+
+function parseVectorOptions(raw) {
+  if (!raw) return { ok: true, options: '' };
+  const trimmed = normalizeOptionValue(raw);
+  if (!trimmed) return { ok: true, options: '' };
+  const parts = trimmed.split(',').map((part) => part.trim()).filter(Boolean);
+  const normalized = [];
+  for (const part of parts) {
+    const match = OPTION_RE.exec(part);
+    if (!match) {
+      return { ok: false, reason: 'invalid vector extension options' };
+    }
+    const key = match[1];
+    const value = match[2];
+    normalized.push(value ? `${key}=${value}` : key);
+  }
+  return { ok: true, options: normalized.join(', ') };
+}
+
+function sanitizeVectorExtensionConfig(config) {
+  const issues = [];
+  if (!isSafeIdentifier(config.module)) issues.push('module');
+  if (!isSafeIdentifier(config.table)) issues.push('table');
+  if (!isSafeIdentifier(config.column)) issues.push('column');
+  const parsedOptions = parseVectorOptions(config.options);
+  if (!parsedOptions.ok) issues.push('options');
+
+  const sanitized = {
+    ...config,
+    options: parsedOptions.ok ? parsedOptions.options : '',
+    disabledReason: null
+  };
+  if (sanitized.enabled && issues.length) {
+    sanitized.enabled = false;
+    sanitized.disabledReason = `invalid vector extension config (${issues.join(', ')})`;
+    warnOnce('vector-extension-invalid', `[sqlite] Vector extension disabled: ${sanitized.disabledReason}`);
+  }
+  return sanitized;
+}
+
 /**
  * Resolve a path relative to the repo root.
  * @param {string} repoRoot
@@ -59,12 +119,13 @@ export function getPlatformKey(platform = process.platform, arch = process.arch)
  */
 export function getVectorExtensionConfig(repoRoot, userConfig = null, overrides = {}) {
   const cfg = userConfig || loadUserConfig(repoRoot);
+  const envConfig = getEnvConfig();
   const sqlite = cfg.sqlite || {};
   const vectorCfg = sqlite.vectorExtension || {};
   const provider = overrides.provider || vectorCfg.provider || DEFAULT_PROVIDER;
   const providerDefaults = PROVIDERS[provider] || {};
 
-  const annModeRaw = overrides.annMode || vectorCfg.annMode || sqlite.annMode || 'js';
+  const annModeRaw = overrides.annMode || vectorCfg.annMode || 'js';
   const annMode = String(annModeRaw).toLowerCase();
   const enabled = overrides.enabled === true
     || vectorCfg.enabled === true
@@ -82,7 +143,7 @@ export function getVectorExtensionConfig(repoRoot, userConfig = null, overrides
   const dir = overrides.dir
     ? resolvePath(repoRoot, overrides.dir)
     : resolvePath(repoRoot, vectorCfg.dir)
-      || process.env.PAIROFCLEATS_EXTENSIONS_DIR
+      || envConfig.extensionsDir
       || getExtensionsDir(repoRoot, cfg);
   const filename = overrides.filename
     || vectorCfg.filename
@@ -91,14 +152,14 @@ export function getVectorExtensionConfig(repoRoot, userConfig = null, overrides
   const pathOverride = overrides.path
     ? resolvePath(repoRoot, overrides.path)
     : resolvePath(repoRoot, vectorCfg.path)
-      || (process.env.PAIROFCLEATS_VECTOR_EXTENSION
-        ? resolvePath(repoRoot, process.env.PAIROFCLEATS_VECTOR_EXTENSION)
+      || (envConfig.vectorExtension
+        ? resolvePath(repoRoot, envConfig.vectorExtension)
         : null);
 
   const url = overrides.url || vectorCfg.url || providerDefaults.url || null;
   const downloads = overrides.downloads || vectorCfg.downloads || providerDefaults.downloads || null;
 
-  return {
+  return sanitizeVectorExtensionConfig({
     annMode,
     enabled,
     provider,
@@ -115,7 +176,7 @@ export function getVectorExtensionConfig(repoRoot, userConfig = null, overrides
     platform,
     arch,
     platformKey
-  };
+  });
 }
 
 /**
@@ -141,7 +202,7 @@ const loadCache = new WeakMap();
  */
 export function loadVectorExtension(db, config, label = 'sqlite') {
   if (!db || !config?.enabled) {
-    return { ok: false, reason: 'disabled' };
+    return { ok: false, reason: config?.disabledReason || 'disabled' };
   }
   if (loadCache.has(db)) return loadCache.get(db);
   const extPath = resolveVectorExtensionPath(config);
@@ -191,10 +252,19 @@ export function ensureVectorTable(db, config, dims) {
   if (!db || !config?.module || !config?.table) {
     return { ok: false, reason: 'missing config' };
   }
+  if (!config.enabled) {
+    return { ok: false, reason: config.disabledReason || 'disabled' };
+  }
+  if (!isSafeIdentifier(config.module) || !isSafeIdentifier(config.table)) {
+    return { ok: false, reason: 'invalid vector extension config' };
+  }
   if (!Number.isFinite(dims) || dims <= 0) {
     return { ok: false, reason: 'invalid dims' };
   }
   const column = config.column || DEFAULT_COLUMN;
+  if (!isSafeIdentifier(column)) {
+    return { ok: false, reason: 'invalid vector extension config' };
+  }
   const options = config.options ? `, ${config.options}` : '';
   try {
     try {
@@ -238,28 +308,47 @@ export function encodeVector(vector, config) {
  * @returns {Array<{idx:number,sim:number}>}
  */
 export function queryVectorAnn(db, config, embedding, topN, candidateSet) {
-  if (!db || !embedding) return [];
+  if (!db || !embedding || !config?.enabled) return [];
   const table = config?.table || DEFAULT_TABLE;
   const column = config?.column || DEFAULT_COLUMN;
+  if (!isSafeIdentifier(table) || !isSafeIdentifier(column)) {
+    warnOnce('vector-extension-unsafe', '[sqlite] Vector extension disabled: invalid identifiers');
+    return [];
+  }
   const limit = Math.max(1, Number(topN) || 1);
-  const queryLimit = candidateSet && candidateSet.size ? limit * 5 : limit;
+  const candidateSize = candidateSet?.size || 0;
+  const canPushdown = candidateSize > 0 && candidateSize <= SQLITE_IN_LIMIT;
+  const candidates = canPushdown ? Array.from(candidateSet) : null;
+  const queryLimit = canPushdown ? limit : (candidateSize ? limit * 5 : limit);
   const encoded = encodeVector(embedding, config);
   if (!encoded) return [];
   try {
+    const candidateClause = canPushdown
+      ? ` AND rowid IN (${candidates.map(() => '?').join(',')})`
+      : '';
+    const params = canPushdown
+      ? [encoded, ...candidates, queryLimit]
+      : [encoded, queryLimit];
+    if (candidateSize && !canPushdown) {
+      warnOnce('vector-extension-candidates', '[sqlite] Vector extension candidate set too large; using best-effort fallback.');
+      incFallback({ surface: 'search', reason: 'vector-candidates' });
+    }
     const stmt = db.prepare(
-      `SELECT rowid, distance FROM ${table} WHERE ${column} MATCH ? ORDER BY distance LIMIT ?`
+      `SELECT rowid, distance FROM ${table} WHERE ${column} MATCH ?${candidateClause} ORDER BY distance LIMIT ?`
     );
-    const rows = stmt.all(encoded, queryLimit);
+    const rows = stmt.all(...params);
     let hits = rows.map((row) => {
       const rowId = Number(row.rowid ?? row.id);
       const raw = row.distance ?? row.score ?? row.similarity ?? row.sim ?? 0;
       const sim = row.distance !== undefined ? -raw : raw;
       return { idx: rowId, sim };
     });
-    if (candidateSet && candidateSet.size) {
+    if (candidateSet && candidateSet.size && !canPushdown) {
       hits = hits.filter((hit) => candidateSet.has(hit.idx));
     }
-    return hits.slice(0, limit);
+    return hits
+      .sort((a, b) => (b.sim - a.sim) || (a.idx - b.idx))
+      .slice(0, limit);
   } catch {
     return [];
   }
diff --git a/tools/verify-extensions.js b/tools/verify-extensions.js
index af45544a1..c29e6c710 100644
--- a/tools/verify-extensions.js
+++ b/tools/verify-extensions.js
@@ -1,14 +1,29 @@
 #!/usr/bin/env node
 import fs from 'node:fs';
-import minimist from 'minimist';
+import path from 'node:path';
+import { createCli } from '../src/shared/cli.js';
 import { loadUserConfig, resolveRepoRoot } from './dict-utils.js';
 import { getVectorExtensionConfig, resolveVectorExtensionPath } from './vector-extension.js';
 
-const argv = minimist(process.argv.slice(2), {
-  boolean: ['json', 'load'],
-  string: ['provider', 'dir', 'path', 'platform', 'arch', 'module', 'table', 'column', 'encoding', 'options', 'ann-mode', 'repo'],
-  default: { json: false, load: true }
-});
+const argv = createCli({
+  scriptName: 'verify-extensions',
+  options: {
+    json: { type: 'boolean', default: false },
+    load: { type: 'boolean', default: true },
+    provider: { type: 'string' },
+    dir: { type: 'string' },
+    path: { type: 'string' },
+    platform: { type: 'string' },
+    arch: { type: 'string' },
+    module: { type: 'string' },
+    table: { type: 'string' },
+    column: { type: 'string' },
+    encoding: { type: 'string' },
+    options: { type: 'string' },
+    'ann-mode': { type: 'string' },
+    repo: { type: 'string' }
+  }
+}).parse();
 
 const rootArg = argv.repo ? path.resolve(argv.repo) : null;
 const root = rootArg || resolveRepoRoot(process.cwd());
diff --git a/tools/workers/bundle-reader.js b/tools/workers/bundle-reader.js
new file mode 100644
index 000000000..f529b13f5
--- /dev/null
+++ b/tools/workers/bundle-reader.js
@@ -0,0 +1,12 @@
+import { readBundleFile } from '../../src/shared/bundle-io.js';
+
+export default async function readBundle({ bundlePath }) {
+  if (!bundlePath) return { ok: false, reason: 'missing bundle path' };
+  try {
+    const result = await readBundleFile(bundlePath);
+    if (!result.ok) return { ok: false, reason: result.reason || 'invalid bundle' };
+    return { ok: true, bundle: { chunks: result.bundle.chunks } };
+  } catch (err) {
+    return { ok: false, reason: err?.message || String(err) };
+  }
+}