diff --git a/.dockerignore b/.dockerignore
index a71ba86..ce609c5 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -8,6 +8,7 @@
 # Files COPYed by docker/score/Dockerfile (anything Dockerfile needs to see).
 !registry.yaml
 !docker/score/Dockerfile
-!docker/score/anc
+!docker/score/inject
+!docker/score/inject/anc
 !docker/score/install-tools.sh
 !docker/score/score-anc100.sh
diff --git a/.github/ISSUE_TEMPLATE/00-blank.yml b/.github/ISSUE_TEMPLATE/00-blank.yml
new file mode 100644
index 0000000..8afdd98
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/00-blank.yml
@@ -0,0 +1,35 @@
+name: "Blank issue"
+description: "Open an issue that doesn't fit any of the structured templates below."
+body:
+  - type: textarea
+    id: body
+    attributes:
+      label: Issue
+      description: "Describe what's going on. Include the affected URL (e.g., anc.dev/scorecards), what you observed vs expected, and any browser / device context where relevant."
+    validations:
+      required: true
+  - type: textarea
+    id: ai-disclosure
+    attributes:
+      label: AI disclosure
+      description: "One sentence: what part of this issue was AI-written, what was human-written?"
+      placeholder: "Entirely human-written."
+    validations:
+      required: true
+  - type: textarea
+    id: agent-instructions
+    attributes:
+      label: For agents
+      description: "Agents: expand this section before filing."
+      value: |
+        <details>
+        <summary>Agent filing instructions</summary>
+
+        1. **Pick a structured template first.** Site bugs have a dedicated form — use it when it fits.
+        2. **Search first.** Run `gh search issues --repo brettdavies/agentnative-site "<keywords>"` to check for duplicates.
+        3. **AI disclosure is required.** Fill the field above honestly.
+        4. **Wrong repo?** Spec questions, principle edits, and grading findings live on [brettdavies/agentnative](https://github.com/brettdavies/agentnative). `anc` checker bugs and tool-registry submissions live on [brettdavies/agentnative-cli](https://github.com/brettdavies/agentnative-cli). Skill bundle issues live on [brettdavies/agentnative-skill](https://github.com/brettdavies/agentnative-skill).
+        5. See [CONTRIBUTING.md](https://github.com/brettdavies/agentnative-site/blob/main/CONTRIBUTING.md) for full guidelines.
+        </details>
+    validations:
+      required: false
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index 7a6c1b2..823600e 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,8 +1,11 @@
 blank_issues_enabled: false
 contact_links:
-  - name: "Spec questions, principle edits, or CLI grading"
+  - name: "Spec questions, principle edits, or grading findings"
     url: "https://github.com/brettdavies/agentnative/issues/new/choose"
-    about: "For anything about the standard itself — propose changes, grade a CLI, ask questions — file on the spec repo."
-  - name: "Checker bugs (false positives/negatives)"
+    about: "For anything about the standard itself — propose changes, submit a grading finding, ask questions — file on the spec repo."
+  - name: "Checker bugs, features, or tool-registry submissions"
     url: "https://github.com/brettdavies/agentnative-cli/issues/new/choose"
-    about: "For bugs in the `anc` checker itself, file on the tool repo."
+    about: "For bugs in the `anc` checker, feature requests, or proposing a tool for the leaderboard, file on the linter repo."
+  - name: "Skill bundle issues (bundle content, install paths, host runtimes)"
+    url: "https://github.com/brettdavies/agentnative-skill/issues/new/choose"
+    about: "For bugs or proposals about the agent-facing skill bundle, file on the skill repo."
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index fdead33..6d72a69 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -6,7 +6,7 @@
 
 <!-- CRITICAL: This section is the source of truth for CHANGELOG.md.
      generate-changelog.sh extracts these categorized bullets verbatim
-     into the release changelog. Write carefully — this IS the changelog.
+     into the release changelog. Write carefully. This IS the changelog.
 
      AUDIENCE: Users and operators. Write from their perspective.
 
@@ -20,7 +20,7 @@
 
      RULES:
 - 1-5 bullets per PR. Fewer is better. One-line fixes get one bullet.
-- Delete empty ### sections entirely — don't leave blank categories.
+- Delete empty ### sections entirely; don't leave blank categories.
 - Each bullet starts with a verb: Add, Fix, Change, Remove, Deprecate.
 - Don't duplicate the PR title — expand on it or provide context.
 - If the PR has NO user-facing changes (pure refactor, test-only, CI), leave this section empty or omit it. The PR still
diff --git a/.github/workflows/deep-check.yml b/.github/workflows/deep-check.yml
index c957d86..4f6c336 100644
--- a/.github/workflows/deep-check.yml
+++ b/.github/workflows/deep-check.yml
@@ -34,8 +34,17 @@
 # Not a PR gate — PR merge is governed by ci.yml. Failures here show up
 # in the Actions tab and (for dispatch-with-ref runs) as a commit status.
 #
-# Secrets: none required. The LHCI step uses the default GITHUB_TOKEN so
-# it can post a commit status (requires statuses:write, granted below).
+# Secrets:
+#   CLOUDFLARE_API_TOKEN / CLOUDFLARE_ACCOUNT_ID — passed to `wrangler dev`
+#     (used by both Playwright's webServer and lighthouse-ci). wrangler 4.x
+#     authenticates to the Cloudflare managed registry to read the
+#     container image manifest even under `--local`, so without these the
+#     dev server fails with "Not logged in". The same secrets that
+#     `deploy.yml` already passes — no new provisioning. Introduced after
+#     the container image moved off Docker Hub to the CF managed
+#     registry in PR #84 (U3-followup); the LHCI step uses the default
+#     GITHUB_TOKEN to post a commit status (requires statuses:write,
+#     granted below).
 
 name: deep-check
 
@@ -171,6 +180,14 @@ jobs:
         run: bun run build
 
       - name: End-to-end tests (all projects)
+        # CLOUDFLARE_API_TOKEN + CLOUDFLARE_ACCOUNT_ID let `wrangler dev`
+        # (spun up by playwright.config.ts's webServer) read the
+        # container image manifest from the CF managed registry. Without
+        # them, wrangler 4.x errors with "Not logged in" before Playwright
+        # can connect.
+        env:
+          CLOUDFLARE_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
+          CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }}
         run: bun run test:e2e
 
       - name: Upload Playwright artifacts on failure
@@ -218,3 +235,8 @@ jobs:
         env:
           LHCI_GITHUB_APP_TOKEN: ${{ secrets.LHCI_GITHUB_APP_TOKEN }}
           LHCI_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          # `wrangler dev` (started by lighthouserc as the local server)
+          # needs CF auth to read the container image manifest from the
+          # CF managed registry, same as the e2e job above.
+          CLOUDFLARE_API_TOKEN: ${{ secrets.CF_API_TOKEN }}
+          CLOUDFLARE_ACCOUNT_ID: ${{ secrets.CF_ACCOUNT_ID }}
diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml
index af403b1..3f2e6e3 100644
--- a/.github/workflows/deploy.yml
+++ b/.github/workflows/deploy.yml
@@ -98,6 +98,12 @@ jobs:
           accountId: ${{ secrets.CF_ACCOUNT_ID }}
           command: deploy --env staging
 
+      - name: Smoke /api/score (registry-fast-path)
+        env:
+          CF_ACCESS_CLIENT_ID: ${{ secrets.ANC_STAGING_ACCESS_CLIENT_ID }}
+          CF_ACCESS_CLIENT_SECRET: ${{ secrets.ANC_STAGING_ACCESS_CLIENT_SECRET }}
+        run: scripts/smoke-api-score.sh https://agentnative-site-staging.brettdavies.workers.dev
+
   production:
     name: build + deploy production
     if: >-
diff --git a/.gitignore b/.gitignore
index 13c646e..cdd42d9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,12 @@
 .DS_Store
 node_modules/
 .env
+# wrangler's local-secrets file (per-env variants also covered). Holds
+# secrets like TURNSTILE_SECRET for `wrangler dev --local`. Never commit
+# even when the value is a public Cloudflare test secret — keeping the
+# habit tight beats explaining the exception.
+.dev.vars
+.dev.vars.*
 .context/
 # docs/solutions/ is a symlink to ~/dev/solutions-docs (see AGENT.md).
 # Ignore that specific subpath; docs/design/ and any other future docs/
@@ -23,9 +29,8 @@ playwright-report/
 .gstack/
 .aider*
 
-# Docker batch-scorer staging area — the anc binary is built per-run from
-# the local CLI checkout (not committed); per-run logs land in out/.
-/docker/score/anc
+# Docker batch-scorer staging area — per-run logs land in out/. The
+# inject binary (docker/score/inject/anc) is gitignored separately above.
 /docker/score/out/
 
 # Vale baseline packs — downloaded by `vale sync` from the URLs in .vale.ini's
@@ -34,3 +39,7 @@ playwright-report/
 /styles/proselint/
 /styles/write-good/
 /styles/.vale-config/
+
+# Injected anc binary (docker/score/build.sh --from-source). Populated at
+# build time, never committed.
+docker/score/inject/anc
diff --git a/BRAND.md b/BRAND.md
index 5844d8f..4a4225f 100644
--- a/BRAND.md
+++ b/BRAND.md
@@ -16,7 +16,7 @@ and artifacts in its own `PRODUCT.md`.
   here is the failure mode if you don't, here is the canonical fix." The point of view is what makes the standard worth
   citing.
 - **Precise.** RFC 2119 language. Anchors stable and citable. Numbers measured, not asserted. Where a contract has a
-  canonical realization (a flag spelling, an exit code, a path), it is named explicitly.
+  canonical realization (a flag spelling, an exit code, a path), the standard names it explicitly.
 - **Inviting.** The reader (or agent handler) keeps reading by design. That comes from details: typography that rewards
   a slow read, prose that rewards a fast scan, code blocks that read like reference material a reader can trust.
   Inviting is not "friendly" and it is not "marketing." It rewards engagement.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..2e90d18
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,121 @@
+# Contributing to `agentnative-site`
+
+This is the source for [anc.dev](https://anc.dev): the rendered spec, the live leaderboard, the live-scoring loop, the
+per-tool scorecard pages, the badge surface, and the skill-distribution endpoint. Principle-level discussion belongs in
+the [spec repo](https://github.com/brettdavies/agentnative); scoring-engine work belongs in the
+[CLI repo](https://github.com/brettdavies/agentnative-cli). For visitor-facing cross-repo navigation, see
+[`anc.dev/contribute`](https://anc.dev/contribute).
+
+## Contribution tiers
+
+The site accepts three shapes of contribution. All three are welcome; none required. Site work skews toward Tier 3
+because the site is the public surface. Most improvements are concrete code or copy changes.
+
+| Tier            | Shape                                                                                                                                             | Intake                                                                                         | Effort   |
+| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------- | -------- |
+| **1. Signal**   | Site bug, rendering issue, broken link, copy critique, mobile-layout regression, performance regression                                           | [`site-bug`](https://github.com/brettdavies/agentnative-site/issues/new?template=site-bug.yml) | ~5 min   |
+| **2. Proposal** | A new page or section, a scorecard renderer rework, a Worker route addition, a build-pipeline change, a live-scoring surface change               | Issue with the design before opening a PR                                                      | ~1-2 hrs |
+| **3. Code**     | Site copy or design polish, scorecard renderer improvements, Worker route or header work, build-pipeline work, accessibility fixes, OG image work | PR against `dev`; `release/<YYYY-MM-DD>-<slug>` cut from `main` for deploys                    | Variable |
+
+**Scorecard submissions** (adding a tool to the leaderboard at [`/scorecards`](https://anc.dev/scorecards)) go through
+the cli repo's
+[`add-tool-to-registry`](https://github.com/brettdavies/agentnative-cli/issues/new?template=add-tool-to-registry.yml)
+template, not a PR here. The site renders what the registry contains; the registry lives in the cli repo. The spec
+repo's `grading-finding` template is a separate path for spec-feedback derived from scoring, not for registry
+submissions.
+
+**Response expectations:** Tier 1 and Tier 2 are welcome and get a substantive reply when time allows. Tier 3 PRs are
+reviewed when scope and time permit. A solo maintainer cannot promise merge windows; real PRs land.
+
+## Branch model
+
+```text
+feat/* → PR to dev (squash merge)
+       → cherry-pick to release/<YYYY-MM-DD>-<slug>
+       → PR release/* to main (squash merge)
+       → deploy.yml fires on push-to-main → Cloudflare Workers production
+```
+
+`dev` is the integration branch. `main` is what `anc.dev` serves. There are no tags or semver versions; the site deploys
+continuously via Cloudflare's `deploy.yml` on push-to-main. Engineering docs (`docs/plans/`, `docs/solutions/`,
+`docs/brainstorms/`, `docs/reviews/`) live on `dev` only and are blocked from `main` by `guard-main-docs.yml`.
+
+## Dev setup
+
+```bash
+git clone https://github.com/brettdavies/agentnative-site && cd agentnative-site
+bun install
+bun run build              # produces dist/
+bun run dev                # local dev server with hot reload
+bun x playwright test      # end-to-end suite
+```
+
+Worker dev against the staging bindings:
+
+```bash
+wrangler dev --env staging
+```
+
+The site uses Cloudflare Workers, Durable Objects (Sandbox for live scoring), R2 (score cache), and KV (kill switch +
+rate limits). The full binding inventory is in [`wrangler.jsonc`](./wrangler.jsonc).
+
+## Pre-push hook
+
+The repo ships a pre-push hook that mirrors CI plus the prose-check stages CI doesn't run. Activate once after clone:
+
+```bash
+git config core.hooksPath scripts/hooks
+```
+
+Seven stages:
+
+1. **lint** (`biome check` + `markdownlint-cli2`)
+2. **build** (`bun src/build/build.mjs`)
+3. **tests** (`bun test`, unit + regression)
+4. **wrangler dry-run** (`wrangler deploy --dry-run`, config + bundle validation)
+5. **pack-README drift** (`bun scripts/generate-pack-readme.mjs site --check`)
+6. **banned-fonts** (`bash scripts/check-banned-fonts.sh`, deployment-layer scan against `styles/site/BannedFonts.yml`)
+7. **prose-check** (`bash scripts/prose-check.sh`, Vale plus LanguageTool when reachable; skips cleanly otherwise)
+
+PRs that pass the hook locally also pass CI for stages 1-4; stages 5-7 are pre-push-only. Fix locally before pushing.
+
+## Pull requests
+
+- **Title format:** [Conventional Commits](https://www.conventionalcommits.org/) (`type(scope): description`). The PR
+  title becomes the squash-merge commit subject.
+- **Body:** follow [`.github/pull_request_template.md`](.github/pull_request_template.md). The `## Changelog` section
+  captures user-visible changes for the eventual release-PR `CHANGELOG.md` entry.
+- **Tests:** new pages ship a regression test that asserts the rendered HTML contains expected anchors and the markdown
+  twin renders. Worker routes ship unit tests under `tests/`; e2e tests live under `tests/e2e/`.
+- **Voice:** site copy passes the prose-check stack: Vale custom rule packs (brand + spec channel) plus the `/unslop`
+  floor. Run `scripts/prose-check.sh --changed-only` during authoring.
+
+## Releases
+
+Cuts are CalVer date-prefixed, slugged per change: `release/2026-05-21-show-hn-cut`, `release/2026-04-30-routing-fix`,
+etc. Cherry-pick from `dev` to the release branch, open the PR against `main`, merge via squash. `deploy.yml` fires on
+push-to-main and reaches `anc.dev` within ~2 minutes. The full procedure lives in [`RELEASES.md`](./RELEASES.md).
+
+## AI disclosure
+
+Inherits from the spec's AI disclosure policy. See
+[agentnative/CONTRIBUTING.md § AI disclosure policy](https://github.com/brettdavies/agentnative/blob/main/CONTRIBUTING.md#ai-disclosure-policy).
+
+## Security
+
+Do not file security issues in the public tracker. Use the
+[GitHub private security advisories channel](https://github.com/brettdavies/agentnative-site/security/advisories/new).
+The Sandbox container, the Worker, and the R2 cache are the primary surfaces of concern.
+
+## License
+
+See [`LICENSE`](./LICENSE).
+
+## Cross-repo navigation
+
+The full visitor-facing menu lives at [`anc.dev/contribute`](https://anc.dev/contribute). Per-repo intakes:
+
+- [Spec](https://github.com/brettdavies/agentnative): principle text, pressure-tests, versioning policy
+- [Linter](https://github.com/brettdavies/agentnative-cli): `anc`, the scoring engine, the registry
+- This repo: the site, the leaderboard renderer, the live-scoring loop
+- [Skill bundle](https://github.com/brettdavies/agentnative-skill): agent-facing bundle, install paths
diff --git a/DESIGN.md b/DESIGN.md
index 91d5649..11cead1 100644
--- a/DESIGN.md
+++ b/DESIGN.md
@@ -5,17 +5,17 @@ feedback. Supersedes the tentative stack and visual placeholders in the CEO plan
 
 Companion artifacts split by role. Shipped artifacts (those a release of the site needs to build/render) live with the
 rest of the shipped tree under `src/` and `scripts/`. Show-your-work research artifacts (palette analysis report,
-preview HTML, concept variants) live under `docs/research/design/` on the `dev` branch only — they are intentionally
-gated off `main` by `guard-main-docs.yml` so production stays free of engineering working notes. Research-only paths
-below are referenced by name; check out `dev` to read them locally.
+preview HTML, concept variants) live under `docs/research/design/` on the `dev` branch only. `guard-main-docs.yml` gates
+them off `main` so production stays free of engineering working notes. Research-only paths below are referenced by name;
+check out `dev` to read them locally.
 
 **Shipped — consumed by the site build, present on `main`:**
 
-- [`src/styles/foundation.css`](src/styles/foundation.css) — generated drop-in stylesheet. Contains palette custom
+- [`src/styles/foundation.css`](src/styles/foundation.css): generated drop-in stylesheet. Contains palette custom
   properties (light default, dark via `prefers-color-scheme`, explicit `[data-theme]` overrides), typography tokens
   (`--font-sans`, `--font-mono`, scale), and the shipped 7b inline-keyword rules. `@font-face` declarations are
-  deliberately NOT in this file (see §4.3 — they live in the site build so the stylesheet is safe to load from any
-  origin without phantom 404s). Copied byte-for-byte to `dist/css/foundation.css` by `src/build/assets.mjs`.
+  deliberately NOT in this file (see §4.3). They live in the site build so the stylesheet is safe to load from any
+  origin without phantom 404s. Copied byte-for-byte to `dist/css/foundation.css` by `src/build/assets.mjs`.
 - [`scripts/og/og.html`](scripts/og/og.html) + [`scripts/og/og.css`](scripts/og/og.css) — production source-of- truth
   for the social card. Rendered to `public/og-image.png` by `scripts/og/generate.ts` (Playwright + Sharp, ≤150 KB PNG).
 
@@ -42,7 +42,7 @@ below are referenced by name; check out `dev` to read them locally.
 
 The agentnative spec site is the first proof-of-concept for an **agent-native documentation surface**. Every decision
 here ladders up to one question: does this choice make the site more agent-legible without making it less human-legible?
-When they conflict we default to agent-legibility — the human case survives simple HTML, the agent case does not survive
+When they conflict we default to agent-legibility. The human case survives simple HTML; the agent case does not survive
 hidden state or heavy client runtimes.
 
 **Decision A (tech stack): static-site generation from markdown via a small custom build pipeline, served by a thin
@@ -55,18 +55,18 @@ requires a Worker regardless of framework. Astro without Starlight is the credib
 concrete and documented below.
 
 **Decision B (visual system): cool-neutral palette at hue 250, navy accent in the same family, deliberately-designed
-dark mode (not inverted), Pangram Pangram's Uncut Sans (body + display) paired with GitHub Next's Monaspace Xenon (code)
-— both OFL, self-hosted, chosen via the impeccable font-selection procedure to avoid the reflex-defaults (Inter, IBM
-Plex, Fraunces, etc.), code as a first-class visual element, `prefers-color-scheme` *plus* a visible user toggle, sticky
-mini-TOC on desktop.** Palette and contrast work is backed by a reproducible tool run — see
-`docs/research/design/color-analysis.md` for inputs, outputs, WCAG + APCA numbers, and every clamped value; the
+dark mode (not inverted), Pangram Pangram's Uncut Sans (body + display) paired with GitHub Next's Monaspace Xenon
+(code), both OFL, self-hosted, chosen via the impeccable font-selection procedure to avoid the reflex-defaults (Inter,
+IBM Plex, Fraunces, etc.), code as a first-class visual element, `prefers-color-scheme` *plus* a visible user toggle,
+sticky mini-TOC on desktop.** A reproducible tool run backs palette and contrast work; see
+`docs/research/design/color-analysis.md` for inputs, outputs, WCAG + APCA numbers, and every clamped value. The
 foundation stylesheet (palette + typography tokens + keyword rules) lives at `src/styles/foundation.css`. `@font-face`
-declarations live in the site build, not the foundation — see §4.3. MUST / SHOULD / MAY keywords ship option 7b (inline
-color only) — the originally-proposed 7b-plus side-stripe variant was pulled after it hit impeccable's banned-pattern
+declarations live in the site build, not the foundation (see §4.3). MUST / SHOULD / MAY keywords ship option 7b (inline
+color only). The originally-proposed 7b-plus side-stripe variant was pulled after it hit impeccable's banned-pattern
 list, with block-level alternatives (leading tag, background fill) deferred to live-site iteration. Preview at
 `docs/research/design/must-should-may-preview.html`.
 
-**JS posture.** Pragmatic. JS is fine — the total homepage budget is 2 MB. We still reject shipping a framework runtime
+**JS posture.** Pragmatic. JS is fine; the total homepage budget is 2 MB. We still reject shipping a framework runtime
 for state the site does not use, we still do syntax highlighting at build time, and we still treat every new dependency
 as an opt-in that needs a one-line justification in this file. But the bar is "does it earn its place," not "is it under
 a byte count." Use the native Clipboard API directly (which needs no library), and when a browser-quirk floor genuinely
@@ -98,7 +98,7 @@ not a person); the code blocks under-index on density (we will be code-heavier).
 how to build CLIs." Tech: Jekyll-ish static; effectively plain HTML.
 
 **github.com/brettdavies/lmgroktfy.** Vibe-calibration only. Borrow: honest product copy, minimal chrome, the "it is
-obvious what this is" feel. Avoid: the product-landing gradient and form-forward layout — this is a spec site, not a
+obvious what this is" feel. Avoid: the product-landing gradient and form-forward layout; this is a spec site, not a
 product page. One-line vibe: "single-purpose, confident." Tech: not directly knowable from the surface.
 
 ### Group 2 — other spec and standards surfaces
@@ -111,8 +111,8 @@ One-line vibe: "a standard that has aged like a standard, honestly." Tech: stati
 **rust-lang.org/book.** The code-heavy reference point. Borrow: generous code-block padding; mono that actually renders
 as mono on every platform; syntax highlighting that prefers readability over rainbow; left sidebar `<details>`-style TOC
 on desktop; the way inline `code` and block code share a visual family. Avoid: the mdBook default chrome (print button,
-in-header search, theme switcher) — we do not need them; the slightly heavy dark theme. One-line vibe: "a textbook that
-wants to be referenced." Tech: mdBook; static HTML with a small client-side search bundle.
+in-header search, theme switcher) since we do not need them; the slightly heavy dark theme. One-line vibe: "a textbook
+that wants to be referenced." Tech: mdBook; static HTML with a small client-side search bundle.
 
 **json-schema.org** and **semver.org** as a pair. Both tiny, authoritative, text-first. Borrow from semver: the
 near-absurd simplicity of a single page with an anchored list of MUST/SHOULD statements, readable in a single scroll,
@@ -135,7 +135,7 @@ Tech: static; plain HTML.
 
 ### Reference benchmark, not a candidate
 
-**Mintlify.** Commercial platform; ships the full agent-native documentation surface natively — auto `/llms.txt`,
+**Mintlify.** Commercial platform; ships the full agent-native documentation surface natively: auto `/llms.txt`,
 `/llms-full.txt`, `.md` URL suffix, `Accept: text/markdown` content negotiation, `Link` and `X-Llms-Txt` response
 headers, and a `noindex` hint on the markdown variant so search engines don't double-index. If we were publishing a
 product docs site for a for-profit tool we would seriously evaluate it. For a standard we own end-to-end and publish
@@ -153,7 +153,7 @@ edge storage via a Worker for routing. This is non-negotiable for three reasons:
 2. **Tail-risk floor.** The site stays reachable even if the build pipeline breaks. Last good commit is always a
    deployable artifact.
 3. **Thesis fit.** The `Accept: text/markdown` and `.md` suffix behavior become trivial when the markdown source **is**
-   the artifact — the Worker serves bytes, it doesn't synthesize them.
+   the artifact: the Worker serves bytes, it doesn't synthesize them.
 
 SSR is out because it adds a runtime dependency (on every request) for content that does not change between deployments.
 CSR is out because it puts the single most important thing on the site (the spec text) behind a client runtime, which
@@ -166,9 +166,9 @@ defeats the agent case entirely.
 3. **Astro without Starlight**
 4. **Hugo + Cloudflare Worker shim** for content negotiation
 5. **Eleventy + Cloudflare Worker shim**
-6. **Writing HTML by hand** (no markdown source) — listed to reject
+6. **Writing HTML by hand** (no markdown source); listed to reject
 
-Explicitly NOT a candidate: Cheng Lou's Pretext (per session spec — no image-wrapping need).
+Explicitly NOT a candidate: Cheng Lou's Pretext (per session spec; no image-wrapping need).
 
 ### 3.3 Scored table
 
@@ -214,10 +214,10 @@ content index, emit `dist/sitemap.xml` and `dist/og-image.png` (copied from `pub
 
 The Worker is ~80 lines: static assets from `env.ASSETS`, content-negotiation branch on `url.pathname.endsWith(".md")
 || Accepts(req).type(['text/html', 'text/markdown']) === 'text/markdown'` (proper RFC 7231 q-value parsing via the
-[`accepts`](https://www.npmjs.com/package/accepts) npm package — bare `includes("text/markdown")` mishandles `q=0`
-rejections, comma-separated media ranges, and the standard browser `*/*` fallback), `Link` and `X-Llms-Txt` response
-headers on every HTML response (copying the Mintlify pattern), `X-Robots-Tag: noindex` on the markdown variant so search
-engines do not double-index. Deploy is `wrangler deploy`. Rollback is `wrangler rollback`.
+[`accepts`](https://www.npmjs.com/package/accepts) npm package, because bare `includes("text/markdown")` mishandles
+`q=0` rejections, comma-separated media ranges, and the standard browser `*/*` fallback), `Link` and `X-Llms-Txt`
+response headers on every HTML response (copying the Mintlify pattern), `X-Robots-Tag: noindex` on the markdown variant
+so search engines do not double-index. Deploy is `wrangler deploy`. Rollback is `wrangler rollback`.
 
 **Static-asset binding (A12).** `env.ASSETS` is the
 [Workers Static Assets](https://developers.cloudflare.com/workers/static-assets/) binding, configured in
@@ -232,7 +232,7 @@ engines do not double-index. Deploy is `wrangler deploy`. Rollback is `wrangler
 
 **Cache strategy (P4).** HTML and `.md` responses carry `Cache-Control: public, max-age=300, s-maxage=86400,
 stale-while-revalidate=60` (short browser cache, long edge cache, SWR for the deploy-between-fetches window). Hashed
-immutable assets — fonts at `/fonts/*`, the content-hashed `/og-image.png` — carry `Cache-Control: public,
+immutable assets (fonts at `/fonts/*`, the content-hashed `/og-image.png`) carry `Cache-Control: public,
 max-age=31536000, immutable`.
 
 **404 handling (A10).** Uses the Workers Static Assets default 404 body; no custom 404 page in v0.
@@ -242,8 +242,8 @@ max-age=31536000, immutable`.
 - **Nine pages.** Everything a framework provides by default (sidebar nav, search indexing, component library,
   multi-version switcher) is either unused or faintly in the way.
 - **Content-negotiation semantics stay in one file we own.** Even with Starlight's plugin ecosystem, `Accept:
-  text/markdown` on the same URL is not a plugin — it is a Worker concern. If the Worker exists regardless, the
-  framework's incremental value for this site is reduced to "it generates the HTML shell" — which we can do in ~40 lines
+  text/markdown` on the same URL is not a plugin; it is a Worker concern. If the Worker exists regardless, the
+  framework's incremental value for this site collapses to "it generates the HTML shell," which we can do in ~40 lines
   of templating.
 - **Plugin supply chain.** With Astro + Starlight + three agent-native plugins, we depend on four moving parts
   maintained by different humans on different release cadences. Each Starlight major-version upgrade risks plugin lag.
@@ -253,7 +253,7 @@ max-age=31536000, immutable`.
 - **Upgrade cost over a decade.** HTML does not churn. CommonMark and Shiki are reasonable to pin and replace.
 
 **Cost honestly stated.** We write ~200 lines of build script. We write ~80 lines of Worker. We do not get Starlight's
-prebuilt components free — specifically, we hand-wire click-to-copy (native Clipboard API, ~40 lines), anchor-copy
+prebuilt components free. Specifically, we hand-wire click-to-copy (native Clipboard API, ~40 lines), anchor-copy
 buttons (~30 lines), and CSS-only tabbed code (radio/`:checked`, ~60 lines CSS, no JS). The theme toggle (§4.9) is ~40
 lines of JS + CSS. Total hand-rolled surface ≈ 450 lines across HTML template + Worker + JS + CSS, plus the build
 script. Auditable in one sitting.
@@ -267,7 +267,7 @@ markdownlint-cli2  →  bun run build  →  bun test  →  bun x playwright test
 
 Each stage gates the next. `bun test` includes the three CRITICAL regression snapshots (anchor-slug, `llms.txt` shape,
 markdown byte-equivalence) from the eng review. Lighthouse CI asserts the 800 KB page-payload regression gate on `/p1`
-(§4.8.1) — merge-blocking, scoped to catch runaway growth, not enforce an absolute target. `wrangler deploy --dry-run`
+(§4.8.1); merge-blocking, scoped to catch runaway growth, not enforce an absolute target. `wrangler deploy --dry-run`
 validates the Worker + assets bundle without publishing. A separate `.github/workflows/deploy.yml` runs on push-to-main
 and performs the real `wrangler deploy` via `cloudflare/wrangler-action@v3`.
 
@@ -296,7 +296,7 @@ content/
 
 Filename convention for principles is load-bearing: `p<n>-<slug>.md` where `<slug>` matches the locked anchor IDs in
 §3.5. The build derives per-principle URLs (`/p<n>`), anchor slugs (`#p<n>-<slug>`), and the order of concatenation from
-the filename — no separate manifest file, no frontmatter ordering key.
+the filename. No separate manifest file, no frontmatter ordering key.
 
 **Build-step outputs (`dist/`):**
 
@@ -318,7 +318,7 @@ the filename — no separate manifest file, no frontmatter ordering key.
 | `src/styles/foundation.css` | `dist/css/foundation.css`                   | Byte-for-byte copy (C3). `cmp -s src/styles/foundation.css dist/css/foundation.css` passes. Source of truth for palette + type tokens + RFC-keyword color rules.                                                                        |
 | templated `site.css`        | `dist/css/site.css`                         | Additive-only: `@font-face` (Uncut Sans + Monaspace Xenon), layout (grid, measure, mini-TOC sticky), code-block chrome, theme-toggle widget, copy-button widget, Shiki dual-theme CSS bridge (§4.6). No overrides of foundation tokens. |
 | `public/fonts/*.woff2`      | `dist/fonts/*.woff2`                        | Self-hosted variable woff2 files per §4.3. Served under `/fonts/`, not `/assets/fonts/`. `@font-face` in `site.css` references `/fonts/…`.                                                                                              |
-| `src/client/theme-init.ts`  | inline `<script>` in `<head>` of every HTML | Compiled + minified by `bun build`, then INLINED (not `<script src>`) so the `[data-theme]` attribute is on `<html>` before first paint — no dark-mode flash.                                                                           |
+| `src/client/theme-init.ts`  | inline `<script>` in `<head>` of every HTML | Compiled + minified by `bun build`, then INLINED (not `<script src>`) so the `[data-theme]` attribute is on `<html>` before first paint, preventing dark-mode flash.                                                                    |
 | `src/client/theme.ts`       | `dist/js/theme.js`                          | Loaded via `<script defer>` in `<body>` close. Handles toggle clicks, `localStorage` writes, `matchMedia` change events.                                                                                                                |
 | `src/client/clipboard.ts`   | `dist/js/clipboard.js`                      | Loaded via `<script defer>`. Click-to-copy on every `<pre>` + copy-anchor on every heading. Uses `navigator.clipboard.writeText` with the pre-2022 Safari fallback from §4.8.                                                           |
 
@@ -329,25 +329,25 @@ The HTML shell emits exactly two `<link rel="stylesheet">` tags in `<head>`, in
 <link rel="stylesheet" href="/css/site.css">
 ```
 
-Foundation first, site second — cascade order is load-bearing. `site.css` additive rules override nothing in
+Foundation first, site second; cascade order is load-bearing. `site.css` additive rules override nothing in
 `foundation.css`; it supplies what foundation deliberately omits (see §4.3 `@font-face` rationale).
 
 **Asset resolution (A4).** Under `html_handling: "auto-trailing-slash"` + flat layout (all nine HTML files at `dist/`
 root alongside their `.md` twins):
 
-| Request path | Resolution                                                      |
-| ------------ | --------------------------------------------------------------- |
-| `/p3`        | 200 → serves `dist/p3.html`                                     |
-| `/check`     | 200 → serves `dist/check.html`                                  |
-| `/about`     | 200 → serves `dist/about.html`                                  |
-| `/check/`    | 307 → redirects to `/check`                                     |
-| `/p3.html`   | 307 → redirects to `/p3`                                        |
-| `/p3.md`     | 200 → serves `dist/p3.md` (no html_handling applies — not HTML) |
+| Request path | Resolution                                                     |
+| ------------ | -------------------------------------------------------------- |
+| `/p3`        | 200 → serves `dist/p3.html`                                    |
+| `/check`     | 200 → serves `dist/check.html`                                 |
+| `/about`     | 200 → serves `dist/about.html`                                 |
+| `/check/`    | 307 → redirects to `/check`                                    |
+| `/p3.html`   | 307 → redirects to `/p3`                                       |
+| `/p3.md`     | 200 → serves `dist/p3.md` (no html_handling applies; not HTML) |
 
 Canonical URLs are extension-less, no trailing slash, uniform across all nine pages (`/`, `/p1`…`/p7`, `/check`,
 `/about`). No `check/index.html` / `about/index.html` asymmetry.
 
-**`llms-full.txt` per-section format (A5).** Each section — `_intro`, p1…p7, `check`, `about` — is emitted in this exact
+**`llms-full.txt` per-section format (A5).** Each section (`_intro`, p1…p7, `check`, `about`) is emitted in this exact
 shape, concatenated in source order, separated by a bare `---` line:
 
 ```text
@@ -371,13 +371,13 @@ or the markdown twin.
 - **Order is by filename.** `p1-...md` ships first, `p7-...md` last. Files sort lex-numerically. Introducing `p8-...` in
   a future revision extends the list without any manifest edit.
 - **Markdown source round-trip is byte-equivalent.** The `/p3.md` endpoint serves exactly the bytes of
-  `content/principles/p3-progressive-help-discovery.md` — no re-rendering, no re-wrapping. The HTML path at `/p3` and
-  `/` is a derivative. If the site render ever drifts from the source (e.g., adds presentational markup the source does
-  not have), the markdown channel wins.
+  `content/principles/p3-progressive-help-discovery.md`: no re-rendering, no re-wrapping. The HTML path at `/p3` and `/`
+  is a derivative. If the site render ever drifts from the source (e.g., adds presentational markup the source does not
+  have), the markdown channel wins.
 - **`content/_intro.md` is the only hand-authored framing text.** Everything else is either principle content or
   sub-page content. No site-render concerns leak into `_intro.md` (no chrome, no TOC fragments, no header duplicates).
 - **Shiki + remark plugins** (the inline-keyword pass from §4.7) run during HTML render only. The source `.md` stays
-  untouched — the `/p3.md` response has `MUST` / `SHOULD` / `MAY` as plain uppercase, no class wrappers.
+  untouched: the `/p3.md` response has `MUST` / `SHOULD` / `MAY` as plain uppercase, no class wrappers.
 
 A ~200-line Node build script implements this. Pseudocode sketch (not the final code, lives in `scripts/build/` once the
 implementation session starts):
@@ -414,7 +414,7 @@ text/markdown`, and `/llms-full.txt`. `llms.txt` + `llms-full.txt` at site root
 `#p2-structured-parseable-output`, `#p3-progressive-help-discovery`, `#p4-fail-fast-actionable-errors`,
 `#p5-safe-retries-mutation-boundaries`, `#p6-composable-predictable-command-structure`,
 `#p7-bounded-high-signal-responses`. No tier keyword (MUST/SHOULD/MAY) in the slug. Once shipped these are permanent
-citation primitives — any rename breaks every inbound link, blog quote, HN comment, or agent citation. If a principle is
+citation primitives. Any rename breaks every inbound link, blog quote, HN comment, or agent citation. If a principle is
 merged or split in the upstream `principles/` source, follow the propagation protocol in `principles/AGENTS.md` and
 treat the old slug as a permanent 301 target. Version and date in footer. Deploy on Cloudflare Workers with Static
 Assets. SSG hard. Mobile-first. A11y baseline: skip-link, semantic landmarks, `prefers-reduced-motion`,
@@ -428,7 +428,7 @@ Scores 44/50 and is the credible fallback. Gives us a tested markdown pipeline,
 Shiki integration via `@astrojs/markdown-remark`, and the Astro CLI/dev-server ergonomics a hand-rolled build lacks. It
 loses to plain+Worker on thesis fit (we still ship a framework) and on CN story parity (Astro emits `/p1/index.html`; we
 either adopt a slightly non-conventional URL shape, configure output to flatten to `/p1.html`, or implement the `.md`
-suffix in the Worker the same way as plain-HTML — in which case we kept the Worker work and paid for Astro anyway). Good
+suffix in the Worker the same way as plain-HTML, in which case we kept the Worker work and paid for Astro anyway). Good
 choice if Brett wants the dev-server comfort and the community-maintained markdown pipeline.
 
 ### 3.7 Flip conditions (what would change the recommendation)
@@ -454,12 +454,12 @@ documentation surface pattern at
 ### 3.9 Skill distribution — `/skill` and `/skill.json`
 
 Two surfaces, one source. Agents fetch `/skill.json` (canonical, machine-primary). Humans fetch `/skill` (HTML render,
-identical commands). Both derive from `src/data/skill.json` at build time — drift is structurally impossible because
+identical commands). Both derive from `src/data/skill.json` at build time, so drift is structurally impossible because
 there's only one source.
 
 **Architecture: agent-primary.** The JSON is the contract; the HTML is a templated render. v1 ships singular `/skill`
 for the single advertised skill (`agent-native-cli`); per-skill `/skill/<name>` URLs remain deferred until N>1. When a
-second skill ships, `/skill` becomes an index and per-skill content moves under `/skill/<name>` — the Worker's
+second skill ships, `/skill` becomes an index and per-skill content moves under `/skill/<name>`. The Worker's
 JSON-extension dispatch is already shape-agnostic, so no Worker code change is anticipated for that transition.
 
 **Source repo coupling.** This site vendors the skill manifest's per-host install commands and metadata at site build
@@ -495,7 +495,7 @@ for the repo-name asymmetry: the skill repo is named `agentnative-skill` but the
 `agent-native-cli`; a bare `git clone` lands on the repo name and breaks every host's skill-discovery convention.
 
 **Header contract (`src/worker/headers.ts`).** The Worker's HTML/markdown branches are joined by a JSON-extension branch
-detected by URL ending in `.json` (extension, not prefix — any `/<slug>.json` endpoint reuses the branch, so the v2
+detected by URL ending in `.json` (extension, not prefix; any `/<slug>.json` endpoint reuses the branch, so the v2
 per-skill `/skill/<name>.json` lands on the same code path with no Worker change):
 
 ```text
@@ -505,7 +505,7 @@ Access-Control-Allow-Origin: *
 X-Robots-Tag:                noindex
 ```
 
-No `Link: rel="alternate"` and no `X-Llms-Txt` on JSON paths — there's no markdown twin for `.json`. The `Accept:
+No `Link: rel="alternate"` and no `X-Llms-Txt` on JSON paths, because there's no markdown twin for `.json`. The `Accept:
 text/markdown` content-negotiation rewrite in `src/worker/index.ts` short-circuits on `.json` paths so `Accept:
 text/markdown` against `/skill.json` returns the JSON unchanged rather than 404'ing on a non-existent `/skill.json.md`
 twin.
@@ -527,16 +527,16 @@ against the Cloudflare cache-purge API after deploy). Update detection at instal
 ### 3.10 CLI install — `/install`
 
 Different surface, same dispatch mechanism. `/install` is the human-facing install page for the `agentnative` CLI tool
-itself — `brew install brettdavies/tap/agentnative`, `cargo install agentnative`, platform archives. Two
-representations: HTML (`/install`) and the markdown twin (`/install.md` or `Accept: text/markdown` against `/install`).
-**No JSON manifest.** The CLI is human-installable via the package managers documented on the page; an agent-friendly
-install manifest is not in scope today, and `/install.json` returns 404 by simply not emitting the asset.
+itself: `brew install brettdavies/tap/agentnative`, `cargo install agentnative`, platform archives. Two representations:
+HTML (`/install`) and the markdown twin (`/install.md` or `Accept: text/markdown` against `/install`). **No JSON
+manifest.** The CLI is human-installable via the package managers documented on the page; an agent-friendly install
+manifest is not in scope today, and `/install.json` returns 404 by not emitting the asset.
 
 The page is content-driven from `content/install.md` and runs through the same `subPages` pipeline as `/methodology` and
-`/scorecard-schema` — no dedicated emitter. The 404 contract for `/install.json` is upheld by asset absence:
-Cloudflare's `not_found_handling: "404-page"` (`wrangler.jsonc`) returns the standard 404 page for any path that has no
-asset. A build-time regression test (`tests/regression.test.ts` regression #6) asserts `dist/install.json` is not
-emitted, so a future edit can't accidentally re-create it.
+`/scorecard-schema`; no dedicated emitter. The 404 contract for `/install.json` is upheld by asset absence: Cloudflare's
+`not_found_handling: "404-page"` (`wrangler.jsonc`) returns the standard 404 page for any path that has no asset. A
+build-time regression test (`tests/regression.test.ts` regression #6) asserts `dist/install.json` is not emitted, so a
+future edit can't accidentally re-create it.
 
 `/install` is the canonical home for the brew/cargo install lines. Three places that previously inlined those commands
 now link here instead:
@@ -558,11 +558,11 @@ One direction. The CEO plan's stated preference ("simple and traditional with mo
 Cool-neutral base, hue 250, one accent in the same hue family, three semantic warm-or-cool accents for MUST / SHOULD /
 MAY. The choice of cool over warm is load-bearing for spec adoption: research summarized below lands decisively on cool
 neutrals for technical documentation. The full ramps, WCAG ratios, APCA Lc values, and gamut-clamping record live in
-`docs/research/design/color-analysis.md` — all generated by `scripts/design/generate-palette.mjs` using `culori` and
+`docs/research/design/color-analysis.md`, all generated by `scripts/design/generate-palette.mjs` using `culori` and
 `apca-w3`.
 
-**Why cool, not warm — color psychology for spec adoption.** Synthesized from 2026 industry sources (see sources
-appendix at end of file). Three findings drive the call:
+**Why cool, not warm: color psychology for spec adoption.** Synthesized from 2026 industry sources (see sources appendix
+at end of file). Three findings drive the call:
 
 1. **Cool neutrals read as credible and logical in developer-facing contexts.** Blue-tinged grays and cool off-whites
    are the consistent pattern across enterprise dev tools, design systems (Material, Fluent, Carbon), and long-form
@@ -580,26 +580,26 @@ courting developer adoption.
 
 **Emitted token summary** (full table with OKLCH, hex, and contrast in `docs/research/design/color-analysis.md`):
 
-| Role             | Light (hex) | Dark (hex) | Notes                                                                                                                                          |
-| ---------------- | ----------- | ---------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
-| `--bg`           | `#fafbfd`   | `#060a0e`  | Page background.                                                                                                                               |
-| `--bg-code`      | `#f0f4f7`   | `#0d1218`  | Inline + block code background.                                                                                                                |
-| `--border`       | `#cfd5db`   | `#222a32`  | Hairline dividers, code-block border.                                                                                                          |
-| `--fg-muted`     | `#7d848a`   | `#8d949c`  | Decorative/large-only captions (≥1.125rem). Below 4.5:1 small-text contrast by design — use `--fg-secondary` for anything readable under 18px. |
-| `--fg-secondary` | `#6a7278`   | `#a3a9af`  | Readable secondary text: site tagline, eyebrow labels, footer meta, captions under 18px. Passes WCAG AA 4.5:1.                                 |
-| `--fg-body`      | `#1a2026`   | `#dfded8`  | Body prose. Warm off-white in dark mode.                                                                                                       |
-| `--fg-heading`   | `#070c11`   | `#f3f2ed`  | Headings.                                                                                                                                      |
-| `--accent`       | `#0058aa`   | `#6dbdff`  | Links, focus ring, copy-button hover.                                                                                                          |
-| `--must`         | `#af2b25`   | `#ff9c8d`  | RFC keyword: MUST.                                                                                                                             |
-| `--should`       | `#a16100`   | `#f6b669`  | RFC keyword: SHOULD.                                                                                                                           |
-| `--may`          | `#007980`   | `#64d1d7`  | RFC keyword: MAY.                                                                                                                              |
+| Role             | Light (hex) | Dark (hex) | Notes                                                                                                                                         |
+| ---------------- | ----------- | ---------- | --------------------------------------------------------------------------------------------------------------------------------------------- |
+| `--bg`           | `#fafbfd`   | `#060a0e`  | Page background.                                                                                                                              |
+| `--bg-code`      | `#f0f4f7`   | `#0d1218`  | Inline + block code background.                                                                                                               |
+| `--border`       | `#cfd5db`   | `#222a32`  | Hairline dividers, code-block border.                                                                                                         |
+| `--fg-muted`     | `#7d848a`   | `#8d949c`  | Decorative/large-only captions (≥1.125rem). Below 4.5:1 small-text contrast by design; use `--fg-secondary` for anything readable under 18px. |
+| `--fg-secondary` | `#6a7278`   | `#a3a9af`  | Readable secondary text: site tagline, eyebrow labels, footer meta, captions under 18px. Passes WCAG AA 4.5:1.                                |
+| `--fg-body`      | `#1a2026`   | `#dfded8`  | Body prose. Warm off-white in dark mode.                                                                                                      |
+| `--fg-heading`   | `#070c11`   | `#f3f2ed`  | Headings.                                                                                                                                     |
+| `--accent`       | `#0058aa`   | `#6dbdff`  | Links, focus ring, copy-button hover.                                                                                                         |
+| `--must`         | `#af2b25`   | `#ff9c8d`  | RFC keyword: MUST.                                                                                                                            |
+| `--should`       | `#a16100`   | `#f6b669`  | RFC keyword: SHOULD.                                                                                                                          |
+| `--may`          | `#007980`   | `#64d1d7`  | RFC keyword: MAY.                                                                                                                             |
 
 All body pairs (`--fg-body`, `--fg-secondary`, `--fg-heading`, `--accent`) pass WCAG AA (≥4.5:1) **and** APCA body
 minimum (|Lc| ≥ 60) in both modes. Headings exceed AAA. `--fg-muted` deliberately sits below AA 4.5:1 in both modes
-(≈4.3:1) — it is a decorative caption tier reserved for text ≥1.125rem where the AA large-text threshold (3:1) applies;
-use `--fg-secondary` for any small body-size secondary text. Two dark-mode tokens (`must`, `accent-subtle`) required a
-tuning pass after the first APCA run flagged them below the 60 threshold; the tuning is recorded in the script as a
-comment, and the second-pass contrast table in the report shows all body pairs clearing thresholds.
+(≈4.3:1) because it is a decorative caption tier reserved for text ≥1.125rem where the AA large-text threshold (3:1)
+applies; use `--fg-secondary` for any small body-size secondary text. Two dark-mode tokens (`must`, `accent-subtle`)
+required a tuning pass after the first APCA run flagged them below the 60 threshold; the tuning is recorded in the
+script as a comment, and the second-pass contrast table in the report shows all body pairs clearing thresholds.
 
 ### 4.2 Dark mode is deliberately designed (not inverted)
 
@@ -607,7 +607,7 @@ Per user direction ("inverted neutrals are acceptable only if they are what woul
 palette was designed independently, not derived by flipping lightness. Four deliberate deviations from a pure inversion:
 
 1. **Background at L=14, not L=0.** Near-black, not pitch-black. Pitch-black produces halation around body text on LCDs.
-   L=14 is approximately GitHub-dark and VS Code-dark — the level the industry has converged on.
+   L=14 is approximately GitHub-dark and VS Code-dark, the level the industry has converged on.
 2. **Mid-range chroma slightly higher (up to 0.02) than light-mode mid-range.** Low-chroma grays on dark backgrounds
    read dead; a hint of hue keeps the UI from feeling Kindle-adjacent.
 3. **Text top tones warm-shift to hue 95** (warm off-white), not hue 250 (cool near-white). Cool text on a cool dark
@@ -623,12 +623,12 @@ reviewer sees the *why* alongside the numbers.
 
 **Ship Pangram Pangram's [Uncut Sans](https://fontshare.com/fonts/uncut-sans) for body + display, and GitHub Next's
 [Monaspace Xenon](https://monaspace.githubnext.com/) for code.** Both OFL. Chosen via the full font-selection procedure
-in [impeccable's typography reference](.claude/skills/impeccable/reference/typography.md) — not from the training-data
-defaults (Inter, IBM Plex, Fraunces, Space Grotesk, Instrument Serif, all of which impeccable ships a
-reflex-fonts-to-reject list for). See session notes in [`PRODUCT.md`](PRODUCT.md) for the 3-word brand voice
-("opinionated, precise, inviting") that drove the pick.
+in [impeccable's typography reference](.claude/skills/impeccable/reference/typography.md), not from the training-data
+defaults (Inter, IBM Plex, Fraunces, Space Grotesk, Instrument Serif, which impeccable ships a reflex-fonts-to-reject
+list for). See session notes in [`PRODUCT.md`](PRODUCT.md) for the 3-word brand voice ("opinionated, precise, inviting")
+that drove the pick.
 
-**Stacks emitted in [`src/styles/foundation.css`](src/styles/foundation.css)** — reproduced here for review; do not
+**Stacks emitted in [`src/styles/foundation.css`](src/styles/foundation.css)**, reproduced here for review; do not
 hand-edit the CSS, change the generator:
 
 ```css
@@ -645,14 +645,14 @@ hand-edit the CSS, change the generator:
 ```
 
 Ligatures and contextual alternates are OFF in mono so spec operators (`>=`, `!=`, `->`, `|>`, `->|`) render with
-explicit character shapes — critical for a document whose correctness depends on the reader seeing exactly what is
+explicit character shapes, critical for a document whose correctness depends on the reader seeing exactly what is
 written. Body ligatures stay on because common ligatures (fi, fl, ffi) improve Latin prose readability with no operator
 risk.
 
 **Production loading.** The `@font-face` declarations live in the **site build's** CSS, not in `foundation.css`. Keeping
 them out of `foundation.css` means the generated stylesheet is safe to load from any origin without phantom 404s against
-missing `/fonts/` paths — relevant for design previews, demos, and any consumer that loads `foundation.css` without the
-site around it. At site-build time, emit the following (into the site's `site.css` or inlined in the HTML shell):
+missing `/fonts/` paths, which matters for design previews, demos, and any consumer that loads `foundation.css` without
+the surrounding site. At site-build time, emit the following (into the site's `site.css` or inlined in the HTML shell):
 
 ```css
 @font-face {
@@ -672,14 +672,14 @@ site around it. At site-build time, emit the following (into the site's `site.cs
 ```
 
 Also emit `<link rel="preload" as="font" crossorigin>` for both files in the HTML `<head>` so the swap happens on first
-paint rather than mid-render. Total shipped: ~35–50 KB gz per family (Latin subset, variable axis) — well inside the 1–2
+paint rather than mid-render. Total shipped: ~35–50 KB gz per family (Latin subset, variable axis), well inside the 1–2
 MB page-payload ceiling.
 
-**Metric-matched fallbacks — TO CALIBRATE AT IMPLEMENTATION.** The `@font-face` block above deliberately omits
+**Metric-matched fallbacks (TO CALIBRATE AT IMPLEMENTATION).** The `@font-face` block above deliberately omits
 `ascent-override` / `descent-override` / `size-adjust`. These reduce layout shift during the font-display swap but
 require real metric measurement. Before ship: run [Fontaine](https://github.com/unjs/fontaine) (or read tables directly
 with `fontkit`) against the shipped woff2 files, compute the overrides, and commit them into the site build's
-`@font-face` block. Do not guess — wrong overrides cause visible shift, worse than the default.
+`@font-face` block. Do not guess; wrong overrides cause visible shift, worse than the default.
 
 **Preview behavior.** `docs/research/design/must-should-may-preview.html` loads both families via CDN `<link>` tags so
 nothing needs to be self-hosted for a design review:
@@ -694,13 +694,13 @@ nothing needs to be self-hosted for a design review:
 
 Both CDN packages register their `@font-face` rules under the exact family names `foundation.css` references, so the
 preview needs no `--font-*` overrides. If either CDN is blocked (offline demo, strict network), the preview falls back
-through the `--font-sans` / `--font-mono` fallback stacks to system fonts. Production does NOT rely on either CDN — it
+through the `--font-sans` / `--font-mono` fallback stacks to system fonts. Production does NOT rely on either CDN; it
 self-hosts both woff2 files from `/fonts/` using the variable font from GitHub Next's repo directly (checked in to the
 site's own assets), per the `@font-face` block documented above.
 
 #### Font supply chain (A11, C4)
 
-Variable woff2 files are checked in to `public/fonts/` and served from `dist/fonts/` — no runtime CDN dependency. Vendor
+Variable woff2 files are checked in to `public/fonts/` and served from `dist/fonts/`: no runtime CDN dependency. Vendor
 sources and integrity hashes are pinned and verified by a one-shot script.
 
 | Family              | Source                                                                      | License | Checked-in path                               |
@@ -714,17 +714,18 @@ already-checked-in files re-runs the hash check without re-downloading. CI runs
 hashes fails the build. Hash bumps are deliberate: refresh the files, regenerate `hashes.txt`, commit both together.
 
 **Preview vs production (C4).** `docs/research/design/must-should-may-preview.html` loads fonts via CDN (see "Preview
-behavior" above) — fast, zero-local-asset design review. Production self-hosts from `/fonts/` with the checked-in woff2
-files. The two surfaces are NOT guaranteed byte-identical across versions: Fontshare can push a new Uncut Sans revision
-at any time, and the checked-in production file is pinned by hash. Final visual signoff before the HN launch MUST run
-against the production-hosted fonts (via `bun run dev` against the real `dist/` or the staging workers.dev URL), not the
-CDN preview, because metric drift between vendor revisions can shift leading and break the §4.4 type-scale calibration.
+behavior" above) for fast, zero-local-asset design review. Production self-hosts from `/fonts/` with the checked-in
+woff2 files. The two surfaces are NOT guaranteed byte-identical across versions: Fontshare can push a new Uncut Sans
+revision at any time, and the checked-in production file is pinned by hash. Final visual signoff before the HN launch
+MUST run against the production-hosted fonts (via `bun run dev` against the real `dist/` or the staging workers.dev
+URL), not the CDN preview, because metric drift between vendor revisions can shift leading and break the §4.4 type-scale
+calibration.
 
 ### 4.4 Type scale
 
 Modular scale, 1.25 ratio (major third), fluid body via `clamp()` from 17px at 360px viewport up to 18px at ~1100px. H1
 also clamps. H2–H4 stay fixed (impeccable's guidance: "Fixed `rem` scales for app UIs, fluid for marketing/content page
-headings" — this site is content-page, so body + h1 flex, inner headings do not).
+headings"; this site is content-page, so body + h1 flex, inner headings do not).
 
 All values emitted as tokens in `foundation.css`:
 
@@ -771,7 +772,7 @@ Section gap: `--space-7`. Code block margin-block: `--space-5`.
 
 Most visually load-bearing element. All code authored in markdown with language tags. Highlighted at **build time** via
 Shiki using a dual-theme configuration (`github-light` + `github-dark-dimmed`). One rendered `<pre>` serves both modes
-via inline CSS custom properties — no client-side theme JS, no FOUC on mode switch.
+via inline CSS custom properties: no client-side theme JS, no FOUC on mode switch.
 
 Treatment:
 
@@ -802,7 +803,7 @@ Shiki config (passed to `codeToHtml` / `@shikijs/rehype` in the build pipeline):
 }
 ```
 
-CSS bridge (lives in `site.css`, not `foundation.css` — it depends on Shiki-emitted class shape):
+CSS bridge (lives in `site.css`, not `foundation.css`, because it depends on Shiki-emitted class shape):
 
 ```css
 /* Default: light. Shiki's emitted `color:` wins. */
@@ -830,8 +831,8 @@ preference of dark does not override an explicit user choice of light (mirrors t
 
 ### 4.7 RFC-keyword treatment — ship 7b (inline), defer block
 
-**Ships: option 7b (inline keyword color only).** The block-level callout variant — originally spec'd as 7b-plus with a
-3px left-edge accent stripe — was pulled after impeccable's `<absolute_bans>` rule flagged `border-left > 1px` on
+**Ships: option 7b (inline keyword color only).** The block-level callout variant (originally spec'd as 7b-plus with a
+3px left-edge accent stripe) was pulled after impeccable's `<absolute_bans>` rule flagged `border-left > 1px` on
 callouts as the #1 most-overused AI-slop pattern. The ban applies regardless of semantic color, radius, opacity, or
 variable-name intent. Even a semantic MUST/SHOULD/MAY stripe is banned. The only non-controversial keyword treatment
 that survives the ban is the inline color on the word itself.
@@ -846,7 +847,7 @@ The shipped CSS (three rules, emitted by `foundation.css`):
 
 Preview at `docs/research/design/must-should-may-preview.html` (dev branch only) shows 7a (plain bold, baseline) vs 7b
 (inline color) side by side in both color modes. Contrast validated against APCA body minimum (|Lc| ≥
-60) in both modes — see `docs/research/design/color-analysis.md`.
+60) in both modes; see `docs/research/design/color-analysis.md`.
 
 **How the build applies the markup.** A small remark plugin runs a single inline pass at render time. It replaces
 bare-word occurrences of `MUST` / `MUST NOT` / `SHOULD` / `SHOULD NOT` / `MAY` in prose text nodes with `<strong
@@ -875,7 +876,7 @@ Explicit test cases (wired into `tests/build.test.ts`):
 | `` Call `MUST` explicitly. ``             | No match (inlineCode ancestor disqualifies).                                           |
 | `[MUST](https://example.com/must)`        | No match (link ancestor disqualifies; link body + href both skipped).                  |
 | `## MUST support --json` (heading)        | Match (heading is not an excluded ancestor).                                           |
-| `**MUST:** Use try_parse() …` (list item) | Annotate-parent case — see A6 below; no second `<strong>` wrap.                        |
+| `**MUST:** Use try_parse() …` (list item) | Annotate-parent case (see A6 below); no second `<strong>` wrap.                        |
 
 **Nested-strong handling (A6).** In the principle source files the tier markers are authored as `**MUST:**`,
 `**SHOULD:**`, `**MAY:**` at the start of requirement list items. After mdast parsing those render as `<strong>MUST:
@@ -893,17 +894,17 @@ parent is `listItem`/`paragraph`/etc.) uses the default wrap-in-new-`<strong>` b
 #### Deferred: block-level treatment (decide once the site is live)
 
 The inline keyword color handles mid-sentence references ("Tools MUST detect invalid state early") well. The *tiered
-information architecture* of the principles — `**MUST:**` / `**SHOULD:**` / `**MAY:**` section headers followed by
-bullet lists — will probably want additional visual chunking so a scroll-speed reader can see which tier a given bullet
-belongs to. The ban forecloses the side-stripe; two post-ban candidates remain, both to be evaluated against real
-principle content after the site is rendering:
+information architecture* of the principles (`**MUST:**` / `**SHOULD:**` / `**MAY:**` section headers followed by bullet
+lists) wants extra visual chunking so a scroll-speed reader can see which tier a given bullet belongs to. The ban
+forecloses the side-stripe; two post-ban candidates remain, both to be evaluated against real principle content after
+the site is rendering:
 
 1. **Leading RFC tag.** Render each requirement-list item with a colored, bold keyword tag as a left prefix: `MUST Use
    try_parse() instead of parse().` The tag carries the color; no border, no background fill. Reads like a rendered RFC
    draft. Implementation: remark plugin's second pass inserts `<span class="rfc-tag rfc-must">MUST</span>` before the
    `<li>` text; a small CSS rule sets fixed-width inline-block. Cost: ~40 lines of plugin + ~8 lines of CSS.
 2. **Full background tint.** Wrap each requirement-list paragraph with `class="callout must"` (or `should` / `may`);
-   foundation.css adds `.callout.* { background: var(--must-wash); padding: ... }` — flat fill, no border. Reads as a
+   foundation.css adds `.callout.* { background: var(--must-wash); padding: ... }`: flat fill, no border. Reads as a
    tinted panel, more visual weight than the leading tag. Cost: ~20 lines of plugin + ~6 lines of CSS + three wash
    tokens (the generator knows how to produce them; currently omitted from `foundation.css`). See the generator comment
    where `light["must-wash"]` used to live.
@@ -964,27 +965,27 @@ The site degrades cleanly when JS is disabled or fails to load:
 ```
 
 The `.js` class is set by the inline script (same one that also reads `localStorage` and sets `[data-theme]`) before
-first paint. CSS-only widgets — the tabbed multi-language code blocks (hidden radio + `:checked ~ .panel`), `:target`
-highlighting, `prefers-color-scheme` dark mode — continue to work with zero JS. Prose, code blocks, anchors, and
+first paint. CSS-only widgets (the tabbed multi-language code blocks via hidden radio + `:checked ~ .panel`, `:target`
+highlighting, `prefers-color-scheme` dark mode) continue to work with zero JS. Prose, code blocks, anchors, and
 navigation are fully functional without the deferred scripts.
 
 ### 4.8.1 Performance budget (C1)
 
 Budgets are regression gates, not absolute targets. A Cloudflare edge POP serves each asset in parallel from the nearest
 region; the Monaspace woff2 that dominates page weight is cached hard on second hit. Chasing a small absolute page-size
-number isn't the meaningful signal for this site — catching a PR that silently doubles the payload is.
+number isn't the meaningful signal for this site; catching a PR that silently doubles the payload is.
 
 | Tier                                   | Budget          | Enforcement                                                                     |
 | -------------------------------------- | --------------- | ------------------------------------------------------------------------------- |
-| Client JS — target                     | ≤ 5 KB gzipped  | Build script asserts total of `dist/js/*.js` + inline `theme-init`. PR review.  |
-| Client JS — hard ceiling               | ≤ 20 KB gzipped | Build script fails if exceeded.                                                 |
-| Initial page payload — regression gate | ≤ 2 MB          | `.lighthouserc.json` fires `resource-summary:total:size` error. Merge-blocking. |
+| Client JS (target)                     | ≤ 5 KB gzipped  | Build script asserts total of `dist/js/*.js` + inline `theme-init`. PR review.  |
+| Client JS (hard ceiling)               | ≤ 20 KB gzipped | Build script fails if exceeded.                                                 |
+| Initial page payload (regression gate) | ≤ 2 MB          | `.lighthouserc.json` fires `resource-summary:total:size` error. Merge-blocking. |
 
 "Initial page payload" is HTML + CSS + fonts + JS + any inline SVG on the first render. Total homepage budget is 2 MB.
-PRs that legitimately add payload bump the ceiling in the same commit — explicit movement, recorded in the diff.
+PRs that legitimately add payload bump the ceiling in the same commit: explicit movement, recorded in the diff.
 
 Client-JS ceilings stay tight (5 KB / 20 KB) because JS bytes run on the main thread and dominate interaction latency in
-a way raw page weight does not. The shipped `theme.js` + `clipboard.js` are ~2.5 KB combined gz — room to spare.
+a way raw page weight does not. The shipped `theme.js` + `clipboard.js` are ~2.5 KB combined gz, with room to spare.
 
 Font-subsetting Monaspace to Latin glyphs is tracked as TODOS P3. When it lands, drop the page-payload ceiling to match
 the new reality (~200 KB + 60 KB headroom ≈ 260 KB).
@@ -1044,12 +1045,12 @@ Accessibility: the toggle is a `<button>` group with `aria-pressed`, keyboard-na
 - `<article>` at `max-inline-size: 68ch`, horizontally centered.
 - Page padding: `--space-5` mobile, `--space-7` desktop.
 - **Header (Terse package)**: wordmark `agentnative` on the left in Uncut Sans Semibold; no link (already on the
-  homepage — clicking the wordmark scrolls to `#` / top). Three utility links right-aligned: `/check`, `/about`,
+  homepage, so clicking the wordmark scrolls to `#` / top). Three utility links right-aligned: `/check`, `/about`,
   `llms.txt`. The `llms.txt` link carries `title="Machine-readable index for AI agents"` for hover context. No nav tree,
   no version pill, no search. Header is one line tall; does not duplicate the mini-TOC's role.
 - **Footer (Terse package)**: a single line. `v0.1 · 2026-04-14 · source on GitHub`. Separator is a middot (`·`) with
   tabular-figure spacing. Version uses `font-variant-numeric: tabular-nums` so cross-version renders align. No personal
-  attribution on the spec site — the spec-is-bigger-than-the-author stance; `davies.fyi` owns the named surface.
+  attribution on the spec site: the spec-is-bigger-than-the-author stance; `davies.fyi` owns the named surface.
 - **Mini-TOC ships on desktop ≥ 1100px** (resolving open question 5.2). Sticky right-rail `<aside>` in a 2-column grid
   with the article. Lists the 7 principle anchors. Collapses to an inline `<nav>` at the top of the article below
   1100px. Always visible in one form or the other.
@@ -1082,8 +1083,8 @@ The card design landed on 2026-04-30 via a `/design-shotgun` → `/impeccable` 
 - Dark-mode background (`--bg`).
 - Brand row at top, 28pt: `anc.dev` (mono Monaspace Xenon, `--accent`) + 32×1px rule + "the agent-native CLI standard"
   (sans Uncut Sans, `--fg-muted`).
-- Manifesto block, 60pt (display Uncut Sans), `align-self: end` — sits in the lower half with breathing room above.
-  Three lines, each governed by a color-coded RFC 2119 keyword (700-weight) followed by a 500-weight verb-phrase:
+- Manifesto block, 60pt (display Uncut Sans), `align-self: end`, sits in the lower half with breathing room above. Three
+  lines, each governed by a color-coded RFC 2119 keyword (700-weight) followed by a 500-weight verb-phrase:
 - **MUST** run without prompting. *(P1's foundational rule, `--must`)*
 - **SHOULD** speak machine-first. *(P2's worldview, `--should`)*
 - **MAY** decorate when a TTY is open. *(P1's safety valve, `--may`)*
@@ -1094,20 +1095,20 @@ The card design landed on 2026-04-30 via a `/design-shotgun` → `/impeccable` 
 - `og:image:alt` + `twitter:image:alt` are emitted by `src/build/shell.mjs` (`OG_IMAGE_ALT` constant): "agent-native CLI
   standard — anc.dev — seven principles for CLIs that agents can operate".
 
-**Future extension — per-page OG cards.** The current model uses one shared `/og-image.png` for every page. The
-generator architecture (`Playwright → Sharp → palette PNG`, deterministic, foundation-token-driven) is reusable for
-per-principle (`/p1`-`/p7`) and per-scorecard (`/scorecards/<tool>`) cards when those land. Concrete extension shape:
-`bun run og --input scripts/og/og-<kind>.html --output public/og/<slug>.png --data k=v,k=v` — the script's existing
-`[data-version]` injection seam already prefigures the data-injection step. Out of scope for v0.1; tracked in the plan's
-"Deferred to Separate Tasks" alongside favicon decoupling.
+**Future extension: per-page OG cards.** The current model uses one shared `/og-image.png` for every page. The generator
+architecture (`Playwright → Sharp → palette PNG`, deterministic, foundation-token-driven) is reusable for per-principle
+(`/p1`-`/p7`) and per-scorecard (`/scorecards/<tool>`) cards when those land. Concrete extension shape: `bun run og
+--input scripts/og/og-<kind>.html --output public/og/<slug>.png --data k=v,k=v`. The script's existing `[data-version]`
+injection seam already prefigures the data-injection step. Out of scope for v0.1; tracked in the plan's "Deferred to
+Separate Tasks" alongside favicon decoupling.
 
 ### 4.14 Schema.org / SEO surface
 
 Schema.org `TechArticle` JSON-LD in `<head>` per page, `isPartOf` pointing to a parent `TechArticle` for the full spec.
 Per the agent-native documentation surface pattern. Twitter card `summary_large_image`. Open Graph: `og:title`,
-`og:description`, `og:image`, `og:url`, `og:type="article"`, `article:published_time`, `article:modified_time`. Pre-
-purchase domain: canonical + og:url use the staging `workers.dev` host until production cut-over; swap via a single
-constant in the HTML shell. Documented in `wrangler.toml` per CEO plan (resolving open question 5.6 — "yes").
+`og:description`, `og:image`, `og:url`, `og:type="article"`, `article:published_time`, `article:modified_time`. Before
+domain purchase: canonical + og:url use the staging `workers.dev` host until production cut-over; swap via a single
+constant in the HTML shell. Documented in `wrangler.toml` per CEO plan (resolving open question 5.6 with "yes").
 
 ## 5. Open questions for Brett — status
 
@@ -1120,7 +1121,7 @@ Revision 1 open questions with current status:
    narrative); full palette and contrast in `docs/research/design/color-analysis.md`.
 4. **Colorize MUST / SHOULD / MAY** → RESOLVED: ship option 7b (inline keyword color only). The stronger 7b-plus
    side-stripe variant was rejected per impeccable's `<absolute_bans>` (border-left >1px on callouts is the #1 AI-slop
-   pattern). Block-level alternatives — leading RFC tag, background-tint fill — deferred to live-site iteration (§4.7).
+   pattern). Block-level alternatives (leading RFC tag, background-tint fill) deferred to live-site iteration (§4.7).
    Preview at `docs/research/design/must-should-may-preview.html`.
 5. **`llms.txt` link in header** → RESOLVED: ship it, recall if it reads cute (§4.11).
 6. **`og:url` pre-domain purchase** → RESOLVED: stage on `workers.dev` host, swap constant at cutover (§4.14).
@@ -1131,12 +1132,12 @@ Revision 1 open questions with current status:
   single `<button>` that cycles, smaller footprint, slightly worse discoverability. Default ships the three-button
   pattern; swap is trivial.
 - **MUST/SHOULD/MAY transform scope.** The remark plugin rewrites bare `MUST` / `SHOULD` / `MAY` in certain block
-  contexts (§4.7). Question: should it also run inside code comments? Current answer: no — highlighted code should not
-  acquire new colors from outside the syntax-highlight theme. Worth re-confirming during implementation.
+  contexts (§4.7). Question: should it also run inside code comments? Current answer: no, because highlighted code
+  should not gain new colors from outside the syntax-highlight theme. Worth re-confirming during implementation.
 - **Header `llms.txt` link label.** "llms.txt" as bare text (literal wink) vs. "for agents" (explanatory). Default ships
   bare-text; `title="Machine-readable index for AI agents"` on hover.
-- **`X-Robots-Tag: noindex` on `.md` variant.** Mintlify ships this and it is probably correct — we do not want search
-  engines to index the markdown (duplicate content). Proposal: ship it. Confirm in `/plan-eng-review`.
+- **`X-Robots-Tag: noindex` on `.md` variant.** Mintlify ships this and it is probably correct, because search engines
+  should not index the markdown (duplicate content). Proposal: ship it. Confirm in `/plan-eng-review`.
 
 ## 6. Sources
 
diff --git a/PRODUCT.md b/PRODUCT.md
index f1b4fbd..dac7f8c 100644
--- a/PRODUCT.md
+++ b/PRODUCT.md
@@ -7,7 +7,7 @@ Channel-specific product + design context for the **site channel** of agentnativ
 
 The site channel sits in a three-tier waterfall. Each tier owns a different concern; nothing duplicates.
 
-1. **Universal — [`BRAND.md`](BRAND.md).** Shared identity, voice anchor, audiences, universal anti-patterns. Vendored
+1. **Universal: [`BRAND.md`](BRAND.md).** Shared identity, voice anchor, audiences, universal anti-patterns. Vendored
    from [`agentnative-spec/BRAND.md`](https://github.com/brettdavies/agentnative/blob/main/BRAND.md) alongside the
    prose-check stack via [`scripts/sync-prose-tooling.sh`](scripts/sync-prose-tooling.sh) (re-run after any
    agentnative-spec release that touches the universal voice). Applies across every channel (spec, site, linter, skill
diff --git a/README.md b/README.md
index cb6e3e5..72bd7f9 100644
--- a/README.md
+++ b/README.md
@@ -1,29 +1,117 @@
 # agentnative-site
 
-Source for the agent-native CLI standard website. Presents the 7 principles for building CLI tools that AI agents can
-operate as first-class users.
+Source for [anc.dev](https://anc.dev), the public surface for the agent-native CLI standard. The site publishes the
+eight principles of the standard, the ANC 100 leaderboard, per-tool curated scorecards, a live-scoring form, the score
+badge surface, and the agent-native-cli skill bundle distribution endpoint.
 
-## Development
+## What it serves
 
-Static HTML + CSS. No build step. Open `index.html` in a browser.
+| Route                                                               | Purpose                                                                 |
+| ------------------------------------------------------------------- | ----------------------------------------------------------------------- |
+| `/`                                                                 | Homepage with the principle index and a live-score form to `/api/score` |
+| `/scorecards`                                                       | The ANC 100 leaderboard (every curated tool, sortable)                  |
+| `/score/<tool>`                                                     | Per-tool curated scorecards (renders from `scorecards/<tool>.json`)     |
+| `/score/live/<binary>`                                              | Shareable live-score result pages backed by the R2 score cache          |
+| `/check`, `/install`, `/methodology`, `/badge`, `/scorecard-schema` | Supporting pages on usage, install, scoring methodology, badge, schema  |
+| `/contribute`, `/about`                                             | Contribution map and attribution                                        |
+| `/skill`                                                            | Human-facing install for the `agent-native-cli` skill bundle            |
+| `/skill.json`                                                       | Canonical machine-primary skill manifest                                |
+| `/llms.txt`, `/llms-full.txt`                                       | llmstxt.org convention (summary index plus full concatenated spec)      |
 
-After cloning, point git at the repo's hook directory once:
+Every HTML page has a markdown twin reachable via `.md` suffix or `Accept: text/markdown` content negotiation.
+
+## Scoring
+
+Per-tool scorecards display a percent computed as `pass / (pass + warn + fail)` (skips and errors are excluded so
+inapplicable checks do not drag the score). MUST-tier misses count as `fail`; SHOULD- or MAY-tier misses count as
+`warn`. Badge eligibility starts at 80%. Full formula and tier mapping at [`/methodology`](https://anc.dev/methodology);
+per-field JSON schema at [`/scorecard-schema`](https://anc.dev/scorecard-schema); badge contract at
+[`/badge`](https://anc.dev/badge). The scoring engine itself lives in
+[`agentnative-cli`](https://github.com/brettdavies/agentnative-cli).
+
+## Stack
+
+Cloudflare Worker over Static Assets. Build pipeline renders markdown in `content/` to HTML at `dist/` via `bun
+src/build/build.mjs`. Live scoring runs in a Cloudflare Sandbox Durable Object, cached in R2, rate-limited by KV, and
+gated by Turnstile. Full inventory in [`wrangler.jsonc`](./wrangler.jsonc); design contract in
+[`DESIGN.md`](./DESIGN.md).
+
+## Local development
+
+```bash
+bun install
+bun run build              # produces dist/
+bun run dev                # bun run build && wrangler dev --local
+wrangler dev --env staging # local Worker against staging bindings
+bun test                   # unit + regression
+bun run test:e2e           # Playwright
+```
+
+After cloning, point git at the repo's hooks once:
 
 ```bash
 git config core.hooksPath scripts/hooks
 ```
 
-This enables `scripts/hooks/pre-push`, which runs `bun run lint`, `bun run build`, `bun test`, and `wrangler deploy
---dry-run` before every push — the same gates CI enforces. Bypass intentionally with `git push --no-verify` if you
-really need to (rare; the hook exists to catch what we've lost time to before).
+This enables `scripts/hooks/pre-push`, which runs the seven local gates before every push: `lint`, `build`, `tests`,
+`wrangler deploy --dry-run`, pack-README drift, banned-fonts scan, and `scripts/prose-check.sh` (Vale plus LanguageTool
+when reachable). CI enforces stages 1 through 4; stages 5 through 7 are pre-push only.
+
+## Branch and release model
+
+Feature branches PR to `dev`. Production cuts via `release/<YYYY-MM-DD>-<slug>` cherry-picked from `dev` to `main`.
+`deploy.yml` ships `main` to `anc.dev` on push. The full procedure lives in [`RELEASES.md`](./RELEASES.md); rationale in
+[`RELEASES-RATIONALE.md`](./RELEASES-RATIONALE.md).
 
-## Deployment
+## Documentation map
+
+| File                                                                                | Purpose                                                       |
+| ----------------------------------------------------------------------------------- | ------------------------------------------------------------- |
+| [`AGENTS.md`](./AGENTS.md)                                                          | Agent-facing project brief: scope, voice, structure, surfaces |
+| [`CONTRIBUTING.md`](./CONTRIBUTING.md)                                              | Contribution tiers, dev setup, pre-push hook, PR conventions  |
+| [`DESIGN.md`](./DESIGN.md)                                                          | Visual and structural design contract                         |
+| [`BRAND.md`](./BRAND.md)                                                            | Voice, positioning, naming                                    |
+| [`PRODUCT.md`](./PRODUCT.md)                                                        | Product framing and roadmap context                           |
+| [`RELEASES.md`](./RELEASES.md) / [`RELEASES-RATIONALE.md`](./RELEASES-RATIONALE.md) | Release runbook plus its reasoning                            |
+| [`docs/runbooks/`](./docs/runbooks/)                                                | Operational runbook set (live-scoring, analytics, deploy)     |
+
+## Related repositories
+
+- [agentnative](https://github.com/brettdavies/agentnative): the canonical spec (principle text, pressure-tests,
+  versioning policy)
+- [agentnative-cli](https://github.com/brettdavies/agentnative-cli): `anc`, the CLI linter and scoring engine, plus the
+  tool registry
+- [agentnative-skill](https://github.com/brettdavies/agentnative-skill): the `agent-native-cli` skill bundle, installed
+  via [anc.dev/skill](https://anc.dev/skill)
+
+## Contributing
+
+Three shapes of contribution, in order of cost:
+
+1. **Signal** (site bug, rendering issue, broken link, copy critique, mobile-layout or performance regression): file an
+   issue with the matching template at
+   [github.com/brettdavies/agentnative-site/issues/new/choose](https://github.com/brettdavies/agentnative-site/issues/new/choose).
+2. **Proposal** (new page or section, scorecard renderer rework, Worker route addition, build-pipeline change,
+   live-scoring surface change): open a design issue first; the maintainer signs off before code lands.
+3. **Code**: PR against `dev` (per branch discipline).
+
+Local setup:
+
+```bash
+git clone https://github.com/brettdavies/agentnative-site
+cd agentnative-site
+git config core.hooksPath scripts/hooks  # mirror CI locally on every push
+bun install
+bun run build
+bun test
+```
 
-Cloudflare Workers. Pushes to `main` deploy automatically.
+The full tier breakdown, pre-push hook contents, and PR conventions live in [`CONTRIBUTING.md`](./CONTRIBUTING.md).
+Cross-repo routing: principle-level discussion (MUST/SHOULD/MAY tier changes, new principles, applicability clauses)
+goes to the [spec repo](https://github.com/brettdavies/agentnative/issues/new/choose); scoring-engine and registry work
+to [agentnative-cli](https://github.com/brettdavies/agentnative-cli/issues/new/choose); skill bundle changes to
+[agentnative-skill](https://github.com/brettdavies/agentnative-skill/issues/new/choose).
 
-## Related
+## License
 
-- [agentnative-cli](https://github.com/brettdavies/agentnative-cli) — the CLI linter that checks compliance with this
-  standard
-- [agentnative-skill](https://github.com/brettdavies/agentnative-skill) — the agent-native-cli skill bundle (SKILL.md +
-  checklists + scripts) installed via [anc.dev/skill](https://anc.dev/skill)
+See [`LICENSE`](./LICENSE).
diff --git a/RELEASES-RATIONALE.md b/RELEASES-RATIONALE.md
new file mode 100644
index 0000000..e95c554
--- /dev/null
+++ b/RELEASES-RATIONALE.md
@@ -0,0 +1,315 @@
+# Architecture decisions
+
+Companion to [`RELEASES.md`](./RELEASES.md). RELEASES.md is the runbook (commands, paths, decision tables). This file
+holds the WHY behind those rules: branching model, PR conventions, CI design, deploy filter logic, sandbox-image
+lifecycle, status-check pitfalls.
+
+Read this when:
+
+- A rule in RELEASES.md doesn't make sense and you're tempted to change it.
+- A new contributor asks "why do we do X this way".
+- You're adding a new release-flow rule and need to know where it fits the existing model.
+
+## Branching model
+
+### Forever `dev`, ephemeral release branches
+
+`dev` is never deleted, even after a release. The next release cycle reuses the same `dev`. The repo's
+`deleteBranchOnMerge: true` setting doesn't touch `dev` as long as `dev` is never the head of a PR. Using a short-lived
+`release/*` head is what keeps the setting compatible with a forever integration branch.
+
+Engineering docs (`docs/plans/`, `docs/solutions/`, `docs/brainstorms/`) live on `dev` only. They never reach `main`.
+`guard-main-docs.yml` blocks them from PRs targeting `main`, and `guard-release-branch.yml` rejects any PR to main whose
+head isn't `release/*`.
+
+### Why cherry-pick from `main`, not branch from `dev`
+
+Branching from `dev` and then `git rm`-ing the guarded paths seems simpler but produces `add/add` merge conflicts
+whenever `dev` and `main` have diverged (which they always do after the first squash merge). The file appears as "added"
+on both sides with different content. Always branch from `origin/main` and cherry-pick the dev commits onto it.
+
+### CalVer release branches
+
+Branch naming `release/<YYYY-MM-DD>-<slug>` (mandatory) makes release branches sortable and unambiguous when multiple
+cuts are in flight. The date prefix is the planned merge date, not the cut date; re-naming on slip is allowed but not
+required. Slug is kebab-case, short, descriptive (3-6 words). Bare `release/<slug>` (no date prefix) is no longer
+permitted.
+
+The `guard-release-branch.yml` workflow currently enforces the `release/` prefix on PRs targeting `main`; the CalVer
+date prefix is convention-enforced via review and the runbook. Tightening the workflow regex to require
+`^release/\d{4}-\d{2}-\d{2}-` is a tracked follow-up.
+
+## PR body conventions
+
+### No explainer prose in the body
+
+Every section of a PR body is user-facing substance only: what is changing for the consumer that was not already there.
+Workflow mechanics (cherry-pick, regenerate, pre-push gate, CI behavior) is documented in RELEASES.md and `.github/`,
+NOT in the PR body. Triple-diff output, pre-push gate results, CI check status, exclusion rationale, and other
+verification artifacts stay local; anomalies get fixed before push, not audit-trailed in the body.
+
+The PR body is read by humans reviewing what shipped. Workflow mechanics and tool-fix provenance are noise from that
+perspective; they belong in this file, the script outputs, and the commit history respectively.
+
+### Why `feat`/`fix` are preferred over `chore`
+
+`cliff.toml` skips `^chore` (and `^style` / `^test` / `^ci` / `^build`) regardless of body content. Mistyping a
+user-facing change as `chore` silently strips it from release notes. Prefer `feat` / `fix` when the change has any
+user-observable effect (config defaults, env vars, default behaviors).
+
+### Why required-when-empty sub-headers
+
+`Related Issues/Stories` has four labels (`Story:` / `Issue:` / `Architecture:` / `Related PRs:`). `Files Modified` has
+four sub-headers (`Modified` / `Created` / `Renamed` / `Deleted`). All four must appear in every PR, even when empty;
+write `- None.` or `n/a` rather than deleting the label. Reason: scanners and humans both rely on a known section shape.
+Conditionally-absent sections force every reader to mentally check "did the author skip this or does it not apply?"
+
+### Why no AI attribution
+
+`Co-Authored-By: Claude …`, `🤖 Generated with [Claude Code]`, or any similar AI-attribution trailer is banned from
+commit messages and PR bodies. Commits and PRs stand on their own technical content. Attribution trailers are noise and
+they age poorly as tools shift.
+
+### Why no hard line wraps
+
+Author each paragraph and each bullet as one logical line, however long. GitHub soft-wraps for display. Hard wraps
+within prose produce visible mid-sentence breaks in some renderers and interfere with the prose-check pipeline: Vale's
+line-anchored output reports findings against split lines, LanguageTool's input handling can choke on certain
+control-char interactions. The auto-format hook skips `/tmp/` paths so the body keeps its authored shape; don't undo
+that with manual wrapping during composition. Same rule applies to commit messages composed via heredoc.
+
+### Why release-PR bodies repeat changelog entries from upstream PRs
+
+The release PR carries the same `### Added` / `### Changed` / `### Fixed` bullets as the feature PRs it cherry-picks.
+The repetition is intentional and harmless: `cliff.toml`'s `^release` skip prevents the release-PR squash commit from
+being double-counted in any future regeneration.
+
+### Why internal-tooling commits don't appear in `## Changelog`
+
+`chore(cliff): ...`, `chore(prose-check): ...`, and similar internal tooling commits don't appear in the PR body's `##
+Changelog`. They are not user-facing. They belong in commit history and in the Files Modified / Key Details sections of
+the PR body, not in the source-of-truth release notes.
+
+## Triple-diff verification
+
+The release-PR procedure runs three diffs (A: main→release, B: release→dev for non-doc paths, C: dev→main) plus a
+patch-id cherry check. This is belt-and-suspenders because missed cherry-picks have shipped to `main` on this and
+sibling repos before, and the file-level diff in B alone doesn't catch the patch-id false-negative class.
+
+### Why patch-id cherry-check output is noisy
+
+In a squash-merge workflow, `git cherry HEAD origin/dev` produces many `+` lines that need human triage. They do NOT
+auto-block the release. Expected sources of false positives:
+
+1. **Historical commits squash-merged in prior releases.** The squash commit on main has a different patch-id than the
+   dev commits it consolidates, so old commits show as `+` forever. Anything older than the previous release tag is
+   almost always this.
+2. **Cherry-picks where conflict resolution stripped guarded paths** (`docs/plans/`, `docs/brainstorms/`, etc.) or
+   otherwise altered the tree. Same source-code intent, different patch-id.
+3. **Intentionally skipped commits**: docs-only commits, release-prep backports, revert-and-redo prep steps.
+
+A real miss looks like: a recent feat/fix/chore commit on dev whose *file content* is not yet on main. To triage a `+`
+line:
+
+```bash
+git show <sha> --stat                       # what did it touch?
+git diff origin/main..HEAD -- <those-files> # already on release?
+```
+
+If every touched file is guarded (`docs/plans/`, `docs/brainstorms/`, etc.) OR the content is already on main via a
+prior squash, it's a false positive: no action. Otherwise cherry-pick the commit and re-run the triple-diff.
+
+## Prose scrubbing scope
+
+Pre-push covers `*.md` files in the repo via Vale + LanguageTool. Three release-flow artifacts live outside that net and
+need a manual scrub before they ship:
+
+- **PR bodies**: `gh pr create` and `gh pr edit` send body text directly to GitHub; pre-push has no reach there.
+- **Release-PR bodies**: the `release/*` PR to `main` carries contributor-authored wrap-up text composed after the
+  cherry-picks land, and the same out-of-repo gap applies.
+- **Any future generated changelog**: if a `CHANGELOG.md` flow lands here, it inherits whatever prose its upstream PR
+  bodies carry.
+
+Scrub-before-submit (author in `/tmp/`, scrub there, submit via `--body-file`) avoids the round-trip of "submit, scrub,
+edit, scrub again". Every fix lands locally and the public PR sees only clean text. The auto-format hook skips `/tmp/`
+paths so the body keeps its authored shape and no soft-wrapping is injected.
+
+For a future generated-changelog finding, fix the upstream PR body (which the regeneration script re-fetches every run)
+and regenerate. Hand-editing the generated artifact directly produces drift the next regeneration overwrites.
+
+## Docs-only deploy filter
+
+The `paths-ignore` filter on the `push` trigger skips deploy when a commit only touches paths the build doesn't ingest.
+The filter is symmetric across `dev` and `main`. In practice the `main` side is mostly theoretical:
+`guard-main-docs.yml` already blocks `docs/plans|solutions|brainstorms|reviews/**` from reaching `main` via PR, and the
+remaining ignored paths (root `*.md`, `DESIGN.md`, `docs/TODOS.md`) don't change build output; wrangler would redeploy a
+bit-identical Worker.
+
+If a future case needs unconditional main-branch deploys, swap the workflow-level filter for a job-level changed-files
+check. The `workflow_dispatch` trigger is unaffected by `paths-ignore`, so manual redeploys always work regardless of
+what changed.
+
+## Sandbox image releases
+
+### Soak-then-promote default
+
+Most image changes go through a staging-soak cycle before reaching production. This protects prod from any sandbox
+regression that only surfaces under real install traffic. The two `wrangler.jsonc` image pins (top-level prod,
+`env.staging.containers[0].image`) are independent CF resources with separate version histories; they may legitimately
+differ during a soak.
+
+After merge to dev, CI deploys `agentnative-site-staging` to the new image. Soak: observability, integration tests, real
+traffic on the staging.workers.dev URL. When the image is ready to ship, a release PR adds one promotion commit that
+bumps the top-level pin to match staging.
+
+### Lockstep-bump shortcut (low-risk only)
+
+For image changes that don't need a soak (base-image security patch, dependency-only update with no behavior delta),
+update BOTH pins in the same feat PR. The dev-targeting PR has equal pins from the start; the eventual release PR
+carries equal pins; staging and prod deploy the new image in lockstep. The CI guard accepts this because both pins exist
+in the registry on every PR.
+
+Use the soak-then-promote default for any change that touches sandbox behavior: package manager additions, runtime
+version bumps, `anc` upgrades, `cargo-binstall` upgrades, anything in `docker/sandbox/Dockerfile` past the base-image
+FROM line.
+
+### Deploy never rebuilds
+
+`wrangler deploy --env staging` (and `wrangler deploy` on main) against the fully-qualified registry URI does NOT
+trigger a rebuild. The image was already published during the local `wrangler containers build -p` step. Build is
+decoupled from deploy: a Worker code-only deploy never rebuilds the image, and an image-only release never reships
+Worker code unintentionally.
+
+### Image-retention discipline
+
+NEVER delete a tag from the CF managed registry that backed a shipped Worker version. Deletion silently breaks `wrangler
+rollback` for any version that referenced the image (per
+[Containers Limits](https://developers.cloudflare.com/containers/platform-details/limits/)). The 50 GB account-wide cap
+is a quarterly prune review, not a routine cleanup. When a release tag ships, record the pair `<git-tag> <-> <registry
+URI>` in the release commit body so the inventory survives.
+
+Retention is what makes soak-then-promote safe: while a new image is soaking on staging, the prod pin still references
+the previous release's tag, and that tag must remain in the registry for prod to keep serving.
+
+### DO migrations are one-way walls
+
+The first Worker version that applied `migrations[].new_sqlite_classes: ["Sandbox"]` (`v1`) cannot be rolled back across
+that boundary via `wrangler rollback` (per
+[Versions and deployments / Rollbacks](https://developers.cloudflare.com/workers/configuration/versions-and-deployments/rollbacks/)).
+Treat DO-migration commits as milestone releases that get an explicit reviewer note.
+
+The only path past the wall is a follow-up migration with `deleted_classes: ["Sandbox"]` on a Worker version that no
+longer references the DO binding. The Cloudflare platform destroys the durable storage attached to the deleted class as
+part of applying the migration; this is a platform behaviour, not a project choice. The R2 score cache survives because
+it lives on a separate binding (`SCORE_CACHE`). The kill-switch flag (`SCORE_KV.scoring_disabled`) and rate-limit
+counters live on KV / rate-limit bindings and are also untouched. The only data loss is whatever the `Sandbox` DO's
+SQLite storage held at the moment the `deleted_classes` migration applies.
+
+Cross-migration recovery is therefore non-trivial: applying the rollback once destroys data, and re-introducing the
+`Sandbox` class later requires a fresh migration tag (`v3-restore-sandbox` or similar; `v1` cannot be reused). The
+rehearsal exists to prove the sequence works on staging before any prod cut depends on it, and to make the data-loss
+cost concrete (the evidence table in `RELEASES.md` records DO instance counts at each step). Skipping the rehearsal is
+what makes the first prod incident unrecoverable in practice.
+
+### GHA fallback
+
+If a local build is impossible, set `image:` to a Dockerfile path (`./docker/sandbox/Dockerfile`) and let
+`cloudflare/wrangler-action` build inline on `ubuntu-latest` (~60-130s cold per deploy; no GHA-side layer cache; push is
+auto-skipped when the existing tag still matches). This is a fallback, not the primary path; the local-build-once flow
+is what the deploy workflow assumes.
+
+### R2 score-cache lifecycle
+
+Plan U7 caches successful live scorecards under `scores/{binary}/{anc-version}.json` in the `SCORE_CACHE` R2 bucket. A
+7-day lifecycle rule reaps stale entries at the bucket level rather than per-write, keeping `Cache-Control: public,
+max-age=300` on every object so CDN edges don't over-cache while the R2 origin holds the long TTL.
+
+The rule name (`scores-7day-ttl`) identifies the rule for future updates or removal. The prefix (`scores/`) scopes the
+TTL so future writes under a different prefix in the same bucket are NOT affected. If a future change adds a new prefix
+(e.g., `audit-logs/`), set up a matching lifecycle rule for it deliberately rather than broadening this one.
+
+The `tests/wrangler-config.test.ts` drift-guard scans the RELEASES.md section for the exact literal command so a future
+regression on the syntax surfaces in CI.
+
+### Post-deploy smoke scope
+
+The CI smoke step after staging deploys (`/api/score` with the `ripgrep` slug, asserting the response triad and
+`registry_hit`) deliberately covers the registry-fast-path only. The motivation is two-sided.
+
+First, the registry-fast-path is the surface every other branch of the pipeline depends on: if the curated
+`/api/score?input=ripgrep` doesn't return `{ scorecard, spec_version, site_spec_version, anc_version, checker_url }`, no
+downstream tier works either. Failing the deploy on this surface catches the broadest class of regressions in the
+shortest run time.
+
+Second, the alternative branches each carry a cost the smoke step shouldn't pay on every deploy. Live-sandbox dispatch
+spins a Container instance and pulls from package registries; doing it on every staging deploy multiplies billable
+egress and surfaces ecosystem-dependency flakes as red builds. Gate behaviour (Turnstile bouncing, rate-limit
+exhaustion) is a contract assertion that lives in unit tests, where it can be exercised deterministically without
+external infrastructure. The opt-in `homepage-score-live` e2e suite covers the live round-trip on a manual / nightly
+cadence where the cost and the latency are acceptable.
+
+The smoke is therefore a high-leverage tripwire, not a full pipeline test. When it fails, the deploy is wrong; when it
+passes, the deploy is at least serving the response triad to a curated input, not a proof that the live path works.
+
+## CI workflow split
+
+### Why the stub workflow exists
+
+Required status checks + `paths-ignore` is a known
+[GitHub Actions sharp edge](https://docs.github.com/en/actions/using-jobs/using-conditions-to-control-job-execution): if
+the workflow is filtered out for a given PR, the required check shows as "Expected" forever and the PR can't merge. The
+stub workflow (`ci-stub.yml`) fires exactly when `ci.yml` is filtered, emits the same check context as a no-op success,
+and unblocks the PR.
+
+### The paths-ignore invariant
+
+`ci.yml`'s `paths-ignore:` list and `ci-stub.yml`'s `paths:` list must stay identical. Drift creates gaps (no workflow
+fires → required check never reports → PR stuck) or benign double-runs on mixed PRs. A comment in both files calls this
+out explicitly.
+
+### Status-check context strings
+
+The `required_status_checks[].context` strings in `protect-main.json` must match exactly what GitHub publishes for each
+check:
+
+- **Inline job** (with `name:` field): published as just `<job-name>` (no workflow-name prefix).
+- **Reusable-workflow caller** (`uses: .../foo.yml@ref`): published as `<caller-job-id> / <reusable-job-id-or-name>`.
+
+Mixing these produces a stuck-but-green PR: all actual checks report green, but the ruleset waits forever on a context
+that will never appear. Confirm the real contexts after a first CI run with:
+
+```bash
+gh api repos/brettdavies/agentnative-site/commits/<sha>/check-runs --jq '.check_runs[].name'
+```
+
+## Visual-fidelity gates
+
+Two visual-regression rules apply to any change touching CSS, layout, or rendered output: a "browser-verify before done"
+agent-side rule (working today) and a Playwright snapshot diff in CI (planned, deferred until the design system
+stabilizes). Both live in [`AGENTS.md` § Visual fidelity](./AGENTS.md#visual-fidelity), the source of truth. A release
+that didn't satisfy those gates upstream isn't unblocked by the CI pipeline being green.
+
+## Skill releases
+
+`/skill.json` and `/skill` advertise the `agent-native-cli` skill, hosted at
+[`brettdavies/agentnative-skill`](https://github.com/brettdavies/agentnative-skill). This site vendors the skill's
+manifest (per-host install commands, version, surface metadata) in `src/data/skill.json`; the skill repo holds the
+actual content. Surface contract in `DESIGN.md` §3.9.
+
+Update detection at install sites is delegated to the skill bundle's `bin/check-update`, which compares the local
+bundle's `VERSION` against `main` on GitHub.
+
+The skill repo's branch model: `main` is the published-release pointer (default branch); `dev` is the integration
+branch. The bare `git clone --depth 1` in each install command lands on `main`, so each release requires the skill
+maintainer to fast-forward `main` to the new tag.
+
+The cache-purge step after a manifest bump exists because users see the manifest via `/skill.json` (24 h `s-maxage`);
+without a purge they'd pick up the old shape for a day. The first-deploy-after-rename note (cutover from `/install*` →
+`/skill*`) is a one-time eviction so legacy cached paths don't serve stale content.
+
+## Related docs
+
+- [`RELEASES.md`](./RELEASES.md): operational runbook (commands, paths, decision tables)
+- [`AGENT.md`](./AGENT.md): onboarding, repo conventions, tool-site sequencing
+- [`DESIGN.md`](./DESIGN.md): design system and build contract
+- [`docs/TODOS.md`](./docs/TODOS.md): deferred work (not in v0 scope)
diff --git a/RELEASES.md b/RELEASES.md
index 340cbf1..87af4e7 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -1,7 +1,6 @@
 # Releasing agentnative-site
 
-Every change reaches production via this pipeline. Direct commits to `dev` or `main` are not permitted — every change
-has a PR number in its squash commit message, which keeps the history scannable and attributable.
+Operational runbook. Rationale lives in [`RELEASES-RATIONALE.md`](./RELEASES-RATIONALE.md).
 
 ```text
 feature branch → PR to dev (squash merge)
@@ -11,13 +10,10 @@ feature branch → PR to dev (squash merge)
               → deploy.yml publishes to production (anc.dev)
 ```
 
-**Exception — `docs/plans/`.** Plan documents are author-driven thinking artifacts. They don't ship to production
-(`guard-main-docs.yml` blocks `docs/plans/`, `docs/solutions/`, and `docs/brainstorms/` from reaching `main`) and they
-don't need code review. Commit them directly to `dev` with a `docs(plans):` Conventional Commits message — skip the
-feature branch entirely. The dev ruleset's admin bypass (`bypass_actor` for RepositoryRole 5) allows this without
-needing the otherwise-required CI check. The same convention applies to ad-hoc edits of `docs/brainstorms/` and
-`docs/solutions/` (the latter being a symlink to the shared `solutions-docs` repo, which has its own commit flow). Code,
-content, scripts, registry, and everything else still go through the PR pipeline above.
+Direct commits to `dev` or `main` are not permitted: every change has a PR number in its squash commit message.
+
+**Exception** for `docs/plans/`, `docs/brainstorms/`, `docs/solutions/`: commit directly to `dev` with `docs(plans):`
+(or similar) message. No feature branch, no PR. These paths never reach `main` (`guard-main-docs.yml`).
 
 ## Branches
 
@@ -25,13 +21,10 @@ content, scripts, registry, and everything else still go through the PR pipeline
 | ---------------------------- | --------------------------------------- | ------------------------------------------- | ------------------------------------ |
 | `main`                       | Production. Only release commits.       | Forever.                                    | `.github/rulesets/protect-main.json` |
 | `dev`                        | Integration. All feature PRs land here. | Forever. Never delete.                      | `.github/rulesets/protect-dev.json`  |
-| `feat/*`, `fix/*`, `chore/*` | Feature work.                           | One PR's worth. Auto-deleted on merge.      | None — squash into dev freely.       |
+| `feat/*`, `fix/*`, `chore/*` | Feature work.                           | One PR's worth. Auto-deleted on merge.      | None. Squash into dev freely.        |
 | `release/*`                  | The head of a dev → main PR.            | One release's worth. Auto-deleted on merge. | None.                                |
 
-`dev` is a **forever branch**. Never delete it locally or remotely, even after a `release/* → main` merge. The next
-release cycle reuses the same `dev`. The repo's `deleteBranchOnMerge: true` setting doesn't touch `dev` as long as `dev`
-is never the head of a PR — using a short-lived `release/*` head is what keeps the setting compatible with a forever
-integration branch.
+→ Rationale: [`RELEASES-RATIONALE.md` § Branching model](./RELEASES-RATIONALE.md#branching-model).
 
 ## Daily development (feature → dev)
 
@@ -45,262 +38,149 @@ gh pr create --base dev --title "feat(scope): what changed"
 ```
 
 - **Commit style**: [Conventional Commits](https://www.conventionalcommits.org/).
-- **PR body**: follow the repo's PR template. The `## Changelog` section is the source of truth for user-facing release
-  notes. PR bodies remain editable post-merge — typos and oversights can be fixed by editing the PR on GitHub later. If
-  a generated `CHANGELOG.md` flow lands here (deferred until needed; the upstream `agentnative-spec` runs one via
-  `scripts/generate-changelog.sh` + `cliff.toml` and re-fetches PR bodies via the GitHub API on each run), the same
-  edited bodies become its source.
-- **PR body prose scrub**: `gh pr create` and `gh pr edit` send body text directly to GitHub; no local pre-push hook
-  sees it. Save the body to `/tmp/`, run Vale + LanguageTool + unslop, fix findings, then submit via `--body-file`. See
-  [§ Prose scrubbing](#prose-scrubbing).
+- **PR body**: follow `.github/pull_request_template.md`. See [§ PR body](#pr-body).
+- **PR body prose scrub**: see [§ Prose scrubbing](#prose-scrubbing).
 
 ## PR body
 
-Every PR — feature, fix, docs, release — uses `.github/pull_request_template.md` verbatim. Six sections, no inventions:
-`## Summary`, `## Changelog`, `## Type of Change`, `## Related Issues/Stories`, `## Files Modified`, `## Testing`.
-
-- **No explainer prose anywhere in the body.** Every section is user-facing substance only: what is changing for the
-  consumer that was not already there. `## Summary` is one short paragraph. Do NOT recap the workflow (cherry-pick,
-  regenerate, pre-push gate, CI behavior is documented in this file and `.github/`). Do NOT paste triple-diff output,
-  pre-push gate results, CI check status, exclusion rationale, or other verification artifacts into the body. Those stay
-  local; anomalies get fixed before push, not audit-trailed in the body.
-- **Changelog** subsections (`### Added` / `### Changed` / `### Fixed` / `### Documentation`) hold the user-facing
-  entries. The template's RULES (in the HTML comment at the top of the section) are literal: 1-5 bullets, delete empty
-  subsections entirely, each bullet starts with a verb. Prose-only edits leave the section empty or omit it.
-- **Type of Change** is one checkbox. Prefer `feat` / `fix` over `chore` when the change has any user-observable effect
-  (config defaults, env vars, default behaviors). `cliff.toml` skips `^chore` (and `^style` / `^test` / `^ci` /
-  `^build`) regardless of body content; mistyping a user-facing change as `chore` silently strips it from release notes.
-- **Related Issues/Stories** has four labels (`Story:` / `Issue:` / `Architecture:` / `Related PRs:`). All four are
-  required even when empty — write `- None.` or `n/a` rather than deleting the label.
-- **Files Modified** has four sub-headers (`**Modified:**` / `**Created:**` / `**Renamed:**` / `**Deleted:**`). All four
-  are required even when empty — `Renamed: None.` / `Deleted: None.`
-- **Internal tooling commits** (`chore(cliff): ...`, `chore(prose-check): ...`, etc.) do NOT appear in the PR body's `##
-  Changelog`. They are not user-facing.
-- **Release PRs** repeat the entries from the upstream feature PRs they cherry-pick. The repetition is intentional and
-  harmless: `cliff.toml`'s `^release` skip prevents the release-PR squash commit from being double-counted in any future
-  regeneration.
-- **No AI attribution.** Never append `Co-Authored-By: Claude …`, `🤖 Generated with [Claude Code]`, or any similar
-  AI-attribution trailer to PR bodies or commit messages. Commits and PRs stand on their own technical content.
-- **No hard line wraps.** Author each paragraph and each bullet as one logical line, however long. GitHub soft-wraps for
-  display; hard wraps within prose produce visible mid-sentence breaks in some renderers and interfere with the
-  prose-check pipeline (Vale's line-anchored output reports findings against split lines, LanguageTool's input handling
-  can choke on certain control-char interactions). The auto-format hook skips `/tmp/` paths so the body keeps its
-  authored shape — don't undo that with manual wrapping during composition. The same rule applies to commit messages
-  composed via heredoc and to any markdown that ships verbatim to GitHub.
-
-The PR body is read by humans reviewing what shipped. Workflow mechanics, verification output, and tool-fix provenance
-are noise from that perspective; they belong in this file (`RELEASES.md`), the script outputs, and the commit history
-respectively.
-
-## Releasing dev to main
+Every PR uses `.github/pull_request_template.md` verbatim. Six sections, no inventions: `## Summary`, `## Changelog`,
+`## Type of Change`, `## Related Issues/Stories`, `## Files Modified`, `## Testing`.
 
-Engineering docs (`docs/plans/`, `docs/solutions/`, `docs/brainstorms/`) live on `dev` only. `guard-main-docs.yml`
-blocks them from reaching `main`, and the `guard-release-branch.yml` workflow rejects any PR to main whose head isn't
-`release/*`. You MUST use the release-branch cherry-pick pattern:
+- **No explainer prose anywhere in the body.** User-facing substance only.
+- **Changelog** subsections (`### Added` / `### Changed` / `### Fixed` / `### Documentation`): 1-5 bullets each, delete
+  empty subsections, each bullet starts with a verb.
+- **Type of Change**: one checkbox. Prefer `feat`/`fix` over `chore` for any user-observable change.
+- **Related Issues/Stories**: four labels (`Story:` / `Issue:` / `Architecture:` / `Related PRs:`). All four required
+  even when empty (`- None.` / `n/a`).
+- **Files Modified**: four sub-headers (`Modified` / `Created` / `Renamed` / `Deleted`). All four required even when
+  empty.
+- **No AI attribution** in commits or PR bodies.
+- **No hard line wraps**: one logical line per paragraph or bullet.
 
-**Branch naming** (CalVer, mandatory): `release/<YYYY-MM-DD>-<slug>` (e.g. `release/2026-05-01-content-neg-fix`). The
-date prefix is the planned merge date, not the cut date — re-naming on slip is allowed but not required. Slug is
-kebab-case, short, descriptive (3-6 words). Bare `release/<slug>` (no date prefix) is no longer permitted; the date
-prefix is what makes release branches sortable and unambiguous when multiple cuts are in flight.
+→ Rationale: [`RELEASES-RATIONALE.md` § PR body conventions](./RELEASES-RATIONALE.md#pr-body-conventions).
 
-The `guard-release-branch.yml` workflow currently enforces the `release/` prefix only on PRs targeting `main`; the
-CalVer date prefix is convention-enforced via review and this doc. Tightening the workflow regex to require
-`^release/\d{4}-\d{2}-\d{2}-` is a tracked follow-up — until that lands, a PR with a date-less branch name will pass CI
-but should be renamed before merge.
+## Releasing dev to main
 
 ```bash
-# 1. Branch from main, NOT dev. Branching from dev causes add/add conflicts
-#    when dev and main have divergent histories (the post-squash-merge norm).
+# 1. Branch from main, NOT dev.
 git fetch origin
-git checkout -b release/<slug> origin/main
+git checkout -b release/<YYYY-MM-DD>-<slug> origin/main
 
-# 2. List the dev commits not yet on main:
+# 2. List the dev commits not yet on main.
 git log --oneline dev --not origin/main
 
-# 3. Cherry-pick the ones you want to ship. Docs commits stay on dev.
+# 3. Cherry-pick the ones to ship. Docs commits stay on dev.
 git cherry-pick <sha1> <sha2> ...
 
-# 4. Triple-diff verification — belt-and-suspenders sweep that catches both
-#    directions of drift before the release tag goes out:
-#
-#    A. main → release  (what users will see; the intended ship surface)
-#    B. release → dev   (should be empty for non-doc paths until the
-#                        bump/CHANGELOG commits land, and even then should
-#                        only list those release-prep files — anything else
-#                        is a missed cherry-pick)
-#    C. dev → main      (sanity: phantom commits dev "appears ahead" on
-#                        because cherry-pick rewrites SHAs post-squash)
-git diff origin/main..HEAD --stat                                                # A
-git diff HEAD..origin/dev --name-only | grep -v '^docs/' || echo "(none)"        # B
-git diff origin/dev..origin/main --stat | tail -5                                # C
-#
-# Re-confirm no guarded paths leaked (this caught the original miss class):
+# 4. Triple-diff verification.
+git diff origin/main..HEAD --stat                                              # A: ship surface
+git diff HEAD..origin/dev --name-only | grep -v '^docs/' || echo "(none)"      # B: no missed picks
+git diff origin/dev..origin/main --stat | tail -5                              # C: phantom-commits sanity
+
+# Re-confirm no guarded paths leaked.
 git diff origin/main..HEAD --name-only \
   | grep -E '^(docs/plans|docs/brainstorms|docs/ideation|docs/reviews|docs/solutions|\.context)' \
-  && echo "LEAKED — reset and redo" || echo "(clean — no guarded paths)"
-#
-# Patch-id cherry check — catches commits on dev that have NO patch-id
-# equivalent on release. The file-level diff in B misses this class when
-# the same content happens to land via a different commit.
-#
-# IMPORTANT: in a squash-merge workflow this output is noisy. Every '+'
-# line needs human triage — it does NOT auto-block the release. Expected
-# sources of '+' lines that are NOT real misses:
-#
-#   1. Historical commits squash-merged in prior releases. The squash
-#      commit on main has a different patch-id than the dev commits it
-#      consolidates, so old commits show as '+' forever. Anything older
-#      than the previous release tag is almost always this.
-#   2. Cherry-picks where conflict resolution stripped guarded paths
-#      (docs/plans, docs/brainstorms, etc.) or otherwise altered the
-#      tree. Same source-code intent, different patch-id.
-#   3. Intentionally skipped commits — docs-only commits, release-prep
-#      backports, revert-and-redo prep steps.
-#
-# A real miss looks like: a recent feat/fix/chore commit on dev whose
-# *file content* is not yet on main. To triage a '+' line:
-#
-#   git show <sha> --stat                       # what did it touch?
-#   git diff origin/main..HEAD -- <those-files> # already on release?
-#
-# If every touched file is guarded (docs/plans/, docs/brainstorms/, etc.)
-# OR the content is already on main via a prior squash, it's a false
-# positive — no action. Otherwise cherry-pick the commit and re-run the
-# triple-diff.
-git cherry HEAD origin/dev | grep '^+' || echo "(none — release is patch-equivalent through dev)"
-#
-# If B lists any non-docs path you didn't expect, fetch dev, identify the
-# commit (`git log dev --not origin/main`), cherry-pick it, re-run the
-# triple-diff. Missed cherry-picks have shipped to main on this and sibling
-# repos before — this step is the cheap way to catch them.
-
-# 5. Push and open the PR. The release-PR body is contributor-authored and goes
-#    directly to GitHub (no pre-push reach), so scrub it through Vale +
-#    LanguageTool + unslop before --body-file. See "Prose scrubbing" below.
-git push -u origin release/<slug>
-gh pr create --base main --head release/<slug> \
+  && echo "LEAKED — reset and redo" || echo "(clean)"
+
+# Patch-id cherry check (noisy in squash-merge workflow; triage per-line).
+git cherry HEAD origin/dev | grep '^+' || echo "(none)"
+
+# 5. Push and open PR. Scrub body in /tmp/ first.
+git push -u origin release/<YYYY-MM-DD>-<slug>
+gh pr create --base main --head release/<YYYY-MM-DD>-<slug> \
   --title "release: <summary>" --body-file /tmp/body.md
 ```
 
-When the PR merges, `deploy.yml` picks up the push to `main` and publishes to staging (see "Deploy" below). Auto-delete
-removes `release/<slug>` from the remote on merge. `dev` is untouched.
+**Branch naming** (mandatory): `release/<YYYY-MM-DD>-<slug>` (e.g. `release/2026-05-01-content-neg-fix`). Slug
+kebab-case, 3-6 words.
 
-### Why branch from main, not dev
+When the PR merges, `deploy.yml` publishes to staging. Auto-delete removes `release/<slug>` from the remote on merge.
+`dev` is untouched.
 
-Branching from `dev` and then `git rm`-ing the guarded paths seems simpler but produces `add/add` merge conflicts
-whenever `dev` and `main` have diverged (which they always do after the first squash merge). The file appears as "added"
-on both sides with different content. Always branch from `origin/main` and cherry-pick onto it.
+→ Rationale + triple-diff false-positive triage:
+[`RELEASES-RATIONALE.md` § Triple-diff verification](./RELEASES-RATIONALE.md#triple-diff-verification).
 
 ## Prose scrubbing
 
-Pre-push covers `*.md` files in the repo via Vale + LanguageTool (see U4 of
-[`docs/plans/2026-05-07-001-feat-prose-check-site-plan.md`](./docs/plans/2026-05-07-001-feat-prose-check-site-plan.md)).
-Three release-flow artifacts live outside that net and need a manual scrub before they ship:
-
-- **PR bodies.** `gh pr create` and `gh pr edit` send body text directly to GitHub; pre-push has no reach there.
-- **Release-PR bodies.** The `release/*` PR to `main` carries contributor-authored wrap-up text composed after the
-  cherry-picks land, and the same out-of-repo gap applies.
-- **Any future generated changelog.** This repo does not yet generate a `CHANGELOG.md`, but if one is added later it
-  inherits whatever prose its upstream PR bodies carry — same scrub procedure applies.
-
-**Scrub before submit.** Author and clean PR bodies in `/tmp/` first, then submit via `--body-file` once. This avoids
-the round-trip of "submit, scrub, edit, scrub again" — every fix lands locally and the public PR sees only clean text.
-The auto-format hook skips `/tmp/` paths so the body keeps its authored shape and no soft-wrapping is injected.
+Pre-push covers `*.md` files via Vale + LanguageTool. Three artifacts live outside that net and need a manual scrub:
 
-The site vendors Vale rule packs locally (brand pack + site channel + `write-good` + `proselint`) via
-`scripts/sync-prose-tooling.sh`; the procedure below uses the local `.vale.ini` directly. The canonical description of
-the rule packs and the orchestrator's blocking-category whitelist lives in the spec at
-[`~/dev/agentnative-spec/docs/architecture/voice-enforcement.md`](https://github.com/brettdavies/agentnative/blob/dev/docs/architecture/voice-enforcement.md).
+- PR bodies (`gh pr create` / `gh pr edit` send body text directly to GitHub).
+- Release-PR bodies (composed after cherry-picks land).
+- Future generated changelog (if a `CHANGELOG.md` flow lands here).
 
 ```bash
-# 1. Author or fetch the artifact in /tmp/.
-$EDITOR /tmp/body.md                                           # author from scratch (gh pr create)
-gh pr view <num> --json body --jq .body > /tmp/body.md         # fetch existing (gh pr edit)
-# cp CHANGELOG.md /tmp/body.md                                 # for changelog scrub (when one exists)
+# 1. Author or fetch in /tmp/.
+$EDITOR /tmp/body.md                                           # author from scratch
+gh pr view <num> --json body --jq .body > /tmp/body.md         # fetch existing
 
 # 2. Vale (local rule packs at error tier).
 vale --no-global --output=line --minAlertLevel=error /tmp/body.md
 
-# 3. LanguageTool (blocking categories: TYPOS|GRAMMAR|CONFUSED_WORDS, mirrors the orchestrator's whitelist).
-curl -sS -X POST "${LANGUAGETOOL_URL:-http://pool.tail42ba87.ts.net:8081}/v2/check" \
-  --data-urlencode "language=en-US" --data-urlencode "text@/tmp/body.md" \
-  | jaq '.matches[] | select(.rule.category.id | test("^(TYPOS|GRAMMAR|CONFUSED_WORDS)$"))'
+# 3. LanguageTool grammar check via lt_check (~/dotfiles/config/shell/languagetool.sh).
+#    Skips cleanly if LT is unreachable. Inspect: `lt_rules`, `lt_info`. See
+#    ~/dev/agentnative-spec/CONTRIBUTING.md § Voice enforcement for the
+#    install-vs-required nuance.
+lt_check /tmp/body.md
 
-# 4. unslop (em-dash density and AI-unique structural patterns Vale + LT do not catch).
+# 4. unslop (em-dash density + AI-unique structural patterns).
 ~/.claude/skills/unslop/scripts/score.py /tmp/body.md
 
-# 5. Apply fixes in /tmp/body.md. Re-run 2-4 until 0 blocking and unslop score is 0.
+# 5. Apply fixes in /tmp/. Re-run 2-4 until 0 blocking + unslop score 0.
 
-# 6. Submit the cleaned version once.
+# 6. Submit once.
 gh pr create --base <base> --title "..." --body-file /tmp/body.md      # new PR
 gh pr edit <num> --body-file /tmp/body.md                              # existing PR
-# (regenerate CHANGELOG.md per the repo's existing changelog flow, once one exists)
 ```
 
-For a future generated-changelog finding, fix the upstream PR body (which the regeneration script re-fetches every run)
-and regenerate. Hand-editing the generated artifact directly produces drift the next regeneration overwrites.
+→ Rationale + which artifacts need this:
+[`RELEASES-RATIONALE.md` § Prose scrubbing scope](./RELEASES-RATIONALE.md#prose-scrubbing-scope).
 
 ## Deploy
 
-`.github/workflows/deploy.yml` runs on pushes to `dev` or `main`, targeting separate Workers via wrangler environments:
+`.github/workflows/deploy.yml` runs on pushes to `dev` or `main`:
 
 | Branch | Worker                     | Domain                                             | Wrangler command                |
 | ------ | -------------------------- | -------------------------------------------------- | ------------------------------- |
 | `dev`  | `agentnative-site-staging` | `agentnative-site-staging.<subdomain>.workers.dev` | `wrangler deploy --env staging` |
 | `main` | `agentnative-site`         | `anc.dev` (custom domain, `workers_dev: false`)    | `wrangler deploy`               |
 
-The staging-host guard in `src/worker/headers.ts` adds `X-Robots-Tag: noindex` on any response served from a
-`.workers.dev` host. Production at `anc.dev` gets full indexing.
+The staging-host guard in `src/worker/headers.ts` adds `X-Robots-Tag: noindex` on `.workers.dev` hosts.
 
-Manual deploys use `workflow_dispatch` with an explicit environment picker:
+Manual deploys:
 
 ```bash
 gh workflow run deploy.yml -f environment=staging              # redeploy staging
 gh workflow run deploy.yml -f environment=production            # redeploy production
-gh workflow run deploy.yml -f environment=staging -f ref=<sha>  # deploy a specific SHA to staging
+gh workflow run deploy.yml -f environment=staging -f ref=<sha>  # specific SHA to staging
 ```
 
 ### Docs-only commits skip deploy
 
 A `paths-ignore` filter on the `push` trigger skips deploy when a commit only touches paths the build doesn't ingest:
 
-- `docs/**` — all planning, design, and solution docs.
-- Root-level `*.md` — `README.md`, `AGENTS.md`, `RELEASES.md`, `CHANGELOG.md` (the glob doesn't cross `/`, so
-  `content/*.md` pages still deploy).
+- `docs/**`: all planning, design, and solution docs.
+- Root-level `*.md`: `README.md`, `AGENTS.md`, `RELEASES.md`, `CHANGELOG.md`. The glob doesn't cross `/`, so
+  `content/*.md` pages still deploy.
 
-Everything else — `content/**`, `src/**`, `scripts/**`, workflows, `wrangler.jsonc`, `package.json`, etc. — still
-triggers a deploy on push. `workflow_dispatch` is unaffected, so manual redeploys always work regardless of what
-changed.
+`workflow_dispatch` is unaffected by `paths-ignore`.
 
-The filter is symmetric across `dev` and `main`. In practice the `main` side is mostly theoretical:
-`guard-main-docs.yml` already blocks `docs/plans|solutions|brainstorms|reviews/**` from reaching `main` via PR, and the
-remaining ignored paths (root `*.md`, `DESIGN.md`, `docs/TODOS.md`) don't change build output — wrangler would redeploy
-a bit-identical Worker. If a future case needs unconditional main-branch deploys, swap the workflow-level filter for a
-job-level changed-files check.
+→ Rationale: [`RELEASES-RATIONALE.md` § Docs-only deploy filter](./RELEASES-RATIONALE.md#docs-only-deploy-filter).
 
 ### Sandbox image releases (live-scoring)
 
-The live-scoring path uses a Cloudflare Containers binding that pins an Alpine + musl sandbox image
-(`docker/sandbox/Dockerfile`). The image lives in the Cloudflare managed registry at
-`registry.cloudflare.com/<account-id>/anc-sandbox:<git-sha>`. Build is decoupled from deploy: a Worker code-only deploy
-never rebuilds the image, and an image-only release never reships Worker code unintentionally.
+- **Base**: `python:3.12-slim-trixie` + Cloudflare Sandbox SDK + PMs (cargo-binstall, pip, uv, npm, bun, upstream Go).
+- **Image**: `registry.cloudflare.com/<account-id>/anc-sandbox:<git-sha>`. Build decoupled from deploy.
+- **Instance type**: staging `standard-2` (1 vCPU, 6 GiB RAM, 12 GB disk); prod `basic` (1/4 vCPU). Promotion: release
+  PR + soak.
+- **Rationale + version-pin matrix**:
+  [`docs/solutions/tooling-decisions/cloudflare-sandbox-python-3.12-base-2026-05-19.md`](docs/solutions/tooling-decisions/cloudflare-sandbox-python-3.12-base-2026-05-19.md).
 
 `wrangler.jsonc` holds TWO independent image pins:
 
-- `containers[0].image` (top-level) is the PRODUCTION pin. The `agentnative-site` Worker on `anc.dev` deploys from this
-  tag. Advances only at release time.
-- `env.staging.containers[0].image` is the STAGING pin. The `agentnative-site-staging` Worker on
-  `agentnative-site-staging.brettdavies.workers.dev` deploys from this tag. Advances independently during development.
+- `containers[0].image` (top-level) = PRODUCTION pin. Advances only at release time.
+- `env.staging.containers[0].image` = STAGING pin. Advances independently during development.
 
-The two pins may legitimately differ. Each env block owns its own container application with its own version history.
-The pins describe what staging and prod each run, not a shared constraint.
-
-#### Default workflow: staging soak then promote
-
-Most image changes go through a staging-soak cycle before reaching production. This protects prod from any sandbox
-regression that only surfaces under real install traffic.
-
-**Image bump (feat PR to dev):**
+#### Image bump (feat PR to dev)
 
 ```bash
 # from a clean working tree on dev
@@ -308,131 +188,291 @@ GIT_SHA=$(git rev-parse --short HEAD)
 bun x wrangler containers build -p -t "anc-sandbox:$GIT_SHA" docker/sandbox/
 ```
 
-The command runs `docker build` locally and pushes to the CF registry, authenticated via `CLOUDFLARE_API_TOKEN`. Output
-ends with a `<git-sha>: digest: sha256:... size: ...` line confirming the push.
+Update **only `env.staging.containers[0].image`** in `wrangler.jsonc` with the new tag. Commit Dockerfile change +
+staging-pin update together. PR to `dev`.
+
+#### Promotion (release PR to main)
+
+Cut a `release/*` branch from `main`, cherry-pick the dev commits, then add one promotion commit bumping the top-level
+`containers[0].image` to match `env.staging.containers[0].image`. CI on a main-targeting PR enforces: both pins exist in
+the CF managed registry AND both pins point at the same tag.
+
+#### Lockstep-bump shortcut
+
+For low-risk image changes (security patch, dependency-only update with no behavior delta), update BOTH pins in the same
+feat PR.
+
+→ Soak-then-promote rationale, retention discipline, DO migration walls, GHA fallback:
+[`RELEASES-RATIONALE.md` § Sandbox image releases](./RELEASES-RATIONALE.md#sandbox-image-releases).
+
+#### R2 score-cache lifecycle
+
+Configure once per bucket (idempotent on the rule name):
+
+```bash
+bun x wrangler r2 bucket lifecycle add anc-score-cache scores-7day-ttl scores/ --expire-days 7 -y
+bun x wrangler r2 bucket lifecycle add anc-score-cache-staging scores-7day-ttl scores/ --expire-days 7 -y
+```
+
+Verify:
 
-Update **only `env.staging.containers[0].image`** in `wrangler.jsonc` with the new tag. Leave the top-level (prod) pin
-alone. Commit the Dockerfile change + the staging-pin update together. PR to `dev`.
+```bash
+bun x wrangler r2 bucket lifecycle list anc-score-cache
+bun x wrangler r2 bucket lifecycle list anc-score-cache-staging
+```
 
-CI on a dev-targeting PR verifies the new staging tag exists in the registry; the prod pin keeps pointing at the
-last-released tag, which also still exists (image-retention discipline). The CI guard accepts the divergence.
+Both buckets were configured on 2026-05-19. The `tests/wrangler-config.test.ts` drift-guard pins the exact literal
+command above.
 
-After merge to dev, CI deploys `agentnative-site-staging` to the new image. Soak: observability, integration tests, real
-traffic on the staging.workers.dev URL.
+## Live-scoring (v3) release procedure
 
-**Promotion (release PR to main):**
+The live-scoring stack adds a Worker route (`/api/score`), a `Sandbox` Durable Object, a Container image, two R2
+buckets, a KV namespace (`SCORE_KV`), and two rate-limit bindings to the static-site release. Static-site mechanics are
+unchanged.
 
-When the image is ready to ship, cut a release branch from `main` and cherry-pick the dev commits as usual. Add one
-promotion commit that bumps the top-level `containers[0].image` to match `env.staging.containers[0].image`. Open the PR
-to `main`. CI on a main-targeting PR enforces TWO invariants:
+→ Rationale and platform constraints:
+[`RELEASES-RATIONALE.md` § Sandbox image releases](./RELEASES-RATIONALE.md#sandbox-image-releases) and
+[§ DO migrations are one-way walls](./RELEASES-RATIONALE.md#do-migrations-are-one-way-walls).
 
-- both pins exist in the CF managed registry, AND
-- both pins point at the same tag (released state)
+### Image rebuild path
 
-Merge. CI deploys `agentnative-site` to the promoted image, and the site at `anc.dev` is now on the new sandbox.
+Image build and registry push happen inside the local `wrangler containers build -p` step (see § Sandbox image releases
+above). `wrangler deploy` does NOT rebuild. Pin format: `registry.cloudflare.com/<account-id>/anc-sandbox:<git-sha>`.
+Immutability is per-Worker-version via `wrangler rollback`, not via a `@sha256:` literal.
 
-#### Shortcut: lockstep bumps (low-risk changes only)
+### Migration v1: the rollback recipe
 
-For image changes that don't need a soak (base-image security patch, dependency-only update with no behavior delta),
-update BOTH pins in the same feat PR. The dev-targeting PR has equal pins from the start; the eventual release PR
-carries equal pins; staging and prod deploy the new image in lockstep.
+`migrations[].new_sqlite_classes: ["Sandbox"]` (tag `v1`) is a one-way gate; rationale lives in
+[`RELEASES-RATIONALE.md` § DO migrations are one-way walls](./RELEASES-RATIONALE.md#do-migrations-are-one-way-walls).
+The only path past `v1` is a follow-up migration:
 
-The CI guard accepts this because both pins exist in the registry on every PR.
+```jsonc
+// wrangler.jsonc
+"migrations": [
+  { "tag": "v1", "new_sqlite_classes": ["Sandbox"] },
+  { "tag": "v2-drop-sandbox", "deleted_classes": ["Sandbox"] }
+]
+```
 
-Use the soak-then-promote default for any change that touches sandbox behavior: package manager additions, runtime
-version bumps, `anc` upgrades, `cargo-binstall` upgrades, anything in `docker/sandbox/Dockerfile` past the base-image
-FROM line.
+Apply via a normal `wrangler deploy` against a Worker version that no longer references the `Sandbox` DO binding. The
+`SCORE_CACHE` R2 bucket, `SCORE_KV`, and rate-limit counters are untouched by the migration.
 
-#### Deploy never rebuilds
+### Cross-migration rollback rehearsal
 
-`wrangler deploy --env staging` (and `wrangler deploy` on main) against the fully-qualified registry URI does NOT
-trigger a rebuild. The image was already published during the local `wrangler containers build -p` step.
+Run on staging before opening the first `release/*` PR to main. The recipe assumes a one-time discovery: the **container
+application binding to the deleted DO namespace is the gotcha**, and a `wrangler containers delete` step between v2 and
+v3 is mandatory. Without it, v3-restore-sandbox uploads the Worker version cleanly but the container-app half of
+`wrangler deploy` refuses with `There is already an application with the name <...> deployed that is associated with a
+different durable object namespace`.
 
-#### Image-retention discipline
+```bash
+# 1. Confirm migration v1 is live on staging.
+bun x wrangler deployments list --env staging | head -20
+
+# 2. Verify /api/score works against a curated slug.
+curl -fSsL -H "Content-Type: application/json" \
+  -H "CF-Access-Client-Id: ${CF_ACCESS_CLIENT_ID}" \
+  -H "CF-Access-Client-Secret: ${CF_ACCESS_CLIENT_SECRET}" \
+  -d '{"input":"ripgrep","turnstile_token":"x"}' \
+  https://agentnative-site-staging.brettdavies.workers.dev/api/score \
+  | jq '.scorecard.kind, .spec_version'
+
+# 3. Apply the follow-up migration on a throwaway branch.
+#    Edit wrangler.jsonc to add the v2-drop-sandbox migration AND
+#    remove the Sandbox DO binding AND the containers[] block under env.staging
+#    (containers reference Sandbox class_name and will fail validation if the
+#    class no longer exists), then:
+bun x wrangler deploy --env staging
+
+# 4. Verify /api/score now bounces cleanly and non-sandbox routes still serve.
+#    Inputs that match the registry-fast-path still return 200 (no DO call).
+#    Inputs that force the DO-invocation path return CF error 1101 today (no
+#    handler guard); the upcoming fix returns a clean 503 sandbox_unavailable.
+curl -i https://agentnative-site-staging.brettdavies.workers.dev/api/score \
+  -H "Content-Type: application/json" -d '{}' | head -20
+curl -fSsL https://agentnative-site-staging.brettdavies.workers.dev/ | head -5
+curl -fSsL https://agentnative-site-staging.brettdavies.workers.dev/scorecards | head -5
+
+# 5a. MANDATORY before v3 — delete the staging container application. Its
+#     internal binding to the old (now-deleted) DO namespace blocks the v3
+#     deploy. List first to capture the ID, then delete.
+bun x wrangler containers list
+bun x wrangler containers delete <staging-container-app-id>
+
+# 5b. Restore the Sandbox DO with a NEW migration tag (cannot reuse v1).
+#     Edit wrangler.jsonc to add a v3-restore-sandbox migration with
+#     new_sqlite_classes: ["Sandbox"] and re-add the binding + containers, then:
+bun x wrangler deploy --env staging
+
+# 6. Confirm /api/score works again (step 2 repeated). The DO-invocation path
+#    (e.g., a real GitHub URL not in the registry) now reaches the sandbox and
+#    returns a real scorecard or a clean handler error, not CF 1101.
+```
 
-NEVER delete a tag from the CF managed registry that backed a shipped Worker version. Deletion silently breaks `wrangler
-rollback` for any version that referenced the image, per
-[Containers Limits](https://developers.cloudflare.com/containers/platform-details/limits/). The 50 GB account-wide cap
-is a quarterly prune review, not a routine cleanup. When a release tag ships, record the pair `<git-tag> <-> <registry
-URI>` in the release commit body so the inventory survives.
+After the rehearsal, dev's `env.staging.migrations` MUST include all three tags (`v1`, `v2-drop-sandbox`,
+`v3-restore-sandbox`) because Cloudflare DO migrations are append-only — staging will reject future deploys whose
+migration list is a subset of what's already applied. Top-level `migrations` (production) stays at `v1` until prod runs
+its own rollback.
 
-Retention is what makes soak-then-promote safe: while a new image is soaking on staging, the prod pin still references
-the previous release's tag, and that tag must remain in the registry for prod to keep serving.
+#### Rehearsal evidence
 
-#### DO migrations are one-way walls
+| Step | Date       | Staging deploy ID                      | Container app / DO namespace                                                                       | Notes                                                                                                                                                                  |
+| ---- | ---------- | -------------------------------------- | -------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| 1    | 2026-05-24 | `c626ddad-6ac2-4f81-a9ec-4a7a6dd926ec` | container `a0309fd2-9622-4dd8-a6a8-faf95292f08e` / DO namespace `a4fb92ed020241cb802c1d5176a39608` | v1 baseline. `env.SCORE (Sandbox)` + all live-scoring bindings present.                                                                                                |
+| 2    | 2026-05-24 | (no deploy)                            | n/a                                                                                                | `/api/score` ripgrep returned 200; triad complete (`spec_version: 0.4.0`, `site_spec_version: 0.4.0`, `anc_version: 0.3.0`, `checker_url`).                            |
+| 3    | 2026-05-24 | `25107ae7-6727-4ea9-90ff-75996cba8cdc` | container unchanged / DO namespace dropped                                                         | v2-drop-sandbox applied. Worker bindings list no longer shows `env.SCORE`.                                                                                             |
+| 4    | 2026-05-24 | (no deploy)                            | n/a                                                                                                | `-d '{}'` → 400 clean (`unrecognized_input`). `/` + `/scorecards` → 200. DO-forcing input (`xplr`) → CF 1101 (handler guard gap captured).                             |
+| 5a   | 2026-05-24 | (no deploy)                            | container `a0309fd2-...` deleted                                                                   | `wrangler containers delete` required — first v3 deploy attempt failed with the "different durable object namespace" error.                                            |
+| 5b   | 2026-05-24 | `d88ef7f1-5cd0-4469-a0be-4d8c08d35800` | container `a03d7221-b4ed-4aad-b534-41d7c34461da` / DO namespace `50c9a04e3d4649268d8e8957572e0cd0` | v3-restore-sandbox applied. `env.SCORE (Sandbox)` back; container app recreated fresh at `instances: 0, max_instances: 10`.                                            |
+| 6    | 2026-05-24 | (no deploy)                            | n/a                                                                                                | `/api/score` ripgrep → 200 with full triad. DO-forcing input (`xplr`) → 502 `chain_resolved_install_failed` (sandbox ran, real install bug — outside rehearsal scope). |
 
-The first Worker version that applied `migrations[].new_sqlite_classes: ["Sandbox"]` (`v1`) cannot be rolled back across
-that boundary via `wrangler rollback`, per
-[Versions and deployments / Rollbacks](https://developers.cloudflare.com/workers/configuration/versions-and-deployments/rollbacks/).
-Treat DO-migration commits as milestone releases that get an explicit reviewer note.
+### Sandbox image promotion
 
-#### GHA fallback
+Standard staging-leads-prod soak (see § Sandbox image releases above). Two pins in `wrangler.jsonc`:
+`env.staging.containers[0].image` advances first; `containers[0].image` (top-level production pin) advances on the
+release PR. Lockstep-bump shortcut for low-risk image changes only. CI on a main-targeting PR enforces both pins exist
+in the CF managed registry AND both pins point at the same tag.
 
-If a local build is impossible, set `image:` to a Dockerfile path (`./docker/sandbox/Dockerfile`) and let
-`cloudflare/wrangler-action` build inline on `ubuntu-latest` (~60-130s cold per deploy; no GHA-side layer cache; push is
-auto-skipped when the existing tag still matches). This is a fallback, not the primary path; the local-build-once flow
-above is what the deploy workflow assumes.
+### Post-deploy smoke
 
-## CI
+`.github/workflows/deploy.yml` runs a smoke step against staging after every successful staging deploy. POSTs to
+`/api/score` for the `ripgrep` slug with the CF Access service-token headers and a Turnstile test token; asserts the
+response triad (`spec_version`, `site_spec_version`, `anc_version`, `checker_url`) plus `scorecard.kind ===
+"registry_hit"`. Fails the deploy on a missing field. No production smoke step runs until U10 promotes live scoring to
+anc.dev.
 
-Two workflows gate pull requests:
+→ Scope rationale (why the smoke covers only the registry-fast-path):
+[`RELEASES-RATIONALE.md` § Post-deploy smoke scope](./RELEASES-RATIONALE.md#post-deploy-smoke-scope).
+
+### Cost-watch hand-off
+
+Operator telemetry queries, error-tier breakdowns, cache hit-rate watchpoints, and the kill-switch flip procedure live
+in the live-scoring monitoring runbook:
+
+- [`docs/runbooks/live-scoring-monitoring.md`](docs/runbooks/live-scoring-monitoring.md).
+
+The queryable counterpart with canonical Analytics Engine SQL lives in the analytics runbook:
 
-| Workflow      | Fires on                                           | Purpose                                                                                                        |
-| ------------- | -------------------------------------------------- | -------------------------------------------------------------------------------------------------------------- |
-| `ci.yml`      | PR with any change outside `docs/**` / root `*.md` | Heavy pipeline: `bun install → lint → build → test → wrangler --dry-run`. ~30s warm.                           |
-| `ci-stub.yml` | PR that touches only `docs/**` or root `*.md`      | No-op stub. Emits the required check name to satisfy the ruleset gate without running the heavy pipeline. ~5s. |
+- [`docs/runbooks/live-scoring-analytics.md`](docs/runbooks/live-scoring-analytics.md).
 
-Both jobs are named `lint · build · test · wrangler` — the same context the dev/main rulesets require. On a PR that
-mixes docs and code, both workflows fire and both pass; the required-check gate is satisfied either way.
+### Cost guardrails
 
-### Why the stub
+Four-layer cost-cap stance, ordered by speed-to-act:
 
-Required status checks + `paths-ignore` is a known
-[GitHub Actions sharp edge](https://docs.github.com/en/actions/using-jobs/using-conditions-to-control-job-execution): if
-the workflow is filtered out for a given PR, the required check shows as "Expected" forever and the PR can't merge. The
-stub workflow fires exactly when `ci.yml` is filtered, emits the same check context as a no-op success, and unblocks the
-PR.
+1. **Implicit per-request limits.** `SCORE_LIMITER` (10 req/min/session) and `SCORE_LIMITER_IP` (30 req/min/IP) are the
+   per-user cost ceilings. Live since U5. Analytics Engine confirms whether they are effective on the registry-hit-rate
+   query in the analytics runbook (high registry-hit-rate means most traffic is unmetered; low rate means the limiters
+   are the load-bearing ceiling).
+2. **Manual kill switch.** Flip via `wrangler kv key put`:
 
-### The invariant
+   ```bash
+   # Staging
+   wrangler kv key put --binding=SCORE_KV --env staging scoring_disabled true
 
-`ci.yml`'s `paths-ignore:` list and `ci-stub.yml`'s `paths:` list must stay identical. Drift creates gaps (no workflow
-fires → required check never reports → PR stuck) or benign double-runs on mixed PRs. A comment in both files calls this
-out explicitly; keep them in sync when editing either one.
+   # Production
+   wrangler kv key put --binding=SCORE_KV scoring_disabled true
+   ```
 
-### Visual-fidelity gates
+   Recovery time is bounded by the in-isolate cache TTL (30 s) and the KV global propagation (≤60 s). Reset by deleting
+   the key:
 
-Beyond the workflow checks above, two visual-regression rules apply to any change touching CSS, layout, or rendered
-output: a "browser-verify before done" agent-side rule (working today) and a Playwright snapshot diff in CI (planned,
-deferred until the design system stabilizes). Both live in [`AGENTS.md` § Visual fidelity](./AGENTS.md#visual-fidelity)
-— that's the source of truth. A release that didn't satisfy those gates upstream isn't unblocked by this pipeline being
-green.
+   ```bash
+   wrangler kv key delete --binding=SCORE_KV --env staging scoring_disabled
+   ```
+
+   The operator-facing playbook lives in the monitoring runbook; this section names the procedure.
+3. **Email-only Cloudflare Budget Alerts** at $5, $25, and $100 thresholds. Cloudflare has no native cost auto-cap; the
+   billing dashboard is read-only at the API layer. Configure in the Cloudflare dashboard under Billing → Notifications.
+   Add three separate Budget Alerts (one per threshold) with the operator email on the destination list; each alert
+   fires once per billing cycle when the rolling charge crosses its threshold; setup is one-time per account and the
+   alerts persist across deploys. Confirm the wiring with a test alert at a low threshold ($1) before relying on the
+   production thresholds.
+4. **Automated kill switch via cron.** DEFERRED to U10.1. Concept: a scheduled Worker queries the Analytics Engine
+   dataset for rolling 24h request count and flips `scoring_disabled` past a threshold. Threshold tuning needs real
+   traffic data; the pieces (dataset + kill switch + Workers cron primitive) all exist, but wiring them is its own
+   discrete change. Do not speculate.
+
+#### Analytics Engine datasets
+
+Two distinct datasets keep staging traffic out of production aggregates:
+
+| Environment | Binding           | Dataset                  |
+| ----------- | ----------------- | ------------------------ |
+| Production  | `SCORE_TELEMETRY` | `anc_live_score_prod`    |
+| Staging     | `SCORE_TELEMETRY` | `anc_live_score_staging` |
+
+Datasets are created on first write; no `wrangler analytics-engine create` step needed. Confirm in the Cloudflare
+dashboard under Workers → Analytics Engine after the first post-deploy request.
+
+Sample query (paste into the dashboard's AE SQL editor, replace dataset name per environment):
+
+```sql
+SELECT blob2 AS pm, COUNT() AS requests
+FROM anc_live_score_staging
+WHERE timestamp > NOW() - INTERVAL '24' HOUR
+GROUP BY pm
+ORDER BY requests DESC
+FORMAT JSONCompact
+```
+
+The full canonical query playbook (daily volume, p50/p99 latency, error distribution, registry-hit-rate, top tools)
+lives in [`docs/runbooks/live-scoring-analytics.md`](docs/runbooks/live-scoring-analytics.md).
+
+## Staging access (Cloudflare Access)
+
+Staging Worker gated by CF Access. Browser: SSO/email-OTP at `https://agentnative-site-staging.brettdavies.workers.dev`
+(90-day session). CLI: `CF-Access-Client-Id` + `CF-Access-Client-Secret` headers from 1Password (see
+`scripts/staging-cache-smoke.sh` for the item lookup convention).
+
+Bootstrap (idempotent):
+
+```bash
+CF_ACCOUNT_ID=<account-id> ./scripts/cf-access-bootstrap.sh
+```
+
+Inventory + rotation playbook + dashboard permission-group gotcha:
+[`docs/solutions/tooling-decisions/cloudflare-access-staging-worker-2026-05-19.md`](docs/solutions/tooling-decisions/cloudflare-access-staging-worker-2026-05-19.md).
+
+## CI
+
+Two workflows gate pull requests:
+
+| Workflow      | Fires on                                           | Purpose                                                                              |
+| ------------- | -------------------------------------------------- | ------------------------------------------------------------------------------------ |
+| `ci.yml`      | PR with any change outside `docs/**` / root `*.md` | Heavy pipeline: `bun install → lint → build → test → wrangler --dry-run`. ~30s warm. |
+| `ci-stub.yml` | PR that touches only `docs/**` or root `*.md`      | No-op stub. Emits the required check name without running the heavy pipeline. ~5s.   |
+
+Both jobs are named `lint · build · test · wrangler`.
+
+`ci.yml`'s `paths-ignore:` list and `ci-stub.yml`'s `paths:` list must stay identical.
+
+→ Rationale + status-check context pitfall:
+[`RELEASES-RATIONALE.md` § CI workflow split](./RELEASES-RATIONALE.md#ci-workflow-split).
 
 ## Secrets
 
-Stored as GitHub Actions secrets on `brettdavies/agentnative-site`. Accessible to workflows via `${{ secrets.<name> }}`.
+GitHub Actions secrets on `brettdavies/agentnative-site`:
 
-| Secret          | Purpose                                                                                                                                                                         | Rotation                                                    |
-| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------- |
-| `CF_API_TOKEN`  | Cloudflare API token with `Workers Scripts:Edit` + `Account:Read`. Used by `wrangler-action` to deploy.                                                                         | Max 1 year; renew before expiry.                            |
-| `CF_ACCOUNT_ID` | Cloudflare account ID. Not a formal secret, but kept out of the public repo; surfaces to wrangler via `CLOUDFLARE_ACCOUNT_ID` env (passed to `wrangler-action` as `accountId`). | Effectively never — changes only if the CF account changes. |
+| Secret          | Purpose                                                                                                 | Rotation                                |
+| --------------- | ------------------------------------------------------------------------------------------------------- | --------------------------------------- |
+| `CF_API_TOKEN`  | Cloudflare API token with `Workers Scripts:Edit` + `Account:Read`. Used by `wrangler-action` to deploy. | Max 1 year; renew before expiry.        |
+| `CF_ACCOUNT_ID` | Cloudflare account ID. Surfaces to wrangler via `CLOUDFLARE_ACCOUNT_ID` env.                            | Changes only if the CF account changes. |
 
-`GITHUB_TOKEN` is provided automatically by GitHub Actions; no setup needed.
+`GITHUB_TOKEN` is provided by GitHub Actions automatically.
 
-Secrets are also mirrored in 1Password (`secrets-dev` vault) for disaster-recovery and cross-device use.
+Secrets are also mirrored in 1Password for disaster recovery and cross-device use.
 
 ## Branch protection
 
-Two rulesets are committed under `.github/rulesets/` and applied to the repo via the GitHub API:
+Rulesets committed under `.github/rulesets/`, applied to the repo via the GitHub API:
 
-- `protect-main.json` — required signatures, linear history, squash-only merges via PR, required status checks (`ci`,
+- `protect-main.json`: required signatures, linear history, squash-only merges via PR, required status checks (`ci`,
   `guard-docs`, `guard-release-branch`), creation/deletion blocked, non-fast-forward blocked.
-- `protect-dev.json` — required signatures, deletion blocked, non-fast-forward blocked. No PR-requirement at the ruleset
-  level; the PR-only norm is enforced by convention + the `guard-release-branch` check on the main side.
+- `protect-dev.json`: required signatures, deletion blocked, non-fast-forward blocked. PR-only norm is convention +
+  `guard-release-branch` on the main side.
 
 ### Applying changes
 
-Edit the JSON locally, then sync to the remote:
-
 ```bash
 # First apply (creating a ruleset):
 gh api -X POST repos/brettdavies/agentnative-site/rulesets \
@@ -443,67 +483,45 @@ gh api -X PUT repos/brettdavies/agentnative-site/rulesets/<id> \
   --input .github/rulesets/protect-main.json
 ```
 
-Committing the JSON alongside the code means ruleset changes land via the same review process as workflow changes — a
-`chore(ci): tighten protect-main` release goes through dev → release/* → main like anything else.
-
-### Status-check context pitfall
-
-The `required_status_checks[].context` strings in `protect-main.json` must match exactly what GitHub publishes for each
-check:
-
-- **Inline job** (with `name:` field): published as just `<job-name>` (no workflow-name prefix).
-- **Reusable-workflow caller** (`uses: .../foo.yml@ref`): published as `<caller-job-id> / <reusable-job-id-or-name>`.
-
-Mixing these produces a stuck-but-green PR: all actual checks report green, but the ruleset waits forever on a context
-that will never appear. Confirm the real contexts after a first CI run with:
-
-```bash
-gh api repos/brettdavies/agentnative-site/commits/<sha>/check-runs --jq '.check_runs[].name'
-```
+→ Status-check context strings (inline vs reusable):
+[`RELEASES-RATIONALE.md` § Status-check context strings](./RELEASES-RATIONALE.md#status-check-context-strings).
 
 ## Skill releases
 
 `/skill.json` and `/skill` advertise the `agent-native-cli` skill, hosted at
-[`brettdavies/agentnative-skill`](https://github.com/brettdavies/agentnative-skill). This site vendors the skill's
-manifest (per-host install commands, version, surface metadata) in `src/data/skill.json`; the skill repo holds the
-actual content. Surface contract in `DESIGN.md` §3.9. Update detection at install sites is delegated to the skill
-bundle's `bin/check-update`, which compares the local bundle's `VERSION` against `main` on GitHub.
+[`brettdavies/agentnative-skill`](https://github.com/brettdavies/agentnative-skill). Site vendors the manifest in
+`src/data/skill.json`; the skill repo holds the actual content.
 
-The skill repo's branch model: `main` is the published-release pointer (default branch); `dev` is the integration
-branch. The bare `git clone --depth 1` in each install command lands on `main` — so each release requires the skill
-maintainer to fast-forward `main` to the new tag.
+### Release procedure
 
-### Skill-release procedure
+1. **Cut the skill release** (in `agentnative-skill`): edit, commit, tag `v0.x.y`, push `dev --follow-tags`.
+   Fast-forward `main` to the new tag and push:
+
+   ```bash
+   git checkout main && git merge --ff-only v0.x.y && git push origin main
+   ```
 
-1. **Cut the skill release** (in `agentnative-skill`): edit, commit, tag `v0.x.y` (signed if a key is configured), then
-   `git push origin dev --follow-tags`. Fast-forward `main` to the new tag and push: `git checkout main && git merge
-   --ff-only v0.x.y && git push origin main`. The site's bare `git clone --depth 1` lands on `main`, so the fast-forward
-   is what makes the new release reachable.
 2. **Bump the manifest in this repo (only when user-facing fields changed)**: edit `src/data/skill.json` to bump
-   `version` and update any per-host install commands, description, or other surface fields the release modified. If
-   nothing user-facing changed, skip the manifest bump entirely — the skill bundle's `bin/check-update` is what tells
-   installed users a new release exists.
-3. **PR to `dev`**: CI runs the unit + worker tests on the bumped manifest. Squash-merge on green.
+   `version` and update any per-host install commands, description, or other surface fields.
+3. **PR to `dev`**: CI runs unit + worker tests on the bumped manifest. Squash-merge on green.
 4. **Release `dev` → `main`** via the standard `release/*` flow above. Site deploys to `anc.dev`.
-5. **Cache-purge** `/skill`, `/skill.json`, and `/skill.md` via the Cloudflare cache-purge API after a manifest bump, so
-   users don't pick up the old shape from the 24h `s-maxage` window. Use the API token stored in 1Password
-   (`secrets-dev` vault, `Cloudflare API Token - Wrangler (bigdaddy)`). First-deploy-after-rename note (cutover from
-   `/install*` → `/skill*`): also purge `/install`, `/install.json`, and `/install.md` once to evict any cached skill
-   content under the old paths. Skip this on subsequent deploys.
-6. **Verify the deployed manifest**: `curl -s https://anc.dev/skill.json | jq -r .version` matches the new version. The
-   Playwright `skill` project (`bun x playwright test --project=skill`) re-runs the live 4-host clone against the
-   advertised hosts; run it locally before tagging if anything in the manifest's host commands changed.
+5. **Cache-purge** `/skill`, `/skill.json`, `/skill.md` via the Cloudflare cache-purge API (token in 1Password).
+   First-deploy-after-rename: also purge `/install`, `/install.json`, `/install.md` once.
+6. **Verify**: `curl -s https://anc.dev/skill.json | jq -r .version` matches the new version. Run the Playwright `skill`
+   project (`bun x playwright test --project=skill`) against the live host.
+
+→ Rationale: [`RELEASES-RATIONALE.md` § Skill releases](./RELEASES-RATIONALE.md#skill-releases).
 
 ### Skill-availability probe
 
 `.github/workflows/skill-availability.yml` runs `git ls-remote --exit-code
-https://github.com/brettdavies/agentnative-skill.git HEAD` daily at 13:00 UTC and on `workflow_dispatch`. It catches
-visibility regressions between releases (repo deletion, accidental flip back to private, branch rename). The probe runs
-over unauthenticated HTTPS; failures show up in the Actions tab and email the run owner. After the cutover that flips
-the skill repo public, run `gh workflow run skill-availability.yml` once to seed a green run on the schedule.
+https://github.com/brettdavies/agentnative-skill.git HEAD` daily at 13:00 UTC and on `workflow_dispatch`. Catches
+visibility regressions (repo deletion, accidental flip back to private, branch rename). After flipping the skill repo
+public, run `gh workflow run skill-availability.yml` once to seed a green run on the schedule.
 
 ## Related docs
 
-- [`AGENT.md`](./AGENT.md) — onboarding, repo conventions, tool-site sequencing
-- [`DESIGN.md`](./DESIGN.md) — design system and build contract
-- [`docs/TODOS.md`](./docs/TODOS.md) — deferred work (not in v0 scope)
+- [`RELEASES-RATIONALE.md`](./RELEASES-RATIONALE.md): release flow rationale, CI design, status-check pitfalls
+- [`AGENT.md`](./AGENT.md): onboarding, repo conventions, tool-site sequencing
+- [`DESIGN.md`](./DESIGN.md): design system and build contract
+- [`docs/TODOS.md`](./docs/TODOS.md): deferred work (not in v0 scope)
diff --git a/bun.lock b/bun.lock
index 82e5b27..4a61638 100644
--- a/bun.lock
+++ b/bun.lock
@@ -5,6 +5,7 @@
     "": {
       "name": "agentnative-site",
       "dependencies": {
+        "@cloudflare/sandbox": "0.9.2",
         "@shikijs/rehype": "^4.0.2",
         "accepts": "^1.3.8",
         "badge-maker": "^5.0.2",
@@ -54,8 +55,12 @@
 
     "@biomejs/cli-win32-x64": ["@biomejs/cli-win32-x64@2.4.10", "", { "os": "win32", "cpu": "x64" }, "sha512-aW/JU5GuyH4uxMrNYpoC2kjaHlyJGLgIa3XkhPEZI0uKhZhJZU8BuEyJmvgzSPQNGozBwWjC972RaNdcJ9KyJg=="],
 
+    "@cloudflare/containers": ["@cloudflare/containers@0.3.3", "", {}, "sha512-ZSXmArCoo5bVTp8pGAJdl5WKmwtZDcffJqr4JcZEbSmMIFjU+AlBqgysuxXMgu03Rp239cOdqerbjK7H0K2krQ=="],
+
     "@cloudflare/kv-asset-handler": ["@cloudflare/kv-asset-handler@0.4.2", "", {}, "sha512-SIOD2DxrRRwQ+jgzlXCqoEFiKOFqaPjhnNTGKXSRLvp1HiOvapLaFG2kEr9dYQTYe8rKrd9uvDUzmAITeNyaHQ=="],
 
+    "@cloudflare/sandbox": ["@cloudflare/sandbox@0.9.2", "", { "dependencies": { "@cloudflare/containers": "^0.3.0", "aws4fetch": "^1.0.20", "capnweb": "^0.6.1", "hono": "^4.7.11" }, "peerDependencies": { "@openai/agents": "^0.3.3", "@opencode-ai/sdk": "^1.1.40", "@xterm/xterm": ">=5.0.0" }, "optionalPeers": ["@openai/agents", "@opencode-ai/sdk", "@xterm/xterm"] }, "sha512-rQJVG79kxrU87yExXMw4edEzZfe0geYTAIqaFabAzpnAxtnMHTLswxB/hZ3hKLBE27BEUB/FXSdkBllrn6TnDQ=="],
+
     "@cloudflare/unenv-preset": ["@cloudflare/unenv-preset@2.16.0", "", { "peerDependencies": { "unenv": "2.0.0-rc.24", "workerd": "1.20260301.1 || ~1.20260302.1 || ~1.20260303.1 || ~1.20260304.1 || >1.20260305.0 <2.0.0-0" }, "optionalPeers": ["workerd"] }, "sha512-8ovsRpwzPoEqPUzoErAYVv8l3FMZNeBVQfJTvtzP4AgLSRGZISRfuChFxHWUQd3n6cnrwkuTGxT+2cGo8EsyYg=="],
 
     "@cloudflare/workerd-darwin-64": ["@cloudflare/workerd-darwin-64@1.20260405.1", "", { "os": "darwin", "cpu": "x64" }, "sha512-EbmdBcmeIGogKG4V1odSWQe7z4rHssUD4iaXv0cXA22/MFrzH3iQT0R+FJFyhucGtih/9B9E+6j0QbSQD8xT3w=="],
@@ -294,6 +299,8 @@
 
     "ast-types": ["ast-types@0.13.4", "", { "dependencies": { "tslib": "^2.0.1" } }, "sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w=="],
 
+    "aws4fetch": ["aws4fetch@1.0.20", "", {}, "sha512-/djoAN709iY65ETD6LKCtyyEI04XIBP5xVvfmNxsEP0uJB5tyaGBztSryRr4HqMStr9R06PisQE7m9zDTXKu6g=="],
+
     "axe-core": ["axe-core@4.11.2", "", {}, "sha512-byD6KPdvo72y/wj2T/4zGEvvlis+PsZsn/yPS3pEO+sFpcrqRpX/TJCxvVaEsNeMrfQbCr7w163YqoD9IYwHXw=="],
 
     "axe-html-reporter": ["axe-html-reporter@2.2.11", "", { "dependencies": { "mustache": "^4.0.1" }, "peerDependencies": { "axe-core": ">=3" } }, "sha512-WlF+xlNVgNVWiM6IdVrsh+N0Cw7qupe5HT9N6Uyi+aN7f6SSi92RDomiP1noW8OWIV85V6x404m5oKMeqRV3tQ=="],
@@ -344,6 +351,8 @@
 
     "camelcase": ["camelcase@5.3.1", "", {}, "sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg=="],
 
+    "capnweb": ["capnweb@0.6.1", "", {}, "sha512-fmhV26QPd1ewf5R74h55oVZnGwIcSaRMzbfLQUy8+zOBjuTmT3KXoT8wxHvnp1m9Ht9BoUUS5ZwNLoVLfQTyBg=="],
+
     "ccount": ["ccount@2.0.1", "", {}, "sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg=="],
 
     "chalk": ["chalk@2.4.2", "", { "dependencies": { "ansi-styles": "^3.2.1", "escape-string-regexp": "^1.0.5", "supports-color": "^5.3.0" } }, "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ=="],
@@ -546,6 +555,8 @@
 
     "hast-util-whitespace": ["hast-util-whitespace@3.0.0", "", { "dependencies": { "@types/hast": "^3.0.0" } }, "sha512-88JUN06ipLwsnv+dVn+OIYOvAuvBMy/Qoi6O7mQHxdPXpjy+Cd6xRkWwux7DKO+4sYILtLBRIKgsdpS2gQc7qw=="],
 
+    "hono": ["hono@4.12.18", "", {}, "sha512-RWzP96k/yv0PQfyXnWjs6zot20TqfpfsNXhOnev8d1InAxubW93L11/oNUc3tQqn2G0bSdAOBpX+2uDFHV7kdQ=="],
+
     "html-void-elements": ["html-void-elements@3.0.0", "", {}, "sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg=="],
 
     "http-errors": ["http-errors@2.0.1", "", { "dependencies": { "depd": "~2.0.0", "inherits": "~2.0.4", "setprototypeof": "~1.2.0", "statuses": "~2.0.2", "toidentifier": "~1.0.1" } }, "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ=="],
diff --git a/bunfig.toml b/bunfig.toml
new file mode 100644
index 0000000..c81e762
--- /dev/null
+++ b/bunfig.toml
@@ -0,0 +1,13 @@
+# Bun test runtime config.
+#
+# preload runs before every `bun test` invocation. tests/bun-setup.ts
+# registers a virtual-module shim for `cloudflare:workers`, which
+# `@cloudflare/containers` (transitive dep of `@cloudflare/sandbox`)
+# imports at module load. Without the shim every test that transitively
+# pulls src/worker/score/do.ts (via the Worker entry export) throws
+# `Cannot find package 'cloudflare:workers'` at import time.
+#
+# Rationale lives in tests/bun-setup.ts.
+
+[test]
+preload = ["./tests/bun-setup.ts"]
diff --git a/content/_intro.md b/content/_intro.md
index c1fbf8c..0959670 100644
--- a/content/_intro.md
+++ b/content/_intro.md
@@ -1,16 +1,16 @@
 # The agent-native CLI standard
 
 CLI tools are how AI agents touch everything else. Compilers, databases, git, the cloud, the shell. An agent asked to
-ship code, rotate a credential, grep a log, or deploy a branch frequently shells out to a binary — it's the
+ship code, rotate a credential, grep a log, or deploy a branch frequently shells out to a binary. It's the
 lowest-common-denominator interface where APIs don't exist or don't compose. The agent reads the output, decides what
 went right or wrong, and picks the next move. There is no human between the request and the process. The CLI either
 makes that loop tractable or it does not.
 
 This is the specification for CLIs that make it tractable. Eight principles, each expressing a requirement with
-[RFC 2119](https://www.rfc-editor.org/rfc/rfc2119) tiers — **MUST** for the contract, **SHOULD** for the default,
-**MAY** for the optional affordance. The companion linter, [`anc`](/check), scores any CLI against them and reports
-results by stable check ID (`p1-non-interactive`, `p2-json-output`, `p6-sigpipe`, …). Cite a principle by its anchor
-slug (`#p1-non-interactive-by-default` through `#p8-discoverable-skill-bundle`) — those are permanent.
+[RFC 2119](https://www.rfc-editor.org/rfc/rfc2119) tiers: **MUST** for the contract, **SHOULD** for the default, **MAY**
+for the optional affordance. The companion linter, [`anc`](/check), scores any CLI against them and reports results by
+stable check ID (`p1-non-interactive`, `p2-json-output`, `p6-sigpipe`, …). Cite a principle by its anchor slug
+(`#p1-non-interactive-by-default` through `#p8-discoverable-skill-bundle`); those are permanent.
 
 Each of the eight principles below has its own page (`/p1` through `/p8`) for deep-linking, and the same text is
 available as raw markdown at `/p1.md` … `/p8.md` for agent consumption. The entire spec as one file lives at
diff --git a/content/about.md b/content/about.md
index 371982a..87acf51 100644
--- a/content/about.md
+++ b/content/about.md
@@ -7,7 +7,7 @@ Eight principles, enforced by RFC 2119 requirement tiers (MUST / SHOULD / MAY),
 ## Provenance
 
 This spec is authored and maintained in the open by Brett Davies, with contributions accepted via the channels below. It
-is a proposal pressure-tested in public, not a ratified industry standard — the goal is to converge on something worth
+is a proposal pressure-tested in public, not a ratified industry standard. The goal is to converge on something worth
 ratifying, by writing it down concretely first and inviting people to break it.
 
 ## Prior art
@@ -16,23 +16,23 @@ The eight-principle structure draws on two distinct lineages.
 
 **Standards and methodologies that shaped the format:**
 
-- [Command Line Interface Guidelines (clig.dev)](https://clig.dev/) — the closest direct prior art for CLI design
+- [Command Line Interface Guidelines (clig.dev)](https://clig.dev/): the closest direct prior art for CLI design
   guidance; the Unix-philosophy distillation this spec departs from when agents change the human-only assumptions.
-- [The Twelve-Factor App (12factor.net)](https://12factor.net/) — the numbered-principle methodology layout,
-  environment- first configuration, and the discipline of writing each factor down concretely.
-- [IETF RFC 2119](https://www.rfc-editor.org/rfc/rfc2119) — the MUST / SHOULD / MAY contract that turns prose into a
+- [The Twelve-Factor App (12factor.net)](https://12factor.net/): the numbered-principle layout, environment-first
+  configuration, and the discipline of writing each factor down concretely.
+- [IETF RFC 2119](https://www.rfc-editor.org/rfc/rfc2119): the MUST / SHOULD / MAY contract that turns prose into a
   conformance bar.
 
 **Writing that informed the principles directly:**
 
 - Cloudflare's [Building a CLI for all of Cloudflare](https://blog.cloudflare.com/cf-cli-local-explorer) (2026-04-13)
-  and the [HN discussion that followed](https://news.ycombinator.com/item?id=47753689) — the most concrete public
-  statement of "agents are the primary customer" of CLI tools, paired with crowd-sourced failure modes from people who
+  and the [HN discussion that followed](https://news.ycombinator.com/item?id=47753689): the most concrete public
+  statement of "agents are the primary customer" of CLI tools, paired with crowdsourced failure modes from people who
   run agents against CLIs every day. Quotes from that thread shaped the framing of P3 (progressive help), P4 (actionable
-  errors), and P6 (composable structure) directly. The Cloudflare team's own rules (`get` not `info`, `--json` always,
-  `--force` not `--skip-confirmations`) are mirrored in P2 and P6.
+  errors), and P6 (composable structure) directly. P2 and P6 mirror the Cloudflare team's own rules (`get` not `info`,
+  `--json` always, `--force` not `--skip-confirmations`).
 
-If a specific principle's framing seems to echo prior writing, it probably does — credit accrues to the people whose
+If a specific principle's framing seems to echo prior writing, it probably does. Credit accrues to the people whose
 public reasoning informed it; mistakes are mine.
 
 ## Versioning
@@ -43,7 +43,7 @@ appears in the footer of every page.
 
 Principle anchor slugs (`#p1-non-interactive-by-default` through `#p8-discoverable-skill-bundle`) are permanent. If a
 principle merges or splits in a future MAJOR version, the old slug will resolve as a permanent redirect to wherever the
-requirement now lives — citations made today will not 404 after a future restructuring.
+requirement now lives; citations made today will not 404 after a future restructuring.
 
 ## RFC 2119
 
@@ -60,15 +60,19 @@ dual-licensed under MIT and Apache-2.0; see [its LICENSE files](https://github.c
 
 Pressure-testing is how the spec evolves. Three ways to contribute:
 
-1. **[Grade a real CLI](https://github.com/brettdavies/agentnative/issues/new?template=grade-a-cli.yml)** against a
-   principle you think the spec gets wrong. Name the CLI, the principle, and the specific MUST/SHOULD/MAY that failed
-   (or passed unexpectedly).
+1. **[Submit a grading finding](https://github.com/brettdavies/agentnative/issues/new?template=grading-finding.yml):**
+   score a real CLI against a principle you think the spec gets wrong, and report what you found. Name the CLI, the
+   principle, and the specific MUST/SHOULD/MAY that failed (or passed unexpectedly).
 2.
 
-**[Report a false positive or false negative](https://github.com/brettdavies/agentnative-cli/issues/new?template=false-positive.yml)**
+**[Report a false positive or false negative](https://github.com/brettdavies/agentnative-cli/issues/new?template=false-positive.yml):**
 in the `anc` checker. Include the command, the output, and the check ID.
-3. **[Propose a principle edit](https://github.com/brettdavies/agentnative/issues/new?template=pressure-test.yml)** —
+3. **[Propose a principle edit](https://github.com/brettdavies/agentnative/issues/new?template=pressure-test.yml):**
    merge, split, rewording, demotion of a MUST to a SHOULD. Describe the problem before proposing a solution.
+4.
+**[Add a tool to the registry](https://github.com/brettdavies/agentnative-cli/issues/new?template=add-tool-to-registry.yml):**
+propose a CLI for inclusion on the anc.dev/scorecards leaderboard. Include the install command, the source repo, and
+(optionally) the result of a fresh `anc check` run.
 
 For full routing guidance, see the spec repo's
 [CONTRIBUTING.md](https://github.com/brettdavies/agentnative/blob/main/CONTRIBUTING.md).
diff --git a/content/badge.md b/content/badge.md
index 4168d9e..412ed2d 100644
--- a/content/badge.md
+++ b/content/badge.md
@@ -10,7 +10,7 @@ when a tool's score changes.
 ## What the badge looks like
 
 The badge is rendered at build time and served as a static SVG. The site does not use shields.io, has no third-party
-render dependency, and requires no account — the renderer is [`badge-maker`](https://www.npmjs.com/package/badge-maker),
+render dependency, and requires no account. The renderer is [`badge-maker`](https://www.npmjs.com/package/badge-maker),
 the same library shields.io uses internally. Visually identical output, fully self-hosted.
 
 For a tool named `<tool>`:
@@ -28,24 +28,24 @@ percent score; the color tracks the same green / yellow / red bands the [leaderb
 [![agent-native](https://anc.dev/badge/<tool>.svg)](https://anc.dev/score/<tool>)
 ```
 
-Replace `<tool>` with the tool's slug — the same name that appears on the leaderboard and in the registry. The badge
+Replace `<tool>` with the tool's slug: the same name that appears on the leaderboard and in the registry. The badge
 links to the tool's per-tool scorecard page so a reader who clicks lands on the live evidence.
 
 Per-tool scorecard pages whose tool clears the eligibility floor render this snippet inline, ready to copy.
 
-## Eligibility — the floor
+## Eligibility: the floor
 
 A tool may legitimately embed the badge when its score is **80% or higher**.
 
 The floor is the brightline at the top quartile of the launch corpus. It captures tools that took agent-readiness
-seriously, not tools that scored marginally. A tool below the floor can still link to its scorecard page — that is the
-public-by-default posture of the standard — but should not embed the badge as a quality signal until it clears 80%.
+seriously, not tools that scored marginally. A tool below the floor can still link to its scorecard page (that is the
+public-by-default posture of the standard), but should not embed the badge as a quality signal until it clears 80%.
 
 The floor is enforced by the per-tool scorecard page, not by the SVG endpoint. The SVG is rendered for every scored tool
 regardless of score. This is intentional: a tool that already embedded the badge should see the visual color shift if
 its score regresses, not a 404.
 
-## Score format — `XX%`
+## Score format: `XX%`
 
 The score on the badge is the same pass-rate the leaderboard reports: `pass / (pass + warn + fail)`, rounded to the
 nearest integer percent.
@@ -57,13 +57,13 @@ matches the leaderboard's score column so a reader sees the same number across s
 
 | Range     | Color       | What it means                                                |
 | --------- | ----------- | ------------------------------------------------------------ |
-| 80–100%   | brightgreen | Eligible — meets or exceeds the badge floor                  |
+| 80–100%   | brightgreen | Eligible; meets or exceeds the badge floor                   |
 | 60–79%    | yellow      | Decent agent-readiness with meaningful gaps                  |
 | Below 60% | red         | Significant gaps; the per-tool page lists the failing checks |
 
 Tools below the floor still receive a rendered SVG so an embedded badge stays honest after a regression.
 
-## Version pinning — the URL is always-latest, the label cites the spec
+## Version pinning: URL always-latest, label cites the spec
 
 The URL `/badge/<tool>.svg` always reflects the tool's most recent score against the most recent published spec. The
 spec version baseline is carried in the badge **label** (e.g., `agent-native v0.3`), not in the URL.
@@ -87,7 +87,7 @@ In practice this means:
 - The scorecard must be a real `anc check --output json` run, committed under
   [`scorecards/`](https://github.com/brettdavies/agentnative-site/tree/main/scorecards).
 - Anyone reading the badge can run `anc check --command <binary>` locally and arrive at the same number, modulo
-  scorecard-staleness — see the regression policy below.
+  scorecard-staleness. See the regression policy below.
 
 If the live re-run produces a different score than the badge, the live re-run wins. The badge is a pointer, not an
 authority.
@@ -104,7 +104,7 @@ your work continuously, not a one-time award.
 
 ## Claiming the badge
 
-1. Get on the [leaderboard](/scorecards) — file a registry entry per
+1. Get on the [leaderboard](/scorecards): file a registry entry per
    [the registry README](https://github.com/brettdavies/agentnative-site/blob/main/registry.yaml). The site
    auto-discovers the latest scorecard for each registry entry on every build.
 2. Run `anc check --command <binary> --output json > scorecards/<tool>-v<version>.json` and commit the result.
@@ -115,7 +115,7 @@ That is the whole flow. The convention is intentionally narrow.
 
 ## Related
 
-- [Methodology](/methodology) — how scores are computed and what the audience signal does and does not claim
-- [Scorecard schema](/scorecard-schema) — the shape of the underlying JSON
-- [Leaderboard](/scorecards) — every scored tool, sortable
-- [Install `anc`](/install) — the CLI that produces scorecards
+- [Methodology](/methodology): how scores are computed and what the audience signal does and does not claim
+- [Scorecard schema](/scorecard-schema): the shape of the underlying JSON
+- [Leaderboard](/scorecards): every scored tool, sortable
+- [Install `anc`](/install): the CLI that produces scorecards
diff --git a/content/check.md b/content/check.md
index 302d80a..3b5cba9 100644
--- a/content/check.md
+++ b/content/check.md
@@ -1,8 +1,7 @@
 # Check your CLI
 
-`anc` (also installable as `agentnative` — they're aliases for the same binary) is the reference linter for this
-standard. It scores any CLI tool against the eight principles and tells you, by check ID, where it passes and where it
-falls short.
+`anc` is the reference linter for this standard. It scores any CLI tool against the eight principles and tells you, by
+check ID, where it passes and where it falls short.
 
 ## Install
 
@@ -52,12 +51,12 @@ issues, commits, and agent output; they do not change between versions.
 
 ## Three check layers
 
-- **Behavioral** — runs your compiled binary and inspects `--help`, `--version`, `--output json`, SIGPIPE, NO_COLOR, and
+- **Behavioral**: runs your compiled binary and inspects `--help`, `--version`, `--output json`, SIGPIPE, NO_COLOR, and
   exit codes. Language-agnostic.
-- **Source** — ast-grep pattern matching on source code. Catches `.unwrap()`, missing error types, naked `println!`.
-  Rust and Python today; more languages as they land.
-- **Project** — file and manifest inspection. Looks for `AGENTS.md`, recommended dependencies, dedicated error and
-  output modules.
+- **Source**: ast-grep pattern matching on source code. Catches `.unwrap()`, missing error types, naked `println!`. Rust
+  and Python today; more languages as they land.
+- **Project**: file and manifest inspection. Looks for `AGENTS.md`, recommended dependencies, dedicated error and output
+  modules.
 
 Pass `--binary` for behavioral-only (skip source). Pass `--source` for source-only (skip behavioral). Most projects want
 the default, which is "run everything."
@@ -65,7 +64,7 @@ the default, which is "run everything."
 ## What a score means
 
 A `[PASS]` is a requirement met, not a compliment. A `[WARN]` is a SHOULD the tool doesn't satisfy; ignoring it is a
-choice, not a bug. A `[FAIL]` is a MUST the tool doesn't satisfy; agents will hit the edge it describes and the tool
+choice, not a bug. A `[FAIL]` is a MUST the tool doesn't satisfy; agents will hit the edge it describes, and the tool
 will surprise them. Nothing here is a vanity metric — the checks map one-to-one to the requirements on the
 [principles page](/).
 
diff --git a/content/contribute.md b/content/contribute.md
new file mode 100644
index 0000000..537f5d9
--- /dev/null
+++ b/content/contribute.md
@@ -0,0 +1,98 @@
+# Contribute
+
+The agent-native CLI spec is `status: active` because the contracts are stable enough to cite, not because anything is
+locked. The pressure-test mechanism is how the spec revises a position when a finding warrants it. This page is the
+navigation across the four repos that make up the project, plus the honest expectations on response time.
+
+## What kinds of contribution are welcome
+
+Three tiers, all welcome, none required. The shape of the contribution determines the intake.
+
+| Tier            | What                                                                                                                                                                                                         | Where                                                              | Time     |
+| --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------ | -------- |
+| **1. Signal**   | A finding against a principle's wording, a missing citation, a contradiction between two principles, a false positive in `anc`, a broken link on the site, a bundle content issue                            | A repo-specific issue template (see "Per-repo intake" below)       | ~5 min   |
+| **2. Proposal** | A new principle the spec is missing, a MUST/SHOULD tier change with rationale, a counter-example that breaks an applicability clause, a new language checker design, a new host runtime for the skill bundle | An issue with the full case in the body, against the relevant repo | ~1-2 hrs |
+| **3. Code**     | A new language checker for `anc`, a tool scoring submission for the leaderboard, a site or skill-bundle improvement, a governance or workflow PR                                                             | A pull request against the relevant repo's `dev` branch            | Variable |
+
+## Per-repo intake
+
+Each repo handles a different layer of the project. File against the one that matches the contribution's shape.
+
+### Spec: [agentnative](https://github.com/brettdavies/agentnative)
+
+The principle text, the requirement IDs, the versioning policy. Pressure-tests against the standard live here.
+
+- [Pressure-test a principle](https://github.com/brettdavies/agentnative/issues/new?template=pressure-test.yml) (Tier 1
+  or 2)
+- [Ask a spec question](https://github.com/brettdavies/agentnative/issues/new?template=spec-question.yml) (Tier 1)
+- [Submit a grading finding](https://github.com/brettdavies/agentnative/issues/new?template=grading-finding.yml) (Tier 1
+  or 2): spec-feedback derived from scoring a real CLI against the standard
+- [`CONTRIBUTING.md`](https://github.com/brettdavies/agentnative/blob/main/CONTRIBUTING.md) ·
+  [`principles/AGENTS.md` § Pressure-test protocol](https://github.com/brettdavies/agentnative/blob/main/principles/AGENTS.md#pressure-test-protocol)
+
+### Linter: [agentnative-cli](https://github.com/brettdavies/agentnative-cli)
+
+`anc`, the Rust linter that scores any repo against the spec. The scoring engine, the registry, the language checkers.
+
+- [Report a false positive](https://github.com/brettdavies/agentnative-cli/issues/new?template=false-positive.yml) (Tier
+  1)
+- [Request a feature](https://github.com/brettdavies/agentnative-cli/issues/new?template=feature-request.yml) (Tier 1 or
+  2)
+- [Report a scoring bug](https://github.com/brettdavies/agentnative-cli/issues/new?template=scoring-bug.yml) (Tier 1)
+- [Add a tool to the registry][add-tool] (Tier 3): propose a CLI for the anc.dev/scorecards leaderboard
+- [Source repo](https://github.com/brettdavies/agentnative-cli)
+
+[add-tool]: https://github.com/brettdavies/agentnative-cli/issues/new?template=add-tool-to-registry.yml
+
+### Site: [agentnative-site](https://github.com/brettdavies/agentnative-site)
+
+This site. The leaderboard renderer, the live-scoring loop, the per-tool scorecard pages, the Worker.
+
+- [File a site bug](https://github.com/brettdavies/agentnative-site/issues/new?template=site-bug.yml) (Tier 1)
+- [Source repo](https://github.com/brettdavies/agentnative-site)
+
+### Skill bundle: [agentnative-skill](https://github.com/brettdavies/agentnative-skill)
+
+The `agent-native-cli` bundle that agents discover via filesystem convention. The install paths, the host-runtime
+detection, the SKILL.md prose.
+
+- [Source repo + intake](https://github.com/brettdavies/agentnative-skill)
+
+## Response expectations (the honest part)
+
+This is a solo-maintainer project. The honest framing:
+
+- **Tier 1 and 2** are welcome and get a substantive reply when time allows. A pressure-test that names a specific
+  failure mode, with the reasoning behind it, is the contribution shape that lands fastest.
+- **Tier 3 PRs** are reviewed when scope and time permit. Real PRs land. No merge-window promise; the queue is what the
+  maintainer can actually read.
+- **Status flips** are how the spec records work in progress on a finding. A principle moves to `status: under-review`
+  when a substantive pressure-test is being processed, then back to `status: active` once the next MINOR release lands.
+  Visible in the principle file's frontmatter.
+
+The standard takes positions because positions are useful. Positions held without willingness to revise them are dogma.
+Both halves of that are intentional.
+
+## How the revision mechanism works
+
+For a Tier 2 proposal that changes a MUST/SHOULD/MAY tier or adds a new principle:
+
+1. The pressure-test issue lands with a specific finding: which requirement, which direction, what failure mode argues
+   for the change.
+2. If the finding is substantive, the relevant principle file's `status` flips from `active` to `under-review`.
+3. The next MINOR spec release resolves the finding: the prose is revised, `last-revised` updates, status returns to
+   `active`. Or the finding is closed with a documented `[wontfix]` rationale appended to the principle's pressure-test
+   notes section.
+
+Full description lives at
+[`principles/AGENTS.md` § Pressure-test protocol](https://github.com/brettdavies/agentnative/blob/main/principles/AGENTS.md#pressure-test-protocol).
+
+## Adjacent reading
+
+-
+  [Spec status lifecycle](https://github.com/brettdavies/agentnative/blob/main/principles/AGENTS.md#pressure-test-protocol)
+  — the `draft → under-review → active → locked` flow
+- [BRAND.md](https://github.com/brettdavies/agentnative/blob/main/BRAND.md): voice and identity
+- [CHANGELOG.md](https://github.com/brettdavies/agentnative/blob/main/CHANGELOG.md): what landed when
+
+The leaderboard at [`/scorecards`](/scorecards) is the running answer to "what does the spec catch in practice."
diff --git a/content/install.md b/content/install.md
index db13722..2c7bf6e 100644
--- a/content/install.md
+++ b/content/install.md
@@ -1,8 +1,8 @@
-# Install agentnative
+# Install anc
 
-`anc` (also installable as `agentnative` — the binary ships under both names) is the reference linter for the
-agent-native CLI standard. It scores any CLI tool against the eight principles and tells you, by check ID, where it
-passes and where it falls short. Install it locally, then point it at a binary or a project directory.
+`anc` is the reference linter for the agent-native CLI standard. It scores any CLI against the eight principles and
+tells you, by check ID, where it passes and where it falls short. Install it locally, then point it at a binary or a
+project directory.
 
 ## Homebrew
 
@@ -25,7 +25,7 @@ cargo install agentnative
 ```
 
 For a prebuilt binary without compiling from source (requires
-[`cargo-binstall`](https://github.com/cargo-bins/cargo-binstall) — skip if you don't already have it):
+[`cargo-binstall`](https://github.com/cargo-bins/cargo-binstall); skip if you don't already have it):
 
 ```bash
 cargo binstall agentnative
@@ -33,15 +33,15 @@ cargo binstall agentnative
 
 ## GitHub Releases
 
-Platform archives — including Windows builds and SHA256 checksums — live at
+Platform archives, including Windows builds and SHA256 checksums, live at
 [github.com/brettdavies/agentnative-cli/releases](https://github.com/brettdavies/agentnative-cli/releases). Download the
 archive for your platform, extract, and put the `anc` binary on `$PATH`.
 
 ## What's next
 
-Once installed, the CLI is invoked as `anc`. See [/check](/check) for usage — flags, output shapes, and how to interpret
-the per-principle check IDs. The principles themselves are spelled out at [/](/), with one page per principle (`/p1`
-through `/p8`).
+Once installed, invoke the CLI as `anc`. See [/check](/check) for usage: flags, output shapes, and how to interpret the
+per-principle check IDs. The principles themselves are spelled out at [/](/), with one page per principle (`/p1` through
+`/p8`).
 
-To install the **agent-native-cli skill bundle** instead — the Claude Code / Codex / Cursor / OpenCode skill that
-teaches an agent to write CLIs against this standard — see [/skill](/skill).
+To install the **agent-native-cli skill bundle** instead (the Claude Code / Codex / Cursor / OpenCode skill that teaches
+an agent to write CLIs against this standard), see [/skill](/skill).
diff --git a/content/methodology.md b/content/methodology.md
index a8840e7..a773642 100644
--- a/content/methodology.md
+++ b/content/methodology.md
@@ -14,20 +14,20 @@ which tools are in the set.
 Adding a tool means filing a registry entry. Removing a tool means filing a registry deletion. There is no other
 inclusion criterion.
 
-### Contributor flow — registry PR and scorecard PR may land in either order
+### Contributor flow: registry PR and scorecard PR may land in either order
 
 A tool needs two artifacts to appear on the leaderboard: a registry entry (`registry.yaml`) and a scorecard
 (`scorecards/<name>-v<version>.json`). The build accepts these in either order:
 
-- **Editorial-PR-first.** A registry entry without a matching scorecard is a "registry orphan" — the build emits a
+- **Editorial-PR-first.** A registry entry without a matching scorecard is a "registry orphan": the build emits a
   warning and excludes the entry from the leaderboard until a scorecard PR lands. This is the expected steady-state for
   a freshly-nominated tool.
-- **Scorecard-PR-first.** A scorecard whose filename slug has no registry entry is a "scorecard orphan" — the build
-  emits the symmetric warning and excludes the scorecard from the leaderboard until the editorial PR lands.
+- **Scorecard-PR-first.** A scorecard whose filename slug has no registry entry is a "scorecard orphan": the build emits
+  the symmetric warning and excludes the scorecard from the leaderboard until the editorial PR lands.
 
 Both directions surface as a structured CI annotation on the PR (`WARNINGS_JSON: { scorecardOrphans, registryOrphans }`)
-so reviewers see drift without grepping logs. The build still passes in either orphaned state — the warning is the
-nudge, not a blocker. Once both halves land, the tool appears on the leaderboard at the next deploy.
+so reviewers see drift without grepping logs. The build still passes in either orphaned state; the warning is the nudge,
+not a blocker. Once both halves land, the tool appears on the leaderboard at the next deploy.
 
 ## How a score is computed
 
@@ -45,27 +45,27 @@ Neither is evidence of a defect, so neither moves the score.
 headline number. Per RFC 2119 those are categorically different — a `fail` means non-conformance with the standard; a
 `warn` means a missed default. The headline is a deliberate simplification chosen so a single number is comparable
 across tools; the **principles met** column is where conformance lives. A tool with one `fail` and zero `warn` will
-score higher than a tool with zero `fail` and three `warn`, but only the first tool is non-conformant — read both
-columns together. The per-tool page is the ground truth.
+score higher than a tool with zero `fail` and three `warn`, but only the first tool is non-conformant. Read both columns
+together. The per-tool page is the ground truth.
 
-The **principles met** column counts how many of the eight principles (P1–P8) have *all* their checks passing — no
+The **principles met** column counts how many of the eight principles (P1–P8) have *all* their checks passing: no
 warnings, no failures. A tool can have a 90% pass rate and still meet only four of eight principles, if the warnings
 cluster inside three principle groups. Both numbers are surfaced because either, alone, hides the shape of the result.
 
-Bonus checks — `CodeQuality` and `ProjectStructure` — are listed on each tool's page but not blended into the primary
+Bonus checks (`CodeQuality` and `ProjectStructure`) are listed on each tool's page but not blended into the primary
 score. They are language-specific and would create unfair comparisons across tools.
 
 ## What the audience signal is, and is not
 
 `anc` v0.1.3+ classifies each scored tool as one of:
 
-- `agent-optimized` — the four signal checks (P1 non-interactive, P2 JSON output, P6 NO_COLOR, P7 quiet) all pass or
-  warn at most once. (One warn allowance reflects the reality that the four signal checks are correlated — a
-  near-conformant tool may miss on one edge, e.g., honoring `NO_COLOR` but not `NO_COLOR=0`; requiring zero warns would
-  over-penalize otherwise-conformant tools.)
-- `mixed` — two of the four signal checks warn.
-- `human-primary` — three or more of the four signal checks warn.
-- `null` with `audience_reason: "suppressed"` — when the active audit profile suppresses one or more of the four signal
+- `agent-optimized`: the four signal checks (P1 non-interactive, P2 JSON output, P6 NO_COLOR, P7 quiet) all pass or warn
+  at most once. (One warn allowance reflects the reality that the four signal checks are correlated; a near-conformant
+  tool may miss on one edge, e.g., honoring `NO_COLOR` but not `NO_COLOR=0`. Requiring zero warns would over-penalize
+  otherwise-conformant tools.)
+- `mixed`: two of the four signal checks warn.
+- `human-primary`: three or more of the four signal checks warn.
+- `null` with `audience_reason: "suppressed"`: when the active audit profile suppresses one or more of the four signal
   checks, the classifier has insufficient input and refuses to label. The per-tool page surfaces the reason so a reader
   can see *why* the field is empty rather than guessing.
 
@@ -74,7 +74,7 @@ behavioral checks. The per-check evidence shown alongside is the ground truth. A
 be safe to use from an agent in narrow, well-bounded ways. A tool labeled `agent-optimized` may still surprise an agent
 on a check the classifier does not look at.
 
-When the classifier disagrees with intuition — for example, a tool you consider agent-hostile gets `agent-optimized` —
+When the classifier disagrees with intuition (for example, a tool you consider agent-hostile gets `agent-optimized`),
 the fix lives in one of two places:
 
 1. The tool fits an exception category that should suppress some checks → file a registry update adding an
@@ -87,8 +87,8 @@ Patching the *site* to override a CLI verdict is never the answer. The site rend
 ## Audit profiles: scoping the standard to a tool's category
 
 Some tools intentionally do not satisfy parts of the standard because the standard does not apply to their category.
-Lazygit is interactive on purpose — it is a TUI. `find` does not emit JSON because POSIX utilities don't. Holding these
-tools to checks that punish their core design produces a misleading score and a hostile leaderboard.
+Lazygit is interactive on purpose because it is a TUI. `find` does not emit JSON because POSIX utilities don't. Holding
+these tools to checks that punish their core design produces a misleading score and a hostile leaderboard.
 
 `anc` v0.1.3 exposes four exception categories via `--audit-profile`. The exact suppression set lives in
 [`SUPPRESSION_TABLE`](https://github.com/brettdavies/agentnative-cli/blob/main/src/principles/registry.rs) in the CLI
@@ -112,12 +112,12 @@ its work.
 
 | Tool        | Profile          | Why                                                                                 |
 | ----------- | ---------------- | ----------------------------------------------------------------------------------- |
-| `lazygit`   | `human-tui`      | Git TUI — primary mode is full-screen interactive UI                                |
-| `gitui`     | `human-tui`      | Git TUI — parallel project to lazygit                                               |
-| `tmux`      | `human-tui`      | Terminal multiplexer — bare invocation attaches/starts an interactive session       |
+| `lazygit`   | `human-tui`      | Git TUI - primary mode is full-screen interactive UI                                |
+| `gitui`     | `human-tui`      | Git TUI - parallel project to lazygit                                               |
+| `tmux`      | `human-tui`      | Terminal multiplexer - bare invocation attaches/starts an interactive session       |
 | `fzf`       | `human-tui`      | Interactive fuzzy-match picker over stdin                                           |
 | `broot`     | `human-tui`      | Interactive directory-tree browser                                                  |
-| `yazi`      | `human-tui`      | Interactive file manager — full-screen browse is the primary mode                   |
+| `yazi`      | `human-tui`      | Interactive file manager - full-screen browse is the primary mode                   |
 | `bottom`    | `human-tui`      | Interactive process/system monitor (htop-class)                                     |
 | `bandwhich` | `human-tui`      | Interactive network bandwidth monitor                                               |
 | `atuin`     | `human-tui`      | Interactive shell-history search; bare-binary mode and `atuin search` are TUI-first |
@@ -127,25 +127,25 @@ its work.
 
 Profiles **not** currently applied to any tool, with the criteria a future entry must meet:
 
-- `posix-utility` — Tool predates structured output and follows POSIX-style stdin/stdout conventions. Modern stream
+- `posix-utility`: Tool predates structured output and follows POSIX-style stdin/stdout conventions. Modern stream
   processors (`jq`, `yq`, `dasel`, `miller`, etc.) already pass P1 non-interactive checks vacuously, so the suppression
   is unnecessary and `posix-utility` is not applied.
-- `diagnostic-only` — Tool can never mutate state by design. Suppresses only P5 dry-run. The current registry's
-  read-only candidates (`procs`, `dust`, `tree`) all pass P5 already, so the profile would be a no-op annotation. It
-  will become useful when P5 grows checks beyond dry-run that warrant skipping for read-only diagnostics.
+- `diagnostic-only`: Tool can never mutate state by design. Suppresses only P5 dry-run. The current registry's read-only
+  candidates (`procs`, `dust`, `tree`) all pass P5 already, so the profile would be a no-op annotation. It will become
+  useful when P5 grows checks beyond dry-run that warrant skipping for read-only diagnostics.
 
 The general rule for adding a profile: **apply it only when an unsuppressed check is fighting the tool's category, not
 its design quality**. A TUI legitimately blocks on a TTY; that's a category fact, not a defect. A CLI that *could* be
-non-interactive but isn't is a defect — no profile applies.
+non-interactive but isn't is a defect; no profile applies.
 
 ## Layers: behavioral, project, source
 
 `anc` runs three layers of checks:
 
-- **Behavioral** — invokes the binary, inspects `--help`, `--version`, `--output json`, SIGPIPE, NO_COLOR, exit codes.
+- **Behavioral**: invokes the binary, inspects `--help`, `--version`, `--output json`, SIGPIPE, NO_COLOR, exit codes.
   Language-agnostic. Every tool on the leaderboard is scored at this layer.
-- **Project** — inspects the project tree: `AGENTS.md`, manifest files, recommended dependencies. Language-agnostic.
-- **Source** — runs ast-grep patterns against source code. Catches `unwrap()`, naked `println!`, missing error types.
+- **Project**: inspects the project tree: `AGENTS.md`, manifest files, recommended dependencies. Language-agnostic.
+- **Source**: runs ast-grep patterns against source code. Catches `unwrap()`, naked `println!`, missing error types.
   Rust + Python today; more languages as they ship.
 
 The headline score combines behavioral and project. Source-layer results, when available, are reported separately on the
@@ -170,10 +170,10 @@ Re-scoring is manual at launch. When a tool ships a release that changes its age
 
 - File an issue on [`agentnative-site`](https://github.com/brettdavies/agentnative-site/issues/new) titled `re-score:
   <tool>` and link the release notes. The committed scorecard will be regenerated against the new version.
-- If a tool's category is misclassified — e.g., a TUI is being scored as a general-purpose CLI — file an issue titled
+- If a tool's category is misclassified (e.g., a TUI is being scored as a general-purpose CLI), file an issue titled
   `audit-profile: <tool> <category>` with the rationale. Audit-profile changes are registry edits; they ship with the
   next site deploy.
-- If a check itself is wrong — false positives, weak signal, missing edge case — file the issue against the
+- If a check itself is wrong (false positives, weak signal, missing edge case), file the issue against the
   [`agentnative` CLI](https://github.com/brettdavies/agentnative-cli/issues), not this site. Site renders; CLI judges.
 
 ## Constructive framing
diff --git a/content/scorecard-schema.md b/content/scorecard-schema.md
index 50f71db..fb441a9 100644
--- a/content/scorecard-schema.md
+++ b/content/scorecard-schema.md
@@ -15,7 +15,7 @@ scorecards/<name>-v<version>.json
 ```
 
 Where `<name>` matches the registry's `name` field (URL slug) and `<version>` is the SemVer string captured at scoring
-time. The filename's `<version>` segment is the **canonical version anchor** — the site reads it directly off disk and
+time. The filename's `<version>` segment is the **canonical version anchor**: the site reads it directly off disk and
 displays it as the scored version on every per-tool page. The scorecard's `tool.version` field (added in schema 0.4) is
 informational; when both are present and disagree, the build aborts with an integrity error. The filename never lies.
 
@@ -67,14 +67,14 @@ registry.
 }
 ```
 
-| Field     | Type           | Meaning                                                                                                                                                                                                                                                                                                                                                            |
-| --------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| `name`    | string         | The literal `--command` argv passed to anc. For tools where the registry name differs from the binary (e.g., registry `ripgrep` → binary `rg`), this is the binary, not the registry slug. The filename slug owns the registry-name side of the join.                                                                                                              |
-| `binary`  | string         | Executable name resolved from `$PATH` at scoring time. Equals `tool.name` for command-mode runs except when a tool ships under an alias.                                                                                                                                                                                                                           |
-| `version` | string \| null | Best-effort version string. The CLI dumps the first line of `<binary> --version` here without further parsing — it may carry the marketing string ("eza - A modern, maintained replacement for ls"), the full multi-line block, or `null` when the binary doesn't print anything parseable. **The filename's `<version>` is canonical**; this field is a courtesy. |
+| Field     | Type           | Meaning                                                                                                                                                                                                                                                                                                                                                           |
+| --------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `name`    | string         | The literal `--command` argv passed to anc. For tools where the registry name differs from the binary (e.g., registry `ripgrep` → binary `rg`), this is the binary, not the registry slug. The filename slug owns the registry-name side of the join.                                                                                                             |
+| `binary`  | string         | Executable name resolved from `$PATH` at scoring time. Equals `tool.name` for command-mode runs except when a tool ships under an alias.                                                                                                                                                                                                                          |
+| `version` | string \| null | Best-effort version string. The CLI dumps the first line of `<binary> --version` here without further parsing; it may carry the marketing string ("eza - A modern, maintained replacement for ls"), the full multi-line block, or `null` when the binary doesn't print anything parseable. **The filename's `<version>` is canonical**; this field is a courtesy. |
 
 **Build-time invariant:** when `tool.version` contains a SemVer-shaped token (`X.Y` or `X.Y.Z`), it must equal the
-filename version. Drift fails the build with a parser-asymmetry error — the regen script's `version_extract` snippet and
+filename version. Drift fails the build with a parser-asymmetry error: the regen script's `version_extract` snippet and
 the CLI's internal probe are the only two places that derive a version from the binary, and they must agree.
 
 ## `anc`
@@ -114,7 +114,7 @@ Run-context: the literal invocation, when it ran, how long it took, and what pla
 | `platform.os`   | string  | OS the binary ran on (`linux`, `darwin`, `windows`).                                                                                                         |
 | `platform.arch` | string  | CPU architecture the binary ran on (`x86_64`, `aarch64`, …).                                                                                                 |
 
-**Security note — `run.invocation`:** for command-mode runs the invocation is the canonical `anc check --command <name>
+**Security note (`run.invocation`):** for command-mode runs the invocation is the canonical `anc check --command <name>
 [--audit-profile <X>] [--output json]` shape, which is safe to embed publicly. For project-mode runs (`target.kind:
 "project"`) the invocation may include a local filesystem path (`anc check ./local/repo`); the site falls back to the
 synthesized form for those runs to avoid leaking machine-local paths into HTML, markdown, and `/llms-full.txt`. Mirror
@@ -138,7 +138,7 @@ What the run was scoring: a command, a binary on disk, or a project tree.
 | `path`    | string \| null | Filesystem path when `kind` is `project` or `binary`; `null` for `command`-mode runs.                                                                                                                                                                      |
 | `command` | string \| null | The `--command` argv string when `kind` is `command`; `null` otherwise. For command-mode runs this equals `tool.name`.                                                                                                                                     |
 
-**Security note — `target.path`:** when `kind` is `project`, this can carry a local directory path (`/home/me/dev/foo`).
+**Security note (`target.path`):** when `kind` is `project`, this can carry a local directory path (`/home/me/dev/foo`).
 It is not currently rendered on any per-tool page (every leaderboard entry today is command-mode), but downstream
 consumers reading the JSON should treat it as machine-local.
 
@@ -161,12 +161,12 @@ Counts of the checks the runner actually executed. Adds up to `total`.
 | ------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
 | `total` | integer | Number of checks the runner attempted on this tool. Equals `pass + warn + fail + skip + error`.                                                   |
 | `pass`  | integer | Checks that succeeded with no concerns.                                                                                                           |
-| `warn`  | integer | Checks that found a soft signal — partial compliance, deprecated pattern, mild inconsistency.                                                     |
+| `warn`  | integer | Checks that found a soft signal: partial compliance, deprecated pattern, mild inconsistency.                                                      |
 | `fail`  | integer | Checks that found a clear non-compliance.                                                                                                         |
 | `skip`  | integer | Checks the runner correctly judged inapplicable. Either the tool's shape made the check meaningless, or the active `audit_profile` suppressed it. |
 | `error` | integer | The check itself crashed and produced no signal. Not evidence of a defect; not blended into the score.                                            |
 
-The headline score on the leaderboard is `pass / (pass + warn + fail)` — `skip` and `error` are excluded from the
+The headline score on the leaderboard is `pass / (pass + warn + fail)`; `skip` and `error` are excluded from the
 denominator on purpose, as documented on the [methodology page](/methodology#how-a-score-is-computed).
 
 ## `coverage_summary`
@@ -192,7 +192,7 @@ by any implemented check.
 | `may.total`       | integer | Number of MAY-tier requirements in the spec.                                                          |
 | `may.verified`    | integer | MAYs satisfied by passing checks for this tool.                                                       |
 
-If `coverage_summary.must.verified` is below `summary.pass`, that's expected — a single passing check can map to
+If `coverage_summary.must.verified` is below `summary.pass`, that's expected because a single passing check can map to
 multiple MUSTs. If `should.verified` and `may.verified` are zero across the board, that's also expected: those tiers are
 aspirational and will fill in as the runner grows checks mapped to them.
 
@@ -212,33 +212,33 @@ Array of one object per check the runner attempted. Order is stable across runs
 }
 ```
 
-| Field        | Type           | Meaning                                                                                                                                                  |
-| ------------ | -------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `id`         | string         | Stable identifier (e.g., `p3-help`, `p1-non-interactive`). Citeable in commits and PRs.                                                                  |
-| `label`      | string         | Human-readable name for the check.                                                                                                                       |
-| `group`      | string         | Principle group this check belongs to: `P1`–`P8`. Drives the **principles met** column on the leaderboard.                                               |
-| `layer`      | string         | `behavioral`, `project`, or `source`. See [layers](/methodology#layers-behavioral-project-source) on methodology.                                        |
-| `status`     | string         | `pass`, `warn`, `fail`, `skip`, or `error`. Definitions match the [`summary` table](#summary) above.                                                     |
-| `evidence`   | string \| null | Short explanation when status is `skip`, `warn`, or `fail`. Often references the suppressing audit profile or the input that triggered the check.        |
-| `confidence` | string         | `high`, `medium`, or `low`. Reflects how directly the check observed the property — direct flag presence is high; inference from `--help` text is lower. |
+| Field        | Type           | Meaning                                                                                                                                                 |
+| ------------ | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `id`         | string         | Stable identifier (e.g., `p3-help`, `p1-non-interactive`). Citeable in commits and PRs.                                                                 |
+| `label`      | string         | Human-readable name for the check.                                                                                                                      |
+| `group`      | string         | Principle group this check belongs to: `P1` through `P8`. Drives the **principles met** column on the leaderboard.                                      |
+| `layer`      | string         | `behavioral`, `project`, or `source`. See [layers](/methodology#layers-behavioral-project-source) on methodology.                                       |
+| `status`     | string         | `pass`, `warn`, `fail`, `skip`, or `error`. Definitions match the [`summary` table](#summary) above.                                                    |
+| `evidence`   | string \| null | Short explanation when status is `skip`, `warn`, or `fail`. Often references the suppressing audit profile or the input that triggered the check.       |
+| `confidence` | string         | `high`, `medium`, or `low`. Reflects how directly the check observed the property: direct flag presence is high; inference from `--help` text is lower. |
 
 ### `status` semantics in detail
 
-- `pass` — Check ran, found no issue.
-- `warn` — Check ran, found a soft signal worth noting. Lowers the headline score.
-- `fail` — Check ran, found a hard non-compliance. Lowers the headline score.
-- `skip` — Check did not run, by design. Either the tool's surface made it inapplicable (e.g., a `--help` parser check
-  on a tool with no flags) or the active `audit_profile` suppressed it (`evidence` will name the profile).
-- `error` — Check tried to run and crashed before producing a verdict. Treated as no-signal, not a defect.
+- `pass`: Check ran, found no issue.
+- `warn`: Check ran, found a soft signal worth noting. Lowers the headline score.
+- `fail`: Check ran, found a hard non-compliance. Lowers the headline score.
+- `skip`: Check did not run, by design. Either the tool's surface made it inapplicable (e.g., a `--help` parser check on
+  a tool with no flags) or the active `audit_profile` suppressed it (`evidence` will name the profile).
+- `error`: Check tried to run and crashed before producing a verdict. Treated as no-signal, not a defect.
 
 ## What is *not* in the scorecard (yet)
 
 The site is transparent about gaps that future schema bumps may fill. Schema 0.4 closed the tool-identity / generated-at
 gap (see `tool`, `anc`, `run`, `target` above). Still outstanding today:
 
-- **Per-check timing** — `run.duration_ms` is the wall-clock total for the run, not per-check. Individual check timings
+- **Per-check timing.** `run.duration_ms` is the wall-clock total for the run, not per-check. Individual check timings
   are observable from the runner's stdout but not captured in the JSON.
-- **Editorial fields inside the scorecard** — tier, language, creator, description, install, repo/url remain in the
+- **Editorial fields inside the scorecard.** Tier, language, creator, description, install, repo/url remain in the
   registry. Migrating them into the scorecard would let the registry shrink to a name list; deferred to a future schema
   bump.
 
diff --git a/docker/sandbox/Dockerfile b/docker/sandbox/Dockerfile
index 9f37a60..98e1050 100644
--- a/docker/sandbox/Dockerfile
+++ b/docker/sandbox/Dockerfile
@@ -1,95 +1,166 @@
 # syntax=docker/dockerfile:1.7
 #
-# Live-scoring sandbox image (plan U2).
+# Live-scoring sandbox image. Carries the CF Sandbox SDK server plus
+# package managers (cargo-binstall, pip, uv, npm, bun, go runtime) and
+# a pre-built `anc` binary. No compilers — install paths must produce
+# precompiled binaries or wheels.
 #
-# Goal: Alpine + musl image carrying CF Sandbox 0.9.x-musl, package managers
-# (cargo-binstall, pip, npm, go runtime), and a pre-built musl `anc` baked
-# in from agentnative-cli v0.3.1. NO COMPILERS, NO TOOLCHAINS — install
-# paths are: cargo binstall (precompiled), pip (wheels only), npm
-# (--ignore-scripts), go install (downloads a precompiled artifact in
-# practice for most modules; modules that build from source will fail
-# install at U6 and bounce). Per Premise #2 of the 2026-04-17 CEO design.
+# brew is NOT installed; `brew install <pkg>` user inputs are translated
+# to an alternative PM via the discovery fallback in
+# src/worker/score/do.ts:resolveSpec(). Brew-only formulae bounce as
+# install_unsupported with pm=brew_only.
 #
-# Image budget: <=350 MB compressed. Fits CF Containers `basic` instance
-# type (1 GiB RAM / 4 GB disk). Verify locally:
+# Python version: the FINAL base is `python:3.12-slim-trixie` rather than
+# the bare `debian:trixie-slim` because aider-chat (and many other
+# Python-ecosystem tools) constrain Python to `<3.13,>=3.10`. Debian
+# Trixie defaults to Python 3.13, which makes pip's resolver back off to
+# ancient package versions trying to find a compatible wheel — see
+# docs/solutions/tooling-decisions/cloudflare-sandbox-python-3.12-base-2026-05-19.md
+# for the full investigation. Otherwise the environment is identical
+# Trixie (same glibc, same apt repos, same Node 22, same anc/bun/uv/go
+# binary versions).
+#
+# Image budget: <=350 MB compressed. Verify locally:
 #   docker build -f docker/sandbox/Dockerfile -t anc-sandbox .
 #   docker image inspect anc-sandbox --format '{{.Size}}'
-#
-# Two-phase egress (R7) is enforced at the Worker / DO layer in U6; this
-# image just provides the install + run substrate. PATH covers every
-# location a binary might land after install via the supported pms.
-#
-# brew is intentionally OMITTED. Linuxbrew on Alpine + musl is not a
-# supported configuration (linuxbrew assumes glibc symbols). User inputs
-# that resolve to `pm: brew` via U4's chain hit U6's
-# `chain_resolved_install_failed` bounce class and surface the
-# install-anc-locally CTA. See plan U2 lines 945-988 for the rationale.
 
-# ----------------------------------------------------------------------
-# Stage 1: pull the CF Sandbox SDK server binary + its glibcxx dependency.
-# Pinned to a digest so a forced re-tag of `0.9.2-musl` upstream cannot
-# silently change what we're shipping.
-# ----------------------------------------------------------------------
-FROM docker.io/cloudflare/sandbox:0.9.2-musl@sha256:b4cb1d6996fa6be43bb4966bf3666db2d434f4d2a24dfb75e1d5e08194c5bd0a AS sandbox-base
+FROM docker.io/cloudflare/sandbox:0.9.2@sha256:3a8626db5d3d838d947abadd37b818e48628b17f12cf2e87d89b8e83a6676482 AS sandbox-base
 
-# ----------------------------------------------------------------------
-# Stage 2: Alpine 3.21 final image with pms + anc baked in.
 # Multi-arch index digest pinned; Docker resolves to the amd64 child at
-# pull time.
-# ----------------------------------------------------------------------
-FROM docker.io/library/alpine:3.21@sha256:48b0309ca019d89d40f670aa1bc06e426dc0931948452e8491e3d65087abc07d AS final
+# pull time. The official Python image installs CPython 3.12 + pip +
+# setuptools + wheel into /usr/local/, which lands ahead of /usr/bin on
+# PATH — so our existing `pip install` command picks up the 3.12 pip
+# without code changes. We DROP `python3` and `python3-pip` from the
+# apt install below because the base image already provides them.
+FROM docker.io/library/python:3.12-slim-trixie@sha256:bf73779de6dbd030f3d189eeeb246286965832761ace318c1518300f76c0840d AS final
 
-# Sandbox runtime: server binary + libstdc++ (the SDK server's only C++
-# runtime dep). If Cloudflare drops the libstdc++ dep in a future SDK
-# rev, the second COPY can be removed; the SDK server warns at startup
-# if the lib is missing.
 COPY --from=sandbox-base /container-server/sandbox /sandbox
-COPY --from=sandbox-base /usr/lib/libstdc++.so.6 /usr/lib/libstdc++.so.6
 
-# Package managers + interpreters. NO compilers (no build-base, no gcc,
-# no rust toolchain, no go build-tools — just the go runtime so `go
-# install` can download precompiled module artifacts).
-#
-# Why each:
-# - bash: cargo-binstall + npm install scripts assume bash for $(...)
-#   substitution; sh-only would surface as cryptic syntax errors.
-# - ca-certificates: TLS for U6 Phase 1 outbound to ecosystem registries.
-# - curl: U6 `direct` install path (curl ... | tar xz).
-# - git: `go install` / npm postinstall fallback resolution.
-# - go: `go install <module>@latest` requires the toolchain on PATH.
-# - nodejs + npm: `npm i -g --ignore-scripts <pkg>`.
-# - py3-pip + python3: `pip install --only-binary=:all: <pkg>`.
-RUN apk add --no-cache \
+# bzip2 / unzip / xz-utils back the per-extension dispatch in
+# sandbox-exec.ts directInstallCommand() for .tar.bz2 / .zip / .tar.xz
+# releases. libstdc++6 satisfies the sandbox-server runtime dep. Python
+# + pip are provided by the base image (see FROM line above) so neither
+# `python3` nor `python3-pip` is in this apt install list.
+RUN apt-get update \
+    && apt-get install -y --no-install-recommends \
         bash \
+        bzip2 \
         ca-certificates \
         curl \
         git \
-        go \
+        libstdc++6 \
         nodejs \
         npm \
-        py3-pip \
-        python3
+        unzip \
+        xz-utils \
+    && rm -rf /var/lib/apt/lists/*
+
+# Upstream Go (NOT Debian's golang-go). Debian builds Go with
+# CGO_ENABLED=0, which makes GODEBUG=netdns=cgo silently fall back to
+# Go's pure-Go resolver. The pure-Go resolver's IPv6 dual-stack
+# behavior hangs on CF Containers (outbound IPv6 is unreliable),
+# producing 60 s timeouts on `go install`. The official tarball ships
+# with CGO enabled, so cgo resolver works and honors /etc/gai.conf's
+# IPv4 precedence.
+RUN set -eux; \
+    curl -fsSL -o /tmp/go.tgz \
+        https://go.dev/dl/go1.26.3.linux-amd64.tar.gz; \
+    echo '2b2cfc7148493da5e73981bffbf3353af381d5f93e789c82c79aff64962eb556  /tmp/go.tgz' | sha256sum -c -; \
+    tar xzf /tmp/go.tgz -C /usr/local/; \
+    rm /tmp/go.tgz; \
+    /usr/local/go/bin/go version
+
+# Force IPv4 precedence in glibc's getaddrinfo. CF Containers' outbound
+# IPv6 path is unreliable for many upstream endpoints; tools whose HTTP
+# clients default to AAAA-first (cargo-binstall, go module proxy, curl,
+# uv) otherwise hang on the IPv6 attempt before falling back to A.
+# Python urllib3, npm, and bun happen to prefer A records and work
+# without this. Uncomment the IPv4-mapped precedence line in
+# /etc/gai.conf — glibc treats this as "rank IPv4 addresses above IPv6".
+RUN sed -i 's/^#\(precedence ::ffff:0:0\/96  100\)/\1/' /etc/gai.conf
 
-# cargo-binstall standalone (musl). Pinned URL + sha256.
 RUN set -eux; \
     curl -fsSL -o /tmp/cb.tgz \
-        https://github.com/cargo-bins/cargo-binstall/releases/download/v1.19.0/cargo-binstall-x86_64-unknown-linux-musl.full.tgz; \
-    echo '15432865784b4aea8749673e7faf0f2939207f73f58c6aca4ba1b6b1e536df6a  /tmp/cb.tgz' | sha256sum -c -; \
+        https://github.com/cargo-bins/cargo-binstall/releases/download/v1.19.0/cargo-binstall-x86_64-unknown-linux-gnu.full.tgz; \
+    echo '651492fa1a5a57a8d5d8de38556c3cbb464d40fa82eda124780a61373783c157  /tmp/cb.tgz' | sha256sum -c -; \
     tar xzf /tmp/cb.tgz -C /usr/local/bin/ cargo-binstall; \
     rm /tmp/cb.tgz; \
     cargo-binstall -V
 
-# `anc` musl bottle from agentnative-cli v0.3.1. Pinned URL + sha256
-# from the release's sha256sum.txt.
 RUN set -eux; \
     curl -fsSL -o /tmp/anc.tgz \
-        https://github.com/brettdavies/agentnative-cli/releases/download/v0.3.1/agentnative-x86_64-unknown-linux-musl.tar.gz; \
-    echo '90361f5bade856cfce3fc9e66703980f04f68e6f4fb86d48dfdb34f92e3b5785  /tmp/anc.tgz' | sha256sum -c -; \
+        https://github.com/brettdavies/agentnative-cli/releases/download/v0.4.0/agentnative-x86_64-unknown-linux-gnu.tar.gz; \
+    echo '67a5a41c68fd6852352b99e13ceb54bce5c043f45a936cab70f4e6d560a6e085  /tmp/anc.tgz' | sha256sum -c -; \
     tar xzf /tmp/anc.tgz -C /tmp/; \
-    install -m 0755 /tmp/agentnative-x86_64-unknown-linux-musl/anc /usr/local/bin/anc; \
-    rm -rf /tmp/anc.tgz /tmp/agentnative-x86_64-unknown-linux-musl; \
+    install -m 0755 /tmp/agentnative-x86_64-unknown-linux-gnu/anc /usr/local/bin/anc; \
+    rm -rf /tmp/anc.tgz /tmp/agentnative-x86_64-unknown-linux-gnu; \
     anc --version
 
-ENV PATH="/usr/local/bin:/usr/local/cargo/bin:/usr/local/go/bin:/root/.local/bin:${PATH}"
+# `unzip -j` flattens the archive's `bun-linux-x64/` directory so the
+# binary lands directly on PATH instead of in $HOME/.bun/bin.
+RUN set -eux; \
+    curl -fsSL -o /tmp/bun.zip \
+        https://github.com/oven-sh/bun/releases/download/bun-v1.3.14/bun-linux-x64.zip; \
+    echo '951ee2aee855f08595aeec6225226a298d3fea83a3dcd6465c09cbccdf7e848f  /tmp/bun.zip' | sha256sum -c -; \
+    unzip -j /tmp/bun.zip 'bun-linux-x64/bun' -d /usr/local/bin/; \
+    chmod 0755 /usr/local/bin/bun; \
+    rm /tmp/bun.zip; \
+    bun --version
+
+# `uv tool install <pkg>` symlinks the installed binary into
+# /root/.local/bin/<binary> (default UV_TOOL_BIN_DIR); the PATH below
+# covers that directory.
+RUN set -eux; \
+    curl -fsSL -o /tmp/uv.tgz \
+        https://github.com/astral-sh/uv/releases/download/0.11.15/uv-x86_64-unknown-linux-gnu.tar.gz; \
+    echo 'b03e572f010bea94a4a52d42671ba72981e12894f71576181a1d26ff68546da7  /tmp/uv.tgz' | sha256sum -c -; \
+    tar xzf /tmp/uv.tgz -C /tmp/; \
+    install -m 0755 /tmp/uv-x86_64-unknown-linux-gnu/uv /usr/local/bin/uv; \
+    install -m 0755 /tmp/uv-x86_64-unknown-linux-gnu/uvx /usr/local/bin/uvx; \
+    rm -rf /tmp/uv.tgz /tmp/uv-x86_64-unknown-linux-gnu; \
+    uv --version
+
+# Every PM's install destination redirected to /usr/local/bin so the
+# post-install `which <binary>` gate in sandbox-exec.ts always looks in
+# the same place. Mirrors the per-command flags
+# (cargo-binstall --install-path, GOBIN=...) for the ENV-configurable
+# package managers (bun, uv) so the convention is consistent across
+# the entire install table.
+ENV BUN_INSTALL=/usr/local
+ENV UV_TOOL_BIN_DIR=/usr/local/bin
+
+# Supply-chain release-delay gate: refuse to install any uv/uvx package
+# published less than 7 days ago. A malicious tag-overwrite or
+# fresh-publish attack has at minimum a 7-day detection window before
+# our sandbox would install it. uv handles relative durations natively
+# (v0.9.17+); pip's equivalent (PIP_UPLOADED_PRIOR_TO, v26.0+) requires
+# an absolute date, so the pip path computes it at exec time in
+# src/worker/score/sandbox-exec.ts rather than baking a stale value
+# here. npm + bun are not gated yet because user inputs through those
+# PMs pass through the registry / discovery hint vetting layer; pip and
+# uv take raw user-typed package names so the unmediated attack surface
+# is higher.
+ENV UV_EXCLUDE_NEWER="7 days"
+# Go's runtime DNS resolver bypasses glibc by default and ignores the
+# IPv4-precedence fix in /etc/gai.conf above. Force the cgo resolver so
+# `go install` honors the system address-selection policy.
+ENV GODEBUG=netdns=cgo
+# Suppress pip's "A new release of pip is available" notice. Without
+# this, every `pip install <pkg>` writes a multi-line upgrade prompt to
+# stderr, which pollutes the scorecard `evidence` field and the
+# bounce-panel `details` stderr block when a `pip install` user input
+# fails. The site never wants the user to act on the prompt (the
+# sandbox is ephemeral; pip is whatever the image ships), so the notice
+# is pure noise. Mirrored at exec time in sandbox-exec.ts so the
+# currently-deployed image gets the suppression before the next image
+# rebuild lands.
+ENV PIP_DISABLE_PIP_VERSION_CHECK=1
+
+ENV PATH="/usr/local/bin:/usr/local/cargo/bin:/usr/local/go/bin:${PATH}"
+
+# Port 3000 is reserved by the CF Sandbox SDK; pick a different port so
+# `wrangler dev --local` accepts the container binding without
+# colliding with the SDK's internal HTTP server.
+EXPOSE 8080
 
 ENTRYPOINT ["/sandbox"]
diff --git a/docker/sandbox/README.md b/docker/sandbox/README.md
index 3b887e5..c75e04b 100644
--- a/docker/sandbox/README.md
+++ b/docker/sandbox/README.md
@@ -1,12 +1,12 @@
 # Live-scoring sandbox image
 
-Alpine + musl image for the live-scoring path. Carries the Cloudflare Sandbox SDK server, package managers
-(cargo-binstall, pip, npm, go), and a pre-built musl `anc` baked in from agentnative-cli v0.3.1. NO COMPILERS, NO
-TOOLCHAINS.
+Debian-trixie-slim + glibc image for the live-scoring path. Carries the Cloudflare Sandbox SDK server, package managers
+(`cargo-binstall`, `pip`, `uv`, `npm`, `bun`, `go` runtime), and a pre-built `anc` binary from agentnative-cli v0.3.1.
+NO COMPILERS, NO TOOLCHAINS.
 
 Plan reference:
 [`docs/plans/2026-04-28-002-feat-live-scoring-cf-sandbox-plan.md`](../../docs/plans/2026-04-28-002-feat-live-scoring-cf-sandbox-plan.md)
-U2.
+U2 + U6.
 
 ## Build and push
 
@@ -59,12 +59,9 @@ Local smoke before pushing (optional but recommended on Dockerfile changes):
 docker run --rm "anc-sandbox:$GIT_SHA" /usr/local/bin/anc --version
 # expect: anc 0.3.1
 
-# cargo-binstall path
-docker run --rm "anc-sandbox:$GIT_SHA" /usr/local/bin/cargo-binstall --version
-
 # all expected pms on PATH
 docker run --rm "anc-sandbox:$GIT_SHA" sh -c \
-  'cargo-binstall --version && pip --version && npm --version && go version'
+  'cargo-binstall --version && pip --version && uv --version && npm --version && bun --version && go version'
 ```
 
 Image size against the budget (<=350 MB compressed; fits CF Containers `basic`):
@@ -101,7 +98,7 @@ survives.
 ### Build-context exclusions
 
 `docker/sandbox/.dockerignore` lists files that must not enter the build context. The current Dockerfile uses only
-multi-stage `COPY --from=` (no copy from the build context), so `.dockerignore` is forward-looking — it protects any
+multi-stage `COPY --from=` (no copy from the build context), so `.dockerignore` is forward-looking: it protects any
 future change that adds `COPY <ctx> ...` to the Dockerfile. `.ignored-sentinel.txt` is a regression probe: if it ever
 appears in a deployed layer, `.dockerignore` has stopped being read by the builder.
 
@@ -115,14 +112,16 @@ remotely, skipping push"`). This is a fallback; the primary path is the local bu
 
 ## SHA pinning
 
-Three external assets are pinned by sha256 inside the Dockerfile:
+Each external asset baked into the image is pinned by sha256 inside the Dockerfile:
 
-| Asset                                               | Pinned at                                             |
-| --------------------------------------------------- | ----------------------------------------------------- |
-| `cloudflare/sandbox:0.9.2-musl`                     | image digest from Docker Hub                          |
-| `alpine:3.21`                                       | multi-arch index digest from Docker Hub               |
-| `cargo-binstall-x86_64-unknown-linux-musl.full.tgz` | sha256 of the GitHub release asset (computed locally) |
-| `agentnative-x86_64-unknown-linux-musl.tar.gz`      | sha256 from the release's `sha256sum.txt`             |
+| Asset                                              | Pinned at                                             |
+| -------------------------------------------------- | ----------------------------------------------------- |
+| `cloudflare/sandbox:0.9.4`                         | image digest from Docker Hub                          |
+| `debian:trixie-slim`                               | multi-arch index digest from Docker Hub               |
+| `cargo-binstall-x86_64-unknown-linux-gnu.full.tgz` | sha256 of the GitHub release asset (computed locally) |
+| `agentnative-x86_64-unknown-linux-gnu.tar.gz`      | sha256 from the release's `sha256sum.txt`             |
+| `bun-linux-x64.zip` (bun-v1.3.14)                  | sha256 from the release's `SHASUMS256.txt`            |
+| `uv-x86_64-unknown-linux-gnu.tar.gz` (0.11.15)     | sha256 from the release's `<asset>.sha256` file       |
 
 To bump any pin, resolve the new sha and update both the URL line and the `echo '<sha> ...' | sha256sum -c -`
 verification line. Keep them in sync.
@@ -131,16 +130,23 @@ To resolve the cloudflare/sandbox digest after a version bump:
 
 ```sh
 curl -fsSL "https://hub.docker.com/v2/repositories/cloudflare/sandbox/tags/<tag>/" \
-  | jaq -r '.images[0].digest'
+  | jaq -r '.digest'
 ```
 
+For other GitHub-hosted releases (`agentnative-cli`, `cargo-binstall`, `uv`) the sha256 ships next to the binary
+(`sha256sum.txt`, `<asset>.sha256`). For `bun`, the release page ships a `SHASUMS256.txt`. Always read the upstream
+checksum file rather than computing locally, because the upstream value is what you're trusting.
+
 ## What's NOT in the image (and why)
 
-- **brew.** Linuxbrew on Alpine + musl is not a supported configuration (linuxbrew assumes glibc symbols). User inputs
-  that resolve to `pm: brew` via U4's chain hit U6's `chain_resolved_install_failed` bounce class.
-- **C/C++/Rust toolchains.** `apk add build-base gcc rust` would balloon the image past the size budget AND violate
-  Premise #2 (install-from-binary only). The `cargo install` (compile) path is intentionally absent — cargo-binstall's
-  job is precompiled-only.
+- **brew.** Linuxbrew on Linux takes 20-60 s per install for most formulae; complex formulae exceed the 60 s install +
+  score budget. `brew install <pkg>` user inputs route through the discovery-fallback in
+  `src/worker/score/do.ts:resolveSpec()`: fetch the formula metadata from `formulae.brew.sh`, parse the homepage as a
+  GitHub URL, run the existing `discoverBinary` chain to find an alternative (crates, npm, PyPI, go, direct). Formulae
+  without a peer PM bounce as `install_unsupported pm=brew_only`.
+- **C/C++/Rust toolchains.** `apt-get install build-essential gcc rustc` would balloon the image past the size budget
+  AND violate Premise #2 (install-from-binary only). The `cargo install` (compile) path is intentionally absent because
+  cargo-binstall's job is precompiled-only.
 - **Specific source-only packages.** Anything that requires compilation during `pip install` (no wheel published) will
   fail at install-time. U6 `pip install --only-binary=:all:` makes that explicit.
 
diff --git a/docker/score/Dockerfile b/docker/score/Dockerfile
index 7d14db2..27f90f7 100644
--- a/docker/score/Dockerfile
+++ b/docker/score/Dockerfile
@@ -91,19 +91,13 @@ RUN --mount=type=cache,target=/home/runner/.cache/Homebrew,uid=1000,gid=1000 \
 RUN --mount=type=cache,target=/home/runner/.cache/Homebrew,uid=1000,gid=1000 \
     brew install yq jaq
 
-# ---- The anc binary (glibc, brew-installed from the published tap) ---------
-# Same install path users get on macOS / Linux. The fully-qualified
-# brettdavies/tap/agentnative spec auto-taps the formula (mirrors the
-# oven-sh/bun/bun pattern above). The brewed binary lives in
-# /home/linuxbrew/.linuxbrew/bin and resolves to a release tag's bottle —
-# no local cargo build, no operator-state coupling, no wrong-SHA risk.
-RUN --mount=type=cache,target=/home/runner/.cache/Homebrew,uid=1000,gid=1000 \
-    brew install brettdavies/tap/agentnative \
-    && anc --version
 # uv tool install drops binaries in ~/.local/bin; bun add -g drops them in
 # ~/.bun/bin; cargo-binstall in ~/.cargo/bin. All three need to be on PATH so
 # the post-install `command -v <binary>` check (in install-tools.sh) and the
-# scoring runtime can resolve them.
+# scoring runtime can resolve them. /home/runner/.local/bin sits first so
+# inject-mode's anc binary at that path wins over anything install-tools.sh
+# later brews into /home/linuxbrew/.linuxbrew/bin (notably the registry's
+# own `anc` entry which runs `brew install brettdavies/tap/agentnative`).
 ENV PATH=/home/runner/.local/bin:/home/runner/.bun/bin:/home/runner/.cargo/bin:$PATH
 
 # ---- Pre-bake every registry tool ------------------------------------------
@@ -113,8 +107,19 @@ ENV PATH=/home/runner/.local/bin:/home/runner/.bun/bin:/home/runner/.cargo/bin:$
 # the two and warns on drift (added/removed tools that didn't make it into
 # the install layer).
 #
-# This layer takes the longest (~20-40 minutes first build); Docker caches
-# it so subsequent rebuilds skip ahead unless registry.yaml changed.
+# This layer takes the longest (~5-7 minutes warm, ~20-40 minutes cold).
+# Docker caches it so subsequent rebuilds skip ahead unless registry.yaml
+# or install-tools.sh changed. The anc-install layer below it intentionally
+# does NOT precede this one — putting anc above install-tools meant every
+# `--from-source` build dirtied install-tools too, paying the full reinstall
+# cost. With install-tools BEFORE anc-install, only the anc layer
+# invalidates on cargo rebuilds.
+#
+# Tool freshness is handled at run time by score-anc100.sh's per-tool
+# update step (brew upgrade / uv tool upgrade / bun add -g re-resolution
+# / cargo binstall --force) rather than by busting this layer's cache.
+# That way the heavy install step runs only when registry.yaml changes,
+# and "always test against latest" still holds via the run-time refresh.
 #
 # BuildKit cache mounts on every package manager's download cache: re-runs
 # (when registry.yaml does change) reuse cached bottles/wheels/tarballs and
@@ -133,6 +138,47 @@ RUN --mount=type=cache,target=/home/runner/.cache/Homebrew,uid=1000,gid=1000 \
 # container with no cache). Wall-time cost of redownloading bun packages
 # at build time is small (~5s per package, well under a minute total).
 
+# ---- The anc binary ---------------------------------------------------------
+# Sits AFTER install-tools so cargo-rebuilds (--from-source) only invalidate
+# this layer, not the heavy install-tools layer above. Two modes, selected
+# by build arg ANC_SOURCE (default: brew):
+#
+#   ANC_SOURCE=brew   — brew-install from brettdavies/tap/agentnative. Same
+#                        install path users get on macOS / Linux; the brewed
+#                        binary lives in /home/linuxbrew/.linuxbrew/bin and
+#                        resolves to a release tag's bottle.
+#   ANC_SOURCE=inject — copy a host-built binary from docker/score/inject/anc
+#                        into /home/runner/.local/bin/anc, which is first in
+#                        PATH. Bypasses brew install entirely. Use for
+#                        testing an unreleased anc (feature branch in
+#                        agentnative-cli) without waiting on a tag + bottle.
+#                        docker/score/build.sh --from-source <path-to-cli-repo>
+#                        orchestrates the cargo build + image build in one step.
+#
+# Inject lands at /home/runner/.local/bin/anc because install-tools.sh above
+# may have brewed the registry's `anc` entry, leaving /home/linuxbrew/...bin/anc.
+# /home/runner/.local/bin sits first in PATH, so the inject binary wins.
+ARG ANC_SOURCE=brew
+
+COPY --chown=runner:runner docker/score/inject/ /tmp/anc-inject/
+
+RUN --mount=type=cache,target=/home/runner/.cache/Homebrew,uid=1000,gid=1000 \
+    case "$ANC_SOURCE" in \
+      brew) \
+        brew install brettdavies/tap/agentnative ;; \
+      inject) \
+        if [ ! -f /tmp/anc-inject/anc ]; then \
+          echo "error: ANC_SOURCE=inject but docker/score/inject/anc is missing in build context" >&2; \
+          echo "       run via: docker/score/build.sh --from-source <path-to-cli-repo>" >&2; \
+          exit 1; \
+        fi && \
+        install -m 0755 /tmp/anc-inject/anc /home/runner/.local/bin/anc ;; \
+      *) \
+        echo "error: ANC_SOURCE must be 'brew' or 'inject' (got: $ANC_SOURCE)" >&2; \
+        exit 1 ;; \
+    esac && \
+    anc --version
+
 # ---- Scoring runner --------------------------------------------------------
 COPY --chown=runner:runner docker/score/score-anc100.sh /work/score-anc100.sh
 WORKDIR /work
diff --git a/docker/score/README.md b/docker/score/README.md
index 35837f4..234bde5 100644
--- a/docker/score/README.md
+++ b/docker/score/README.md
@@ -19,13 +19,13 @@ docker/score/
 
 ## Prerequisites (host)
 
-- **Docker Engine + Compose v2.** Engine only — NOT Docker Desktop. Install via `bash docker/score/setup-host.sh`
+- **Docker Engine + Compose v2.** Engine only, NOT Docker Desktop. Install via `bash docker/score/setup-host.sh`
   (Ubuntu) or follow Docker's apt-repo instructions for your distro.
 - **For `nvidia-smi` scoring:** NVIDIA driver + `nvidia-container-toolkit` configured against the Docker daemon. The
   setup-host.sh script handles this if a host GPU is detected. Without it, `nvidia-smi` falls back to `install-missing`
   and the other 99 tools still score.
 
-`anc` is brew-installed inside the image from `brettdavies/tap/agentnative` — no local CLI checkout required.
+`anc` is brew-installed inside the image from `brettdavies/tap/agentnative`. No local CLI checkout required.
 
 ## One-time host setup
 
@@ -40,7 +40,7 @@ bash docker/score/setup-host.sh
 ## Usage
 
 ```bash
-# Build only:
+# Build only (brew-installed anc):
 bash docker/score/build.sh
 
 # Build + score all 100 tools (writes scorecards/*.json on host):
@@ -50,6 +50,31 @@ bash docker/score/build.sh --run
 docker compose -f docker/score/compose.yml run --rm scorer bash
 ```
 
+### Scoring against an unreleased anc
+
+When you need to test a feature branch in agentnative-cli before it gets tagged and bottled, use `--from-source` to
+cargo-build the binary on the host and inject it into the image (skipping brew install):
+
+```bash
+# Build anc from a local CLI checkout + bake into the image:
+bash docker/score/build.sh --from-source ~/dev/agentnative-cli
+
+# Build + run in one step:
+bash docker/score/build.sh --from-source ~/dev/agentnative-cli --run
+```
+
+Inject mode caveats:
+
+- The host must be Linux with cargo + a glibc that can produce a binary the container's Debian trixie base (glibc 2.41)
+  can load. Recent Debian/Ubuntu hosts satisfy this. macOS-built binaries do not work.
+- The injected binary lives in `docker/score/inject/anc` (gitignored). The directory is tracked via `.gitkeep` so the
+  COPY layer always succeeds.
+- Layer cache invalidates when `ANC_SOURCE` changes or when the injected binary content changes (Docker checksums the
+  COPY source). Switching between brew and inject modes re-runs only the anc-install layer; the heavy install-tools
+  layer stays cached.
+- Inject mode skips the brew tap lookup entirely. The image will report `anc --version` matching whatever your local
+  cargo build produced, regardless of what version is currently published to brew.
+
 ## Image structure
 
 The Dockerfile is layered so the v2 Cloudflare Sandbox image (live "paste-a- URL" scoring, post-launch) can extend the
@@ -61,28 +86,30 @@ Layer order:
 1. Base Debian-slim + OS essentials (curl, git, jq, sudo, ca-certificates).
 2. Non-root `runner` user.
 3. Linuxbrew (the heaviest single layer; cached aggressively).
-4. Other package managers: `uv`, `bun`, `cargo-binstall` — all installed via brew so they're prebuilt + cached.
+4. Other package managers: `uv`, `bun`, `cargo-binstall`. All installed via brew, so they're prebuilt + cached.
 5. Tooling for the runner: `yq`, `jaq`.
-6. The `anc` binary, brew-installed from `brettdavies/tap/agentnative` (same install path users get on macOS / Linux).
+6. The `anc` binary, installed via one of two modes selected by the `ANC_SOURCE` build argument: `brew` (default;
+   installs from `brettdavies/tap/agentnative`, same path users get on macOS / Linux) or `inject` (copies a host-built
+   binary from `docker/score/inject/anc`, used by `build.sh --from-source`).
 7. `install-tools.sh` runs once at image build time, reading the build-time registry baked at `/build/registry.yaml` and
-   installing every entry. Failures are logged to `/build/install-log.txt` but do NOT abort the build — tools that fail
-   to install simply end up missing from PATH and the runner records them as `install-missing`.
+   installing every entry. Failures are logged to `/build/install-log.txt` but do NOT abort the build. Tools that fail
+   to install end up missing from PATH and the runner records them as `install-missing`.
 8. `score-anc100.sh` is the entrypoint; iterates the run-time registry at `/work/registry.yaml` (compose bind-mount from
-   the host). If the run-time registry diverges from the baked one, the runner emits a drift warning so the operator
+   the host). If the run-time registry diverges from the baked one, the runner emits a drift warning, so the operator
    knows new tools won't be installed without a rebuild.
 
 ## Failure handling
 
 The runner classifies each registry entry as one of:
 
-- **OK** — installed at build time, scored at run time. Scorecard written to `/work/scorecards/<name>-v<version>.json`
+- **OK**: installed at build time, scored at run time. Scorecard written to `/work/scorecards/<name>-v<version>.json`
   (bind-mounted to host).
-- **install-missing** — install command at build time exited zero but the expected `binary` is not in PATH (or installed
+- **install-missing**: install command at build time exited zero but the expected `binary` is not in PATH (or installed
   cleanly but only as a library, etc.). No scorecard written; the leaderboard renders the registry's existing fallback
   row ("not yet scored").
-- **score-failed** — binary present, but `anc check` produced invalid JSON or exited >1 (real error, not the standard
+- **score-failed**: binary present, but `anc check` produced invalid JSON or exited >1 (real error, not the standard
   "checks failed" exit 1). No scorecard written; entry logged in `/work/scoring-failures.txt`.
-- **skipped** — install method outside the allowed set (e.g., the `included` value used for `nvidia-smi`'s "comes with
+- **skipped**: install method outside the allowed set (e.g., the `included` value used for `nvidia-smi`'s "comes with
   the driver"). The runner records and moves on.
 
 After a successful run, host's `scorecards/` has the new JSONs. Re-run `bun run build` on the host to regenerate
@@ -109,4 +136,4 @@ This image is intentionally a strict subset of the v2 sandbox image. The v2 path
 2. **Registry changed (added/removed/edited a tool):** rerun the same command. The install layer is invalidated for the
    affected tool; brew re-resolves; scoring re-runs.
 3. **Tool released a new version:** the runner's version-extract logic pulls the actually-installed version at score
-   time and writes `<name>-v<NEWVERSION>.json`. The old scorecard file stays on disk — `trash`-clean it manually.
+   time and writes `<name>-v<NEWVERSION>.json`. The old scorecard file stays on disk; `trash`-clean it manually.
diff --git a/docker/score/build.sh b/docker/score/build.sh
index 5777814..743b1a4 100755
--- a/docker/score/build.sh
+++ b/docker/score/build.sh
@@ -1,34 +1,114 @@
 #!/usr/bin/env bash
 # Build the anc-scorer image and (optionally) run it.
 #
-# `anc` is brew-installed inside the image from brettdavies/tap/agentnative
-# (see Dockerfile §"The anc binary"). No local cargo build, no operator-state
-# coupling — the image always uses a published release.
+# Default mode: brew-install anc from brettdavies/tap/agentnative inside
+# the image (Dockerfile §"The anc binary", ANC_SOURCE=brew). The image
+# always uses a published release; no local cargo build, no operator-state
+# coupling.
+#
+# Inject mode: `--from-source <path-to-agentnative-cli>` cargo-builds anc
+# from that repo (release profile), copies the resulting binary into
+# docker/score/inject/anc, and builds the image with ANC_SOURCE=inject
+# so the Dockerfile bypasses brew install and uses the injected binary.
+# Use this to score the registry against an unreleased anc (feature
+# branch, pre-tag dev work) without waiting on a tap formula bump.
 #
 # Steps:
-#   1. Build the docker image via compose.
-#   2. (Optional, with --run) Run the scorer with bind-mounts to write
+#   1. (Inject mode only) cargo build --release in the CLI repo.
+#   2. (Inject mode only) Stage the binary at docker/score/inject/anc.
+#   3. Build the docker image via compose.
+#   4. (Optional, with --run) Run the scorer with bind-mounts to write
 #      scorecards back to the host.
 #
 # Usage (from repo root):
-#   bash docker/score/build.sh           # build only
-#   bash docker/score/build.sh --run     # build + run
+#   bash docker/score/build.sh                                 # brew, build only
+#   bash docker/score/build.sh --run                           # brew, build + run
+#   bash docker/score/build.sh --from-source ~/dev/agentnative-cli         # inject, build only
+#   bash docker/score/build.sh --from-source ~/dev/agentnative-cli --run   # inject, build + run
+#   bash docker/score/build.sh --run -- --only bat,fd          # build + run, partial scoring
+#   bash docker/score/build.sh --run -- --no-update            # build + run, skip per-tool update
+#
+# Arguments after a literal `--` sentinel are passed through to score-anc100.sh
+# inside the container. Use this to flip --only / --no-update without touching
+# the wrapper script.
 
 set -euo pipefail
 
 REPO_ROOT="$(cd "$(dirname "$0")/../.." && pwd)"
+INJECT_DIR="$REPO_ROOT/docker/score/inject"
+
+RUN_AFTER=0
+ANC_SRC=""
+PASSTHROUGH=()
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --run)
+      RUN_AFTER=1
+      shift
+      ;;
+    --from-source)
+      if [[ -z "${2:-}" || "$2" == --* ]]; then
+        echo "error: --from-source requires a path to the agentnative-cli repo" >&2
+        exit 2
+      fi
+      ANC_SRC="$2"
+      shift 2
+      ;;
+    --)
+      shift
+      PASSTHROUGH=("$@")
+      break
+      ;;
+    -h | --help)
+      sed -n '2,/^$/p' "$0" | sed 's/^# \?//'
+      exit 0
+      ;;
+    *)
+      echo "error: unknown flag: $1" >&2
+      echo "       try: $0 --help" >&2
+      exit 2
+      ;;
+  esac
+done
 
 cd "$REPO_ROOT"
 
-# 1. Build the image via compose.
-echo "==> Building anc-scorer image..."
+if [[ -n "$ANC_SRC" ]]; then
+  if [[ ! -d "$ANC_SRC" ]]; then
+    echo "error: --from-source path does not exist or is not a directory: $ANC_SRC" >&2
+    exit 1
+  fi
+  if [[ ! -f "$ANC_SRC/Cargo.toml" ]]; then
+    echo "error: $ANC_SRC does not look like a Cargo workspace (no Cargo.toml at root)" >&2
+    exit 1
+  fi
+
+  echo "==> Building anc from source: $ANC_SRC"
+  (cd "$ANC_SRC" && cargo build --release)
+
+  built_binary="$ANC_SRC/target/release/anc"
+  if [[ ! -f "$built_binary" ]]; then
+    echo "error: cargo build succeeded but $built_binary is missing" >&2
+    exit 1
+  fi
+
+  echo "==> Staging binary into $INJECT_DIR/anc"
+  install -m 0755 "$built_binary" "$INJECT_DIR/anc"
+  echo "    $($INJECT_DIR/anc --version) from $ANC_SRC ($(cd "$ANC_SRC" && git rev-parse --short HEAD))"
+
+  export ANC_SOURCE=inject
+else
+  export ANC_SOURCE=brew
+fi
+
+echo "==> Building anc-scorer image (ANC_SOURCE=$ANC_SOURCE)..."
 docker compose -f docker/score/compose.yml build
 
-# 2. Optionally run.
-if [[ "${1:-}" == "--run" ]]; then
-  echo "==> Running anc-scorer..."
+if [[ $RUN_AFTER -eq 1 ]]; then
+  echo "==> Running anc-scorer${PASSTHROUGH:+ (passthrough: ${PASSTHROUGH[*]})}..."
   mkdir -p docker/score/out
-  docker compose -f docker/score/compose.yml run --rm scorer
+  docker compose -f docker/score/compose.yml run --rm scorer "${PASSTHROUGH[@]}"
 fi
 
 echo "==> done."
diff --git a/docker/score/compose.yml b/docker/score/compose.yml
index ef7ad3a..b923c36 100644
--- a/docker/score/compose.yml
+++ b/docker/score/compose.yml
@@ -4,8 +4,9 @@
 #   docker compose -f docker/score/compose.yml build
 #   docker compose -f docker/score/compose.yml run --rm scorer
 #
-# The wrapper at docker/score/build.sh does both, plus the prerequisite
-# step of building the `anc` binary from the local CLI dev checkout.
+# The wrapper at docker/score/build.sh does both. The image brew-installs
+# the latest `anc` from `brettdavies/tap/agentnative` at build time (see
+# Dockerfile §"The anc binary"). No local CLI checkout, no cargo build.
 #
 # GPU passthrough notes:
 #   The compose file declares NVIDIA GPU access so `nvidia-smi` (one of
@@ -23,6 +24,12 @@ services:
     build:
       context: ../..
       dockerfile: docker/score/Dockerfile
+      args:
+        # Selects how anc lands in the image: `brew` (default) installs from
+        # brettdavies/tap/agentnative; `inject` copies docker/score/inject/anc
+        # in instead, skipping brew. The wrapper (docker/score/build.sh
+        # --from-source <cli-repo>) toggles this via the ANC_SOURCE env var.
+        ANC_SOURCE: ${ANC_SOURCE:-brew}
     image: anc-scorer:latest
     container_name: anc-scorer
 
diff --git a/docker/score/inject/.gitkeep b/docker/score/inject/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/docker/score/score-anc100.sh b/docker/score/score-anc100.sh
index a54e4e4..0689057 100755
--- a/docker/score/score-anc100.sh
+++ b/docker/score/score-anc100.sh
@@ -8,12 +8,24 @@
 # explicitly instead of stopping; (c) writes to a bind-mounted directory so
 # the host filesystem ends up with the scorecards.
 #
+# Each tool gets a per-tool update attempt before its check runs, so a
+# scoring run always tests against the latest version each package manager
+# can resolve (brew upgrade / uv tool upgrade / bun add -g re-resolution).
+# This decouples "tool freshness" from "image build cache" — install-tools
+# in the Dockerfile pre-bakes baseline binaries, and this script upgrades
+# them at run time. Disable with --no-update.
+#
 # Output structure (per-tool):
 #   scored OK    → scorecards/<name>-v<version>.json
 #   install fail → no scorecard file (registry's existing fallback UX renders)
 #   score fail   → no scorecard file + entry added to score-failures.txt
 #
 # Run inside the docker/score/Dockerfile-built image.
+#
+# Flags (passed via `docker compose run --rm scorer <flags>`):
+#   --only NAME1,NAME2     Score only listed registry names; skip the rest.
+#   --no-update            Skip the per-tool update step; score what's baked.
+#   --help                 Print this header and exit.
 
 set -uo pipefail
 
@@ -28,14 +40,51 @@ LOG_DIR=/work/out                        # bind-mounted to host docker/score/out
 SUMMARY=$LOG_DIR/scoring-summary.txt
 FAILURES=$LOG_DIR/scoring-failures.txt
 
+ONLY=""
+DO_UPDATE=1
+
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --only)
+      if [[ -z "${2:-}" || "$2" == --* ]]; then
+        echo "error: --only requires a comma-separated tool list" >&2
+        exit 2
+      fi
+      ONLY="$2"
+      shift 2
+      ;;
+    --no-update)
+      DO_UPDATE=0
+      shift
+      ;;
+    -h | --help)
+      sed -n '2,/^$/p' "$0" | sed 's/^# \?//'
+      exit 0
+      ;;
+    *)
+      echo "error: unknown flag: $1" >&2
+      exit 2
+      ;;
+  esac
+done
+
 mkdir -p "$OUT_DIR" "$LOG_DIR"
 : > "$SUMMARY"
 : > "$FAILURES"
 
 echo "=== anc100 batch scorer ==="
 echo "anc version: $(anc --version)"
+echo "anc path:    $(command -v anc)"
 echo "registry:    $REGISTRY (run-time)"
 echo "output dir:  $OUT_DIR"
+if [[ $DO_UPDATE -eq 1 ]]; then
+  echo "update step: enabled (per-tool, before each check)"
+else
+  echo "update step: disabled (--no-update)"
+fi
+if [[ -n "$ONLY" ]]; then
+  echo "only:        $ONLY"
+fi
 echo
 
 # Drift check: if the run-time registry diverges from the build-time one,
@@ -59,14 +108,25 @@ mapfile -t entries < <(
   # `read -r ... <<<"$line"` collapses consecutive IFS tabs into a single
   # delimiter (observed on bash 5.3.9), which would silently drop the
   # version_extract field whenever audit_profile is unset.
-  yq -r '.tools[] | [.name, .binary, .audit_profile // "-", .version_extract // "-"] | join("\t")' "$REGISTRY"
+  yq -r '.tools[] | [.name, .binary, .audit_profile // "-", .version_extract // "-", .install // "-"] | join("\t")' "$REGISTRY"
 )
 
+# Build the --only allow-set as a comma-bookended string so we can do exact-
+# match lookups via substring containment ("name" matches ",name," only).
+ONLY_SET=""
+if [[ -n "$ONLY" ]]; then
+  ONLY_SET=",${ONLY//[[:space:]]/},"
+fi
+
 total=${#entries[@]}
 scored=0
 install_missing=0
 score_failed=0
 skipped=0
+filtered_out=0
+updated=0
+update_failed=0
+update_skipped=0
 
 # Default extractor: first SemVer-shaped token on the first --version line.
 DEFAULT_VERSION_REGEX='[0-9]+\.[0-9]+(\.[0-9]+)?'
@@ -85,13 +145,96 @@ extract_version() {
   echo "$version"
 }
 
+# Derive the upgrade command for a tool from its registry `install:` field.
+# Echoes the upgrade command on stdout, or "SKIP" if the install method has
+# no upgrade path (OS-bundled, etc.). Exits non-zero if the install method
+# is unrecognized.
+#
+# Special case: the registry's `anc` entry resolves via PATH to whatever's
+# baked into the image (brew in ANC_SOURCE=brew, inject in ANC_SOURCE=inject).
+# In inject mode we explicitly do NOT upgrade anc — the inject binary is the
+# whole point of the run and we must not let brew silently replace it. The
+# scoring loop checks PATH and skips upgrade when the active anc is the
+# inject-mode binary.
+derive_upgrade() {
+  local install=$1
+  case "$install" in
+    "brew install "*)
+      # Pulls the package spec (everything after "brew install ").
+      local pkg="${install#brew install }"
+      echo "brew upgrade $pkg"
+      ;;
+    "uv tool install "*)
+      local pkg="${install#uv tool install }"
+      echo "uv tool upgrade $pkg"
+      ;;
+    "bun add -g "*)
+      # `bun add -g <pkg>` re-resolves to latest each call (Bun's behavior).
+      echo "$install"
+      ;;
+    "included with "*)
+      echo "SKIP"
+      ;;
+    *)
+      return 1
+      ;;
+  esac
+}
+
+# In inject mode, the inject anc lives at /home/runner/.local/bin/anc.
+# When scoring the registry's `anc` entry we must not let `brew upgrade`
+# touch the brew-installed anc and shadow the inject binary (it can't, since
+# /home/runner/.local/bin sits ahead of brew bin in PATH, but the upgrade is
+# pure churn in that case). Detect inject mode by checking which path `anc`
+# resolves to.
+ANC_INJECT_PATH=/home/runner/.local/bin/anc
+ANC_IS_INJECT=0
+if [[ "$(command -v anc)" == "$ANC_INJECT_PATH" ]]; then
+  ANC_IS_INJECT=1
+fi
+
 for line in "${entries[@]}"; do
-  IFS=$'\t' read -r name binary profile extractor <<<"$line"
+  IFS=$'\t' read -r name binary profile extractor install <<<"$line"
   # Decode "-" sentinel back to empty string (see yq pipeline above).
   [[ "$profile" == "-" ]] && profile=""
   [[ "$extractor" == "-" ]] && extractor=""
+  [[ "$install" == "-" ]] && install=""
+
+  if [[ -n "$ONLY_SET" && "$ONLY_SET" != *",$name,"* ]]; then
+    filtered_out=$((filtered_out + 1))
+    continue
+  fi
+
   echo "----- $name ($binary) -----"
 
+  if [[ $DO_UPDATE -eq 1 && -n "$install" ]]; then
+    if [[ "$name" == "anc" && $ANC_IS_INJECT -eq 1 ]]; then
+      echo "  [update] skip — inject anc is the linter, do not replace"
+      update_skipped=$((update_skipped + 1))
+    else
+      upgrade_cmd="$(derive_upgrade "$install" 2>/dev/null || echo UNRECOGNIZED)"
+      case "$upgrade_cmd" in
+        UNRECOGNIZED)
+          echo "  [update] skip — unrecognized install method: $install"
+          update_skipped=$((update_skipped + 1))
+          ;;
+        SKIP)
+          echo "  [update] skip — OS-bundled ($install)"
+          update_skipped=$((update_skipped + 1))
+          ;;
+        *)
+          echo "  [update] $upgrade_cmd"
+          if eval "$upgrade_cmd" >/dev/null 2>&1; then
+            updated=$((updated + 1))
+          else
+            echo "  [update] failed (rc=$?); continuing with installed version"
+            update_failed=$((update_failed + 1))
+          fi
+          ;;
+      esac
+    fi
+  fi
+
   if ! command -v "$binary" >/dev/null 2>&1; then
     echo "  [skip] binary '$binary' not in PATH (install-failed at image-build time)"
     echo "$name install-missing" >> "$FAILURES"
@@ -146,10 +289,16 @@ echo
 echo "==============================="
 {
   echo "anc100 batch-scoring summary ($(date -u +%Y-%m-%dT%H:%M:%SZ))"
-  echo "  scored:          $scored / $total"
+  echo "  scored:          $scored / $((total - filtered_out))"
   echo "  install-missing: $install_missing"
   echo "  score-failed:    $score_failed"
   echo "  skipped:         $skipped"
+  if [[ -n "$ONLY" ]]; then
+    echo "  filtered out:    $filtered_out  (--only $ONLY)"
+  fi
+  if [[ $DO_UPDATE -eq 1 ]]; then
+    echo "  updates:         $updated ok, $update_failed failed, $update_skipped skipped"
+  fi
 } | tee "$SUMMARY"
 echo "==============================="
 
diff --git a/docs/runbooks/live-scoring-analytics.md b/docs/runbooks/live-scoring-analytics.md
new file mode 100644
index 0000000..c53fa09
--- /dev/null
+++ b/docs/runbooks/live-scoring-analytics.md
@@ -0,0 +1,224 @@
+# Live-scoring analytics runbook
+
+Queryable counterpart to the [live-scoring monitoring runbook](./live-scoring-monitoring.md). The monitoring runbook
+covers manual playbooks, `wrangler tail`, kill-switch flips, and incident response. This runbook covers the Workers
+Analytics Engine surface: canonical SQL for usage, performance, errors, and cost-efficiency aggregates.
+
+Two datasets, one per environment:
+
+| Environment | Binding           | Dataset                  |
+| ----------- | ----------------- | ------------------------ |
+| Production  | `SCORE_TELEMETRY` | `anc_live_score_prod`    |
+| Staging     | `SCORE_TELEMETRY` | `anc_live_score_staging` |
+
+Dataset names are configured in `wrangler.jsonc` at top-level (prod) and `env.staging`. The shape isolation is enforced
+by `tests/wrangler-config.test.ts` so a future config refactor cannot accidentally merge the two datasets and pollute
+prod aggregates with staging traffic.
+
+Queries below are written against `anc_live_score_staging`. Swap the dataset name when querying production.
+
+## Field schema
+
+One `writeDataPoint` per `/api/score` request, emitted from `src/worker/score/handler.ts` in the same `try/finally`
+block that emits the `score.tier` console log. The console log is the manual-recovery fallback when Analytics Engine is
+down; this runbook covers the AE side.
+
+| Slot      | Field         | Type                                                               | Notes                                                                                                |
+| --------- | ------------- | ------------------------------------------------------------------ | ---------------------------------------------------------------------------------------------------- |
+| `blob1`   | input kind    | `registry` \| `install-command` \| `github-url` \| `invalid`       | Mapped from `ValidatedInput.kind`; `registry` is the slug-matched case                               |
+| `blob2`   | pm            | `npm` \| `cargo-binstall` \| `pip` \| `uv` \| `bun` \| `go` \| ... | Null when no `InstallSpec` was resolved (curated hit, cache hit, validation reject)                  |
+| `blob3`   | error code    | `ScoreError.code` string                                           | Null on success                                                                                      |
+| `blob4`   | freshness     | `live` \| `cache-hit` \| `registry-hit`                            | Null on error                                                                                        |
+| `blob5`   | resolved step | `0.5-hints` \| `2-releases-asset` \| `3-crates` \| ...             | Set when discovery ran; `registry` for curated hits; null otherwise                                  |
+| `double1` | total ms      | number                                                             | Worker handler wall clock                                                                            |
+| `double2` | install ms    | number                                                             | Sandbox install exec duration; null on non-live paths                                                |
+| `double3` | anc check ms  | number                                                             | Sandbox anc-check exec duration; null on non-live paths                                              |
+| `double4` | status        | number                                                             | HTTP status code                                                                                     |
+| `index1`  | tool          | string                                                             | Tool name or slug; cardinality target ≤10k. AE samples high-cardinality indexes automatically (1:N). |
+
+The slot map is pinned by `tests/score-telemetry.test.ts`. Reordering blobs or doubles silently breaks every query
+below; the regression test fires loudly if the order moves.
+
+## Where to run
+
+Cloudflare dashboard → Workers → Analytics Engine → SQL editor. Paste any query and run. Datasets appear on first write;
+no `wrangler analytics-engine create` step.
+
+Programmatic access via the Cloudflare API is available for the same queries; details in
+[Cloudflare's Analytics Engine SQL API docs](https://developers.cloudflare.com/analytics/analytics-engine/sql-api/).
+
+For the agent-friendly counterpart, see the [monitoring runbook's Agent (MCP) subsections](./live-scoring-monitoring.md)
+which document `mcp__plugin_cloudflare_cloudflare-observability__query_worker_observability` calls inline next to each
+manual check.
+
+## Canonical queries
+
+Each query is named so it can be referred to from incident retrospectives. Time windows default to 24 h; widen or narrow
+as needed.
+
+### Daily request volume by pm
+
+Counts requests grouped by package manager. `null` rows are the requests that never resolved to an `InstallSpec`
+(curated hits, cache hits, validation rejects).
+
+```sql
+SELECT
+  blob2 AS pm,
+  COUNT() AS requests
+FROM anc_live_score_staging
+WHERE timestamp > NOW() - INTERVAL '24' HOUR
+GROUP BY pm
+ORDER BY requests DESC
+FORMAT JSONCompact
+```
+
+### p50 and p99 install + anc check latency by pm
+
+Quantiles on the sandbox-side timings (live paths only — registry hits and cache hits have null timings). Surfaces the
+"which PM is the long tail?" question without leaving the dashboard.
+
+```sql
+SELECT
+  blob2 AS pm,
+  quantileTDigest(0.5)(double2) AS install_p50_ms,
+  quantileTDigest(0.99)(double2) AS install_p99_ms,
+  quantileTDigest(0.5)(double3) AS anc_check_p50_ms,
+  quantileTDigest(0.99)(double3) AS anc_check_p99_ms,
+  COUNT() AS live_runs
+FROM anc_live_score_staging
+WHERE timestamp > NOW() - INTERVAL '24' HOUR
+  AND blob4 = 'live'
+GROUP BY pm
+ORDER BY live_runs DESC
+FORMAT JSONCompact
+```
+
+### Error code distribution
+
+Counts each `ScoreError` variant. Replaces the manual log-query approach the U9 monitoring runbook documented for the
+"Error rate by code" signal; cross-reference the manual path as a fallback for AE outages.
+
+```sql
+SELECT
+  blob3 AS error_code,
+  COUNT() AS hits,
+  AVG(double4) AS avg_status
+FROM anc_live_score_staging
+WHERE timestamp > NOW() - INTERVAL '24' HOUR
+  AND blob3 IS NOT NULL
+GROUP BY error_code
+ORDER BY hits DESC
+FORMAT JSONCompact
+```
+
+Compare against the [monitoring runbook's threshold table](./live-scoring-monitoring.md#threshold-table) for which codes
+are user-driven (expected) versus signal-bearing (investigate).
+
+### Registry-hit-rate: the cost-efficiency signal
+
+Higher is cheaper: registry hits are unmetered (no Turnstile, no rate-limit budget, no DO dispatch). Track this number
+over time; a decline indicates the registry is missing tools the homepage form is being asked to score.
+
+```sql
+SELECT
+  COUNTIf(blob4 = 'registry-hit') / COUNT() AS registry_hit_rate,
+  COUNTIf(blob4 = 'cache-hit') / COUNT() AS cache_hit_rate,
+  COUNTIf(blob4 = 'live') / COUNT() AS live_rate,
+  COUNT() AS total
+FROM anc_live_score_staging
+WHERE timestamp > NOW() - INTERVAL '24' HOUR
+  AND blob3 IS NULL
+FORMAT JSONCompact
+```
+
+The denominator excludes error rows so the rate reflects served traffic, not bounced traffic. Tier mix expectations
+(when healthy: registry-hit + cache-hit dominant; live is the long tail) match the
+[monitoring runbook's tier-mix signal](./live-scoring-monitoring.md#tier-mix).
+
+### Top tools by request count
+
+Sample-corrected count via the `_sample_interval` AE virtual column. `index1` cardinality target is ≤10k; AE
+auto-samples beyond that, and the multiplier corrects for it.
+
+```sql
+SELECT
+  index1 AS tool,
+  SUM(_sample_interval) AS requests
+FROM anc_live_score_staging
+WHERE timestamp > NOW() - INTERVAL '24' HOUR
+  AND index1 IS NOT NULL
+GROUP BY tool
+ORDER BY requests DESC
+LIMIT 25
+FORMAT JSONCompact
+```
+
+### Discovery-step attribution
+
+Which discovery tier resolves live traffic. Answers "should we invest in the README parser, or focus on releases-asset
+hints?"
+
+```sql
+SELECT
+  blob5 AS resolved_step,
+  COUNT() AS resolutions
+FROM anc_live_score_staging
+WHERE timestamp > NOW() - INTERVAL '24' HOUR
+  AND blob4 = 'live'
+  AND blob5 IS NOT NULL
+GROUP BY resolved_step
+ORDER BY resolutions DESC
+FORMAT JSONCompact
+```
+
+`registry` rows in this output are curated registry hits (live paths that fell through to a registered tool via
+post-discovery cache — rare). Discovery-only resolutions land under `0.5-hints` (zero-cost), `2-releases-asset`, `3-brew
+| 3-crates | 3-npm | 3-pypi | 3-go`, or `4-readme-parse`.
+
+### Cache hit composition (cross-source query)
+
+Analytics Engine captures freshness but not the `cache_pre_attempted` / `cache_pre_hit` / `cache_post_attempted` /
+`cache_post_hit` pair from the `score.tier` console log. To break down cache-hit traffic by pre-discovery vs
+post-discovery, join the Analytics Engine counts above against the score.tier log query in the
+[monitoring runbook](./live-scoring-monitoring.md#cache-hit-rate-pre-discovery-vs-post-discovery). Cross-source
+correlation is the explicit cost of keeping the schema slim; widening the Analytics Engine blobs to carry both
+`cache_pre` and `cache_post` booleans is a U10.x consideration if the cross-source dance becomes load-bearing.
+
+## Threshold inheritance
+
+The watch and alarm thresholds from the
+[monitoring runbook's threshold table](./live-scoring-monitoring.md#threshold-table) apply directly to AE aggregates.
+Same numbers, easier to compute. Move thresholds in the monitoring runbook; this runbook inherits.
+
+When a watch fires:
+
+1. Cross-reference the
+   [monitoring runbook's common-failures section](./live-scoring-monitoring.md#common-failures-and-operator-response)
+   for diagnostic commands and resolution paths.
+2. If the symptom doesn't match a documented failure mode, capture the AE query output as evidence and open a follow-up
+   issue against the live-scoring plan.
+
+## AE sampling
+
+Workers Analytics Engine samples high-cardinality indexes automatically. `index1` (tool name or slug) has a target of
+≤10k distinct values; beyond that, the dashboard rows carry a `_sample_interval` multiplier and queries should multiply
+by `SUM(_sample_interval)` to reflect the corrected count (see the "Top tools" query above).
+
+Blob columns are not sampled; they're aggregated faithfully regardless of cardinality. The schema kept low-cardinality
+fields in blobs precisely so the most-queried aggregates (tier mix, error distribution, pm breakdown) stay exact.
+
+If `index1` cardinality grows past 10k in real traffic and sampling becomes a problem (queries get noisier than is
+useful), the cheap escape hatch is to truncate `index1` to a shorter prefix or fold it into a blob. Both are
+config-level changes; deferred until traffic warrants.
+
+## Cross-references
+
+- Field-shape contract: `src/worker/score/telemetry.ts` (`ScoreEventFields`) and `tests/score-telemetry.test.ts` (slot
+  regression).
+- Manual log-query playbook: [`docs/runbooks/live-scoring-monitoring.md`](./live-scoring-monitoring.md).
+- Cost guardrails (rate limits, kill switch, Budget Alerts, deferred auto-kill):
+  [`RELEASES.md` § Cost guardrails](../../RELEASES.md#cost-guardrails).
+- Wrangler binding declarations: `wrangler.jsonc` (top-level + `env.staging`).
+- Gate-ordering rationale (why registry + cache hits are unmetered):
+  [`docs/solutions/architecture-patterns/cf-worker-gate-ordering-before-cost-bearing-outbounds-2026-05-20.md`](../solutions/architecture-patterns/cf-worker-gate-ordering-before-cost-bearing-outbounds-2026-05-20.md).
+- Plan unit: `docs/plans/2026-04-28-002-feat-live-scoring-cf-sandbox-plan.md` (U10 deliverable 3).
diff --git a/docs/runbooks/live-scoring-monitoring.md b/docs/runbooks/live-scoring-monitoring.md
new file mode 100644
index 0000000..2187b8e
--- /dev/null
+++ b/docs/runbooks/live-scoring-monitoring.md
@@ -0,0 +1,375 @@
+# Live-scoring monitoring runbook
+
+Operator playbook for `/api/score`. Manual + log-query workflow against the Wrangler observability binding; the primary
+telemetry surface is the `score.tier` log line that `src/worker/score/handler.ts` emits once per request, with a
+queryable Workers Analytics Engine counterpart (`SCORE_TELEMETRY` binding → `anc_live_score_prod` /
+`anc_live_score_staging`) introduced in U10. The analytics runbook covers the AE SQL playbook end-to-end; this runbook
+covers manual incident response, kill-switch flips, and the operator-facing diagnostic recipes.
+
+For canonical Analytics Engine SQL (daily volume, p50/p99 latency, error distribution, registry-hit-rate), see the
+[live-scoring analytics runbook](./live-scoring-analytics.md). Threshold values in this runbook apply identically when
+computed via AE SQL.
+
+The runbook stays in staging-mode prose until live scoring promotes to anc.dev. Production-specific commands land here
+when production promotion happens.
+
+## Telemetry contract
+
+`src/worker/score/handler.ts` calls `emitTelemetry()` in a `try/finally` so every code path produces one log line,
+regardless of which tier served the response or which error short-circuited the pipeline. Schema:
+
+```json
+{
+  "scope": "score.tier",
+  "tier": "curated | cache_pre | cache_post | live | error_<code>",
+  "cache_pre_attempted": true,
+  "cache_pre_hit": false,
+  "cache_post_attempted": true,
+  "cache_post_hit": false,
+  "binary": "ripgrep | null",
+  "input_kind": "slug | install-command | github-url | null"
+}
+```
+
+`tier` values:
+
+- `curated`: registry-fast-path hit (in-memory hashmap, no I/O, unmetered).
+- `cache_pre`: step-2 R2 cache hit; binary derivable from input.
+- `cache_post`: step-6.5 R2 cache hit; binary discovered via fan-out, then re-checked against cache.
+- `live`: Sandbox DO dispatched, container spawned, real `anc` check.
+- `error_<code>`: terminal error. `<code>` is one of the 19 `ScoreError` variants from
+  `src/worker/score/response-shape.ts`.
+
+`cache_pre_*` and `cache_post_*` are independent: both can be `true` in a single request that missed pre-discovery
+cache, fanned out, then hit the post-discovery cache. The pair tells you whether the cache shape (currently keyed on
+`binary`) is doing its job before vs after discovery.
+
+`binary` is `null` until the pipeline resolves an `InstallSpec`. `input_kind` records how the input parser classified
+the request, useful when filtering by request shape.
+
+## Querying the telemetry
+
+Two complementary surfaces.
+
+Live tail (debug a current incident):
+
+```bash
+bun x wrangler tail --env staging --format json --search 'score.tier'
+```
+
+Historical search (post-mortem, threshold checks): Cloudflare dashboard → Workers → `agentnative-site-staging` → Logs
+(observability) → filter `scope:"score.tier"`. `head_sampling_rate` is `1.0`, so every request is captured.
+
+The smoke script (`scripts/smoke-api-score.sh <base-url>`) exercises the registry-fast-path in CI and locally. Use it as
+a one-shot reachability check before deeper investigation.
+
+## What to watch
+
+Five signals, ordered by how often a watch will look at them.
+
+### Tier mix
+
+Percentage of requests resolved by each tier over a rolling window (1 h for incident triage, 24 h for soak posture).
+Healthy mix has `curated + cache_pre + cache_post` dominant; `live` is the long tail and `error_*` is bounded.
+
+The R6 unmetered contract (curated + cache hits bypass kill-switch, Turnstile, and rate-limit) means `curated` and
+`cache_pre` are the cheapest tiers per request and should carry most traffic. See
+`docs/solutions/architecture-patterns/cf-worker-gate-ordering-before-cost-bearing-outbounds-2026-05-20.md` for the
+rationale.
+
+Watch: `live` climbing above ~30% sustained. Alarm: `live` above ~50% sustained over an hour. Either signals broken
+cache writes, registry coverage gaps, or an abuse pattern fanning out to uncached binaries.
+
+**AE (primary):** Use the
+[registry-hit-rate query in the analytics runbook](./live-scoring-analytics.md#registry-hit-rate--the-cost-efficiency-signal)
+for sustained tier-mix monitoring; the same row also surfaces `cache_hit_rate` and `live_rate` for the same window.
+
+**Manual log-query (fallback when AE is down):** filter the Workers Logs dashboard by `scope:"score.tier"` and bucket
+`tier` values over the window of interest.
+
+**Agent (MCP):** `mcp__plugin_cloudflare_cloudflare-observability__query_worker_observability` with filter
+`scope:"score.tier"` and a `groupBy: ["tier"]` aggregation returns the same buckets without the dashboard round-trip.
+
+### Error rate by code
+
+Group `error_<code>` lines and break down by `code`. Some codes are user-driven and expected:
+
+- `chain_no_resolve`: input didn't match registry, hints, or GitHub probe. User mis-typed, or the binary genuinely isn't
+  in any registry.
+- `invalid_url`, `non_https_url`, `non_github_host`, `invalid_url_path`, `unrecognized_input`,
+  `unparseable_install_command`: input parser rejections. Bounded by what the form lets through.
+
+The signal-bearing codes:
+
+- `turnstile_failed` (HTTP 400): siteverify rejected the token. A spike means either bot-defense degraded (Cloudflare
+  siteverify outage) or the homepage form is dispatching tokens incorrectly.
+- `rate_limited` (HTTP 429): caller burned through 10 distinct-tool requests per session per minute (`SCORE_LIMITER`) or
+  30 requests per minute per IP (`SCORE_LIMITER_IP`). Spike is abuse or a hot user.
+- `service_misconfigured` (HTTP 500): a required secret (`TURNSTILE_SECRET`, `SESSION_HMAC_SECRET`) is missing. Always
+  operator-actionable; investigate immediately.
+- `incomplete_response_contract` (HTTP 500): handler produced a payload missing the response triad. Drift-guard for a
+  future regression; should be zero.
+- `scoring_disabled` (HTTP 503): kill-switch is on. Expected during incidents; unexpected otherwise.
+- `timeout` (HTTP 504): sandbox install or scoring exceeded the budget. Sandbox cold-start drag or a stuck container.
+- `chain_resolved_install_failed`, `chain_resolved_no_binary_produced` (HTTP 502): the install spec resolved but the
+  sandbox couldn't produce a usable binary. Image regression or upstream package outage.
+- `discovery_redirect_loop` (HTTP 502): the discovery chain cycled. Worth investigating individually.
+
+HTTP-status mapping is canonical in `src/worker/score/response-shape.ts:statusForError`.
+
+**AE (primary):** Use the
+[error-code-distribution query in the analytics runbook](./live-scoring-analytics.md#error-code-distribution); it groups
+`blob3` (error code) and counts. The same query carries `avg_status` so a status drift (e.g., a code that should be 404
+returning 500) surfaces inline.
+
+**Manual log-query (fallback when AE is down):** filter Workers Logs by `scope:"score.tier"` and bucket on lines where
+`tier` starts with `error_`. Slower than AE but covers the same fields.
+
+**Agent (MCP):** `mcp__plugin_cloudflare_cloudflare-observability__query_worker_observability` with filter
+`scope:"score.tier" AND tier:error_*` and a `groupBy: ["tier"]` aggregation returns the same per-code breakdown.
+
+### Cache hit rate, pre-discovery vs post-discovery
+
+From `cache_pre_attempted` / `cache_pre_hit` / `cache_post_attempted` / `cache_post_hit`. Compute:
+
+- pre-hit rate = `count(cache_pre_hit=true) / count(cache_pre_attempted=true)`
+- post-hit rate = `count(cache_post_hit=true) / count(cache_post_attempted=true)`
+
+Pre-hit rate should be substantially higher than post-hit rate in a healthy state, because pre-discovery hits are the
+cheapest path. If post-hit rate consistently dominates, the cache key shape (`scores/<binary>/<SPEC_VERSION>.json`,
+keyed on the discovered binary) isn't catching round-1 traffic. The reshape (key on owner/repo) is a future planning
+call; document the observation here and surface it in the next planning pass rather than acting on each spike.
+
+**Cross-source dependency:** Analytics Engine carries only `freshness` (`live` / `cache-hit` / `registry-hit`); it does
+NOT carry the `cache_pre_*` / `cache_post_*` booleans, which stay in the `score.tier` console log. To break down
+cache-hit traffic by pre-discovery vs post-discovery, run the manual log-query above and combine the result with the
+Analytics Engine freshness aggregate. See the
+[cache-hit composition cross-source note in the analytics runbook](./live-scoring-analytics.md#cache-hit-composition-cross-source-query).
+
+### GitHub unauth quota proximity
+
+No direct telemetry. Discovery hits `api.github.com` at 60 requests/hour per IP, but the IP is Cloudflare's shared
+egress pool, so the quota is consumed across every Worker tenant. Detection is reactive: `chain_no_resolve` rates climb
+for inputs that previously resolved, and `wrangler tail` shows 403s from `api.github.com` in the request trace.
+
+Mitigation today is "wait for the hourly window to reset"; curated and cache tiers stay healthy throughout the outage
+because of the R6 unmetered contract. Chronic exhaustion is a U10 trigger to add an authenticated GitHub PAT to
+discovery.
+
+### Sandbox cold-start and run latency
+
+Not in `score.tier` directly. Pull from the observability `duration` field on requests where `tier === 'live'`.
+Acceptable bounds while the staging soak continues:
+
+- median sandbox-served request: 5 to 15 s
+- p99: under 60 s (the install + score timeout budget)
+
+Above p99 budget means timeouts will follow. Check Cloudflare Containers dashboard for instance count and recent image
+churn before assuming the worst.
+
+## Threshold table
+
+Watch thresholds are informational (look closer). Alarm thresholds page the operator. Conservative on the alarm side;
+easier to tighten later than to recover from a missed incident.
+
+| Signal                              | Watch                              | Alarm                              | Where to look                                                  |
+| ----------------------------------- | ---------------------------------- | ---------------------------------- | -------------------------------------------------------------- |
+| `tier === 'live'` share             | > 30% over 1 h                     | > 50% over 1 h                     | Tier mix query                                                 |
+| `error_turnstile_failed` rate       | > 5% of POSTs over 15 min          | > 20% of POSTs over 15 min         | Error breakdown                                                |
+| `error_rate_limited` rate           | > 2% of POSTs over 15 min          | > 10% of POSTs over 15 min         | Error breakdown + per-IP attribution in dashboard              |
+| `error_service_misconfigured` count | any                                | any                                | Error breakdown; always investigate                            |
+| `error_incomplete_response`         | any                                | any                                | Treat as a regression; reproduce + open issue                  |
+| `error_timeout` rate                | > 1% of `tier === 'live'`          | > 5% of `tier === 'live'`          | Sandbox + container dashboard                                  |
+| `error_chain_resolved_*` rate       | > 1% of `tier === 'live'`          | > 5% of `tier === 'live'`          | Compare against recent image deploys                           |
+| Pre-hit rate                        | < 40% with `cache_pre_attempted>0` | < 20% with `cache_pre_attempted>0` | Cache shape candidate (defer action to U10)                    |
+| GitHub 403 rate on outbound         | any                                | sustained > 5 min                  | `wrangler tail`, `api.github.com` Status                       |
+| Sandbox p99 duration                | > 45 s                             | > 60 s                             | Observability `duration` filtered to `tier === 'live'`         |
+| Active sandbox instance count       | -                                  | At configured ceiling for > 5 min  | Containers dashboard (`max_instances: 3` per `wrangler.jsonc`) |
+
+Rate-limit + Turnstile alarms presume traffic is non-trivial. Below ~50 requests in the window, treat ratios as noisy
+and rely on absolute counts.
+
+## Common failures and operator response
+
+Six runbook entries. Each names a symptom, the diagnostic command, and the resolution. Stop at the first match; the list
+is not a tree.
+
+### Kill-switch flip (manual incident response)
+
+Symptom: not a failure mode; the operator's tool for stopping all `live` traffic when something downstream is on fire.
+
+```bash
+# Flip ON. Subsequent /api/score live requests return 503 with Retry-After: 3600.
+bun x wrangler kv key put --binding=SCORE_KV --env staging scoring_disabled true
+
+# Flip OFF.
+bun x wrangler kv key delete --binding=SCORE_KV --env staging scoring_disabled
+```
+
+The Worker caches the flag in-isolate for 30 s, and KV propagates globally within ~60 s, so allow up to a minute for the
+flip to land everywhere. `curated` and cache tiers keep serving because they short-circuit ahead of the gate block (R6
+unmetered contract).
+
+The U10 auto-kill cron will flip the same flag based on Budget Alert state. Manual flip stays available as an override.
+
+### Turnstile siteverify outage
+
+Symptom: `error_turnstile_failed` rate spikes above the alarm threshold while site traffic looks otherwise normal.
+
+Diagnostic:
+
+```bash
+# Confirm siteverify reachability from your laptop (does not consume metered budget).
+curl -i https://challenges.cloudflare.com/turnstile/v0/siteverify -d 'secret=1x0000000000000000000000000000000AA&response=x'
+
+# Check Cloudflare Status for ongoing incidents.
+open https://www.cloudflarestatus.com/
+```
+
+Resolution: nothing operator-actionable. The Worker fail-closes on siteverify failures (intentional). Wait for
+Cloudflare to restore. `curated` and cache tiers stay healthy throughout.
+
+If siteverify is healthy but `turnstile_failed` persists, suspect the homepage form: check the dispatched token shape
+against the staging form's network tab and confirm `TURNSTILE_SITEKEY` matches `TURNSTILE_SECRET` (staging uses the
+always-passes test pair).
+
+### GitHub API exhaustion (unauthenticated quota)
+
+Symptom: `chain_no_resolve` rate climbs for inputs that previously resolved. `wrangler tail` shows 403s from
+`api.github.com`.
+
+Diagnostic:
+
+```bash
+# Live tail for outbound 403s.
+bun x wrangler tail --env staging --search 'api.github.com'
+```
+
+Resolution: no operator action; wait for the hourly window to reset. Curated entries and cached binaries keep serving
+because of the R6 unmetered contract.
+
+If exhaustion is chronic (multiple windows in a row), escalate to U10 to add an authenticated GitHub PAT for discovery.
+
+### Sandbox crash or cold-start timeout
+
+Symptom: `error_timeout` or `error_chain_resolved_install_failed` rates spike for inputs that previously worked; sandbox
+p99 duration crosses the alarm.
+
+Diagnostic:
+
+```bash
+# Live deploy history.
+bun x wrangler deployments list --env staging | head -20
+
+# Container instance health (Cloudflare dashboard).
+open https://dash.cloudflare.com/?to=/:account/workers/services/view/agentnative-site-staging/containers
+```
+
+Resolution:
+
+1. If a recent Worker version coincides with the spike, roll it back: `bun x wrangler rollback <version-id> --env
+   staging`. The DO migration `v1` stays; rollback only reverts code + bindings.
+2. If the image was bumped recently, inspect the image build for missing dependencies (matches the U6
+   `python:3.12-slim-trixie` sdist allowlist pattern); deploy a corrected image via the standard `wrangler containers
+   build -p` → `wrangler deploy` flow documented in
+   [`RELEASES.md` § Sandbox image releases](../../RELEASES.md#sandbox-image-releases).
+3. If neither, raise the kill-switch (see above) and investigate offline.
+
+### R2 cache failure
+
+Symptom: `cache_pre_attempted=true` with `cache_pre_hit=false` for inputs that should be cached (e.g., a slug scored
+within the last 7 days). Pre-hit rate craters across all inputs.
+
+Diagnostic:
+
+```bash
+# Confirm R2 is reachable from your laptop and the bucket has objects.
+bun x wrangler r2 object list anc-score-cache-staging --prefix=scores/ | head -20
+
+# Confirm the lifecycle rule is intact.
+bun x wrangler r2 bucket lifecycle list anc-score-cache-staging
+```
+
+Resolution: cache reads and writes are best-effort in `src/worker/score/cache.ts`; a missing cache makes `live` runs
+more frequent (more expensive) but doesn't break the route. If the R2 binding is genuinely broken, the lifecycle rule
+disappeared, or the bucket is gone, restore via the recipes in
+[`RELEASES.md` § R2 score-cache lifecycle](../../RELEASES.md#r2-score-cache-lifecycle).
+
+### Service misconfigured
+
+Symptom: `error_service_misconfigured` appears at all.
+
+Diagnostic + resolution:
+
+```bash
+# List wrangler secrets to confirm what's bound.
+bun x wrangler secret list --env staging
+
+# Re-set whichever is missing. The handler fail-closes when either is absent.
+bun x wrangler secret put TURNSTILE_SECRET --env staging
+bun x wrangler secret put SESSION_HMAC_SECRET --env staging
+```
+
+Both secrets are required for `/api/score` to mint sessions and verify tokens. Treat any occurrence of this error as an
+immediate page; the route serves only `curated` + cache tiers (the unmetered ones) while it persists.
+
+## Cost-watch checklist
+
+Quick hand-check before each staging deploy, and any time a tier mix or error alarm fires. The numbers below are
+reference points, not contracts; tune as soak data arrives.
+
+- Wrangler dashboard request count for the last 24 h. Baseline expected: low (this is staging, gated by CF Access). A
+  jump means traffic shape changed or a test harness ran loose.
+- Active sandbox container instance count. Ceiling is `max_instances: 3` per `wrangler.jsonc`. Sustained at-ceiling
+  means traffic is exceeding the container budget; the kill-switch flip is the safety valve.
+- R2 storage size for `anc-score-cache-staging` `scores/` prefix. Should plateau because of the 7-day lifecycle. Steady
+  growth means lifecycle is broken; see the R2 cache failure entry above.
+- Outbound GitHub request rate. Proxy: count `tier === 'live'` lines and multiply by ~5 (each live run touches GitHub
+  Releases, the repo metadata endpoint, and a few README/asset fetches).
+- Turnstile siteverify rate. One per session mint; should track `tier === 'live'` plus `tier === 'cache_post'` roughly.
+
+Production cost watch is documented in [`RELEASES.md` § Cost guardrails](../../RELEASES.md#cost-guardrails). Budget
+Alerts at $5 / $25 / $100 live in the Cloudflare dashboard and trigger ahead of the kill-switch, so the manual checklist
+is a backstop, not the primary signal.
+
+## Cross-references
+
+- Telemetry emitter: `src/worker/score/handler.ts` (the `Telemetry` type + `emitTelemetry()`).
+- Error union + HTTP status mapping: `src/worker/score/response-shape.ts` (`statusForError`).
+- Kill switch: `src/worker/score/kill-switch.ts` + the `SCORE_KV` binding in `wrangler.jsonc`.
+- Cache shape: `src/worker/score/cache.ts` (key `scores/<binary>/<SPEC_VERSION>.json`).
+- Gate-ordering rationale (why R6 unmetered tier is safe under abuse):
+  `docs/solutions/architecture-patterns/cf-worker-gate-ordering-before-cost-bearing-outbounds-2026-05-20.md`.
+- Release procedure:
+  [`RELEASES.md` § Live-scoring (v3) release procedure](../../RELEASES.md#live-scoring-v3-release-procedure).
+- Post-deploy smoke: [`RELEASES.md` § Post-deploy smoke](../../RELEASES.md#post-deploy-smoke) and
+  `scripts/smoke-api-score.sh`.
+- Sandbox image lifecycle and DO migrations:
+  [`RELEASES-RATIONALE.md` § Sandbox image releases](../../RELEASES-RATIONALE.md#sandbox-image-releases) and
+  [§ DO migrations are one-way walls](../../RELEASES-RATIONALE.md#do-migrations-are-one-way-walls).
+- Plan unit: `docs/plans/2026-04-28-002-feat-live-scoring-cf-sandbox-plan.md` (U9 deliverable 5).
+
+## Still deferred
+
+Out of scope for U10, named here so the operator knows where the next gap is:
+
+- **Auto-kill cron** (U10.1). Reads an Analytics Engine aggregate for rolling 24h request count and flips
+  `scoring_disabled` automatically when the budget breaches the configured ceiling. Threshold needs real traffic data to
+  set; deferred until staging soak produces enough signal.
+- **Production-side procedure block** in this runbook (commands, thresholds, escalation paths against anc.dev rather
+  than the staging Worker). Lands when live scoring promotes to anc.dev.
+- **Authenticated GitHub PAT for discovery**, if chronic unauth-quota exhaustion has materialised by then.
+
+### Agent-deterministic checks
+
+**Decision (U10):** Path B (MCP queries) shipped. Each manual check above carries an inline **Agent (MCP)** subsection
+naming the `mcp__plugin_cloudflare_cloudflare-observability__query_worker_observability` (or related cloudflare-bindings
+MCP) call. No `scripts/monitoring/` wrapper scripts were added; the existing wrangler CLI + dashboard surface stays the
+operator path, and agents reach the same data through native MCP tools.
+
+Rationale: agents already have Cloudflare MCP tools; documenting which call to make for which check costs zero new code
+surface and zero ongoing maintenance. The alternative (shell scripts in `scripts/monitoring/`) was deferred because its
+unique value is GitHub Actions cron reach, which U10's acceptance bar doesn't require. If automated CI monitoring
+becomes a need later, scripts can land as a separate change without unwinding the MCP docs.
+
+Until U10 lands, the runbook above is the operator's only playbook.
diff --git a/package.json b/package.json
index 9cd4392..29287d2 100644
--- a/package.json
+++ b/package.json
@@ -32,6 +32,7 @@
     "wrangler": "^4.81.0"
   },
   "dependencies": {
+    "@cloudflare/sandbox": "0.9.2",
     "@shikijs/rehype": "^4.0.2",
     "accepts": "^1.3.8",
     "badge-maker": "^5.0.2",
diff --git a/playwright.config.ts b/playwright.config.ts
index a3ee7d2..c769ad6 100644
--- a/playwright.config.ts
+++ b/playwright.config.ts
@@ -48,7 +48,11 @@ export default defineConfig({
     {
       name: 'chromium',
       use: { ...devices['Desktop Chrome'] },
-      testIgnore: /skill\.e2e\.ts/,
+      // Both opt-in live projects (skill + homepage-score-live) are
+      // excluded from the default suite — they hit real network endpoints
+      // (github.com clone hosts, the staging Worker) that the deep-check
+      // daily schedule shouldn't depend on.
+      testIgnore: [/skill\.e2e\.ts/, /homepage-score-live\.e2e\.ts/],
     },
     { name: 'mobile-android', use: { ...devices['Pixel 7'] }, testMatch: /flows\.e2e\.ts/ },
     { name: 'mobile-ios', use: { ...devices['iPhone 13'] }, testMatch: /flows\.e2e\.ts/ },
@@ -60,9 +64,28 @@ export default defineConfig({
       // Live `git clone` against github.com over the network — give it room.
       timeout: 60_000,
     },
+    {
+      name: 'homepage-score-live',
+      // Live staging Worker. Set ANC_STAGING_BASE_URL before invoking;
+      // see tests/e2e/homepage-score-live.e2e.ts for full env contract.
+      // Excluded from the default suite; run with --project=homepage-score-live.
+      use: { ...devices['Desktop Chrome'] },
+      testMatch: /homepage-score-live\.e2e\.ts/,
+      // Real Sandbox container cold starts and Turnstile siteverify
+      // round-trips push the per-test budget past Playwright's default.
+      timeout: 120_000,
+    },
   ],
   webServer: {
-    command: 'bun run build && bun x wrangler dev --local --port ' + PORT,
+    // --env staging: the staging-pinned Sandbox image is the one we keep
+    // locally; the top-level prod image is rotated less frequently and
+    // often isn't in the dev Docker cache, which makes `wrangler dev
+    // --local` (no --env) fail with a misleading "container Sandbox does
+    // not expose any ports" error during prepareContainerImagesForDev.
+    // Using --env staging also gives the homepage-score E2E suite a real
+    // TURNSTILE_SITEKEY var to substitute into the meta tag — matches
+    // staging behavior directly.
+    command: 'bun run build && bun x wrangler dev --local --env staging --port ' + PORT,
     url: BASE_URL,
     reuseExistingServer: !process.env.CI,
     timeout: 120_000,
diff --git a/scripts/SYNCS.md b/scripts/SYNCS.md
index db7d26f..82d4129 100644
--- a/scripts/SYNCS.md
+++ b/scripts/SYNCS.md
@@ -2,8 +2,8 @@
 
 How CLI / spec / skill data flows into this repo, and how site artifacts flow out.
 
-This is the source of truth for sync mechanisms — the scripts, the directions, the drift checks, and what is *planned
-but not built*. Update this file whenever a sync script, workflow, endpoint, or vendored artifact changes shape.
+This is the source of truth for sync mechanisms: the scripts, the directions, the drift checks, and what is *planned but
+not built*. Update this file whenever a sync script, workflow, endpoint, or vendored artifact changes shape.
 
 Existing top-level docs cover adjacent concerns but none give a single map:
 
@@ -42,22 +42,22 @@ flowchart LR
     cf -- "anc.dev endpoints<br/>(serves the rendered site)" --> agentHosts
 ```
 
-## Upstream — data flowing INTO this repo
+## Upstream: data flowing INTO this repo
 
-| Source                                                                                                                                                                                                                                               | Mechanism                                                                                                                                                                                                                           | What's synced                                                                                                           | Trigger / cadence                                                                                                                                                                                                                          | Drift check                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     |
-| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `brettdavies/agentnative-cli` `coverage/matrix.json`                                                                                                                                                                                                 | `scripts/sync-coverage-matrix.sh` (manual `cp` from `$ANC_ROOT/coverage/matrix.json`)                                                                                                                                               | → `src/data/coverage-matrix.json`                                                                                       | After CLI bumps the matrix (new checks, registry changes)                                                                                                                                                                                  | CLI's CI enforces `anc generate coverage-matrix --check` against the committed CLI artifact. Site trusts the synced copy; no site-side `--check` mode. Resync is manual; `git diff` after sync is the review surface.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
-| `brettdavies/agentnative` (spec) `principles/p*-*.md` + `VERSION` + `CHANGELOG.md`                                                                                                                                                                   | `scripts/sync-spec.sh` (manual; remote-first via `SPEC_REMOTE_URL`, falls back to local `SPEC_ROOT`; auto-picks latest v* tag; extracts via `git show "$tag:<path>" >dest`)                                                         | → `src/data/spec/{VERSION,CHANGELOG.md,principles/p*-*.md}` (`principles/AGENTS.md` filtered out — spec-side internal)  | After a spec release. Spec's `repository_dispatch:spec-release` already fires here on tag publish.                                                                                                                                         | None automated on this side (consumer-side handler that auto-PRs the resync is tracked as follow-up). Spec repo's `scripts/hooks/pre-push` enforces source-side correctness. `git diff src/data/spec/` after sync is the review surface. `src/data/spec/README.md` documents the workflow.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
-| `brettdavies/agentnative` (spec) prose-check tooling: `BRAND.md`, `styles/brand/*.yml` + `README.md`, `styles/config/vocabularies/brand/{accept,reject}.txt`, `scripts/generate-pack-readme.mjs`                                                     | `scripts/sync-prose-tooling.sh` (manual; remote-first / local-fallback like `sync-spec.sh`, but tracks `main` HEAD instead of the latest v* tag because prose tooling is not contract; extracts via `git show "main:<path>" >dest`) | → repo-rooted: `BRAND.md`, `styles/brand/`, `styles/config/vocabularies/brand/`, `scripts/generate-pack-readme.mjs`     | After spec's `main` advances with changes touching the prose-check stack. Separate sync clock from `sync-spec.sh` because prose tooling and the principles/contract release on different cadences and the tooling has no release ceremony. | None automated on this side. Sync-script atomicity is the integrity guarantee: brand `*.yml` AND its `README.md` come from the same `main` HEAD SHA, so no downstream regeneration / drift surface. `git diff` after sync is the review surface. Idempotent at a fixed `main` HEAD SHA: re-running produces no diff until upstream `main` moves. **Consumer-owned (un-vendored 2026-05-13):** `scripts/prose-check.sh` is no longer vendored by this script — the upstream copy kept clobbering the SITE-LOCAL DIVERGENCE block (consumer-specific path exclusions and LT denylist additions). Universal pipeline changes (new check stage, LT URL change, severity routing) now require coordinated PRs across all four channel repos (spec / site / cli / skill). Long-term fix is the sidecar-config migration tracked at `agentnative-spec/.context/compound-engineering/todos/`; once shipped, vendoring can resume with universal logic vendored and consumer config in a sidecar file. See `scripts/prose-check.sh`'s CONSUMER-OWNED header for context. |
-| `docker/score/` image — pre-installs the full ANC 100 toolset (`anc` + 96 scored binaries) inside a reproducible Ubuntu container; iterates `registry.yaml` and runs `anc check --command <bin> [--audit-profile <category>] --output json` for each | `bash docker/score/build.sh --run` (builds `anc` from local cli checkout, builds image, runs `score-anc100.sh` inside container with bind-mounted `scorecards/` + `out/` dirs)                                                      | → `scorecards/<name>-v<version>.json` (96 files) + `docker/score/out/score-failures.txt` for any install/score failures | After a new `anc` release, after registry changes, or to refresh the full leaderboard                                                                                                                                                      | Build-time schema 0.5 invariant validation in `src/build/scorecards.mjs`; auto-discovery picks the highest-versioned scorecard per slug, silently superseding stale ones. Filename's `-v<version>` suffix is the version anchor (registry no longer carries `version:` per entry post-U4). The container is the source of truth — host-side ad-hoc scoring is deprecated.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       |
+| Source                                                                                                                                                                                                                                              | Mechanism                                                                                                                                                                                                                                   | What's synced                                                                                                           | Trigger / cadence                                                                                                                                                                                                                          | Drift check                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           |
+| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `brettdavies/agentnative-cli` `coverage/matrix.json`                                                                                                                                                                                                | `scripts/sync-coverage-matrix.sh` (manual `cp` from `$ANC_ROOT/coverage/matrix.json`)                                                                                                                                                       | → `src/data/coverage-matrix.json`                                                                                       | After CLI bumps the matrix (new checks, registry changes)                                                                                                                                                                                  | CLI's CI enforces `anc generate coverage-matrix --check` against the committed CLI artifact. Site trusts the synced copy; no site-side `--check` mode. Resync is manual; `git diff` after sync is the review surface.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
+| `brettdavies/agentnative` (spec) `principles/p*-*.md` + `VERSION` + `CHANGELOG.md`                                                                                                                                                                  | `scripts/sync-spec.sh` (manual; remote-first via `SPEC_REMOTE_URL`, falls back to local `SPEC_ROOT`; auto-picks latest v* tag; extracts via `git show "$tag:<path>" >dest`)                                                                 | → `src/data/spec/{VERSION,CHANGELOG.md,principles/p*-*.md}` (`principles/AGENTS.md` filtered out, spec-side internal)   | After a spec release. Spec's `repository_dispatch:spec-release` already fires here on tag publish.                                                                                                                                         | None automated on this side (consumer-side handler that auto-PRs the resync is tracked as follow-up). Spec repo's `scripts/hooks/pre-push` enforces source-side correctness. `git diff src/data/spec/` after sync is the review surface. `src/data/spec/README.md` documents the workflow.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+| `brettdavies/agentnative` (spec) prose-check tooling: `BRAND.md`, `styles/brand/*.yml` + `README.md`, `styles/config/vocabularies/brand/{accept,reject}.txt`, `scripts/generate-pack-readme.mjs`                                                    | `scripts/sync-prose-tooling.sh` (manual; remote-first / local-fallback like `sync-spec.sh`, but tracks `main` HEAD instead of the latest v* tag because prose tooling is not contract; extracts via `git show "main:<path>" >dest`)         | → repo-rooted: `BRAND.md`, `styles/brand/`, `styles/config/vocabularies/brand/`, `scripts/generate-pack-readme.mjs`     | After spec's `main` advances with changes touching the prose-check stack. Separate sync clock from `sync-spec.sh` because prose tooling and the principles/contract release on different cadences and the tooling has no release ceremony. | None automated on this side. Sync-script atomicity is the integrity guarantee: brand `*.yml` AND its `README.md` come from the same `main` HEAD SHA, so no downstream regeneration / drift surface. `git diff` after sync is the review surface. Idempotent at a fixed `main` HEAD SHA: re-running produces no diff until upstream `main` moves. **Consumer-owned (un-vendored 2026-05-13):** `scripts/prose-check.sh` is no longer vendored by this script because the upstream copy kept clobbering the SITE-LOCAL DIVERGENCE block (consumer-specific path exclusions and LT denylist additions). Universal pipeline changes (new check stage, LT URL change, severity routing) now require coordinated PRs across all four channel repos (spec / site / cli / skill). Long-term fix is the sidecar-config migration tracked at `agentnative-spec/.context/compound-engineering/todos/`; once shipped, vendoring can resume with universal logic vendored and consumer config in a sidecar file. See `scripts/prose-check.sh`'s CONSUMER-OWNED header for context. |
+| `docker/score/` image: pre-installs the full ANC 100 toolset (`anc` + 96 scored binaries) inside a reproducible Ubuntu container; iterates `registry.yaml` and runs `anc check --command <bin> [--audit-profile <category>] --output json` for each | `bash docker/score/build.sh --run` (default: brew-installs the latest `anc` from `brettdavies/tap/agentnative`; with `--from-source <cli-repo>` cargo-builds anc on the host and injects the binary into the image instead, bypassing brew) | → `scorecards/<name>-v<version>.json` (96 files) + `docker/score/out/score-failures.txt` for any install/score failures | After a new `anc` release, after registry changes, or to refresh the full leaderboard. Inject mode is also the way to score against an unreleased anc (feature branch in agentnative-cli before tag + bottle).                             | Build-time schema 0.5 invariant validation in `src/build/scorecards.mjs`; auto-discovery picks the highest-versioned scorecard per slug, silently superseding stale ones. Filename's `-v<version>` suffix is the version anchor (registry no longer carries `version:` per entry post-U4). The container is the source of truth; host-side ad-hoc scoring is deprecated.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              |
 
 ### How spec version flows into rendering
 
 ### How spec versions flow into rendering surfaces
 
-The site shows version labels in three places. **Each pulls from a different source by design** — the three sources move
-at different cadences (vendoring, scoring, manual reconciliation), and conflating them into one would lie about at least
-one of those movements.
+The site shows version labels in three places. **Each pulls from a different source by design** because the three
+sources move at different cadences (vendoring, scoring, manual reconciliation), and conflating them into one would lie
+about at least one of those movements.
 
 ```mermaid
 flowchart LR
@@ -80,20 +80,20 @@ flowchart LR
     util -. "SPEC_VERSION (reference only)" .-> diff
 ```
 
-| Surface         | Source                                             | Bumped by                                                                                                 |
-| --------------- | -------------------------------------------------- | --------------------------------------------------------------------------------------------------------- |
-| Footer          | `SITE_SPEC_VERSION` ← `content/principles/VERSION` | Manual, by the contributor who reconciles `content/principles/p*-*.md` after a `sync-spec.sh` run.        |
-| Per-tool badges | Each scorecard's `spec_version` field              | Automatic — bumps when the scorecard is regenerated against a newer `anc` build (via `docker/score/`).    |
-| OG card         | `anc`'s self-scorecard's `spec_version`            | Automatic on `bun run og` after `anc`'s scorecard is refreshed.                                           |
-| (no surface)    | `SPEC_VERSION` ← `src/data/spec/VERSION`           | Automatic — `./scripts/sync-spec.sh` overwrites whenever the spec ships a new tag. Reference / diff only. |
+| Surface         | Source                                             | Bumped by                                                                                                |
+| --------------- | -------------------------------------------------- | -------------------------------------------------------------------------------------------------------- |
+| Footer          | `SITE_SPEC_VERSION` ← `content/principles/VERSION` | Manual, by the contributor who reconciles `content/principles/p*-*.md` after a `sync-spec.sh` run.       |
+| Per-tool badges | Each scorecard's `spec_version` field              | Automatic; bumps when the scorecard is regenerated against a newer `anc` build (via `docker/score/`).    |
+| OG card         | `anc`'s self-scorecard's `spec_version`            | Automatic on `bun run og` after `anc`'s scorecard is refreshed.                                          |
+| (no surface)    | `SPEC_VERSION` ← `src/data/spec/VERSION`           | Automatic; `./scripts/sync-spec.sh` overwrites whenever the spec ships a new tag. Reference / diff only. |
 
 Why three sources, not one: vendoring (we got a snapshot), scoring (anc was compiled against this spec), and site
 reconciliation (the prose has been updated to match) are three independent events. Conflating them into one constant
 forces at least one surface to lie about its actual currency. Full rationale in `src/data/spec/README.md` and the
 cross-repo version-model doc at `docs/solutions/best-practices/agentnative-version-model-2026-05-01.md`. There is no
-site-own version (`package.json` is `"0.0.0"` deliberately — the spec version IS the site's "version" by intent).
+site-own version (`package.json` is `"0.0.0"` deliberately: the spec version IS the site's "version" by intent).
 
-## Downstream — data flowing OUT of this repo
+## Downstream: data flowing OUT of this repo
 
 ### Build-time vendoring by other repos
 
@@ -103,9 +103,9 @@ site-own version (`package.json` is `"0.0.0"` deliberately — the spec version
 
 ### Deploy-time emission to Cloudflare Workers
 
-| Surface                        | Mechanism                                                   | What's emitted                                                                                                                                                                                                               | Trigger / cadence                                                                                                                                               | Drift check                                                                                                                                             |
-| ------------------------------ | ----------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `anc.dev` (Cloudflare Workers) | `wrangler deploy` invoked by `.github/workflows/deploy.yml` | `dist/` — HTML pages, CSS, JS, 107 per-tool scorecard HTML pages + markdown twins, 96 badge SVGs, OG image, fonts, `skill.{json,html,md}`, `install.{html,md}` (no `install.json` — see DESIGN §3.10), llms.txt, sitemap.xml | Push to `dev` (staging Worker `agentnative-site-staging`) or `main` (production `anc.dev`); `paths-ignore: docs/**, *.md` skips deploy on planning-only commits | None automated — production canary is by hand. The pre-deploy CI pipeline (`ci.yml`) gates on `bun install → lint → build → test → wrangler --dry-run`. |
+| Surface                        | Mechanism                                                   | What's emitted                                                                                                                                                                                                             | Trigger / cadence                                                                                                                                               | Drift check                                                                                                                                            |
+| ------------------------------ | ----------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `anc.dev` (Cloudflare Workers) | `wrangler deploy` invoked by `.github/workflows/deploy.yml` | `dist/`: HTML pages, CSS, JS, 107 per-tool scorecard HTML pages + markdown twins, 96 badge SVGs, OG image, fonts, `skill.{json,html,md}`, `install.{html,md}` (no `install.json`; see DESIGN §3.10), llms.txt, sitemap.xml | Push to `dev` (staging Worker `agentnative-site-staging`) or `main` (production `anc.dev`); `paths-ignore: docs/**, *.md` skips deploy on planning-only commits | None automated; production canary is by hand. The pre-deploy CI pipeline (`ci.yml`) gates on `bun install → lint → build → test → wrangler --dry-run`. |
 
 ## Release / sync orchestration
 
@@ -116,8 +116,8 @@ The flows interact, but each is independently triggered:
    this repo trusts the bytes.
 
 2. **A scored tool ships a new version** (or `anc` itself does) → maintainer runs `bash docker/score/build.sh --run`
-   from the repo root → `docker/score/build.sh` rebuilds the `anc` binary from the local `agentnative-cli` checkout,
-   bakes it into the image, and runs `score-anc100.sh` against the full registry inside the container; bind-mounts write
+   from the repo root → `docker/score/build.sh` brew-installs the latest `anc` from `brettdavies/tap/agentnative` inside
+   the image, bakes it in, and runs `score-anc100.sh` against the full registry inside the container; bind-mounts write
    the new `scorecards/<tool>-v<new>.json` files back to the host. Old per-tool files are silently superseded by
    auto-discovery → next build refreshes the badge SVG and `/score/<tool>` page. The container is the source of truth
    for scoring; host-side ad-hoc scoring (the prior `regen-scorecards.sh` flow) is deprecated.
@@ -126,13 +126,13 @@ The flows interact, but each is independently triggered:
    tag from the spec remote) → vendored `src/data/spec/{VERSION,CHANGELOG.md,principles/p*-*.md}` updates → next site
    build picks up the new `SPEC_VERSION` automatically (footer, OG card, badge URLs all flow from the vendored `VERSION`
    file). Site contributor reviews `git diff src/data/spec/principles/` and decides whether to manually reconcile any
-   prose changes into `content/principles/p*-*.md` (the two file shapes are intentionally different — see
+   prose changes into `content/principles/p*-*.md` (the two file shapes are intentionally different; see
    `src/data/spec/README.md` for the workflow). Spec's `repository_dispatch:spec-release` event already fires here on
    tag publish; a consumer-side handler that auto-PRs the resync is tracked as follow-up work.
 
 4. **Spec's `main` advances with prose-tooling changes** → maintainer runs `bash scripts/sync-prose-tooling.sh` (same
    remote-first / local-fallback resolution as `sync-spec.sh`, but tracks `main` HEAD instead of the latest v* tag
-   because prose tooling is not contract — it's tooling, faster cadence, no release ceremony) → vendored `BRAND.md`,
+   because prose tooling is not contract: it's tooling, faster cadence, no release ceremony) → vendored `BRAND.md`,
    `styles/brand/`, `styles/config/vocabularies/brand/`, and `scripts/generate-pack-readme.mjs` update in place.
    `scripts/prose-check.sh` is consumer-owned (un-vendored 2026-05-13) and is no longer touched by this sync; universal
    pipeline changes there require coordinated PRs across all four channel repos. Separate sync clock from item 3 because
@@ -144,7 +144,7 @@ The flows interact, but each is independently triggered:
    manifest fields changed (per-host install commands, version, description), edits this repo's `src/data/skill.json` to
    bump `version` plus the changed fields → PR to `dev` → release flow to `main` → `wrangler deploy` updates
    `/skill.json` on `anc.dev` → Cloudflare cache purge → CLI's next PR exercises `skill-fixture-drift` against the new
-   fixture. If the release didn't change any manifest fields, skip the manifest bump entirely — installed users learn
+   fixture. If the release didn't change any manifest fields, skip the manifest bump entirely; installed users learn
    about the new release via the skill bundle's `bin/check-update`, not via a manifest change here. Full runbook in
    `RELEASES.md` §"Skill-release procedure".
 
@@ -153,28 +153,28 @@ The flows interact, but each is independently triggered:
 
 ## Reference
 
-- `scripts/sync-coverage-matrix.sh` — header comment for usage and `ANC_ROOT` env var.
-- `scripts/sync-spec.sh` — header comment for usage, `SPEC_REMOTE_URL` / `SPEC_ROOT` env vars, and the
+- `scripts/sync-coverage-matrix.sh`: header comment for usage and `ANC_ROOT` env var.
+- `scripts/sync-spec.sh`: header comment for usage, `SPEC_REMOTE_URL` / `SPEC_ROOT` env vars, and the
   remote-first-with-local-fallback resolution flow.
-- `scripts/sync-prose-tooling.sh` — header comment for the prose-check vendor manifest and rationale (separate sync
-  clock from `sync-spec.sh`; tracks `main` HEAD instead of v* tags because tooling is not contract; brand README is a
-  released artifact, not regenerated downstream). Shares `SPEC_REMOTE_URL` / `SPEC_ROOT` env vars with `sync-spec.sh`.
-  Note: `scripts/prose-check.sh` is consumer-owned (un-vendored 2026-05-13) and intentionally NOT in the manifest; see
-  that file's CONSUMER-OWNED header for context.
-- `docker/score/README.md` + `docker/score/build.sh` — the canonical scoring pipeline. `build.sh --run` builds the image
+- `scripts/sync-prose-tooling.sh`: header comment for the prose-check vendor manifest and rationale (separate sync clock
+  from `sync-spec.sh`; tracks `main` HEAD instead of v* tags because tooling is not contract; brand README is a released
+  artifact, not regenerated downstream). Shares `SPEC_REMOTE_URL` / `SPEC_ROOT` env vars with `sync-spec.sh`. Note:
+  `scripts/prose-check.sh` is consumer-owned (un-vendored 2026-05-13) and intentionally NOT in the manifest; see that
+  file's CONSUMER-OWNED header for context.
+- `docker/score/README.md` + `docker/score/build.sh`: the canonical scoring pipeline. `build.sh --run` builds the image
   and runs `score-anc100.sh` inside the container, writing scorecards back to the host via bind mount. The container is
   the single source of truth for scoring; host-side `regen-scorecards.sh` is deprecated.
-- `src/data/spec/README.md` — what's vendored, why, and the manual reconciliation workflow when spec prose drifts.
-- `RELEASES.md` §"Skill releases" — the downstream manifest-bump procedure for `src/data/skill.json` end-to-end
-  (manifest edit → cache-purge → live verify).
+- `src/data/spec/README.md`: what's vendored, why, and the manual reconciliation workflow when spec prose drifts.
+- `RELEASES.md` §"Skill releases": the downstream manifest-bump procedure for `src/data/skill.json` end-to-end (manifest
+  edit → cache-purge → live verify).
 - `docs/DESIGN.md` §3.9 (`/skill` + `/skill.json` build contract) and §3.10 (`/install` HTML-only contract).
-- `AGENTS.md` — repo conventions and the `content/principles/` vs `src/data/spec/principles/` separation rule.
-- `docs/plans/2026-04-23-001-feat-sync-spec-plan.md` (dev branch only, gated off main) — the plan that introduced
+- `AGENTS.md`: repo conventions and the `content/principles/` vs `src/data/spec/principles/` separation rule.
+- `docs/plans/2026-04-23-001-feat-sync-spec-plan.md` (dev branch only, gated off main): the plan that introduced
   `sync-spec.sh` + vendored `src/data/spec/` + the SPEC_VERSION wiring.
-- `docs/solutions/best-practices/agentnative-version-model-2026-05-01.md` — cross-repo version model: what version means
+- `docs/solutions/best-practices/agentnative-version-model-2026-05-01.md`: cross-repo version model. What version means
   in each of the four agentnative repos, why the site has no own version, where each version is read or displayed.
-- `docs/solutions/best-practices/cross-repo-artifact-consumption-static-sites-2026-04-21.md` — governing pattern
+- `docs/solutions/best-practices/cross-repo-artifact-consumption-static-sites-2026-04-21.md`: governing pattern
   (commit-a-copy over build-time fetch over symlinks).
 - CLI's reference implementation of `sync-spec.sh`: `~/dev/agentnative-cli/scripts/sync-spec.sh`.
-- CLI's `scripts/sync-skill-fixture.sh` and `skill-fixture-drift` workflow — the inverse-direction drift gate that
+- CLI's `scripts/sync-skill-fixture.sh` and `skill-fixture-drift` workflow: the inverse-direction drift gate that
   protects the `src/data/skill.json` → CLI fixture flow.
diff --git a/scripts/cf-access-bootstrap.sh b/scripts/cf-access-bootstrap.sh
new file mode 100755
index 0000000..b43f4e6
--- /dev/null
+++ b/scripts/cf-access-bootstrap.sh
@@ -0,0 +1,288 @@
+#!/usr/bin/env bash
+# cf-access-bootstrap.sh — idempotent Cloudflare Access setup for the staging Worker.
+#
+# What this script does, each step skipped if already present:
+#
+#   1. Creates the Self-Hosted Access application for the staging Worker URL.
+#   2. Creates a CLI service token, capturing its client_id + client_secret
+#      into 1Password (the secret is shown ONCE by Cloudflare).
+#   3. Creates two policies on the app:
+#        a. "Allow brett email" — decision allow, includes a specific email.
+#        b. "Allow CLI service token" — decision non_identity, includes the
+#           service token id from step 2.
+#   4. Verifies the boundary works: unauth request to the protected URL must
+#      return a 302 redirect to *.cloudflareaccess.com; authed request with
+#      the service token headers must return 200.
+#
+# Resources are matched by NAME (not ID), so the script is safe to re-run.
+# If everything is already in place, every step reports "exists, skipping".
+#
+# Disaster recovery: if the CF account is restored from backup or the
+# Access app is deleted, re-running this script reconstructs the staging
+# auth surface from 1Password-resident credentials. The 1Password item
+# `Cloudflare API Token - Access Setup (agentnative-site)` is the only
+# operator-side prerequisite.
+#
+# Inputs (env vars; defaults below):
+#
+#   CF_ACCOUNT_ID         Cloudflare account ID. REQUIRED.
+#   APP_NAME              Access app name (default: "agentnative-site staging")
+#   APP_DOMAIN            Protected URL (default: agentnative-site-staging.brettdavies.workers.dev)
+#   APP_SESSION           session_duration (default: 2160h, 90 days)
+#   IDENTITY_EMAIL        Email allowed by the identity policy (default: davies.brett@gmail.com)
+#   SERVICE_TOKEN_NAME    Service token name (default: agentnative-site-staging-cli)
+#   SERVICE_TOKEN_DURATION CF duration string (default: 8760h, 1 year — the CF max non-forever)
+#   OP_ITEM_API_TOKEN     1Password title for the setup API token
+#                         (default: "Cloudflare API Token - Access Setup (agentnative-site)")
+#   OP_ITEM_SERVICE_TOKEN 1Password title for the service token credentials
+#                         (default: "Cloudflare Access Service Token - agentnative-site-staging")
+#
+# Dependencies: curl, jaq (preferred) or jq, op CLI via the
+# ~/.claude/skills/1password/scripts/ helpers.
+
+set -u
+
+# ---------------------------------------------------------------------------
+# Inputs
+# ---------------------------------------------------------------------------
+
+CF_ACCOUNT_ID="${CF_ACCOUNT_ID:-}"
+APP_NAME="${APP_NAME:-agentnative-site staging}"
+APP_DOMAIN="${APP_DOMAIN:-agentnative-site-staging.brettdavies.workers.dev}"
+APP_SESSION="${APP_SESSION:-2160h}"
+IDENTITY_EMAIL="${IDENTITY_EMAIL:-davies.brett@gmail.com}"
+SERVICE_TOKEN_NAME="${SERVICE_TOKEN_NAME:-agentnative-site-staging-cli}"
+SERVICE_TOKEN_DURATION="${SERVICE_TOKEN_DURATION:-8760h}"
+OP_ITEM_API_TOKEN="${OP_ITEM_API_TOKEN:-Cloudflare API Token - Access Setup (agentnative-site)}"
+OP_ITEM_SERVICE_TOKEN="${OP_ITEM_SERVICE_TOKEN:-Cloudflare Access Service Token - agentnative-site-staging}"
+
+OP_READ="${OP_READ:-$HOME/.claude/skills/1password/scripts/read_field.sh}"
+OP_CREATE="${OP_CREATE:-$HOME/.claude/skills/1password/scripts/create_item.sh}"
+
+JQ_BIN="$(command -v jaq || command -v jq || true)"
+
+# ---------------------------------------------------------------------------
+# Sanity checks
+# ---------------------------------------------------------------------------
+
+die() {
+  printf 'FATAL: %s\n' "$1" >&2
+  exit 2
+}
+
+[ -n "$CF_ACCOUNT_ID" ] || die "CF_ACCOUNT_ID env var is required (32-char hex)."
+[ -n "$JQ_BIN" ] || die "neither jaq nor jq installed; install one (brew install jaq) and retry."
+[ -x "$OP_READ" ] || die "1Password read helper not found at $OP_READ; install the 1password skill or export OP_READ."
+[ -x "$OP_CREATE" ] || die "1Password create helper not found at $OP_CREATE."
+
+API_TOKEN="$("$OP_READ" "$OP_ITEM_API_TOKEN" credential 2>/dev/null || true)"
+[ -n "$API_TOKEN" ] || die "could not read API token from 1Password item '$OP_ITEM_API_TOKEN'. Verify the item exists with a field named 'credential'."
+
+API_BASE="https://api.cloudflare.com/client/v4/accounts/$CF_ACCOUNT_ID"
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+# cf_get PATH
+cf_get() {
+  curl -s -H "Authorization: Bearer $API_TOKEN" "$API_BASE$1"
+}
+
+# cf_post PATH BODY
+cf_post() {
+  curl -s -X POST -H "Authorization: Bearer $API_TOKEN" -H "Content-Type: application/json" \
+    "$API_BASE$1" --data "$2"
+}
+
+# Report a one-liner table row.
+row() {
+  printf '  %-30s %s\n' "$1" "$2"
+}
+
+# ---------------------------------------------------------------------------
+# Token sanity probe
+# ---------------------------------------------------------------------------
+
+printf '\n=== cf-access-bootstrap @ %s ===\n' "$APP_DOMAIN"
+printf '    account_id=%s\n' "$CF_ACCOUNT_ID"
+printf '    app_name=%s\n' "$APP_NAME"
+printf '    session_duration=%s\n\n' "$APP_SESSION"
+
+probe="$(cf_get "/access/apps")"
+probe_success="$("$JQ_BIN" -r '.success' <<<"$probe")"
+if [ "$probe_success" != "true" ]; then
+  die "API token sanity check failed: $(echo "$probe" | "$JQ_BIN" -c '.errors')
+       Verify the token has 'Access: Apps and Policies Write' AND 'Access: Service Tokens Write' permissions."
+fi
+
+# ---------------------------------------------------------------------------
+# Step 1: Access application
+# ---------------------------------------------------------------------------
+
+printf '[1] Access application\n'
+APP_ID="$(echo "$probe" | "$JQ_BIN" -r --arg name "$APP_NAME" '.result[] | select(.name == $name) | .id' | head -1)"
+
+if [ -n "$APP_ID" ] && [ "$APP_ID" != "null" ]; then
+  row "status" "exists, skipping creation"
+  row "app_id" "$APP_ID"
+  AUD="$(echo "$probe" | "$JQ_BIN" -r --arg name "$APP_NAME" '.result[] | select(.name == $name) | .aud' | head -1)"
+  CURRENT_SESSION="$(echo "$probe" | "$JQ_BIN" -r --arg name "$APP_NAME" '.result[] | select(.name == $name) | .session_duration' | head -1)"
+  row "aud" "$AUD"
+  row "session_duration" "$CURRENT_SESSION"
+  if [ "$CURRENT_SESSION" != "$APP_SESSION" ]; then
+    row "session_duration drift" "current=$CURRENT_SESSION  desired=$APP_SESSION (re-run with manual PUT if you want this updated)"
+  fi
+else
+  printf '  creating ...\n'
+  create_body=$(cat <<EOF
+{
+  "name": "$APP_NAME",
+  "type": "self_hosted",
+  "domain": "$APP_DOMAIN",
+  "session_duration": "$APP_SESSION",
+  "auto_redirect_to_identity": false,
+  "service_auth_401_redirect": false,
+  "app_launcher_visible": false
+}
+EOF
+)
+  create_resp="$(cf_post "/access/apps" "$create_body")"
+  create_success="$(echo "$create_resp" | "$JQ_BIN" -r '.success')"
+  [ "$create_success" = "true" ] || die "app create failed: $(echo "$create_resp" | "$JQ_BIN" -c '.errors')"
+  APP_ID="$(echo "$create_resp" | "$JQ_BIN" -r '.result.id')"
+  AUD="$(echo "$create_resp" | "$JQ_BIN" -r '.result.aud')"
+  row "status" "CREATED"
+  row "app_id" "$APP_ID"
+  row "aud" "$AUD"
+fi
+
+# ---------------------------------------------------------------------------
+# Step 2: Service token
+# ---------------------------------------------------------------------------
+
+printf '\n[2] Service token\n'
+tokens="$(cf_get "/access/service_tokens")"
+SVC_TOKEN_ID="$(echo "$tokens" | "$JQ_BIN" -r --arg name "$SERVICE_TOKEN_NAME" '.result[] | select(.name == $name) | .id' | head -1)"
+
+if [ -n "$SVC_TOKEN_ID" ] && [ "$SVC_TOKEN_ID" != "null" ]; then
+  row "status" "exists, skipping creation"
+  row "token_id" "$SVC_TOKEN_ID"
+  # Sanity-check 1Password item is present so the smoke script will find it.
+  if ! "$OP_READ" "$OP_ITEM_SERVICE_TOKEN" client_id >/dev/null 2>&1; then
+    printf '  WARNING: service token "%s" exists in CF but 1Password item "%s" is missing.\n' "$SERVICE_TOKEN_NAME" "$OP_ITEM_SERVICE_TOKEN" >&2
+    printf '           The CLI client_secret cannot be recovered. Rotate via:\n' >&2
+    printf '             curl -s -X POST -H "Authorization: Bearer \$API_TOKEN" \\\n' >&2
+    printf '               "%s/access/service_tokens/%s/rotate"\n' "$API_BASE" "$SVC_TOKEN_ID" >&2
+    printf '           Then capture the new client_secret into 1Password.\n' >&2
+  else
+    row "1password" "item '$OP_ITEM_SERVICE_TOKEN' present (client_id readable)"
+  fi
+else
+  printf '  creating ...\n'
+  resp_dir="$(mktemp -d -t cf-svc-XXXXXXXX)"
+  chmod 700 "$resp_dir"
+  create_resp="$(cf_post "/access/service_tokens" "{\"name\": \"$SERVICE_TOKEN_NAME\", \"duration\": \"$SERVICE_TOKEN_DURATION\"}")"
+  echo "$create_resp" > "$resp_dir/resp.json"
+  chmod 600 "$resp_dir/resp.json"
+  create_success="$("$JQ_BIN" -r '.success' "$resp_dir/resp.json")"
+  if [ "$create_success" != "true" ]; then
+    err="$("$JQ_BIN" -c '.errors' "$resp_dir/resp.json")"
+    shred -uz "$resp_dir/resp.json" && rmdir "$resp_dir"
+    die "service token create failed: $err"
+  fi
+  SVC_TOKEN_ID="$("$JQ_BIN" -r '.result.id' "$resp_dir/resp.json")"
+  expires_at="$("$JQ_BIN" -r '.result.expires_at' "$resp_dir/resp.json")"
+  expires_ts="$(date -u -d "$expires_at" +%s)"
+
+  printf '  ingesting to 1Password (value never echoed) ...\n'
+  notes="CF Access service token for the $APP_NAME Worker at $APP_DOMAIN. Auth via HTTP headers CF-Access-Client-Id and CF-Access-Client-Secret. Created $(date -u +%Y-%m-%d) by scripts/cf-access-bootstrap.sh; expires $expires_at. Rotate via the CF dashboard or POST to /access/service_tokens/$SVC_TOKEN_ID/rotate."
+  "$OP_CREATE" \
+    --title "$OP_ITEM_SERVICE_TOKEN" \
+    --tags "cloudflare,access,service-token,agentnative-site,staging" \
+    --notes "$notes" \
+    --hostname "$APP_DOMAIN" \
+    --field "username=$SERVICE_TOKEN_NAME" \
+    --field "expires=$expires_ts" \
+    --field "type=Service Token" \
+    --field "client_id=$("$JQ_BIN" -r '.result.client_id' "$resp_dir/resp.json")" \
+    --field "client_secret[concealed]=$("$JQ_BIN" -r '.result.client_secret' "$resp_dir/resp.json")" >/dev/null
+
+  shred -uz "$resp_dir/resp.json" && rmdir "$resp_dir"
+  row "status" "CREATED + ingested"
+  row "token_id" "$SVC_TOKEN_ID"
+  row "1password" "item '$OP_ITEM_SERVICE_TOKEN' created"
+fi
+
+# ---------------------------------------------------------------------------
+# Step 3: Policies
+# ---------------------------------------------------------------------------
+
+printf '\n[3] Policies\n'
+existing_policies="$(cf_get "/access/apps/$APP_ID/policies")"
+
+ensure_policy() {
+  local pname="$1" body="$2"
+  local existing_id
+  existing_id="$(echo "$existing_policies" | "$JQ_BIN" -r --arg name "$pname" '.result[] | select(.name == $name) | .id' | head -1)"
+  if [ -n "$existing_id" ] && [ "$existing_id" != "null" ]; then
+    row "$pname" "exists ($existing_id)"
+    return
+  fi
+  local resp
+  resp="$(cf_post "/access/apps/$APP_ID/policies" "$body")"
+  local ok
+  ok="$(echo "$resp" | "$JQ_BIN" -r '.success')"
+  if [ "$ok" != "true" ]; then
+    printf '  FAILED: %s\n' "$pname" >&2
+    echo "$resp" | "$JQ_BIN" -c '.errors' >&2
+    die "policy create failed (most common cause: API token missing 'Access: Apps and Policies Write' permission group)"
+  fi
+  row "$pname" "CREATED ($(echo "$resp" | "$JQ_BIN" -r '.result.id'))"
+}
+
+email_policy_body=$(cat <<EOF
+{
+  "name": "Allow brett email",
+  "decision": "allow",
+  "include": [{"email": {"email": "$IDENTITY_EMAIL"}}]
+}
+EOF
+)
+service_policy_body=$(cat <<EOF
+{
+  "name": "Allow CLI service token",
+  "decision": "non_identity",
+  "include": [{"service_token": {"token_id": "$SVC_TOKEN_ID"}}]
+}
+EOF
+)
+ensure_policy "Allow brett email" "$email_policy_body"
+ensure_policy "Allow CLI service token" "$service_policy_body"
+
+# ---------------------------------------------------------------------------
+# Step 4: Verify
+# ---------------------------------------------------------------------------
+
+printf '\n[4] Verify\n'
+unauth_status="$(curl -s -o /dev/null -w '%{http_code}' "https://$APP_DOMAIN/api/score?input=ripgrep")"
+unauth_location="$(curl -s -o /dev/null -w '%{redirect_url}' "https://$APP_DOMAIN/api/score?input=ripgrep")"
+if [ "$unauth_status" = "302" ] && echo "$unauth_location" | grep -q 'cloudflareaccess.com'; then
+  row "unauth probe" "302 → cloudflareaccess.com (boundary enforced)"
+else
+  row "unauth probe" "UNEXPECTED status=$unauth_status (expected 302 to *.cloudflareaccess.com)"
+fi
+
+CLIENT_ID="$("$OP_READ" "$OP_ITEM_SERVICE_TOKEN" client_id)"
+CLIENT_SECRET="$("$OP_READ" "$OP_ITEM_SERVICE_TOKEN" client_secret)"
+authed_status="$(curl -s -o /dev/null -w '%{http_code}' \
+  -H "CF-Access-Client-Id: $CLIENT_ID" \
+  -H "CF-Access-Client-Secret: $CLIENT_SECRET" \
+  "https://$APP_DOMAIN/api/score?input=ripgrep")"
+if [ "$authed_status" = "200" ]; then
+  row "service-token probe" "200 (service token allowed by policy)"
+else
+  row "service-token probe" "UNEXPECTED status=$authed_status (expected 200)"
+fi
+
+printf '\n=== done ===\n'
diff --git a/scripts/generate-pack-readme.mjs b/scripts/generate-pack-readme.mjs
index 1fe3d51..5e9edae 100644
--- a/scripts/generate-pack-readme.mjs
+++ b/scripts/generate-pack-readme.mjs
@@ -10,7 +10,7 @@
 // its source pack.
 //
 // Usage:
-//   bun scripts/generate-pack-readme.mjs            write mode (defaults: Brand, Spec)
+//   bun scripts/generate-pack-readme.mjs            write mode (defaults: Brand, Site)
 //   bun scripts/generate-pack-readme.mjs --check    drift check; exits 1 on diff
 //   bun scripts/generate-pack-readme.mjs <pack>...  target specific pack(s)
 //
@@ -23,9 +23,9 @@ import yaml from "js-yaml";
 
 const REPO_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
 const STYLES_DIR = path.join(REPO_ROOT, "styles");
-const DEFAULT_PACKS = ["brand", "spec"];
+const DEFAULT_PACKS = ["brand", "site"];
 const TRAILER = (pack) =>
-  `<!-- generated by scripts/generate-pack-readme.mjs from styles/${pack}/*.yml — do not edit by hand -->`;
+  `<!-- generated by scripts/generate-pack-readme.mjs from styles/${pack}/*.yml. Do not edit by hand. -->`;
 
 function parseArgs(argv) {
   const args = argv.slice(2);
diff --git a/scripts/hooks/pre-push b/scripts/hooks/pre-push
index aebd449..fc3ed56 100755
--- a/scripts/hooks/pre-push
+++ b/scripts/hooks/pre-push
@@ -20,7 +20,7 @@
 #                       reads tokens from styles/site/BannedFonts.yml)
 #   7. prose-check    — bash scripts/prose-check.sh
 #                       (Vale + LanguageTool over *.md in scope; LT skips
-#                       cleanly when pool is unreachable)
+#                       cleanly when LanguageTool is unreachable)
 #
 # Stages 5-7 each redirect child stdin to </dev/null. The branch-deletion
 # short-circuit at the top of the hook consumes the hook-protocol bytes
@@ -71,6 +71,9 @@ bold '==> Banned-font deployment scan'
 bash scripts/check-banned-fonts.sh </dev/null
 
 bold '==> prose-check (Vale + LanguageTool)'
-bash scripts/prose-check.sh </dev/null
+# Scope to files changed vs PROSE_CHECK_BASE (default origin/dev) so untracked
+# or pre-existing prose drift outside the pushed branch's diff can't block the
+# push. Full-scope check remains available via `bash scripts/prose-check.sh`.
+bash scripts/prose-check.sh --changed-only </dev/null
 
 bold '==> pre-push checks passed'
diff --git a/scripts/prose-check.sh b/scripts/prose-check.sh
index 9888b17..007ac41 100755
--- a/scripts/prose-check.sh
+++ b/scripts/prose-check.sh
@@ -70,57 +70,34 @@
 #   scripts/prose-check.sh --lt-only       skip Vale entirely (LT debugging)
 #
 # Env:
-#   LANGUAGETOOL_URL    LT base URL (default: http://pool.tail42ba87.ts.net:8081)
-#                       FQDN avoids macOS+Tailscale short-name DNS timeouts.
+#   LANGUAGETOOL_URL    LT base URL (default: http://languagetool:8081).
+#                       Consumed by lt_check (~/dotfiles/config/shell/languagetool.sh).
+#   LT_DENY_RULES       Extend the baseline 10-rule denylist with repo-specific
+#                       rule IDs. This site adds 4 by default (IN_PRINCIPAL,
+#                       CONTRACT_CONTACT, TO_DO_HYPHEN, PLURAL_MODIFIER); override
+#                       to replace, or set to "${LT_DENY_RULES_BASELINE}|EXTRA" to
+#                       extend further.
 #   PROSE_CHECK_BASE    git ref to diff against in --changed-only (default: origin/dev)
 
 set -euo pipefail
 cd "$(git rev-parse --show-toplevel)"
 
-LT_URL_DEFAULT="http://pool.tail42ba87.ts.net:8081"
-LT_URL="${LANGUAGETOOL_URL:-$LT_URL_DEFAULT}"
+# LanguageTool wrapper: see ~/dotfiles/config/shell/languagetool.sh for the
+# baseline 10-rule denylist (LT_DENY_RULES_BASELINE), category whitelist,
+# and exit-code contract. Reachability probe and per-file POST live there.
+LT_LIB="${DOTFILES_SHELL_DIR:-$HOME/dotfiles/config/shell}/languagetool.sh"
+if [[ ! -f "$LT_LIB" ]]; then
+  echo "prose-check: required helper $LT_LIB not found (install brettdavies/dotfiles)" >&2
+  exit 2
+fi
+# shellcheck disable=SC1090
+source "$LT_LIB"
+
 PROSE_CHECK_BASE="${PROSE_CHECK_BASE:-origin/dev}"
-# LT blocking whitelist — narrowed from the plan's 7-category default
-# (TYPOS|GRAMMAR|PUNCTUATION|TYPOGRAPHY|CASING|COMPOUNDING|CONFUSED_WORDS)
-# to the three categories that are reliably high-signal on markdown corpora.
-# PUNCTUATION/TYPOGRAPHY/CASING/COMPOUNDING fired ~95% noise on the spec
-# corpus from LT misreading markdown syntax (table whitespace, `->` arrows,
-# code-fence quotes); they remain on the warning tier (visible via
-# --warnings). Re-promote to blocking when LT gains markdown awareness or
-# a per-rule allowlist lands.
-LT_BLOCKING_CATEGORIES='^(TYPOS|GRAMMAR|CONFUSED_WORDS)$'
 
-# Per-rule denylist within the blocking categories — specific LT rule
-# IDs that misfire on RFC 2119 keyword conventions or on technical-prose
-# patterns the rule pack does not cover. Override via LT_DENY_RULES env.
-#
-#   MD_BASEFORM            "MUST <verb>" / "MAY <verb>" — LT does not
-#                          recognize RFC 2119 keywords; treats them as
-#                          modal-verb usage and demands base form.
-#   MUST_HAVE_TO           Same root cause for "must" usage.
-#   HAVE_PART_AGREEMENT    Misfires on "if: CLI has X" YAML-prose.
-#   PREPOSITION_VERB       Misfires on workflow names ("deploy / publish").
-#   THIS_NNS               Misfires on "all of these hold" technical claims.
-#   NON_STANDARD_WORD      Misfires on identifier strings inside code spans.
-#   POSSESSIVE_APOSTROPHE  Misfires on code-comment-style prose.
-#   A_INSTALL              Misfires on "an install path" / "a full reinstall"
-#                          — CLI-domain noun usage of install/reinstall that
-#                          LT's noun lexicon does not cover.
-#   IS_AND_ARE             Misfires on parenthetical-clause subjects, e.g.
-#                          "runtimes (Claude Code, Cursor, ... and others as
-#                          the ecosystem evolves)" — LT picks the wrong head
-#                          noun when a parenthetical sits between subject and
-#                          verb.
-#   SINGULAR_NOUN_ADV_AGREEMENT
-#                          Same class of misfire on subordinate-clause
-#                          subjects, e.g. "Agents consuming JSON output still
-#                          receive interleaved diagnostic text" — LT parses
-#                          "JSON output" as the head noun and demands a
-#                          singular verb when the actual subject ("Agents")
-#                          is plural.
-#
 # === SITE-LOCAL DENYLIST EXTENSIONS ====================================
-# Four additional rules that misfire on agentnative-site domain jargon:
+# Four rules atop the lt_check baseline that misfire on agentnative-site
+# domain jargon:
 #
 #   IN_PRINCIPAL       LT confuses "principle" (P1-P8 noun, the contract
 #                      term) with "principal" (chief). Site corpus uses
@@ -143,8 +120,8 @@ LT_BLOCKING_CATEGORIES='^(TYPOS|GRAMMAR|CONFUSED_WORDS)$'
 #                      site-corpus-correct fix; the alternative is
 #                      rewording every doc that names a CF CLI command.
 # ========================================================================
-LT_DENY_RULES_DEFAULT='^(MD_BASEFORM|MUST_HAVE_TO|HAVE_PART_AGREEMENT|PREPOSITION_VERB|THIS_NNS|NON_STANDARD_WORD|POSSESSIVE_APOSTROPHE|A_INSTALL|IS_AND_ARE|SINGULAR_NOUN_ADV_AGREEMENT|IN_PRINCIPAL|CONTRACT_CONTACT|TO_DO_HYPHEN|PLURAL_MODIFIER)$'
-LT_DENY_RULES="${LT_DENY_RULES:-$LT_DENY_RULES_DEFAULT}"
+LT_DENY_RULES="${LT_DENY_RULES:-${LT_DENY_RULES_BASELINE}|IN_PRINCIPAL|CONTRACT_CONTACT|TO_DO_HYPHEN|PLURAL_MODIFIER}"
+export LT_DENY_RULES
 
 CHANGED_ONLY=0
 SHOW_WARNINGS=0
@@ -249,47 +226,25 @@ fi
 
 # --- LanguageTool stage ---
 if (( RUN_LT )); then
-  if curl --max-time 2 -fsS "$LT_URL/v2/languages" >/dev/null 2>&1; then
-    LT_TMP="$(mktemp -d)"
-    trap 'rm -rf "$LT_TMP" "$OUT_FILE"' EXIT
-
-    printf '%s\0' "${MD_FILES[@]}" | xargs -0 -P4 -I{} bash -c '
-      file="$1"; tmp="$2"; url="$3"
-      out="$tmp/$(echo "$file" | tr "/" "_").json"
-      curl -sS --max-time 30 -X POST "$url/v2/check" \
-        --data-urlencode "language=en-US" \
-        --data-urlencode "text@$file" > "$out" 2>/dev/null || true
-    ' _ {} "$LT_TMP" "$LT_URL"
-
-    for f in "${MD_FILES[@]}"; do
-      json="$LT_TMP/$(echo "$f" | tr '/' '_').json"
-      [[ -s "$json" ]] || continue
-      while IFS=$'\t' read -r offset rule_id category message; do
-        [[ -z "$offset" ]] && continue
-        # Approximate line from byte offset (no exact column conversion at v1).
-        line=$(awk -v off="$offset" 'BEGIN{cur=0} {cur+=length($0)+1; if (cur>off) {print NR; exit}}' "$f" 2>/dev/null)
-        line="${line:-?}"
-        if [[ "$category" =~ $LT_BLOCKING_CATEGORIES ]] && ! [[ "$rule_id" =~ $LT_DENY_RULES ]]; then
-          BLOCKING=$((BLOCKING + 1))
-          printf '%s:%s:LT.%s (%s): %s\n' "$f" "$line" "$rule_id" "$category" "$message" >> "$OUT_FILE"
-        else
-          WARNING=$((WARNING + 1))
-          if (( SHOW_WARNINGS )); then
-            printf '[warn] %s:%s:LT.%s (%s): %s\n' "$f" "$line" "$rule_id" "$category" "$message" >> "$OUT_FILE"
-          fi
-        fi
-      done < <(jaq -r '.matches[]? | [.offset, .rule.id, .rule.category.id, .message] | @tsv' "$json" 2>/dev/null || true)
-    done
-  else
-    rc=$?
-    case "$rc" in
-      6)  reason="couldn't resolve host (Tailscale likely off, or FQDN drift)" ;;
-      7)  reason="couldn't connect (host up, LT service down)" ;;
-      28) reason="timed out (>2s; service slow or network impaired)" ;;
-      *)  reason="curl exit $rc" ;;
-    esac
-    echo "prose-check: LanguageTool unreachable at $LT_URL — $reason; skipping grammar check" >&2
-  fi
+  LT_OUT="$(mktemp)"
+  trap 'rm -f "$OUT_FILE" "$LT_OUT"' EXIT
+  LT_RC=0
+  lt_check "${MD_FILES[@]}" > "$LT_OUT" || LT_RC=$?
+  case "$LT_RC" in
+    0|1) ;;  # findings (if any) are in LT_OUT
+    2) echo "prose-check: skipping grammar check (see lt_check notice above)" >&2 ;;
+    *) echo "prose-check: lt_check returned unexpected exit $LT_RC" >&2; exit 2 ;;
+  esac
+  while IFS= read -r ln; do
+    [[ -z "$ln" ]] && continue
+    if [[ "$ln" == "[warn] "* ]]; then
+      WARNING=$((WARNING + 1))
+      (( SHOW_WARNINGS )) && printf '%s\n' "$ln" >> "$OUT_FILE"
+    else
+      BLOCKING=$((BLOCKING + 1))
+      printf '%s\n' "$ln" >> "$OUT_FILE"
+    fi
+  done < "$LT_OUT"
 fi
 
 # Print findings sorted by file then line
diff --git a/scripts/score-sandbox.py b/scripts/score-sandbox.py
new file mode 100755
index 0000000..533df2e
--- /dev/null
+++ b/scripts/score-sandbox.py
@@ -0,0 +1,420 @@
+#!/usr/bin/env -S uv run python3
+# /// script
+# requires-python = ">=3.11"
+# dependencies = [
+#   "polars>=1.0",
+# ]
+# ///
+"""Score-algorithm sandbox.
+
+Loads every latest-version scorecard plus the coverage-matrix, joins per-check tier
+metadata onto each result row, and computes several candidate scoring algorithms
+side-by-side as a polars DataFrame. Emits into `.context/score-sandbox/` (gitignored
+local-only artifact dir per the repo's `.context/` convention):
+
+    .context/score-sandbox/long.parquet   long-form dataframe (one row per check per tool)
+    .context/score-sandbox/tools.csv      per-tool aggregate scores (one row per tool)
+    .context/score-sandbox/report.md      markdown report (eligibility, distribution, leaderboard)
+
+Pure read-only against the host repo's tracked data. Does not touch the CLI or scorecards/.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+import sys
+from pathlib import Path
+
+import polars as pl
+
+REPO = Path(__file__).resolve().parent.parent
+SCORECARDS = REPO / "scorecards"
+COVERAGE = REPO / "src/data/coverage-matrix.json"
+OUT_DIR = REPO / ".context/score-sandbox"
+
+VERSION_RE = re.compile(r"^(.+)-v([0-9].*)\.json$")
+
+
+def parse_version(v: str) -> tuple[int, ...]:
+    return tuple(int(x) if x.isdigit() else 0 for x in v.split("."))
+
+
+def load_tier_lookup() -> dict[str, str]:
+    matrix = json.loads(COVERAGE.read_text())
+    lookup: dict[str, str] = {}
+    for row in matrix["rows"]:
+        for v in row.get("verifiers", []):
+            lookup[v["check_id"]] = row["level"]  # must | should | may
+    lookup.setdefault("p3-version", "must")
+    return lookup
+
+
+def load_latest_scorecards() -> list[dict]:
+    """Pick the highest-versioned scorecard per slug."""
+    seen: dict[str, dict] = {}
+    for f in sorted(SCORECARDS.glob("*.json")):
+        m = VERSION_RE.match(f.name)
+        if not m:
+            continue
+        slug, version = m.group(1), m.group(2)
+        prior = seen.get(slug)
+        if prior is None or parse_version(version) > parse_version(prior["version"]):
+            data = json.loads(f.read_text())
+            seen[slug] = {"slug": slug, "version": version, "file": f.name, "data": data}
+    return sorted(seen.values(), key=lambda x: x["slug"])
+
+
+def build_long_frame(cards: list[dict], tiers: dict[str, str]) -> pl.DataFrame:
+    """One row per check per tool: slug, version, check_id, status, layer, tier."""
+    rows = []
+    for card in cards:
+        for r in card["data"]["results"]:
+            rows.append(
+                {
+                    "slug": card["slug"],
+                    "version": card["version"],
+                    "check_id": r["id"],
+                    "status": r["status"],
+                    "layer": r.get("layer", ""),
+                    "tier": tiers.get(r["id"], "must"),
+                }
+            )
+    return pl.DataFrame(rows)
+
+
+# ───── scoring expressions ─────────────────────────────────────────────────
+
+
+def weighted_score(
+    weights: dict[str, float],
+    *,
+    may_warn_as_skip: bool = False,
+    skip_in_denom: bool = False,
+    exec_pass: float = 1.0,
+    exec_warn: float = 0.5,
+    exec_fail: float = 0.0,
+) -> pl.Expr:
+    """Element-value: sum(base * exec) / sum(base over denom rows).
+
+    `skip_in_denom=False` (default): denominator = base over pass/warn/fail only.
+        Rewards tools whose evaluated set is mostly passes — "ratio under tier weights."
+    `skip_in_denom=True`: denominator = base over pass/warn/fail/skip.
+        True skating model: skip earns no points but its base still appears in the
+        ceiling, so a tool that didn't attempt the check pays for that absence.
+    `may_warn_as_skip`: reclassify MAY-tier `warn` to `skip` before applying the
+        skip-handling rule. Lets "MAY non-adoption shouldn't count against you"
+        compose with either denominator stance.
+    """
+    tier_w = (
+        pl.when(pl.col("tier") == "must")
+        .then(weights["must"])
+        .when(pl.col("tier") == "should")
+        .then(weights["should"])
+        .when(pl.col("tier") == "may")
+        .then(weights["may"])
+        .otherwise(1.0)
+    )
+    eff_status = (
+        pl.when((pl.col("tier") == "may") & (pl.col("status") == "warn") & may_warn_as_skip)
+        .then(pl.lit("skip"))
+        .otherwise(pl.col("status"))
+    )
+    exec_mult = (
+        pl.when(eff_status == "pass")
+        .then(exec_pass)
+        .when(eff_status == "warn")
+        .then(exec_warn)
+        .when(eff_status == "fail")
+        .then(exec_fail)
+        .otherwise(0.0)  # skip → contributes 0 to numerator
+    )
+    if skip_in_denom:
+        # Denom rows: every status except `error` (probe broke; anc-side bug).
+        denom_valid = eff_status != "error"
+    else:
+        # Denom rows: only pass/warn/fail.
+        denom_valid = eff_status.is_in(["pass", "warn", "fail"])
+    num = (tier_w * exec_mult).filter(denom_valid).sum()
+    denom = tier_w.filter(denom_valid).sum()
+    return (
+        pl.when(denom > 0)
+        .then((num / denom * 100).round(0))
+        .otherwise(0)
+        .cast(pl.Int64)
+    )
+
+
+def current_score() -> pl.Expr:
+    pass_n = (pl.col("status") == "pass").sum()
+    warn_n = (pl.col("status") == "warn").sum()
+    fail_n = (pl.col("status") == "fail").sum()
+    denom = pass_n + warn_n + fail_n
+    return (
+        pl.when(denom > 0)
+        .then((pass_n / denom * 100).round(0))
+        .otherwise(0)
+        .cast(pl.Int64)
+    )
+
+
+def compliance_score() -> pl.Expr:
+    """MUST + SHOULD only. MAY excluded from headline."""
+    mask = pl.col("tier").is_in(["must", "should"])
+    pass_n = ((pl.col("status") == "pass") & mask).sum()
+    eval_n = (pl.col("status").is_in(["pass", "warn", "fail"]) & mask).sum()
+    return (
+        pl.when(eval_n > 0)
+        .then((pass_n / eval_n * 100).round(0))
+        .otherwise(0)
+        .cast(pl.Int64)
+    )
+
+
+def extras_score() -> pl.Expr:
+    """MAY adoption rate: pass / (pass + warn + fail + skip) over MAY-tier checks.
+
+    Skip counts in the denominator so the metric reflects what fraction of the
+    spec's MAY menu the tool adopts — including 'tool didn't ship this thing.'
+    """
+    mask = pl.col("tier") == "may"
+    pass_n = ((pl.col("status") == "pass") & mask).sum()
+    total_n = mask.sum()
+    return (
+        pl.when(total_n > 0)
+        .then((pass_n / total_n * 100).round(0))
+        .otherwise(0)
+        .cast(pl.Int64)
+    )
+
+
+def weighted_blend(comp_weight: float = 0.85) -> pl.Expr:
+    return (
+        (compliance_score() * comp_weight + extras_score() * (1 - comp_weight))
+        .round(0)
+        .cast(pl.Int64)
+    )
+
+
+# ───── aggregation per tool ───────────────────────────────────────────────
+
+
+def compute_tool_scores(long: pl.DataFrame) -> pl.DataFrame:
+    tier_mix = (
+        long.group_by("slug")
+        .agg(
+            (pl.col("tier") == "must").sum().alias("n_must"),
+            (pl.col("tier") == "should").sum().alias("n_should"),
+            (pl.col("tier") == "may").sum().alias("n_may"),
+            pl.col("version").first(),
+        )
+    )
+
+    scored = long.group_by("slug").agg(
+        current_score().alias("A_current"),
+        weighted_score({"must": 1, "should": 2, "may": 3}).alias("B_skating_1_2_3"),
+        weighted_score({"must": 1, "should": 2, "may": 4}).alias("C_skating_1_2_4"),
+        compliance_score().alias("D_compliance"),
+        extras_score().alias("D_extras"),
+        weighted_score({"must": 1, "should": 2, "may": 3}, may_warn_as_skip=True).alias(
+            "E_skating_may_skip"
+        ),
+        weighted_blend(0.85).alias("F_weighted_85_15"),
+        weighted_score({"must": 1, "should": 2, "may": 3}, skip_in_denom=True).alias(
+            "G_ceiling_1_2_3"
+        ),
+        weighted_score(
+            {"must": 1, "should": 2, "may": 3},
+            skip_in_denom=True,
+            may_warn_as_skip=True,
+        ).alias("H_ceiling_may_skip"),
+    )
+
+    return tier_mix.join(scored, on="slug").sort("B_skating_1_2_3", descending=True)
+
+
+# ───── reporting ──────────────────────────────────────────────────────────
+
+
+def threshold_eligibility(df: pl.DataFrame, threshold: int) -> dict[str, int]:
+    cols = [
+        "A_current", "B_skating_1_2_3", "C_skating_1_2_4", "D_compliance",
+        "E_skating_may_skip", "F_weighted_85_15", "G_ceiling_1_2_3", "H_ceiling_may_skip",
+    ]
+    out = {c: int(df.filter(pl.col(c) >= threshold).height) for c in cols}
+    out["D_both"] = int(
+        df.filter((pl.col("D_compliance") >= threshold) & (pl.col("D_extras") >= 50)).height
+    )
+    return out
+
+
+def bucket_distribution(df: pl.DataFrame, col: str) -> list[int]:
+    buckets = [(90, 100), (80, 89), (70, 79), (60, 69), (50, 59), (0, 49)]
+    return [
+        int(df.filter((pl.col(col) >= lo) & (pl.col(col) <= hi)).height)
+        for (lo, hi) in buckets
+    ]
+
+
+def add_ranks(df: pl.DataFrame) -> pl.DataFrame:
+    return df.with_columns(
+        pl.col("A_current").rank("min", descending=True).cast(pl.Int64).alias("A_rank"),
+        pl.col("B_skating_1_2_3").rank("min", descending=True).cast(pl.Int64).alias("B_rank"),
+        pl.col("F_weighted_85_15").rank("min", descending=True).cast(pl.Int64).alias("F_rank"),
+    ).with_columns(
+        (pl.col("A_rank") - pl.col("B_rank")).alias("rank_delta_A_to_B"),
+    )
+
+
+def render_markdown(df: pl.DataFrame) -> str:
+    lines: list[str] = []
+    push = lines.append
+    push("# Scoring sandbox — v0.4.0 rescore data")
+    push("")
+    push(f"Tools analyzed: {df.height}. Generated by `scripts/score-sandbox.py`.")
+    push("")
+    push("## Configurations")
+    push("")
+    push("- **A current** — `pass / (pass + warn + fail)`, skip/error excluded. Today's algorithm.")
+    push("- **B skating 1/2/3** — element-value, weights MUST=1, SHOULD=2, MAY=3; pass=1.0 warn=0.5 fail=0.0; skip/error excluded.")
+    push("- **C skating 1/2/4** — element-value, weights MUST=1, SHOULD=2, MAY=4; same execution multiplier as B.")
+    push("- **D compliance / extras** — two numbers per tool. Compliance = MUST + SHOULD ratio (skip/error excluded). Extras = MAY pass rate against the full MAY menu (skips in denominator).")
+    push("- **E skating + MAY→skip** — same weights as B, but MAY-warn results are reclassified as skip (excluded from numerator AND denominator).")
+    push("- **F weighted 85/15** — single-number blend of D: `compliance × 0.85 + extras × 0.15`.")
+    push("- **G ceiling 1/2/3** — same weights as B but `skip` is counted in the denominator (spec ceiling). True skating model: a tool that didn't attempt a check pays for the absence.")
+    push("- **H ceiling + MAY→skip** — G with MAY-warn reclassified as skip. Tests whether shifting MAY-warns to skips meaningfully changes outcomes when the denominator already counts skips.")
+    push("")
+
+    push("## Eligibility counts")
+    push("")
+    e75 = threshold_eligibility(df, 75)
+    e80 = threshold_eligibility(df, 80)
+    push("| Threshold | A | B | C | D both | D comp | E | F | G | H |")
+    push("| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |")
+    push(f"| ≥ 75 | {e75['A_current']} | {e75['B_skating_1_2_3']} | {e75['C_skating_1_2_4']} | {e75['D_both']} | {e75['D_compliance']} | {e75['E_skating_may_skip']} | {e75['F_weighted_85_15']} | {e75['G_ceiling_1_2_3']} | {e75['H_ceiling_may_skip']} |")
+    push(f"| ≥ 80 | {e80['A_current']} | {e80['B_skating_1_2_3']} | {e80['C_skating_1_2_4']} | {e80['D_both']} | {e80['D_compliance']} | {e80['E_skating_may_skip']} | {e80['F_weighted_85_15']} | {e80['G_ceiling_1_2_3']} | {e80['H_ceiling_may_skip']} |")
+    push("")
+
+    push("## Distribution by score bucket")
+    push("")
+    cols_for_dist = [
+        ("A_current", "A"),
+        ("B_skating_1_2_3", "B"),
+        ("C_skating_1_2_4", "C"),
+        ("D_compliance", "D-comp"),
+        ("D_extras", "D-ext"),
+        ("E_skating_may_skip", "E"),
+        ("F_weighted_85_15", "F"),
+        ("G_ceiling_1_2_3", "G"),
+        ("H_ceiling_may_skip", "H"),
+    ]
+    header = "| Bucket | " + " | ".join(label for _, label in cols_for_dist) + " |"
+    push(header)
+    push("| --- | " + " | ".join("---:" for _ in cols_for_dist) + " |")
+    bucket_labels = ["90–100", "80–89", "70–79", "60–69", "50–59", "0–49"]
+    bucket_data = {col: bucket_distribution(df, col) for col, _ in cols_for_dist}
+    for i, label in enumerate(bucket_labels):
+        row = "| " + label + " | " + " | ".join(str(bucket_data[col][i]) for col, _ in cols_for_dist) + " |"
+        push(row)
+    push("")
+
+    df_ranked = add_ranks(df)
+
+    # Sort leaderboard by G (true skating ceiling model) rather than B.
+    df_ranked = df_ranked.sort("G_ceiling_1_2_3", descending=True)
+
+    push("## Per-tool leaderboard (sorted by config G — true skating ceiling)")
+    push("")
+    push("| # | Slug | Version | M/S/m | A | B | C | D comp/ext | E | F | G | H | Δ rank A→G |")
+    push("| ---: | --- | --- | :---: | ---: | ---: | ---: | :---: | ---: | ---: | ---: | ---: | :---: |")
+    df_ranked = df_ranked.with_columns(
+        pl.col("G_ceiling_1_2_3").rank("min", descending=True).cast(pl.Int64).alias("G_rank"),
+    ).with_columns(
+        (pl.col("A_rank") - pl.col("G_rank")).alias("rank_delta_A_to_G"),
+    )
+    for i, row in enumerate(df_ranked.iter_rows(named=True), start=1):
+        delta = row["rank_delta_A_to_G"]
+        arrow = f"▲{delta}" if delta > 0 else (f"▼{-delta}" if delta < 0 else "–")
+        push(
+            f"| {i} | {row['slug']} | v{row['version']} | "
+            f"{row['n_must']}/{row['n_should']}/{row['n_may']} | "
+            f"{row['A_current']} | {row['B_skating_1_2_3']} | {row['C_skating_1_2_4']} | "
+            f"{row['D_compliance']} / {row['D_extras']} | "
+            f"{row['E_skating_may_skip']} | {row['F_weighted_85_15']} | "
+            f"{row['G_ceiling_1_2_3']} | {row['H_ceiling_may_skip']} | {arrow} |"
+        )
+    push("")
+
+    push("## Biggest A→B rank movers")
+    push("")
+    movers = df_ranked.sort("rank_delta_A_to_B", descending=True)
+    push("### Climbers (rank ↑ going from A to B)")
+    push("")
+    push("| Slug | A rank | B rank | A% | B% | Δ rank |")
+    push("| --- | ---: | ---: | ---: | ---: | :---: |")
+    for row in movers.head(15).iter_rows(named=True):
+        if row["rank_delta_A_to_B"] <= 0:
+            continue
+        push(
+            f"| {row['slug']} | {row['A_rank']} | {row['B_rank']} | "
+            f"{row['A_current']} | {row['B_skating_1_2_3']} | ▲{row['rank_delta_A_to_B']} |"
+        )
+    push("")
+    push("### Fallers (rank ↓ going from A to B)")
+    push("")
+    push("| Slug | A rank | B rank | A% | B% | Δ rank |")
+    push("| --- | ---: | ---: | ---: | ---: | :---: |")
+    for row in movers.tail(15).iter_rows(named=True):
+        if row["rank_delta_A_to_B"] >= 0:
+            continue
+        push(
+            f"| {row['slug']} | {row['A_rank']} | {row['B_rank']} | "
+            f"{row['A_current']} | {row['B_skating_1_2_3']} | ▼{-row['rank_delta_A_to_B']} |"
+        )
+    push("")
+
+    return "\n".join(lines)
+
+
+def main() -> int:
+    if not COVERAGE.exists():
+        print(f"error: missing {COVERAGE}", file=sys.stderr)
+        return 1
+    tiers = load_tier_lookup()
+    cards = load_latest_scorecards()
+    if not cards:
+        print("error: no scorecards found", file=sys.stderr)
+        return 1
+    OUT_DIR.mkdir(parents=True, exist_ok=True)
+    long_path = OUT_DIR / "long.parquet"
+    tools_path = OUT_DIR / "tools.csv"
+    report_path = OUT_DIR / "report.md"
+
+    long = build_long_frame(cards, tiers)
+    long.write_parquet(long_path)
+
+    df = compute_tool_scores(long)
+    df.write_csv(tools_path)
+
+    md = render_markdown(df)
+    report_path.write_text(md)
+
+    # Echo the markdown report to stdout so a `bash` invocation captures it.
+    print(md)
+    print(
+        f"\n---\nlong-form dataframe: {long_path.relative_to(REPO)}  ({long.height} rows)",
+        file=sys.stderr,
+    )
+    print(
+        f"per-tool table:      {tools_path.relative_to(REPO)}  ({df.height} rows)",
+        file=sys.stderr,
+    )
+    print(
+        f"markdown report:     {report_path.relative_to(REPO)}",
+        file=sys.stderr,
+    )
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
diff --git a/scripts/smoke-api-score.sh b/scripts/smoke-api-score.sh
new file mode 100755
index 0000000..0c74306
--- /dev/null
+++ b/scripts/smoke-api-score.sh
@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+# Post-deploy smoke for the live-scoring Worker. Exits 0 when /api/score for
+# a curated slug returns the response triad; exits non-zero otherwise.
+#
+# Invoked from .github/workflows/deploy.yml after a successful wrangler
+# deploy, and runnable locally for parity. Exercises the registry-fast-path
+# only: gate behaviour and live-sandbox dispatch are covered by unit tests
+# and the opt-in homepage-score-live e2e suite. Rationale lives in
+# RELEASES-RATIONALE.md § Post-deploy smoke scope.
+#
+# Usage:
+#   scripts/smoke-api-score.sh <base-url>
+#
+# Environment variables (all optional):
+#   CF_ACCESS_CLIENT_ID      Sent as CF-Access-Client-Id when non-empty.
+#   CF_ACCESS_CLIENT_SECRET  Sent as CF-Access-Client-Secret when non-empty.
+#                            Both come from repo secrets in GH Actions; they
+#                            are required for staging (Worker is behind
+#                            Cloudflare Access) and unused for production
+#                            (anc.dev is public).
+#   TURNSTILE_TOKEN          Defaults to "x". The literal "x" succeeds only
+#                            against the CF always-passes test secret used
+#                            on staging. Production needs a real strategy.
+#   SMOKE_SLEEP_SEC          Edge-propagation delay before the POST.
+#                            Default 10. Tune up if regional latency starts
+#                            producing intermittent 404s.
+#   SLUG                     Curated slug to score. Default "ripgrep".
+#                            Must be present in registry.yaml.
+#
+# Exit codes:
+#   0  smoke passed
+#   1  smoke failed (assertion mismatch or non-200 from /api/score)
+#   2  prerequisite missing (no base URL, no jq)
+
+set -euo pipefail
+
+BASE_URL="${1:-}"
+if [ -z "$BASE_URL" ]; then
+  echo "FATAL: missing base URL. Usage: $0 <base-url>" >&2
+  exit 2
+fi
+
+JQ_BIN="$(command -v jaq || command -v jq || true)"
+if [ -z "$JQ_BIN" ]; then
+  echo "FATAL: neither jaq nor jq is installed. Install one (brew install jaq) and retry." >&2
+  exit 2
+fi
+
+SLEEP_SEC="${SMOKE_SLEEP_SEC:-10}"
+SLUG="${SLUG:-ripgrep}"
+TURNSTILE_TOKEN="${TURNSTILE_TOKEN:-x}"
+
+ACCESS_HEADERS=()
+if [ -n "${CF_ACCESS_CLIENT_ID:-}" ] && [ -n "${CF_ACCESS_CLIENT_SECRET:-}" ]; then
+  ACCESS_HEADERS+=(-H "CF-Access-Client-Id: ${CF_ACCESS_CLIENT_ID}")
+  ACCESS_HEADERS+=(-H "CF-Access-Client-Secret: ${CF_ACCESS_CLIENT_SECRET}")
+fi
+
+if [ "$SLEEP_SEC" -gt 0 ]; then
+  echo "Waiting ${SLEEP_SEC}s for edge propagation..."
+  sleep "$SLEEP_SEC"
+fi
+
+echo "POST ${BASE_URL}/api/score (slug=${SLUG})"
+response="$(curl --silent --show-error --fail-with-body \
+  --max-time 30 \
+  "${ACCESS_HEADERS[@]}" \
+  -H "Content-Type: application/json" \
+  -d "{\"input\":\"${SLUG}\",\"turnstile_token\":\"${TURNSTILE_TOKEN}\"}" \
+  "${BASE_URL}/api/score")"
+
+echo "::group::smoke response"
+echo "${response}" | "$JQ_BIN" .
+echo "::endgroup::"
+
+# Contract: scorecard.kind === "registry_hit" plus four-field response triad.
+# Missing any field is a deploy-stop signal.
+if ! echo "${response}" | "$JQ_BIN" --exit-status '
+    .scorecard.kind == "registry_hit"
+    and (.spec_version | type) == "string"
+    and (.site_spec_version | type) == "string"
+    and (.anc_version | type) == "string"
+    and (.checker_url | type) == "string"
+  ' > /dev/null; then
+  echo "FATAL: /api/score response missing required fields for ${SLUG}" >&2
+  exit 1
+fi
+
+echo "[pass] /api/score returned registry_hit with full response triad"
diff --git a/scripts/staging-cache-smoke.sh b/scripts/staging-cache-smoke.sh
new file mode 100755
index 0000000..27ab3be
--- /dev/null
+++ b/scripts/staging-cache-smoke.sh
@@ -0,0 +1,394 @@
+#!/usr/bin/env bash
+# staging-cache-smoke.sh — opt-in live cache smoke test for /api/score on staging.
+#
+# Plan U7 verification. NOT in the default test pipeline (bun test). Run on
+# demand when you need confidence that the live staging cache tier is
+# behaving as designed, or after any change to handler.ts / cache.ts / do.ts
+# that touches the lookupScorecard or post-success cache-write path.
+#
+# Two modes:
+#
+#   ./scripts/staging-cache-smoke.sh
+#       Warm + edge tests only. No sandbox spawns. Safe to run repeatedly.
+#       Asserts validation gates, Turnstile semantics, method gate, curated
+#       registry hit unmetered, and cache READS for binaries previously
+#       written (cowsay is the canonical fixture, see HOW THE CACHE GETS
+#       SEEDED below).
+#
+#   ./scripts/staging-cache-smoke.sh --cold
+#       Adds three cold sandbox spawns. Runs cold-POST then warm-POST for
+#       each of: `pip install black`, `cargo binstall ouch`, and the
+#       hint-mapped github-url `https://github.com/Aider-AI/aider`.
+#       Asserts cache WRITES (R2 object lands at the canonical key) AND
+#       READS (second request hits the cache, sub-2s, same scorecard
+#       payload). Each cold spawn burns ~5-20 s of staging container time;
+#       use sparingly.
+#
+# HOW THE CACHE GETS SEEDED: U7 writes to SCORE_CACHE on every successful
+# live score, so any prior --cold run (or production-style traffic from
+# the homepage form once U8 ships) seeds the cache. The warm-mode tests
+# assume `cowsay` is already cached — the very first U7 verification on
+# 2026-05-19 wrote it. If it ages out via the 7-day R2 lifecycle, run
+# `./scripts/staging-cache-smoke.sh --cold` to reseed.
+#
+# Turnstile bypass: staging's TURNSTILE_SECRET is bound to the Cloudflare
+# always-passes test secret, so all POSTs in this script pass
+# `turnstile_token: "x"`. See
+# docs/solutions/tooling-decisions/cloudflare-staging-turnstile-test-secret-2026-05-19.md
+# for the full pattern.
+#
+# Cloudflare Access (added 2026-05-19): the staging Worker URL is now
+# gated by a CF Access Self-Hosted Application. CLI clients must send
+# CF-Access-Client-Id + CF-Access-Client-Secret headers from a service
+# token. This script reads them from 1Password by item title:
+#   "Cloudflare Access Service Token - agentnative-site-staging"
+# A missing service-token item OR a missing op CLI surfaces as an
+# instant 302 redirect to `*.cloudflareaccess.com` on every request,
+# which the harness reports as a clear FAIL rather than a confusing
+# protocol-level error.
+#
+# Dependencies: curl, jaq (preferred) or jq, wrangler (bun x wrangler), date (GNU or BSD), op (1Password CLI).
+
+set -u
+
+STAGING_URL="${STAGING_URL:-https://agentnative-site-staging.brettdavies.workers.dev}"
+STAGING_BUCKET="${STAGING_BUCKET:-anc-score-cache-staging}"
+COLD=false
+[ "${1:-}" = "--cold" ] && COLD=true
+
+# Currently 0.4.0 — keep in lockstep with src/worker/spec-version.gen.ts.
+SPEC_VERSION="${SPEC_VERSION:-0.4.0}"
+
+# Prefer jaq (faster, drop-in jq replacement). Fall back to jq.
+JQ_BIN="$(command -v jaq || command -v jq || true)"
+if [ -z "$JQ_BIN" ]; then
+  echo "FATAL: neither jaq nor jq is installed. Install one (brew install jaq) and retry." >&2
+  exit 2
+fi
+
+# Fetch CF Access service token credentials from 1Password. The values
+# never enter the script's logged output; they live in shell variables
+# scoped to this process and are passed to curl via -H. The 1Password
+# helper script picks up the operator's default vault.
+OP_ITEM="Cloudflare Access Service Token - agentnative-site-staging"
+OP_READ="${OP_READ:-$HOME/.claude/skills/1password/scripts/read_field.sh}"
+if [ ! -x "$OP_READ" ]; then
+  echo "FATAL: 1Password helper not found at $OP_READ. Export OP_READ to point at it, or install the 1password skill." >&2
+  exit 2
+fi
+CF_ACCESS_CLIENT_ID="$("$OP_READ" "$OP_ITEM" client_id 2>/dev/null || true)"
+CF_ACCESS_CLIENT_SECRET="$("$OP_READ" "$OP_ITEM" client_secret 2>/dev/null || true)"
+if [ -z "$CF_ACCESS_CLIENT_ID" ] || [ -z "$CF_ACCESS_CLIENT_SECRET" ]; then
+  echo "FATAL: could not read CF Access service token from 1Password item '$OP_ITEM'." >&2
+  echo "       Verify the item exists in 1Password with fields 'client_id' and 'client_secret'." >&2
+  echo "       Then re-run. Without these credentials every staging request returns 302 to *.cloudflareaccess.com." >&2
+  exit 2
+fi
+
+# Curl helper that always carries the CF Access service-token headers.
+# All HTTP calls below go through these so the Access boundary is
+# transparent to the test logic.
+ACCESS_HEADERS=(
+  -H "CF-Access-Client-Id: $CF_ACCESS_CLIENT_ID"
+  -H "CF-Access-Client-Secret: $CF_ACCESS_CLIENT_SECRET"
+)
+
+PASS=0
+FAIL=0
+FAIL_LABELS=()
+
+ok() {
+  printf '  [pass] %s\n' "$1"
+  PASS=$((PASS + 1))
+}
+
+ko() {
+  printf '  [FAIL] %s — %s\n' "$1" "$2"
+  FAIL=$((FAIL + 1))
+  FAIL_LABELS+=("$1")
+}
+
+# Millisecond clock (Linux + macOS).
+now_ms() {
+  if date +%s%N >/dev/null 2>&1 && [ "$(date +%N)" != "N" ]; then
+    echo $(($(date +%s%N) / 1000000))
+  else
+    # macOS without coreutils — fall back to perl.
+    perl -MTime::HiRes=time -E 'say int(time() * 1000)'
+  fi
+}
+
+# expect_status_post LABEL BODY EXPECTED_STATUS [QUERY_STRING]
+expect_status_post() {
+  local label=$1 body=$2 expected=$3 query=${4:-}
+  local tmp
+  tmp=$(mktemp)
+  local code
+  code=$(curl -s -o "$tmp" -w '%{http_code}' "${ACCESS_HEADERS[@]}" \
+    -X POST -H 'content-type: application/json' \
+    "$STAGING_URL/api/score$query" \
+    --data "$body")
+  if [ "$code" = "$expected" ]; then
+    ok "$label (status=$code)"
+  else
+    ko "$label" "expected $expected, got $code: $(head -c 200 "$tmp")"
+  fi
+  rm -f "$tmp"
+}
+
+# expect_error_code LABEL BODY EXPECTED_HTTP_STATUS EXPECTED_ERROR_CODE [QUERY]
+expect_error_code() {
+  local label=$1 body=$2 expected_status=$3 expected_code=$4 query=${5:-}
+  local tmp
+  tmp=$(mktemp)
+  local code
+  code=$(curl -s -o "$tmp" -w '%{http_code}' "${ACCESS_HEADERS[@]}" \
+    -X POST -H 'content-type: application/json' \
+    "$STAGING_URL/api/score$query" \
+    --data "$body")
+  local body_code
+  body_code=$("$JQ_BIN" -r '.error.code // "<no error.code>"' <"$tmp" 2>/dev/null || echo "<parse failed>")
+  if [ "$code" = "$expected_status" ] && [ "$body_code" = "$expected_code" ]; then
+    ok "$label (status=$code, error.code=$body_code)"
+  else
+    ko "$label" "expected ${expected_status}/${expected_code}, got ${code}/${body_code}"
+  fi
+  rm -f "$tmp"
+}
+
+# expect_status_method LABEL METHOD EXPECTED_STATUS
+expect_status_method() {
+  local label=$1 method=$2 expected=$3
+  local code
+  code=$(curl -s -o /dev/null -w '%{http_code}' "${ACCESS_HEADERS[@]}" -X "$method" "$STAGING_URL/api/score")
+  if [ "$code" = "$expected" ]; then
+    ok "$label (method=$method, status=$code)"
+  else
+    ko "$label" "expected $expected, got $code"
+  fi
+}
+
+# expect_warm_hit LABEL BODY MAX_MS — POST and assert sub-MAX_MS round-trip
+# AND scorecard.kind != 'registry_hit' (live or cache-hit, not curated).
+expect_warm_hit() {
+  local label=$1 body=$2 max_ms=$3
+  local tmp
+  tmp=$(mktemp)
+  local start_ms end_ms duration code
+  start_ms=$(now_ms)
+  code=$(curl -s -o "$tmp" -w '%{http_code}' "${ACCESS_HEADERS[@]}" \
+    -X POST -H 'content-type: application/json' \
+    "$STAGING_URL/api/score" --data "$body")
+  end_ms=$(now_ms)
+  duration=$((end_ms - start_ms))
+  if [ "$code" != "200" ]; then
+    ko "$label" "expected 200, got $code: $(head -c 200 "$tmp")"
+    rm -f "$tmp"
+    return
+  fi
+  if [ "$duration" -gt "$max_ms" ]; then
+    ko "$label" "expected <${max_ms} ms (cache hit), got ${duration} ms — cache may be cold"
+    rm -f "$tmp"
+    return
+  fi
+  ok "$label (status=200, duration=${duration} ms < ${max_ms} ms — cache hit)"
+  rm -f "$tmp"
+}
+
+# expect_cold_then_warm LABEL_PREFIX BODY EXPECTED_BINARY
+expect_cold_then_warm() {
+  local label_prefix=$1 body=$2 binary=$3
+  local tmp_cold tmp_warm
+  tmp_cold=$(mktemp)
+  tmp_warm=$(mktemp)
+
+  # COLD
+  local start_ms end_ms duration code
+  start_ms=$(now_ms)
+  code=$(curl -s -o "$tmp_cold" -w '%{http_code}' --max-time 90 "${ACCESS_HEADERS[@]}" \
+    -X POST -H 'content-type: application/json' \
+    "$STAGING_URL/api/score" --data "$body")
+  end_ms=$(now_ms)
+  duration=$((end_ms - start_ms))
+  if [ "$code" != "200" ]; then
+    ko "$label_prefix cold" "expected 200, got $code: $(head -c 200 "$tmp_cold")"
+    rm -f "$tmp_cold" "$tmp_warm"
+    return
+  fi
+  ok "$label_prefix cold (status=200, duration=${duration} ms — sandbox spawn)"
+
+  # Verify R2 object lands at the canonical key.
+  local key="scores/${binary}/${SPEC_VERSION}.json"
+  if bun x wrangler r2 object get "${STAGING_BUCKET}/${key}" --file /tmp/r2-probe.json --remote >/dev/null 2>&1; then
+    local payload_keys
+    payload_keys=$("$JQ_BIN" -r 'keys | join(",")' </tmp/r2-probe.json 2>/dev/null || echo "")
+    if echo "$payload_keys" | grep -q "spec_version" && echo "$payload_keys" | grep -q "anc_version" && echo "$payload_keys" | grep -q "tool_version"; then
+      ok "$label_prefix R2 wrote $key with full payload shape"
+    else
+      ko "$label_prefix R2 write" "payload shape missing required fields (got: $payload_keys)"
+    fi
+  else
+    ko "$label_prefix R2 write" "object not found at $key after cold run"
+  fi
+
+  # WARM
+  start_ms=$(now_ms)
+  code=$(curl -s -o "$tmp_warm" -w '%{http_code}' "${ACCESS_HEADERS[@]}" \
+    -X POST -H 'content-type: application/json' \
+    "$STAGING_URL/api/score" --data "$body")
+  end_ms=$(now_ms)
+  duration=$((end_ms - start_ms))
+  if [ "$code" != "200" ]; then
+    ko "$label_prefix warm" "expected 200, got $code"
+    rm -f "$tmp_cold" "$tmp_warm"
+    return
+  fi
+  if [ "$duration" -gt 2000 ]; then
+    ko "$label_prefix warm" "expected <2000 ms (cache hit), got ${duration} ms"
+    rm -f "$tmp_cold" "$tmp_warm"
+    return
+  fi
+  ok "$label_prefix warm (status=200, duration=${duration} ms — cache hit)"
+
+  # Cold and warm scorecards must be byte-identical (cache returns what we wrote).
+  if diff <("$JQ_BIN" -S '.scorecard' <"$tmp_cold") <("$JQ_BIN" -S '.scorecard' <"$tmp_warm") >/dev/null 2>&1; then
+    ok "$label_prefix scorecard equality (cold == warm)"
+  else
+    ko "$label_prefix scorecard equality" "cold and warm scorecards differ"
+  fi
+  rm -f "$tmp_cold" "$tmp_warm"
+}
+
+printf '\n=== staging-cache-smoke @ %s ===\n' "$STAGING_URL"
+printf '    SPEC_VERSION=%s  COLD=%s\n\n' "$SPEC_VERSION" "$COLD"
+
+# -----------------------------------------------------------------------------
+# Group Z — CF Access boundary (must run FIRST so a lifted Access app
+# surfaces here rather than silently letting the rest of the suite
+# "pass" via the service-token bypass)
+# -----------------------------------------------------------------------------
+#
+# Without the ACCESS_HEADERS, an unauth request to the staging Worker
+# must be intercepted by Cloudflare Access and redirected to the
+# account's *.cloudflareaccess.com login flow. If we instead see a 200
+# or a 4xx from the Worker, the Access app has been disabled or its
+# policies wiped, AND the rest of the suite would falsely "pass"
+# (because every other request carries the service-token headers).
+# This probe catches the boundary getting silently lifted.
+printf '[Z] CF Access boundary\n'
+ZUNAUTH_STATUS=$(curl -s -o /dev/null -w '%{http_code}' \
+  "$STAGING_URL/api/score?input=ripgrep")
+ZUNAUTH_LOC=$(curl -s -o /dev/null -w '%{redirect_url}' \
+  "$STAGING_URL/api/score?input=ripgrep")
+if [ "$ZUNAUTH_STATUS" = "302" ] && echo "$ZUNAUTH_LOC" | grep -q 'cloudflareaccess.com'; then
+  ok "Z01 unauth request → 302 to *.cloudflareaccess.com (boundary enforced)"
+else
+  ko "Z01 unauth boundary" "expected 302 to *.cloudflareaccess.com; got status=$ZUNAUTH_STATUS location=${ZUNAUTH_LOC:-<empty>}"
+fi
+
+# -----------------------------------------------------------------------------
+# Group A — input validation (warm; no sandbox)
+# -----------------------------------------------------------------------------
+printf '\n[A] input validation\n'
+expect_error_code "A01 empty input"            '{"input":"","turnstile_token":"x"}'                                         400 unrecognized_input
+expect_status_post "A02 malformed JSON body"   'not json'                                                                    400
+expect_error_code "A03 non-https URL"          '{"input":"http://github.com/foo/bar","turnstile_token":"x"}'                400 non_https_url
+expect_error_code "A04 non-github host"        '{"input":"https://example.com/foo/bar","turnstile_token":"x"}'              400 non_github_host
+expect_error_code "A05 branch path URL"        '{"input":"https://github.com/foo/bar/tree/main","turnstile_token":"x"}'     400 invalid_url_path
+
+# -----------------------------------------------------------------------------
+# Group B — method gate (warm; no sandbox)
+# -----------------------------------------------------------------------------
+printf '\n[B] method gate\n'
+expect_status_method "B01 DELETE → 405" DELETE 405
+expect_status_method "B02 PUT → 405"    PUT    405
+
+# -----------------------------------------------------------------------------
+# Group C — Turnstile semantics (warm; no sandbox)
+# -----------------------------------------------------------------------------
+# Empty/missing tokens are rejected by the Worker BEFORE siteverify is called
+# (the "missing_token" check fires first). The CF test secret only matters
+# AFTER a non-empty token reaches siteverify.
+printf '\n[C] Turnstile semantics\n'
+expect_error_code "C01 empty turnstile_token"     '{"input":"https://github.com/foo/bar","turnstile_token":""}'  400 turnstile_failed
+expect_error_code "C02 missing turnstile_token"   '{"input":"https://github.com/foo/bar"}'                       400 turnstile_failed
+
+# Curated registry hit (slug=ripgrep) is unmetered — bypasses Turnstile entirely.
+# Should return 200 with ANY token, including empty or missing.
+expect_status_post "C03 curated slug with token=x" '{"input":"ripgrep","turnstile_token":"x"}' 200
+expect_status_post "C04 curated slug with empty token (unmetered bypass)" '{"input":"ripgrep","turnstile_token":""}' 200
+expect_status_post "C05 curated slug without token field"                  '{"input":"ripgrep"}'                       200
+
+# -----------------------------------------------------------------------------
+# Group D — registry/cache read tier (warm; no sandbox)
+# -----------------------------------------------------------------------------
+printf '\n[D] read tiers\n'
+expect_warm_hit "D01 POST cowsay (cached from prior run)" '{"input":"npm install -g cowsay","turnstile_token":"x"}' 2000
+
+# GET path: cache tier also honored on GET per U7 (read-only contract extended).
+GET_LATENCY=$({
+  start_ms=$(now_ms)
+  curl -s -o /dev/null "${ACCESS_HEADERS[@]}" "$STAGING_URL/api/score?input=npm%20install%20-g%20cowsay"
+  end_ms=$(now_ms)
+  echo $((end_ms - start_ms))
+})
+GET_STATUS=$(curl -s -o /dev/null -w '%{http_code}' "${ACCESS_HEADERS[@]}" "$STAGING_URL/api/score?input=npm%20install%20-g%20cowsay")
+if [ "$GET_STATUS" = "200" ] && [ "$GET_LATENCY" -lt 2000 ]; then
+  ok "D02 GET cowsay → 200 cache-hit ($GET_LATENCY ms)"
+else
+  ko "D02 GET cowsay" "status=$GET_STATUS, latency=$GET_LATENCY ms"
+fi
+
+# GET on an uncached non-registry github-url → 404 chain_no_resolve.
+# GET is registry + cache tier only (read-only contract). The cache tier
+# can't help here because there's no derivable binary upfront.
+GET_404_STATUS=$(curl -s -o /tmp/d03 -w '%{http_code}' "${ACCESS_HEADERS[@]}" "$STAGING_URL/api/score?input=https%3A%2F%2Fgithub.com%2Ftotally%2Funknown-tool-12345")
+GET_404_CODE=$("$JQ_BIN" -r '.error.code // ""' </tmp/d03 2>/dev/null)
+if [ "$GET_404_STATUS" = "404" ] && [ "$GET_404_CODE" = "chain_no_resolve" ]; then
+  ok "D03 GET unknown github → 404 chain_no_resolve"
+else
+  ko "D03 GET unknown github" "status=$GET_404_STATUS, error.code=$GET_404_CODE"
+fi
+rm -f /tmp/d03
+
+# -----------------------------------------------------------------------------
+# Group E — cold sandbox spawns (only with --cold; 3 sandbox runs)
+# -----------------------------------------------------------------------------
+if [ "$COLD" = true ]; then
+  printf '\n[E] cold sandbox spawns (3 cold + 3 warm)\n'
+
+  expect_cold_then_warm "E01 pip install black"   '{"input":"pip install black","turnstile_token":"x"}'     black
+  expect_cold_then_warm "E02 cargo binstall ouch" '{"input":"cargo binstall ouch","turnstile_token":"x"}'   ouch
+  expect_cold_then_warm "E03 github.com/Aider-AI/aider (hint→pip aider-chat)" '{"input":"https://github.com/Aider-AI/aider","turnstile_token":"x"}' aider
+
+  # E04 — ?fromCache=false bypass on a cached entry. Live re-spawn forced
+  # even though cowsay is cached. The cache write still fires (overwriting
+  # the existing entry with a freshly-scored copy).
+  printf '  exercising ?fromCache=false bypass on cowsay (1 sandbox spawn)\n'
+  start_ms=$(now_ms)
+  code=$(curl -s -o /tmp/e04 -w '%{http_code}' --max-time 90 "${ACCESS_HEADERS[@]}" \
+    -X POST -H 'content-type: application/json' \
+    "$STAGING_URL/api/score?fromCache=false" \
+    --data '{"input":"npm install -g cowsay","turnstile_token":"x"}')
+  end_ms=$(now_ms)
+  duration=$((end_ms - start_ms))
+  if [ "$code" = "200" ] && [ "$duration" -gt 1500 ]; then
+    ok "E04 ?fromCache=false on cowsay (status=200, duration=${duration} ms — live re-spawn)"
+  else
+    ko "E04 ?fromCache=false" "status=$code, duration=${duration} ms (expected 200 + >1500 ms)"
+  fi
+  rm -f /tmp/e04
+else
+  printf '\n[E] cold sandbox spawns: SKIPPED (pass --cold to enable)\n'
+fi
+
+# -----------------------------------------------------------------------------
+# Summary
+# -----------------------------------------------------------------------------
+printf '\n=== summary: %d passed, %d failed ===\n' "$PASS" "$FAIL"
+if [ "$FAIL" -gt 0 ]; then
+  printf 'failed tests:\n'
+  for label in "${FAIL_LABELS[@]}"; do printf '  - %s\n' "$label"; done
+  exit 1
+fi
+exit 0
diff --git a/src/build/00-spec-version-gen.mjs b/src/build/00-spec-version-gen.mjs
new file mode 100644
index 0000000..3ead95f
--- /dev/null
+++ b/src/build/00-spec-version-gen.mjs
@@ -0,0 +1,103 @@
+// Build-time emitter for `src/worker/spec-version.gen.ts`.
+//
+// Reads `src/data/spec/VERSION` (the vendored spec channel — the standard
+// the Worker scores against) and `content/principles/VERSION` (this site's
+// principle copy — may lag the spec briefly during a release cycle), and
+// writes a TS module the Worker imports at build time. The emitted file is
+// the single source of truth for `SPEC_VERSION`, `SITE_SPEC_VERSION`, and
+// `CHECKER_URL`; `response-shape.ts` consumes it for every response triad.
+//
+// Two-file split is load-bearing: spec VERSION and site-principles VERSION
+// are released independently. A spec bump can ship before the site copy
+// updates; rendering both makes the lag observable on every response.
+//
+// Run via `build()` in build.mjs before any worker-bundle step. The drift
+// test in tests/spec-version-gen.test.ts re-runs this emitter in memory
+// and asserts the committed `.gen.ts` matches — so an out-of-date generated
+// file fails CI instead of silently shipping a stale triad.
+
+import { readFile, writeFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const REPO_ROOT = join(fileURLToPath(import.meta.url), '..', '..', '..');
+const SPEC_VERSION_PATH = join(REPO_ROOT, 'src', 'data', 'spec', 'VERSION');
+const SITE_VERSION_PATH = join(REPO_ROOT, 'content', 'principles', 'VERSION');
+const GEN_PATH = join(REPO_ROOT, 'src', 'worker', 'spec-version.gen.ts');
+
+// CHECKER_URL is intentionally not a file: anc.dev is the only live-scoring
+// surface. If a future fork wants to point at a different host, override
+// here. Not a token-fetched value because we want the literal embedded in
+// the bundle, not a runtime lookup.
+const CHECKER_URL = 'https://anc.dev/score';
+
+/**
+ * Read a VERSION file and strip trailing newlines. Throws if the file
+ * is missing or empty — these constants must NEVER ship as empty strings,
+ * since `response-shape.ts` writes them into every `/api/score` response.
+ */
+async function readVersion(path) {
+  const raw = await readFile(path, 'utf8');
+  const trimmed = raw.trim();
+  if (!trimmed) throw new Error(`spec-version-gen: ${path} is empty`);
+  if (!/^\d+\.\d+\.\d+/.test(trimmed)) {
+    throw new Error(`spec-version-gen: ${path} does not look like semver (got "${trimmed}")`);
+  }
+  return trimmed;
+}
+
+/**
+ * Build the file content. Pure — takes the resolved versions and returns
+ * the bytes that should land at `src/worker/spec-version.gen.ts`. Exposed
+ * so the drift test can compare the on-disk file against a fresh
+ * re-computation without writing anything.
+ */
+export function renderSpecVersionModule({ specVersion, siteSpecVersion, checkerUrl }) {
+  return `// GENERATED by src/build/00-spec-version-gen.mjs — do NOT edit.
+// Re-run \`bun run build\` to regenerate. The drift check in
+// tests/spec-version-gen.test.ts fails CI if this file is out of date.
+//
+// SPEC_VERSION       — from src/data/spec/VERSION (the standard the
+//                      Worker scores against).
+// SITE_SPEC_VERSION  — from content/principles/VERSION (the principle
+//                      copy this site renders).
+// CHECKER_URL        — production live-scoring surface; moves with anc.dev.
+
+export const SPEC_VERSION = '${specVersion}';
+export const SITE_SPEC_VERSION = '${siteSpecVersion}';
+export const CHECKER_URL = '${checkerUrl}';
+`;
+}
+
+/**
+ * Run the emitter. Returns the generated content + the resolved versions
+ * so callers (build.mjs, the drift test) can assert on either.
+ */
+export async function generateSpecVersionModule() {
+  const specVersion = await readVersion(SPEC_VERSION_PATH);
+  const siteSpecVersion = await readVersion(SITE_VERSION_PATH);
+  const content = renderSpecVersionModule({ specVersion, siteSpecVersion, checkerUrl: CHECKER_URL });
+  await writeFile(GEN_PATH, content);
+  return { specVersion, siteSpecVersion, checkerUrl: CHECKER_URL, content, path: GEN_PATH };
+}
+
+/**
+ * Pure variant for the drift test — computes what the file SHOULD say
+ * without writing it. The test reads the on-disk file and compares.
+ */
+export async function computeExpectedSpecVersionModule() {
+  const specVersion = await readVersion(SPEC_VERSION_PATH);
+  const siteSpecVersion = await readVersion(SITE_VERSION_PATH);
+  return {
+    specVersion,
+    siteSpecVersion,
+    checkerUrl: CHECKER_URL,
+    content: renderSpecVersionModule({ specVersion, siteSpecVersion, checkerUrl: CHECKER_URL }),
+    path: GEN_PATH,
+  };
+}
+
+if (import.meta.main) {
+  const { specVersion, siteSpecVersion, path } = await generateSpecVersionModule();
+  console.log(`spec-version-gen: wrote ${path} (spec=${specVersion}, site=${siteSpecVersion})`);
+}
diff --git a/src/build/assets.mjs b/src/build/01-assets.mjs
similarity index 91%
rename from src/build/assets.mjs
rename to src/build/01-assets.mjs
index 56b8d0f..e6ba625 100644
--- a/src/build/assets.mjs
+++ b/src/build/01-assets.mjs
@@ -84,8 +84,12 @@ export async function copyAssets({ repoRoot, distDir }) {
     join(repoRoot, 'src/client/leaderboard.ts'),
     join(distDir, 'js/leaderboard.js'),
   );
+  // Homepage live-scoring form (Turnstile lazy-load + 2 s theater +
+  // redirect to /live-score/<binary>). Loaded with defer from the
+  // homepage shell only.
+  const liveScoreJs = await bundleClient(join(repoRoot, 'src/client/live-score.ts'), join(distDir, 'js/live-score.js'));
   // theme-init is inlined into every HTML head — no file emitted.
   const themeInit = await bundleClient(join(repoRoot, 'src/client/theme-init.ts'));
 
-  return { themeInit, themeJs, clipboardJs, leaderboardJs };
+  return { themeInit, themeJs, clipboardJs, leaderboardJs, liveScoreJs };
 }
diff --git a/src/build/06-homepage.mjs b/src/build/06-homepage.mjs
new file mode 100644
index 0000000..880a4e6
--- /dev/null
+++ b/src/build/06-homepage.mjs
@@ -0,0 +1,160 @@
+// Homepage emit. Section 6 of the build pipeline.
+//
+// Produces dist/index.html (hero + live-score form + principle listing) and
+// the trimmed-to-match dist/index.md twin. The live-scoring form is
+// server-rendered as an inert shell; /js/live-score.js wires submit +
+// Turnstile + redirect on the client side. The Turnstile sitekey is
+// injected by the Worker via meta[name=turnstile-sitekey] — only set on
+// staging until full promotion (DESIGN.md §3.4).
+
+import { readFile, writeFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { extractDescription, extractFirstParagraph, extractIntroSummary, extractTitle } from './content.mjs';
+import { emitShell } from './shell.mjs';
+import { absolutifyMarkdownLinks, escHtml } from './util.mjs';
+
+/**
+ * Build the homepage body HTML — hero, live-scoring form section,
+ * principle listing, install-anc CTA. The live-score section sits between
+ * hero and principles per the wireframe-first placement; layout polish is
+ * deferred to /design-review after the basic surface renders.
+ *
+ * @param {string} introTitle
+ * @param {string} introLede
+ * @param {Array<{n: number, title: string, shortDesc: string}>} principles
+ * @returns {string}
+ */
+function buildHomepageBody(introTitle, introLede, principles) {
+  const entries = principles
+    .map((p) => {
+      const num = String(p.n).padStart(2, '0');
+      const title = escHtml(p.title.replace(/^P\d+:\s*/, ''));
+      const desc = escHtml(p.shortDesc);
+      return `    <li class="principle-entry">
+      <a href="/p${p.n}" class="principle-entry__link">
+        <span class="principle-entry__num">${num}</span>
+        <span class="principle-entry__title">${title}</span>
+        <span class="principle-entry__desc">${desc}</span>
+      </a>
+    </li>`;
+    })
+    .join('\n');
+
+  return `<section class="hero">
+  <h1 class="hero__title">${escHtml(introTitle)}</h1>
+  <p class="hero__lede">${escHtml(introLede)}</p>
+</section>
+${buildLiveScoreSection()}
+<section class="principles-index" aria-label="The eight principles">
+  <ol class="principles-index__list">
+${entries}
+  </ol>
+</section>`;
+}
+
+/**
+ * Live-scoring paste-input form section. Server-rendered shell: the JS at
+ * /js/live-score.js (lazy-loaded with the rest of the deferred client
+ * bundle) wires submit + Turnstile + theater. The Turnstile sitekey is
+ * injected by the Worker at request time via meta[name=turnstile-sitekey]
+ * — only set on staging until full promotion, so production HTML carries
+ * an empty value and the JS disables the form with a "not yet live"
+ * message.
+ *
+ * R9 CTA framing: install-anc is the PRIMARY surface, not buried. Visible
+ * above the form input so a visitor who never engages the form still sees
+ * the local-install option first.
+ *
+ * @returns {string}
+ */
+function buildLiveScoreSection() {
+  return `<section class="live-score" aria-labelledby="live-score-heading" data-live-score-section>
+  <div class="live-score__row">
+    <span class="live-score__kicker" aria-hidden="true">Try</span>
+    <div class="live-score__content">
+      <h2 id="live-score-heading" class="live-score__title">Score a binary, live.</h2>
+      <p class="live-score__lede">
+        <a href="/install">Install <code>anc</code> locally</a> for source + project depth. The demo here is binary and behavioral checks only.
+      </p>
+      <form class="live-score__form" method="post" action="/api/score" novalidate data-live-score-form>
+        <div class="live-score__input-row">
+          <input
+            id="live-score-input"
+            class="live-score__input"
+            name="input"
+            type="text"
+            autocomplete="off"
+            spellcheck="false"
+            placeholder="ripgrep"
+            required
+            aria-label="Tool name, install command, or GitHub URL"
+            aria-describedby="live-score-help"
+          />
+          <button type="submit" class="live-score__submit" data-live-score-submit>Score</button>
+        </div>
+        <p id="live-score-help" class="live-score__help">
+          or try
+          <button type="button" class="live-score__chip" data-live-score-example="ripgrep"><code>ripgrep</code></button>,
+          <button type="button" class="live-score__chip" data-live-score-example="brew install bat"><code>brew install bat</code></button>,
+          or
+          <button type="button" class="live-score__chip" data-live-score-example="https://github.com/cli/cli"><code>github.com/cli/cli</code></button>.
+        </p>
+        <p class="live-score__status" data-live-score-status role="status" aria-live="polite" hidden></p>
+      </form>
+    </div>
+  </div>
+</section>`;
+}
+
+/**
+ * Emit dist/index.html and dist/index.md. The introSource is returned so
+ * downstream (llms-full.txt) can embed the homepage markdown verbatim
+ * without re-reading the file.
+ *
+ * @param {object} args
+ * @param {string} args.distDir
+ * @param {string} args.contentDir
+ * @param {string} args.themeInit
+ * @param {Array<{n: number, title: string, shortDesc: string}>} args.principles
+ * @returns {Promise<{introTitle: string, introSummary: string, introSource: string, introLede: string}>}
+ */
+export async function emitHomepage({ distDir, contentDir, themeInit, principles }) {
+  const introPath = join(contentDir, '_intro.md');
+  const introSource = await readFile(introPath, 'utf8');
+  const introTitle = extractTitle(introSource);
+  const introSummary = extractIntroSummary(introSource);
+  const introDescription = extractDescription(introSource);
+  const introLede = extractFirstParagraph(introSource);
+
+  const indexBody = buildHomepageBody(introTitle, introLede, principles);
+  await writeFile(
+    join(distDir, 'index.html'),
+    emitShell({
+      title: introTitle,
+      description: introDescription,
+      canonicalPath: '/',
+      bodyHtml: indexBody,
+      themeInitJs: themeInit,
+      isIndex: true,
+      // Homepage carries the live-scoring form. /js/live-score.js is
+      // bundled in assets.mjs alongside theme/clipboard/leaderboard and
+      // loads with `defer`. Lazy-loads Turnstile + handles submit/redirect.
+      extraScripts: ['/js/live-score.js'],
+    }),
+  );
+
+  // index.md — trimmed to match the HTML homepage.
+  const indexMdLines = [
+    `# ${introTitle}`,
+    '',
+    introLede,
+    '',
+    '## Principles',
+    '',
+    ...principles.map((p) => `- [${p.title}](/p${p.n}) — ${p.shortDesc}`),
+    '',
+  ];
+  await writeFile(join(distDir, 'index.md'), absolutifyMarkdownLinks(indexMdLines.join('\n')));
+
+  return { introTitle, introSummary, introSource, introLede };
+}
diff --git a/src/build/07-subpages.mjs b/src/build/07-subpages.mjs
new file mode 100644
index 0000000..b05b10e
--- /dev/null
+++ b/src/build/07-subpages.mjs
@@ -0,0 +1,60 @@
+// Content-driven sub-pages emit. Section 7 of the build pipeline.
+//
+// For each entry in `subPages`, reads content/<name>.md, renders the HTML
+// via the shared markdown pipeline, wraps in emitShell, and emits both the
+// HTML and markdown twin. The twin is the authored source with site-
+// relative links absolutified.
+//
+// Adding a new content/*.md page requires three coordinated registrations:
+// this list, src/build/10-sitemap.mjs's hardcoded paths, and src/build/shell.mjs's
+// nav. See docs/solutions/conventions/new-content-page-requires-three-registrations-2026-05-21.md.
+
+import { readFile, writeFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { extractDescription, extractTitle } from './content.mjs';
+import { renderMarkdown } from './render.mjs';
+import { emitShell } from './shell.mjs';
+import { absolutifyMarkdownLinks } from './util.mjs';
+
+/**
+ * Emit content-driven sub-pages (HTML + MD twin via shared pipeline).
+ *
+ * @param {object} args
+ * @param {string} args.distDir
+ * @param {string} args.contentDir
+ * @param {string} args.themeInit
+ * @returns {Promise<Array<{name: string, source: string, title: string}>>}
+ *          Per-page metadata consumed by llms-full.txt assembly.
+ */
+export async function emitSubPages({ distDir, contentDir, themeInit }) {
+  const subPages = [
+    { name: 'check', path: join(contentDir, 'check.md') },
+    { name: 'install', path: join(contentDir, 'install.md') },
+    { name: 'about', path: join(contentDir, 'about.md') },
+    { name: 'badge', path: join(contentDir, 'badge.md') },
+    { name: 'changelog', path: join(contentDir, 'changelog.md') },
+    { name: 'contribute', path: join(contentDir, 'contribute.md') },
+    { name: 'methodology', path: join(contentDir, 'methodology.md') },
+    { name: 'scorecard-schema', path: join(contentDir, 'scorecard-schema.md') },
+  ];
+  const subPageData = [];
+  for (const { name, path } of subPages) {
+    const source = await readFile(path, 'utf8');
+    const title = extractTitle(source);
+    const description = extractDescription(source);
+    const html = await renderMarkdown(source);
+    await writeFile(
+      join(distDir, `${name}.html`),
+      emitShell({
+        title,
+        description,
+        canonicalPath: `/${name}`,
+        bodyHtml: html,
+        themeInitJs: themeInit,
+      }),
+    );
+    await writeFile(join(distDir, `${name}.md`), absolutifyMarkdownLinks(source));
+    subPageData.push({ name, source, title });
+  }
+  return subPageData;
+}
diff --git a/src/build/08-scorecards-emit.mjs b/src/build/08-scorecards-emit.mjs
new file mode 100644
index 0000000..02653fd
--- /dev/null
+++ b/src/build/08-scorecards-emit.mjs
@@ -0,0 +1,292 @@
+// Scorecard-surface emit. Section 8 of the build pipeline.
+//
+// Owns the entire scorecard + coverage + skill emit pipeline:
+//   - Registry loading + corpus invariants
+//   - Build-time indexes for the live-scoring path (registry-index.json,
+//     discovery-hints-index.json)
+//   - Leaderboard page (dist/scorecards.html + .md)
+//   - Per-tool scorecard pages (dist/score/<name>.{html,md})
+//   - Badge SVGs (dist/badge/<name>.svg)
+//   - Binary-name redirect pages for tools where binary !== name
+//   - Stale-file reaping for removed registry entries
+//   - Coverage matrix page (dist/coverage.{html,md})
+//   - Skill manifest surfaces (dist/skill.json + dist/skill.{html,md})
+//
+// Returns the data downstream needs: leaderboard (for llms-full + sitemap
+// extra paths), scorecardPaths (for sitemap), coverageMarkdown and skill
+// artifacts (for llms-full).
+
+import { mkdir, readdir, unlink, writeFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { renderBadgeSvg } from './badge.mjs';
+import { buildCoverageBody, buildCoverageMarkdown, loadCoverageMatrix } from './coverage.mjs';
+import { emitBuildIndexes } from './registry-index.mjs';
+import {
+  computeLeaderboard,
+  extractTopIssues,
+  loadRegistry,
+  loadScoredTools,
+  runScorecardInvariants,
+} from './scorecards.mjs';
+import {
+  buildLeaderboardBody,
+  buildLeaderboardMarkdown,
+  buildScorecardBody,
+  buildScorecardMarkdown,
+} from './scorecards-render.mjs';
+import { emitShell } from './shell.mjs';
+import { emitSkillJson, emitSkillMarkdown, loadSkillData, renderSkillPage } from './skill.mjs';
+import { absolutifyMarkdownLinks, escHtml } from './util.mjs';
+
+/**
+ * Emit the leaderboard, per-tool scorecards + badges, coverage page, and
+ * skill manifest surfaces. Returns the data downstream (sitemap, llms)
+ * needs.
+ *
+ * @param {object} args
+ * @param {string} args.distDir
+ * @param {string} args.registryPath
+ * @param {string} args.hintsPath
+ * @param {string} args.coverageMatrixPath
+ * @param {string} args.skillDataPath
+ * @param {string} args.scorecardsDir
+ * @param {string} args.themeInit
+ * @returns {Promise<{
+ *   leaderboard: Array<object>,
+ *   scorecardPaths: string[],
+ *   badgePaths: string[],
+ *   coverageMarkdown: string,
+ *   skillData: object,
+ *   skillMarkdown: string,
+ * }>}
+ */
+export async function emitScorecardSurface({
+  distDir,
+  registryPath,
+  hintsPath,
+  coverageMatrixPath,
+  skillDataPath,
+  scorecardsDir,
+  themeInit,
+}) {
+  const registry = await loadRegistry(registryPath);
+
+  // v0.4 corpus invariants run before rendering: any scorecard below the
+  // schema floor, missing a registry entry, scoring the wrong binary, or
+  // carrying a non-RFC-3339 timestamp aborts the build before producing
+  // bad output.
+  await runScorecardInvariants(scorecardsDir, registry);
+  // Scorecard-driven discovery + registry editorial join. Both directions
+  // of mismatch are warnings, not errors: a scorecard with no registry
+  // entry → excluded; a registry entry with no scorecard → excluded. The
+  // build emits a stable WARNINGS_JSON line so CI can parse it into a
+  // PR-comment annotation.
+  const { tools: toolsWithScorecards, warnings: scorecardWarnings } = await loadScoredTools(scorecardsDir, registry);
+  for (const filename of scorecardWarnings.scorecardOrphans) {
+    console.warn(`warning: scorecard ${filename} has no matching registry entry — excluded from leaderboard.`);
+  }
+  for (const name of scorecardWarnings.registryOrphans) {
+    console.warn(`warning: registry entry "${name}" has no matching scorecard — excluded from leaderboard.`);
+  }
+  console.log(`WARNINGS_JSON: ${JSON.stringify(scorecardWarnings)}`);
+
+  // 8a. Build-time indexes for the live-scoring path:
+  //     - dist/registry-index.json (powers /api/score registry-fast-path)
+  //     - dist/discovery-hints-index.json (powers discovery's hint
+  //       short-circuit)
+  //
+  // Each registry-index entry is augmented with the latest scorecard's
+  // version, the anc binary version that produced it, and the public URL
+  // of the per-tool scorecard page, so /api/score can return the
+  // spec_version + anc_version + checker_url triad without fetching the
+  // full scorecard payload.
+  const enrichments = {};
+  for (const t of toolsWithScorecards) {
+    enrichments[t.tool.name] = {
+      version: t.version,
+      anc_version: t.metadata?.anc?.version ?? null,
+      scorecard_url: `/score/${t.tool.name}`,
+      // Carried into the registry-fast-path envelope so the homepage
+      // form can show a "Curated · X% pass rate" reward inline without
+      // a second round-trip to fetch the scorecard JSON. Schema 0.5
+      // guarantees badge.score_pct is an integer 0..100.
+      score_pct: t.scorecard?.badge?.score_pct ?? null,
+    };
+  }
+  const { warnings: indexWarnings } = await emitBuildIndexes({
+    registry,
+    hintsPath,
+    distDir,
+    enrichments,
+  });
+  for (const w of indexWarnings) console.warn(`warning: ${w}`);
+  const leaderboard = computeLeaderboard(toolsWithScorecards);
+
+  const methodologyHtml = `  <p>Every score is the output of <code>anc check &lt;binary&gt;</code> against a real CLI tool.
+  The <strong>score</strong> column is the pass rate <code>pass / (pass + warn + fail)</code>;
+  the <strong>principles met</strong> column counts how many of the eight principles have every
+  check passing. The <strong>audience</strong> classification — when present — is informational,
+  not authoritative; the per-tool page's evidence list is the ground truth.</p>
+  <p>For the full explanation of scoring, audience classification, audit profiles, and how to
+  request a re-score, see the <a href="/methodology">methodology page</a>.</p>
+  <p>To reproduce any row locally, <a href="/install">install <code>anc</code></a> and run
+  <code>anc check &lt;binary&gt;</code>.</p>`;
+
+  const leaderboardBody = buildLeaderboardBody(leaderboard, methodologyHtml);
+  await writeFile(
+    join(distDir, 'scorecards.html'),
+    emitShell({
+      title: 'ANC 100 — Agent-Native CLI Leaderboard',
+      description:
+        'Automated agent-readiness scores for real CLI tools, scored against the seven agent-native principles.',
+      canonicalPath: '/scorecards',
+      bodyHtml: leaderboardBody,
+      themeInitJs: themeInit,
+      extraScripts: ['/js/leaderboard.js'],
+    }),
+  );
+  await writeFile(join(distDir, 'scorecards.md'), absolutifyMarkdownLinks(buildLeaderboardMarkdown(leaderboard)));
+
+  // Per-tool scorecard pages → dist/score/<tool-name>.html + .md
+  // Badge SVGs               → dist/badge/<tool-name>.svg
+  // Binary-name redirects    → dist/score/<binary>.html + .md (when
+  //                            registry.binary !== registry.name)
+  await mkdir(join(distDir, 'score'), { recursive: true });
+  await mkdir(join(distDir, 'badge'), { recursive: true });
+  // Drop stale per-tool pages and badge SVGs from prior builds. When a tool
+  // is removed from the registry (e.g., aider, plandex, fabric in PR #40),
+  // its old html/md/svg would otherwise linger in dist/ and ship as broken
+  // links / orphaned badges referencing a tool the leaderboard no longer
+  // knows about. The allowlist also includes binary slugs for the
+  // name-vs-binary tools (ripgrep/rg, ast-grep/sg, …) so the redirect
+  // pages emitted by the per-tool loop aren't unlinked on every build
+  // — without this guard the reaper deletes them every time, defeating
+  // the redirect entirely.
+  const expectedNames = new Set(leaderboard.map((e) => e.tool.name));
+  for (const e of leaderboard) {
+    if (e.tool.binary && e.tool.binary !== e.tool.name) {
+      expectedNames.add(e.tool.binary);
+    }
+  }
+  for (const file of await readdir(join(distDir, 'score')).catch(() => [])) {
+    const m = file.match(/^([a-z0-9-]+)\.(html|md)$/);
+    if (m && !expectedNames.has(m[1])) {
+      await unlink(join(distDir, 'score', file));
+    }
+  }
+  // Badge SVGs are emitted for the canonical name only (no binary-slug
+  // SVG). A reader following /score/rg → /score/ripgrep ends up on the
+  // canonical page, where /badge/ripgrep.svg renders correctly.
+  const expectedBadgeNames = new Set(leaderboard.map((e) => e.tool.name));
+  for (const file of await readdir(join(distDir, 'badge')).catch(() => [])) {
+    const m = file.match(/^([a-z0-9-]+)\.svg$/);
+    if (m && !expectedBadgeNames.has(m[1])) {
+      await unlink(join(distDir, 'badge', file));
+    }
+  }
+  const scorecardPaths = [];
+  const badgePaths = [];
+  for (const entry of leaderboard) {
+    const { tool, scorecard, principleScore, version, metadata } = entry;
+    const topIssues = extractTopIssues(scorecard);
+
+    const scorecardBody = buildScorecardBody(tool, scorecard, topIssues, principleScore, version, metadata);
+    await writeFile(
+      join(distDir, 'score', `${tool.name}.html`),
+      emitShell({
+        title: `${tool.name} — Agent-Native Scorecard`,
+        description: `Agent-readiness scorecard for ${tool.name}: ${tool.description}`,
+        canonicalPath: `/score/${tool.name}`,
+        bodyHtml: scorecardBody,
+        themeInitJs: themeInit,
+      }),
+    );
+    await writeFile(
+      join(distDir, 'score', `${tool.name}.md`),
+      absolutifyMarkdownLinks(buildScorecardMarkdown(tool, scorecard, topIssues, principleScore, version, metadata)),
+    );
+    scorecardPaths.push(`/score/${tool.name}`);
+
+    // Badge SVG — emitted for every scored tool, even those below the
+    // eligibility floor. The /score/<tool> page gates the embed snippet
+    // (above-floor only); the SVG itself stays available so a tool's
+    // existing embed continues to render the current score after a
+    // regression. Score derived from schema 0.5 `badge.score_pct` (0–100
+    // int) → 0–1 for badge-maker's color thresholds.
+    // spec_version is per-scorecard (the spec the CLI was compiled against
+    // when it produced this scorecard) — pass it explicitly so the badge
+    // label tracks the actual scoring context, not a global default.
+    const svg = renderBadgeSvg(scorecard.badge.score_pct / 100, scorecard.spec_version);
+    await writeFile(join(distDir, 'badge', `${tool.name}.svg`), svg);
+    badgePaths.push(`/badge/${tool.name}.svg`);
+
+    // Binary-name redirect: tools where registry.binary !== registry.name
+    // (e.g., ripgrep/rg, ast-grep/sg, bottom/btm — 11 entries today) get a
+    // second pair of files at /score/<binary>.html + .md that point at the
+    // canonical /score/<name>. Closes the URL fragmentation a reader hits
+    // when guessing the URL from the binary they typed at a shell prompt.
+    if (tool.binary && tool.binary !== tool.name) {
+      const targetPath = `/score/${tool.name}`;
+      const titleSafe = escHtml(tool.name);
+      const redirectHtml = `<!doctype html>
+<html lang="en">
+<head>
+  <meta charset="utf-8">
+  <title>Redirecting to ${titleSafe}</title>
+  <link rel="canonical" href="${targetPath}">
+  <meta http-equiv="refresh" content="0; url=${targetPath}">
+</head>
+<body>
+  <p>Redirecting to <a href="${targetPath}">${titleSafe}</a>. If your browser does not redirect, follow the link.</p>
+</body>
+</html>
+`;
+      await writeFile(join(distDir, 'score', `${tool.binary}.html`), redirectHtml);
+      await writeFile(join(distDir, 'score', `${tool.binary}.md`), `See [${targetPath}](${targetPath}).\n`);
+    }
+  }
+
+  // 8b. Coverage matrix page — /coverage.
+  const coverageMatrix = await loadCoverageMatrix(coverageMatrixPath);
+  const coverageBody = buildCoverageBody(coverageMatrix);
+  const coverageMarkdown = buildCoverageMarkdown(coverageMatrix);
+  await writeFile(
+    join(distDir, 'coverage.html'),
+    emitShell({
+      title: 'Spec Coverage Matrix — anc.dev',
+      description: 'Which agent-native CLI requirements have automated checks and which remain uncovered.',
+      canonicalPath: '/coverage',
+      bodyHtml: coverageBody,
+      themeInitJs: themeInit,
+    }),
+  );
+  await writeFile(join(distDir, 'coverage.md'), absolutifyMarkdownLinks(coverageMarkdown));
+
+  // 8c. /skill.json + /skill + /skill.md — skill-distribution surface.
+  // The same manifest is emitted as canonical JSON, rendered HTML (via the
+  // shared unified pipeline), and a markdown twin. Drift is structurally
+  // impossible because all three derive from the same data file.
+  const skillData = await loadSkillData(skillDataPath);
+  await emitSkillJson(skillData, distDir);
+  const { markdown: skillMarkdown, html: skillBodyHtml } = await renderSkillPage(skillData);
+  await writeFile(
+    join(distDir, 'skill.html'),
+    emitShell({
+      title: `Install ${skillData.name}`,
+      description: skillData.description,
+      canonicalPath: '/skill',
+      bodyHtml: skillBodyHtml,
+      themeInitJs: themeInit,
+    }),
+  );
+  await emitSkillMarkdown(absolutifyMarkdownLinks(skillMarkdown), distDir);
+
+  return {
+    leaderboard,
+    scorecardPaths,
+    badgePaths,
+    coverageMarkdown,
+    skillData,
+    skillMarkdown,
+  };
+}
diff --git a/src/build/09-llms-emit.mjs b/src/build/09-llms-emit.mjs
new file mode 100644
index 0000000..52d01b6
--- /dev/null
+++ b/src/build/09-llms-emit.mjs
@@ -0,0 +1,102 @@
+// llms.txt + llms-full.txt emit. Section 9 of the build pipeline.
+//
+// llms.txt is the structured index per https://llmstxt.org/ — H1 title, a
+// `>` summary line, then sections listing every page as a markdown link.
+// llms-full.txt embeds each page's markdown body verbatim with the .md-twin
+// absolutification policy so site-relative links resolve when an agent
+// fetches /llms-full.txt directly.
+
+import { writeFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { buildLlmsFull, buildLlmsIndex } from './llms.mjs';
+import { buildLeaderboardMarkdown } from './scorecards-render.mjs';
+import { absolutifyMarkdownLinks } from './util.mjs';
+
+/**
+ * Emit dist/llms.txt and dist/llms-full.txt.
+ *
+ * @param {object} args
+ * @param {string} args.distDir
+ * @param {string} args.introTitle
+ * @param {string} args.introSummary
+ * @param {string} args.introSource
+ * @param {Array<{n: number, slug: string, title: string, source: string}>} args.principles
+ * @param {Array<{name: string, source: string, title: string}>} args.subPageData
+ * @param {Array<object>} args.leaderboard         — per-tool entries; .tool.name is the canonical slug
+ * @param {string} args.coverageMarkdown            — pre-built coverage page body
+ * @param {object} args.skillData                   — manifest object; .name embedded in the section heading
+ * @param {string} args.skillMarkdown               — pre-built skill page body
+ */
+export async function emitLlmsSurface({
+  distDir,
+  introTitle,
+  introSummary,
+  introSource,
+  principles,
+  subPageData,
+  leaderboard,
+  coverageMarkdown,
+  skillData,
+  skillMarkdown,
+}) {
+  const llmsIndex = buildLlmsIndex({
+    introTitle,
+    summary: introSummary,
+    principles: principles.map((p) => ({ n: p.n, slug: p.slug, title: p.title })),
+    subPages: subPageData.map((s) => ({ name: s.name, title: s.title })),
+    scorecardLinks: [
+      { name: 'Leaderboard', path: '/scorecards.md' },
+      { name: 'Coverage Matrix', path: '/coverage.md' },
+      // Per-tool scorecards alphabetical so the llms.txt index reads as a
+      // browseable directory; the leaderboard itself owns rank-order presentation.
+      ...leaderboard
+        .map((e) => ({ name: e.tool.name, path: `/score/${e.tool.name}.md` }))
+        .sort((a, b) => a.name.localeCompare(b.name)),
+    ],
+    skillLinks: [
+      { name: 'Skill (HTML)', path: '/skill.md' },
+      { name: 'Skill (canonical JSON)', path: '/skill.json' },
+    ],
+  });
+  await writeFile(join(distDir, 'llms.txt'), llmsIndex);
+
+  // llms-full.txt embeds each page's markdown body verbatim. Apply the same
+  // .md-twin absolutification policy so site-relative links resolve when an
+  // agent fetches /llms-full.txt directly.
+  const llmsFull = buildLlmsFull({
+    sections: [
+      { title: introTitle, body: absolutifyMarkdownLinks(introSource), htmlPath: '/', mdPath: '/index.md' },
+      ...principles.map((p) => ({
+        title: p.title,
+        body: absolutifyMarkdownLinks(p.source),
+        htmlPath: `/p${p.n}`,
+        mdPath: `/p${p.n}.md`,
+      })),
+      ...subPageData.map((s) => ({
+        title: s.title,
+        body: absolutifyMarkdownLinks(s.source),
+        htmlPath: `/${s.name}`,
+        mdPath: `/${s.name}.md`,
+      })),
+      {
+        title: 'ANC 100 — Agent-Native CLI Leaderboard',
+        body: absolutifyMarkdownLinks(buildLeaderboardMarkdown(leaderboard)),
+        htmlPath: '/scorecards',
+        mdPath: '/scorecards.md',
+      },
+      {
+        title: 'Spec Coverage Matrix',
+        body: absolutifyMarkdownLinks(coverageMarkdown),
+        htmlPath: '/coverage',
+        mdPath: '/coverage.md',
+      },
+      {
+        title: `Install ${skillData.name}`,
+        body: absolutifyMarkdownLinks(skillMarkdown),
+        htmlPath: '/skill',
+        mdPath: '/skill.md',
+      },
+    ],
+  });
+  await writeFile(join(distDir, 'llms-full.txt'), llmsFull);
+}
diff --git a/src/build/sitemap.mjs b/src/build/10-sitemap.mjs
similarity index 96%
rename from src/build/sitemap.mjs
rename to src/build/10-sitemap.mjs
index 00cbb20..e233378 100644
--- a/src/build/sitemap.mjs
+++ b/src/build/10-sitemap.mjs
@@ -23,7 +23,9 @@ export function buildSitemap({ principleNumbers, extraPaths = [], baseUrl, lastm
     '/check',
     '/about',
     '/changelog',
+    '/contribute',
     '/methodology',
+    '/scorecard-schema',
     ...extraPaths,
   ];
 
diff --git a/src/build/build.mjs b/src/build/build.mjs
index 95885c1..32d9279 100644
--- a/src/build/build.mjs
+++ b/src/build/build.mjs
@@ -12,7 +12,7 @@
 //   7. Render check.md + about.md into sub-pages.
 //   8. Scorecard pages — leaderboard + per-tool pages from registry.yaml
 //      + scorecards/*.json.
-//   9. Emit llms.txt + llms-full.txt (A5 format).
+//   9. Emit llms.txt + llms-full.txt.
 //  10. Emit sitemap.xml.
 //  11. Invariant check — no MUST/SHOULD/MAY leaked into <code> / <pre> /
 //      <a>, locked anchors present on principle pages, md sha256 matches.
@@ -20,39 +20,25 @@
 // Fail-fast: the invariant check throws on violation so CI/`bun run build`
 // exits non-zero. Regression tests are the verification net.
 
-import { mkdir, readdir, readFile, unlink, writeFile } from 'node:fs/promises';
+import { mkdir, readFile, writeFile } from 'node:fs/promises';
 import { join } from 'node:path';
 import { fileURLToPath } from 'node:url';
-import { copyAssets } from './assets.mjs';
-import { renderBadgeSvg } from './badge.mjs';
-import {
-  extractDefinitionParagraph,
-  extractDescription,
-  extractFirstParagraph,
-  extractIntroSummary,
-  extractTitle,
-} from './content.mjs';
-import { buildCoverageBody, buildCoverageMarkdown, loadCoverageMatrix } from './coverage.mjs';
-import { buildLlmsFull, buildLlmsIndex } from './llms.mjs';
-import { emitBuildIndexes } from './registry-index.mjs';
+// Pipeline-stage modules sort in execution order via numeric filename
+// prefixes (00-… → 06-…). Numbering is decorative; build() below is the
+// actual order-enforcer. Shared helpers (content.mjs, render.mjs,
+// shell.mjs, util.mjs, etc.) stay unnumbered because they don't represent
+// a single pipeline stage.
+import { generateSpecVersionModule } from './00-spec-version-gen.mjs';
+import { copyAssets } from './01-assets.mjs';
+import { emitHomepage } from './06-homepage.mjs';
+import { emitSubPages } from './07-subpages.mjs';
+import { emitScorecardSurface } from './08-scorecards-emit.mjs';
+import { emitLlmsSurface } from './09-llms-emit.mjs';
+import { buildSitemap } from './10-sitemap.mjs';
+import { extractDefinitionParagraph, extractDescription, extractTitle } from './content.mjs';
 import { renderMarkdown } from './render.mjs';
-import {
-  computeLeaderboard,
-  extractTopIssues,
-  loadRegistry,
-  loadScoredTools,
-  runScorecardInvariants,
-} from './scorecards.mjs';
-import {
-  buildLeaderboardBody,
-  buildLeaderboardMarkdown,
-  buildScorecardBody,
-  buildScorecardMarkdown,
-} from './scorecards-render.mjs';
-import { emitShell } from './shell.mjs';
-import { buildSitemap } from './sitemap.mjs';
-import { emitSkillJson, emitSkillMarkdown, loadSkillData, renderSkillPage } from './skill.mjs';
-import { absolutifyMarkdownLinks, escHtml, parseFilename, sortedGlob } from './util.mjs';
+import { emitShell, emitShellTemplate } from './shell.mjs';
+import { absolutifyMarkdownLinks, parseFilename, sortedGlob } from './util.mjs';
 
 const REPO_ROOT = join(fileURLToPath(import.meta.url), '..', '..', '..');
 const CONTENT_DIR = join(REPO_ROOT, 'content');
@@ -79,37 +65,6 @@ async function ensureDir(dir) {
   await mkdir(dir, { recursive: true });
 }
 
-/**
- * Build the homepage body HTML — hero section (title + lede) followed by
- * the principle listing with links to individual pages.
- */
-function buildHomepageBody(introTitle, introLede, principles) {
-  const entries = principles
-    .map((p) => {
-      const num = String(p.n).padStart(2, '0');
-      const title = escHtml(p.title.replace(/^P\d+:\s*/, ''));
-      const desc = escHtml(p.shortDesc);
-      return `    <li class="principle-entry">
-      <a href="/p${p.n}" class="principle-entry__link">
-        <span class="principle-entry__num">${num}</span>
-        <span class="principle-entry__title">${title}</span>
-        <span class="principle-entry__desc">${desc}</span>
-      </a>
-    </li>`;
-    })
-    .join('\n');
-
-  return `<section class="hero">
-  <h1 class="hero__title">${escHtml(introTitle)}</h1>
-  <p class="hero__lede">${escHtml(introLede)}</p>
-</section>
-<section class="principles-index" aria-label="The eight principles">
-  <ol class="principles-index__list">
-${entries}
-  </ol>
-</section>`;
-}
-
 async function runInvariantChecks(distDir, principleSlugs, principleSources) {
   // 1. No MUST / SHOULD / MAY bare words inside <code> / <pre> / <a>.
   //    Check every principle page (the index page no longer has inline
@@ -157,12 +112,37 @@ async function runInvariantChecks(distDir, principleSlugs, principleSources) {
       throw new Error(`invariant: dist/p${n}.md does not match absolutified ${sourcePath}`);
     }
   }
+
+  // 5. Markdown-twin silence for the homepage. The homepage HTML
+  // gains the live-scoring form; the markdown twin MUST NOT carry any of
+  // that surface (no form markup, no JS reference, no Turnstile mention,
+  // no /api/score documentation). Agents pasting `Accept: text/markdown`
+  // against `/` are expected to use `anc check` locally; the form is
+  // HTML-only by design. A future copy edit that leaks any of these
+  // tokens into the homepage markdown fails the build here.
+  const indexMd = await readFile(join(distDir, 'index.md'), 'utf8');
+  const FORBIDDEN_IN_INDEX_MD = ['live-score', 'turnstile', 'challenges.cloudflare.com', '/api/score'];
+  for (const needle of FORBIDDEN_IN_INDEX_MD) {
+    if (indexMd.toLowerCase().includes(needle.toLowerCase())) {
+      throw new Error(
+        `invariant: dist/index.md leaked live-scoring surface "${needle}". The homepage markdown twin stays silent on the form by design.`,
+      );
+    }
+  }
 }
 
 export async function build() {
   await ensureDir(DIST_DIR);
 
+  // 0. Regenerate src/worker/spec-version.gen.ts from VERSION files BEFORE
+  // copyAssets bundles the client/worker JS. The Worker imports the file via
+  // a relative module path, so an out-of-date constant would otherwise ship
+  // verbatim into the bundle even when the VERSION files have advanced. The
+  // drift test (tests/spec-version-gen.test.ts) is the second guardrail.
+  await generateSpecVersionModule();
+
   // 1. Copy static assets + bundle client JS. themeInit inlined into every shell.
+  // bundleClient also emits /js/live-score.js used by the homepage form.
   const { themeInit } = await copyAssets({ repoRoot: REPO_ROOT, distDir: DIST_DIR });
 
   // 2. Sorted principle files.
@@ -202,321 +182,55 @@ export async function build() {
   }
 
   // 6. Homepage — hero + principle listing (links to /p{N} pages).
-  const introPath = join(CONTENT_DIR, '_intro.md');
-  const introSource = await readFile(introPath, 'utf8');
-  const introTitle = extractTitle(introSource);
-  const introSummary = extractIntroSummary(introSource);
-  const introDescription = extractDescription(introSource);
-  const introLede = extractFirstParagraph(introSource);
-
-  const indexBody = buildHomepageBody(introTitle, introLede, principles);
-  await writeFile(
-    join(DIST_DIR, 'index.html'),
-    emitShell({
-      title: introTitle,
-      description: introDescription,
-      canonicalPath: '/',
-      bodyHtml: indexBody,
-      themeInitJs: themeInit,
-      isIndex: true,
-    }),
-  );
-
-  // index.md — trimmed to match the HTML homepage.
-  const indexMdLines = [
-    `# ${introTitle}`,
-    '',
-    introLede,
-    '',
-    '## Principles',
-    '',
-    ...principles.map((p) => `- [${p.title}](/p${p.n}) — ${p.shortDesc}`),
-    '',
-  ];
-  await writeFile(join(DIST_DIR, 'index.md'), absolutifyMarkdownLinks(indexMdLines.join('\n')));
+  const { introTitle, introSummary, introSource } = await emitHomepage({
+    distDir: DIST_DIR,
+    contentDir: CONTENT_DIR,
+    themeInit,
+    principles,
+  });
 
   // 7. content-driven sub-pages (HTML + MD twin via shared pipeline).
-  const subPages = [
-    { name: 'check', path: join(CONTENT_DIR, 'check.md') },
-    { name: 'install', path: join(CONTENT_DIR, 'install.md') },
-    { name: 'about', path: join(CONTENT_DIR, 'about.md') },
-    { name: 'badge', path: join(CONTENT_DIR, 'badge.md') },
-    { name: 'changelog', path: join(CONTENT_DIR, 'changelog.md') },
-    { name: 'methodology', path: join(CONTENT_DIR, 'methodology.md') },
-    { name: 'scorecard-schema', path: join(CONTENT_DIR, 'scorecard-schema.md') },
-  ];
-  const subPageData = [];
-  for (const { name, path } of subPages) {
-    const source = await readFile(path, 'utf8');
-    const title = extractTitle(source);
-    const description = extractDescription(source);
-    const html = await renderMarkdown(source);
-    await writeFile(
-      join(DIST_DIR, `${name}.html`),
-      emitShell({
-        title,
-        description,
-        canonicalPath: `/${name}`,
-        bodyHtml: html,
-        themeInitJs: themeInit,
-      }),
-    );
-    await writeFile(join(DIST_DIR, `${name}.md`), absolutifyMarkdownLinks(source));
-    subPageData.push({ name, source, title });
-  }
-
-  // 8. Scorecard pages — leaderboard + per-tool pages.
-  const registry = await loadRegistry(REGISTRY_PATH);
-
-  // 8a. Build-time indexes for the live-scoring path (plan U1):
-  //     - dist/registry-index.json (powers U4's registry-fast-path)
-  //     - dist/discovery-hints-index.json (powers U4's step 0.5 — F1)
-  const { warnings: indexWarnings } = await emitBuildIndexes({
-    registry,
-    hintsPath: HINTS_PATH,
+  const subPageData = await emitSubPages({
     distDir: DIST_DIR,
+    contentDir: CONTENT_DIR,
+    themeInit,
   });
-  for (const w of indexWarnings) console.warn(`warning: ${w}`);
 
-  // v0.4 corpus invariants run before rendering: any scorecard below the
-  // schema floor, missing a registry entry, scoring the wrong binary, or
-  // carrying a non-RFC-3339 timestamp aborts the build before producing
-  // bad output.
-  await runScorecardInvariants(SCORECARDS_DIR, registry);
-  // Scorecard-driven discovery + registry editorial join (U3 inversion).
-  // Both directions of mismatch are warnings, not errors: a scorecard with
-  // no registry entry → excluded; a registry entry with no scorecard →
-  // excluded. The build emits a stable WARNINGS_JSON line so CI can parse
-  // it (U8 PR-comment annotation).
-  const { tools: toolsWithScorecards, warnings: scorecardWarnings } = await loadScoredTools(SCORECARDS_DIR, registry);
-  for (const filename of scorecardWarnings.scorecardOrphans) {
-    console.warn(`warning: scorecard ${filename} has no matching registry entry — excluded from leaderboard.`);
-  }
-  for (const name of scorecardWarnings.registryOrphans) {
-    console.warn(`warning: registry entry "${name}" has no matching scorecard — excluded from leaderboard.`);
-  }
-  console.log(`WARNINGS_JSON: ${JSON.stringify(scorecardWarnings)}`);
-  const leaderboard = computeLeaderboard(toolsWithScorecards);
-
-  const methodologyHtml = `  <p>Every score is the output of <code>anc check &lt;binary&gt;</code> against a real CLI tool.
-  The <strong>score</strong> column is the pass rate <code>pass / (pass + warn + fail)</code>;
-  the <strong>principles met</strong> column counts how many of the eight principles have every
-  check passing. The <strong>audience</strong> classification — when present — is informational,
-  not authoritative; the per-tool page's evidence list is the ground truth.</p>
-  <p>For the full explanation of scoring, audience classification, audit profiles, and how to
-  request a re-score, see the <a href="/methodology">methodology page</a>.</p>
-  <p>To reproduce any row locally, <a href="/install">install <code>anc</code></a> and run
-  <code>anc check &lt;binary&gt;</code>.</p>`;
-
-  const leaderboardBody = buildLeaderboardBody(leaderboard, methodologyHtml);
-  await writeFile(
-    join(DIST_DIR, 'scorecards.html'),
-    emitShell({
-      title: 'ANC 100 — Agent-Native CLI Leaderboard',
-      description:
-        'Automated agent-readiness scores for real CLI tools, scored against the seven agent-native principles.',
-      canonicalPath: '/scorecards',
-      bodyHtml: leaderboardBody,
-      themeInitJs: themeInit,
-      extraScripts: ['/js/leaderboard.js'],
-    }),
-  );
-  await writeFile(join(DIST_DIR, 'scorecards.md'), absolutifyMarkdownLinks(buildLeaderboardMarkdown(leaderboard)));
-
-  // Per-tool scorecard pages → dist/score/<tool-name>.html + .md
-  // Badge SVGs               → dist/badge/<tool-name>.svg
-  // Binary-name redirects    → dist/score/<binary>.html + .md (when
-  //                            registry.binary !== registry.name; U7)
-  await ensureDir(join(DIST_DIR, 'score'));
-  await ensureDir(join(DIST_DIR, 'badge'));
-  // Drop stale per-tool pages and badge SVGs from prior builds. When a tool
-  // is removed from the registry (e.g., aider, plandex, fabric in PR #40),
-  // its old html/md/svg would otherwise linger in dist/ and ship as broken
-  // links / orphaned badges referencing a tool the leaderboard no longer
-  // knows about. The allowlist also includes binary slugs for the
-  // name-vs-binary tools (ripgrep/rg, ast-grep/sg, …) so the redirect pages
-  // U7 emits aren't unlinked on every build (P0: without this the reaper
-  // deletes them every time, defeating the redirect entirely).
-  const expectedNames = new Set(leaderboard.map((e) => e.tool.name));
-  for (const e of leaderboard) {
-    if (e.tool.binary && e.tool.binary !== e.tool.name) {
-      expectedNames.add(e.tool.binary);
-    }
-  }
-  for (const file of await readdir(join(DIST_DIR, 'score')).catch(() => [])) {
-    const m = file.match(/^([a-z0-9-]+)\.(html|md)$/);
-    if (m && !expectedNames.has(m[1])) {
-      await unlink(join(DIST_DIR, 'score', file));
-    }
-  }
-  // Badge SVGs are emitted for the canonical name only (no binary-slug
-  // SVG). A reader following /score/rg → /score/ripgrep ends up on the
-  // canonical page, where /badge/ripgrep.svg renders correctly.
-  const expectedBadgeNames = new Set(leaderboard.map((e) => e.tool.name));
-  for (const file of await readdir(join(DIST_DIR, 'badge')).catch(() => [])) {
-    const m = file.match(/^([a-z0-9-]+)\.svg$/);
-    if (m && !expectedBadgeNames.has(m[1])) {
-      await unlink(join(DIST_DIR, 'badge', file));
-    }
-  }
-  const scorecardPaths = [];
-  const badgePaths = [];
-  for (const entry of leaderboard) {
-    const { tool, scorecard, principleScore, version, metadata } = entry;
-    const topIssues = extractTopIssues(scorecard);
-
-    const scorecardBody = buildScorecardBody(tool, scorecard, topIssues, principleScore, version, metadata);
-    await writeFile(
-      join(DIST_DIR, 'score', `${tool.name}.html`),
-      emitShell({
-        title: `${tool.name} — Agent-Native Scorecard`,
-        description: `Agent-readiness scorecard for ${tool.name}: ${tool.description}`,
-        canonicalPath: `/score/${tool.name}`,
-        bodyHtml: scorecardBody,
-        themeInitJs: themeInit,
-      }),
-    );
-    await writeFile(
-      join(DIST_DIR, 'score', `${tool.name}.md`),
-      absolutifyMarkdownLinks(buildScorecardMarkdown(tool, scorecard, topIssues, principleScore, version, metadata)),
-    );
-    scorecardPaths.push(`/score/${tool.name}`);
-
-    // Badge SVG — emitted for every scored tool, even those below the
-    // eligibility floor. The /score/<tool> page gates the embed snippet
-    // (above-floor only); the SVG itself stays available so a tool's
-    // existing embed continues to render the current score after a
-    // regression. Score derived from schema 0.5 `badge.score_pct` (0–100
-    // int) → 0–1 for badge-maker's color thresholds.
-    // spec_version is per-scorecard (the spec the CLI was compiled against
-    // when it produced this scorecard) — pass it explicitly so the badge
-    // label tracks the actual scoring context, not a global default.
-    const svg = renderBadgeSvg(scorecard.badge.score_pct / 100, scorecard.spec_version);
-    await writeFile(join(DIST_DIR, 'badge', `${tool.name}.svg`), svg);
-    badgePaths.push(`/badge/${tool.name}.svg`);
-
-    // Binary-name redirect: tools where registry.binary !== registry.name
-    // (e.g., ripgrep/rg, ast-grep/sg, bottom/btm — 11 entries today) get a
-    // second pair of files at /score/<binary>.html + .md that point at the
-    // canonical /score/<name>. Closes the URL fragmentation a reader hits
-    // when guessing the URL from the binary they typed at a shell prompt.
-    if (tool.binary && tool.binary !== tool.name) {
-      const targetPath = `/score/${tool.name}`;
-      const titleSafe = escHtml(tool.name);
-      const redirectHtml = `<!doctype html>
-<html lang="en">
-<head>
-  <meta charset="utf-8">
-  <title>Redirecting to ${titleSafe}</title>
-  <link rel="canonical" href="${targetPath}">
-  <meta http-equiv="refresh" content="0; url=${targetPath}">
-</head>
-<body>
-  <p>Redirecting to <a href="${targetPath}">${titleSafe}</a>. If your browser does not redirect, follow the link.</p>
-</body>
-</html>
-`;
-      await writeFile(join(DIST_DIR, 'score', `${tool.binary}.html`), redirectHtml);
-      await writeFile(join(DIST_DIR, 'score', `${tool.binary}.md`), `See [${targetPath}](${targetPath}).\n`);
-    }
-  }
-
-  // 8b. Coverage matrix page — /coverage.
-  const coverageMatrix = await loadCoverageMatrix(COVERAGE_MATRIX_PATH);
-  const coverageBody = buildCoverageBody(coverageMatrix);
-  const coverageMarkdown = buildCoverageMarkdown(coverageMatrix);
-  await writeFile(
-    join(DIST_DIR, 'coverage.html'),
-    emitShell({
-      title: 'Spec Coverage Matrix — anc.dev',
-      description: 'Which agent-native CLI requirements have automated checks and which remain uncovered.',
-      canonicalPath: '/coverage',
-      bodyHtml: coverageBody,
-      themeInitJs: themeInit,
-    }),
-  );
-  await writeFile(join(DIST_DIR, 'coverage.md'), absolutifyMarkdownLinks(coverageMarkdown));
-
-  // 8c. /skill.json + /skill + /skill.md — skill-distribution surface.
-  // The same manifest is emitted as canonical JSON, rendered HTML (via the
-  // shared unified pipeline), and a markdown twin. Drift is structurally
-  // impossible because all three derive from the same data file.
-  const skillData = await loadSkillData(SKILL_DATA_PATH);
-  await emitSkillJson(skillData, DIST_DIR);
-  const { markdown: skillMarkdown, html: skillBodyHtml } = await renderSkillPage(skillData);
-  await writeFile(
-    join(DIST_DIR, 'skill.html'),
-    emitShell({
-      title: `Install ${skillData.name}`,
-      description: skillData.description,
-      canonicalPath: '/skill',
-      bodyHtml: skillBodyHtml,
-      themeInitJs: themeInit,
-    }),
-  );
-  await emitSkillMarkdown(absolutifyMarkdownLinks(skillMarkdown), DIST_DIR);
+  // 8. Scorecard surface — leaderboard, per-tool pages, badges, coverage, skill.
+  const { leaderboard, scorecardPaths, badgePaths, coverageMarkdown, skillData, skillMarkdown } =
+    await emitScorecardSurface({
+      distDir: DIST_DIR,
+      registryPath: REGISTRY_PATH,
+      hintsPath: HINTS_PATH,
+      coverageMatrixPath: COVERAGE_MATRIX_PATH,
+      skillDataPath: SKILL_DATA_PATH,
+      scorecardsDir: SCORECARDS_DIR,
+      themeInit,
+    });
 
   // 9. llms.txt + llms-full.txt (includes scorecard + skill sections).
-  const llmsIndex = buildLlmsIndex({
+  await emitLlmsSurface({
+    distDir: DIST_DIR,
     introTitle,
-    summary: introSummary,
-    principles: principles.map((p) => ({ n: p.n, slug: p.slug, title: p.title })),
-    subPages: subPageData.map((s) => ({ name: s.name, title: s.title })),
-    scorecardLinks: [
-      { name: 'Leaderboard', path: '/scorecards.md' },
-      { name: 'Coverage Matrix', path: '/coverage.md' },
-      // Per-tool scorecards alphabetical so the llms.txt index reads as a
-      // browseable directory; the leaderboard itself owns rank-order presentation.
-      ...leaderboard
-        .map((e) => ({ name: e.tool.name, path: `/score/${e.tool.name}.md` }))
-        .sort((a, b) => a.name.localeCompare(b.name)),
-    ],
-    skillLinks: [
-      { name: 'Skill (HTML)', path: '/skill.md' },
-      { name: 'Skill (canonical JSON)', path: '/skill.json' },
-    ],
+    introSummary,
+    introSource,
+    principles,
+    subPageData,
+    leaderboard,
+    coverageMarkdown,
+    skillData,
+    skillMarkdown,
   });
-  await writeFile(join(DIST_DIR, 'llms.txt'), llmsIndex);
 
-  // llms-full.txt embeds each page's markdown body verbatim. Apply the same
-  // .md-twin absolutification policy so site-relative links resolve when an
-  // agent fetches /llms-full.txt directly.
-  const llmsFull = buildLlmsFull({
-    sections: [
-      { title: introTitle, body: absolutifyMarkdownLinks(introSource), htmlPath: '/', mdPath: '/index.md' },
-      ...principles.map((p) => ({
-        title: p.title,
-        body: absolutifyMarkdownLinks(p.source),
-        htmlPath: `/p${p.n}`,
-        mdPath: `/p${p.n}.md`,
-      })),
-      ...subPageData.map((s) => ({
-        title: s.title,
-        body: absolutifyMarkdownLinks(s.source),
-        htmlPath: `/${s.name}`,
-        mdPath: `/${s.name}.md`,
-      })),
-      {
-        title: 'ANC 100 — Agent-Native CLI Leaderboard',
-        body: absolutifyMarkdownLinks(buildLeaderboardMarkdown(leaderboard)),
-        htmlPath: '/scorecards',
-        mdPath: '/scorecards.md',
-      },
-      {
-        title: 'Spec Coverage Matrix',
-        body: absolutifyMarkdownLinks(coverageMarkdown),
-        htmlPath: '/coverage',
-        mdPath: '/coverage.md',
-      },
-      {
-        title: `Install ${skillData.name}`,
-        body: absolutifyMarkdownLinks(skillMarkdown),
-        htmlPath: '/skill',
-        mdPath: '/skill.md',
-      },
-    ],
-  });
-  await writeFile(join(DIST_DIR, 'llms-full.txt'), llmsFull);
+  // 9b. Live-score shell template. Worker's summary-render.ts fetches
+  // this asset to wrap dynamic `/score/live/<binary>` responses in the
+  // same shell as static pages. The `/_internal/*` namespace is
+  // intercepted by the Worker entry so direct user access returns 404 —
+  // the file exists for internal env.ASSETS fetches only. Filename
+  // mirrors the URL path so a future reader greps `score-live` and
+  // finds both ends.
+  await ensureDir(join(DIST_DIR, '_internal'));
+  await writeFile(join(DIST_DIR, '_internal', 'score-live-shell.html'), emitShellTemplate({ themeInitJs: themeInit }));
 
   // 10. Sitemap (includes scorecard paths). /install (CLI) and /skill (skill
   // bundle) are indexed for humans; /skill.json carries X-Robots-Tag: noindex
diff --git a/src/build/registry-index.mjs b/src/build/registry-index.mjs
index be7e3a8..10986e8 100644
--- a/src/build/registry-index.mjs
+++ b/src/build/registry-index.mjs
@@ -1,12 +1,12 @@
-// Build-time indexes for the live-scoring path (plan U1):
+// Build-time indexes for the live-scoring path:
 //
-// - dist/registry-index.json: dual-keyed (slug, owner/repo) lookup of every
-//   committed-scorecard tool. Powers U4's registry-fast-path so the Worker
-//   does O(1) lookups whether the input was a slug or a GitHub URL.
+// - dist/registry-index.json: dual-keyed (slug, owner/repo) lookup of
+//   every committed-scorecard tool. Powers the Worker's registry-fast-
+//   path with O(1) lookups whether the input was a slug or a GitHub URL.
 // - dist/discovery-hints-index.json: owner/repo -> {pm, package, binary}
 //   hints for tools the discovery chain would otherwise bounce due to
-//   incomplete or non-canonical ecosystem metadata. Powers U4's step 0.5
-//   (per Pre-Implementation Validation gate finding F1).
+//   incomplete or non-canonical ecosystem metadata. Powers the hint
+//   short-circuit at the front of the discovery chain.
 //
 // Pure data emit; no network, no side effects beyond writeFile.
 
@@ -14,10 +14,10 @@ import { readFile, writeFile } from 'node:fs/promises';
 import { join } from 'node:path';
 import yaml from 'js-yaml';
 
-// Mirrors U4's parse-install.ts table (plan lines 1092-1103). Adding a new
-// pm here requires a matching parser entry; keeping these in sync is the
-// typo guard. `direct` is reserved for URL-paste paths (step 1 of U4) and
-// is not a valid hint pm — hints always name an ecosystem package.
+// Mirrors parse-install.ts's pm table. Adding a new pm here requires a
+// matching parser entry; keeping these in sync is the typo guard.
+// `direct` is reserved for URL-paste paths and is not a valid hint pm —
+// hints always name an ecosystem package.
 export const KNOWN_PM = new Set(['brew', 'cargo-binstall', 'bun', 'pip', 'npm', 'go']);
 
 const OWNER_REPO_RE = /^[^/]+\/[^/]+$/;
@@ -31,7 +31,7 @@ export function deriveOwnerRepo(tool) {
   return null;
 }
 
-function projectRegistryEntry(tool) {
+function projectRegistryEntry(tool, enrichment) {
   const out = {
     name: tool.name,
     binary: tool.binary,
@@ -39,15 +39,33 @@ function projectRegistryEntry(tool) {
   };
   if (tool.audit_profile) out.audit_profile = tool.audit_profile;
   if (tool.repo) out.repo = tool.repo;
+  // The registry-fast-path response carries the latest scorecard's
+  // version + anc_version + URL so the Worker can build the response
+  // triad (spec_version + anc_version + checker_url) and route the user
+  // to /score/<slug> without fetching the scorecard JSON. Also carry
+  // score_pct so the registry_hit envelope can show a "Curated - N% pass
+  // rate" reward inline on the homepage form without a second round-trip.
+  if (enrichment) {
+    if (enrichment.version) out.version = enrichment.version;
+    if (enrichment.anc_version) out.anc_version = enrichment.anc_version;
+    if (enrichment.scorecard_url) out.scorecard_url = enrichment.scorecard_url;
+    if (typeof enrichment.score_pct === 'number') out.score_pct = enrichment.score_pct;
+  }
   return out;
 }
 
-export function buildRegistryIndex(registry) {
+/**
+ * @param {Array<object>} registry
+ * @param {Record<string, { version?: string, anc_version?: string, scorecard_url?: string }>} [enrichments]
+ *   Per-tool-name lookup of scored-build metadata. Tools without an entry
+ *   here still appear in the index (no scorecard committed yet).
+ */
+export function buildRegistryIndex(registry, enrichments = {}) {
   const by_slug = {};
   const by_owner_repo = {};
   const warnings = [];
   for (const tool of registry) {
-    const projected = projectRegistryEntry(tool);
+    const projected = projectRegistryEntry(tool, enrichments[tool.name]);
     by_slug[tool.name] = projected;
     const ownerRepo = deriveOwnerRepo(tool);
     if (!ownerRepo) {
@@ -109,8 +127,8 @@ export async function loadDiscoveryHints(hintsPath) {
   return hints;
 }
 
-export async function emitBuildIndexes({ registry, hintsPath, distDir }) {
-  const { index: registryIndex, warnings: rWarnings } = buildRegistryIndex(registry);
+export async function emitBuildIndexes({ registry, hintsPath, distDir, enrichments }) {
+  const { index: registryIndex, warnings: rWarnings } = buildRegistryIndex(registry, enrichments);
   const hints = await loadDiscoveryHints(hintsPath);
   const { index: hintsIndex, warnings: hWarnings } = buildDiscoveryHintsIndex(hints, registryIndex);
 
diff --git a/src/build/scorecards-render.mjs b/src/build/scorecards-render.mjs
index 9a5c36c..62385d5 100644
--- a/src/build/scorecards-render.mjs
+++ b/src/build/scorecards-render.mjs
@@ -2,7 +2,14 @@
 // per-tool scorecard pages. Template concern only; data loading and
 // scoring live in scorecards.mjs.
 
-import { BONUS_GROUPS, escHtml, PRINCIPLE_GROUPS, PRINCIPLE_NAMES } from './util.mjs';
+import {
+  BONUS_GROUPS,
+  escHtml,
+  formatCheckTableMarkdownLines,
+  groupToPrincipleNum,
+  PRINCIPLE_GROUPS,
+  PRINCIPLE_NAMES,
+} from '../shared/scorecard-format.mjs';
 
 // Display-only mirror of the CLI's badge eligibility floor (80%). All
 // eligibility decisions read `scorecard.badge.eligible` (canonical source
@@ -12,15 +19,8 @@ import { BONUS_GROUPS, escHtml, PRINCIPLE_GROUPS, PRINCIPLE_NAMES } from './util
 // scorecard.badge.eligible directly.
 const BADGE_FLOOR_DISPLAY_PCT = 80;
 
-/**
- * Map a check group string to a principle number (1-7) or null for bonus groups.
- * @param {string} group
- * @returns {number | null}
- */
-function groupToPrincipleNum(group) {
-  const match = group.match(/^P(\d+)$/);
-  return match ? Number(match[1]) : null;
-}
+// groupToPrincipleNum lives in src/shared/scorecard-format.mjs (single source
+// of truth shared with the Worker). Imported above.
 
 // Evidence prefix the CLI emits for any check suppressed by `--audit-profile`.
 // Mirrors `SUPPRESSION_EVIDENCE_PREFIX` in agentnative/src/principles/registry.rs
@@ -116,10 +116,10 @@ function renderCheckRows(checks) {
 export function buildLeaderboardBody(leaderboard, methodology) {
   const tierBadge = (tier) => `<span class="tier-badge tier-badge--${escHtml(tier)}">${escHtml(tier)}</span>`;
 
-  // Post-U3 inversion: every leaderboard entry has a scorecard (registry
-  // entries without scorecards are excluded by loadScoredTools). The em-dash
-  // "—" / "—/7" cells the pre-inversion code carried for unscored rows are
-  // gone with the unscored row itself. Score read directly from schema 0.5
+  // Every leaderboard entry has a scorecard (registry entries without
+  // scorecards are excluded by loadScoredTools). The em-dash "—" / "—/7"
+  // cells the pre-inversion code carried for unscored rows are gone with
+  // the unscored row itself. Score read directly from schema 0.5
   // `badge.score_pct` — the CLI is canonical for the integer.
   const scoreCell = (entry) => {
     const pct = entry.scorecard.badge.score_pct;
@@ -155,8 +155,8 @@ export function buildLeaderboardBody(leaderboard, methodology) {
   // Eligible-tool count for the badge callout. Reads scorecard.badge.eligible
   // (schema 0.5) — the CLI is canonical for what eligibility means. Lets the
   // callout cite a real number ("24 tools currently qualify") instead of a
-  // vague "tools that qualify." Post-U3 every leaderboard entry has a
-  // scorecard, so no null guard needed.
+  // vague "tools that qualify." Every leaderboard entry has a scorecard,
+  // so no null guard needed.
   const eligibleCount = leaderboard.filter((e) => e.scorecard.badge.eligible).length;
   const floorPct = BADGE_FLOOR_DISPLAY_PCT;
 
@@ -590,7 +590,7 @@ export function buildLeaderboardMarkdown(leaderboard) {
   ];
 
   for (const entry of leaderboard) {
-    // Post-U3: every leaderboard entry has a scorecard.
+    // Every leaderboard entry has a scorecard at this point.
     const score = `${entry.scorecard.badge.score_pct}%`;
     const ps = entry.principleScore;
     const principles = `${ps.met}/${ps.total}`;
@@ -647,13 +647,13 @@ export function buildScorecardMarkdown(tool, scorecard, _topIssues, principleSco
     lines.push('');
   }
 
-  // Check results table
-  lines.push('| Status | Check | Principle | Evidence |');
-  lines.push('|--------|-------|-----------|----------|');
-  for (const check of scorecard.results) {
-    const pNum = groupToPrincipleNum(check.group);
-    const groupLabel = pNum ? `[${check.group}](/p${pNum})` : check.group;
-    lines.push(`| ${check.status.toUpperCase()} | ${check.label} | ${groupLabel} | ${check.evidence || ''} |`);
+  // Check results table — formatted by the shared row helper so the
+  // /score/<tool>.md and /live-score/<binary>.md surfaces stay in lockstep.
+  // Empty `baseUrl` produces site-relative links (`/p3`); the build's
+  // absolutifyMarkdownLinks pass rewrites those to absolute anc.dev URLs
+  // for the twin output (matches the other markdown pages in this file).
+  for (const row of formatCheckTableMarkdownLines(scorecard.results)) {
+    lines.push(row);
   }
   lines.push('');
 
diff --git a/src/build/scorecards.mjs b/src/build/scorecards.mjs
index 196235b..9596c78 100644
--- a/src/build/scorecards.mjs
+++ b/src/build/scorecards.mjs
@@ -54,8 +54,9 @@ export async function loadRegistry(registryPath) {
     throw new Error('registry.yaml: expected top-level "tools" array');
   }
 
-  // Binary-name collision guard (U7 redirects): for tools where binary !==
-  // name, the binary slug must not appear as ANY other tool's `name`.
+  // Binary-name collision guard for `/score/<binary>` redirects: for tools
+  // where binary !== name, the binary slug must not appear as ANY other
+  // tool's `name`.
   // Without this, a future registry addition `name: rg, binary: rg` would
   // silently overwrite the `/score/rg` redirect page that ripgrep emits, or
   // vice versa. Build the binary set first so we can detect collisions in
@@ -78,6 +79,11 @@ export async function loadRegistry(registryPath) {
     if (t.name === 'scorecards') {
       throw new Error('registry.yaml: "scorecards" is reserved — slug collision with the leaderboard page');
     }
+    if (t.name === 'live') {
+      throw new Error(
+        'registry.yaml: "live" is reserved — slug collision with the /score/live/<binary> dynamic share-URL namespace',
+      );
+    }
     if (seen.has(t.name)) {
       throw new Error(`registry.yaml: duplicate name "${t.name}"`);
     }
@@ -163,7 +169,7 @@ function indexScorecardsByName(filenames) {
 /**
  * Discover scorecards on disk and join each to its registry editorial entry.
  *
- * Iteration is **scorecard-driven** (post-U3 inversion): the build reads
+ * Iteration is **scorecard-driven**: the build reads
  * `<name>-v*.json` from the scorecards/ directory, picks the highest version
  * per slug, and joins to `registry.tools[name=slug]` for editorial fields
  * (tier, language, creator, description, install, repo/url).
@@ -178,10 +184,10 @@ function indexScorecardsByName(filenames) {
  *     `name` on disk. Excluded from the leaderboard. Supports
  *     editorial-PR-first contribution flow.
  *
- * The orchestrator logs both lists; CI surfaces them as a PR comment (U8).
- * R5(b)'s structural invariant — "every scorecard's filename slug must
- * match a registry entry" — is intentionally NOT enforced here; it lives
- * in `runScorecardInvariants()`. Splitting the contracts lets a contributor
+ * The orchestrator logs both lists; CI surfaces them as a PR comment.
+ * The structural invariant — "every scorecard's filename slug must match
+ * a registry entry" — is intentionally NOT enforced here; it lives in
+ * `runScorecardInvariants()`. Splitting the contracts lets a contributor
  * land a scorecard PR + editorial PR in either order without the build
  * blowing up mid-merge.
  *
@@ -438,9 +444,9 @@ export function extractTopIssues(scorecard, limit = 3) {
 }
 
 /**
- * Sort tools by primary score descending. Post-U3 inversion every tool has
- * a scorecard; the unscored-tools-sort-to-bottom branch is gone with the
- * pre-inversion code path that allowed null scorecards.
+ * Sort tools by primary score descending. Every tool has a scorecard, so
+ * the unscored-tools-sort-to-bottom branch is gone with the pre-inversion
+ * code path that allowed null scorecards.
  *
  * @param {Array<{ tool: object, scorecard: object }>} tools
  * @returns {Array<{ tool: object, scorecard: object, rank: number, principleScore: object }>}
diff --git a/src/build/shell.mjs b/src/build/shell.mjs
index db30a91..e019ef3 100644
--- a/src/build/shell.mjs
+++ b/src/build/shell.mjs
@@ -55,6 +55,23 @@ const AI_PROVIDERS = [
   },
 ];
 
+// Official GitHub mark (Simple Icons). currentColor + aria-hidden so the
+// SVG inherits link color and screen readers fall through to the link
+// text ("spec", "cli", etc.).
+const GITHUB_SVG =
+  '<svg viewBox="0 0 24 24" fill="currentColor" aria-hidden="true"><path d="M12 .297c-6.63 0-12 5.373-12 12 0 5.303 3.438 9.8 8.205 11.385.6.113.82-.258.82-.577 0-.285-.01-1.04-.015-2.04-3.338.724-4.042-1.61-4.042-1.61C4.422 18.07 3.633 17.7 3.633 17.7c-1.087-.744.084-.729.084-.729 1.205.084 1.838 1.236 1.838 1.236 1.07 1.835 2.809 1.305 3.495.998.108-.776.417-1.305.76-1.605-2.665-.3-5.466-1.332-5.466-5.93 0-1.31.465-2.38 1.235-3.22-.135-.303-.54-1.523.105-3.176 0 0 1.005-.322 3.3 1.23.96-.267 1.98-.399 3-.405 1.02.006 2.04.138 3 .405 2.28-1.552 3.285-1.23 3.285-1.23.645 1.653.24 2.873.12 3.176.765.84 1.23 1.91 1.23 3.22 0 4.61-2.805 5.625-5.475 5.92.42.36.81 1.096.81 2.22 0 1.606-.015 2.896-.015 3.286 0 .315.21.69.825.57C20.565 22.092 24 17.592 24 12.297c0-6.627-5.373-12-12-12"/></svg>';
+
+// Source-of-truth repos linked from the footer. Order: spec first (the
+// canonical SoT), then the three channel implementations in increasing
+// audience reach (cli for tool authors, site for visitors, skill for
+// agents). Names match the repo slugs on github.com/brettdavies.
+const SOURCE_REPOS = [
+  { name: 'spec', url: 'https://github.com/brettdavies/agentnative' },
+  { name: 'cli', url: 'https://github.com/brettdavies/agentnative-cli' },
+  { name: 'site', url: 'https://github.com/brettdavies/agentnative-site' },
+  { name: 'skill', url: 'https://github.com/brettdavies/agentnative-skill' },
+];
+
 const esc = escHtml;
 
 /**
@@ -70,6 +87,35 @@ const esc = escHtml;
  * @param {string=} args.baseUrl             — absolute base (default prod).
  * @returns {string} full HTML document.
  */
+/**
+ * Emit a placeholder-only version of the shell. Used by the Worker to
+ * render dynamic pages (/score/live/<binary>) without duplicating the
+ * shell layout. The template has four placeholders:
+ *
+ *   {{TITLE}}            — document <title> + og:title (escaped at substitution)
+ *   {{DESCRIPTION}}      — meta description + og:description
+ *   {{CANONICAL_PATH}}   — site-relative canonical path (no trailing extension)
+ *   {{BODY}}             — already-rendered body HTML (pre-escaped by caller)
+ *
+ * Same shell layout as the static pages; the only difference is the
+ * placeholders for the four dynamic fields. The markdown-twin link in
+ * the footer substitutes to `{{CANONICAL_PATH}}.md` so live-score pages
+ * carry the same markdown-twin affordance as every other page.
+ */
+export function emitShellTemplate({ themeInitJs, baseUrl } = {}) {
+  return emitShell({
+    title: '{{TITLE}}',
+    description: '{{DESCRIPTION}}',
+    canonicalPath: '{{CANONICAL_PATH}}',
+    bodyHtml: '{{BODY}}',
+    themeInitJs: themeInitJs ?? '',
+    isIndex: false,
+    principles: [],
+    baseUrl,
+    extraScripts: [],
+  });
+}
+
 export function emitShell({
   title,
   description,
@@ -85,18 +131,33 @@ export function emitShell({
   const canonical = base + canonicalPath;
   const ogImage = `${base}/og-image.png`;
 
+  const orgId = `${base}/#organization`;
   const jsonLd = {
     '@context': 'https://schema.org',
-    '@type': 'TechArticle',
-    headline: title,
-    description,
-    url: canonical,
-    image: ogImage,
-    publisher: {
-      '@type': 'Organization',
-      name: SITE_NAME,
-      url: base,
-    },
+    '@graph': [
+      {
+        '@type': 'Organization',
+        '@id': orgId,
+        name: SITE_NAME,
+        url: base,
+        logo: `${base}/apple-touch-icon-180.png`,
+        sameAs: SOURCE_REPOS.map((r) => r.url),
+      },
+      {
+        '@type': 'TechArticle',
+        headline: title,
+        description,
+        url: canonical,
+        image: ogImage,
+        author: {
+          '@type': 'Person',
+          name: 'Brett Davies',
+          url: 'https://github.com/brettdavies',
+          sameAs: ['https://x.com/brettdavies'],
+        },
+        publisher: { '@id': orgId },
+      },
+    ],
   };
 
   const miniToc =
@@ -120,6 +181,7 @@ ${principles
     <title>${esc(title)}</title>
     <meta name="description" content="${esc(description)}" />
     <link rel="canonical" href="${canonical}" />
+${isIndex ? `    <meta name="turnstile-sitekey" content="{{TURNSTILE_SITEKEY}}" />\n` : ''}
 
     <meta property="og:type" content="article" />
     <meta property="og:title" content="${esc(title)}" />
@@ -160,9 +222,11 @@ ${principles
         <a href="/scorecards">Leaderboard</a>
         <a href="/install">Install</a>
         <a href="/check">Check your CLI</a>
+        <a href="/skill">Skill</a>
         <a href="/methodology">Methodology</a>
         <a href="/coverage">Coverage</a>
         <a href="/about">About</a>
+        <a href="/contribute">Contribute</a>
       </nav>
       <div class="theme-toggle" role="group" aria-label="Theme">
         <button type="button" data-theme-set="light" aria-pressed="false">Light</button>
@@ -184,6 +248,14 @@ ${AI_PROVIDERS.map(
 ).join('\n')}
         </div>
       </div>
+      <p class="site-footer__source">
+        <span class="site-footer__source-icon" aria-hidden="true">${GITHUB_SVG}</span>
+        <span class="site-footer__source-label">Source:</span>
+${SOURCE_REPOS.map(
+  (r, i) =>
+    `        ${i > 0 ? '<span aria-hidden="true"> · </span>\n        ' : ''}<a href="${r.url}" rel="noopener noreferrer">${r.name}</a>`,
+).join('\n')}
+      </p>
       <p class="site-footer__meta">
         <span>${SITE_NAME}</span>
         <span> · </span>
diff --git a/src/build/util.mjs b/src/build/util.mjs
index 4641923..55c8c10 100644
--- a/src/build/util.mjs
+++ b/src/build/util.mjs
@@ -43,35 +43,16 @@ export function parseFilename(filename) {
   return { n: Number(match[1]), slug: match[2] };
 }
 
-/**
- * Escape HTML special characters in a string.
- * @param {string} s
- * @returns {string}
- */
-export function escHtml(s) {
-  return String(s).replace(
-    /[<>&"']/g,
-    (c) => ({ '<': '&lt;', '>': '&gt;', '&': '&amp;', '"': '&quot;', "'": '&#39;' })[c],
-  );
-}
-
-// -------------------------------------------------------------------
-// Shared constants (STAR — single authoritative source)
-// -------------------------------------------------------------------
-
-export const PRINCIPLE_NAMES = {
-  P1: 'Non-Interactive by Default',
-  P2: 'Structured, Parseable Output',
-  P3: 'Progressive Help Discovery',
-  P4: 'Fail-Fast, Actionable Errors',
-  P5: 'Safe Retries & Mutation Boundaries',
-  P6: 'Composable, Predictable Command Structure',
-  P7: 'Bounded, High-Signal Responses',
-};
-
-export const PRINCIPLE_GROUPS = Object.keys(PRINCIPLE_NAMES);
-
-export const BONUS_GROUPS = ['CodeQuality', 'ProjectStructure'];
+// Worker-safe primitives + constants live in `src/shared/scorecard-format.mjs`
+// so both the build (Node) and the Worker (Cloudflare runtime) can import
+// them without dragging in this file's fs.readFileSync calls. Re-exported
+// here for backward compat with existing build-side callers.
+export {
+  BONUS_GROUPS,
+  escHtml,
+  PRINCIPLE_GROUPS,
+  PRINCIPLE_NAMES,
+} from '../shared/scorecard-format.mjs';
 
 // =====================================================================
 // Spec version constants — three distinct concepts, three distinct files.
diff --git a/src/client/live-score.ts b/src/client/live-score.ts
new file mode 100644
index 0000000..f469353
--- /dev/null
+++ b/src/client/live-score.ts
@@ -0,0 +1,612 @@
+// Homepage live-scoring form — paste-input, lazy-loaded Turnstile, 2 s
+// theater floor, redirect on success.
+//
+// Behavior contract:
+//   - Turnstile script (https://challenges.cloudflare.com/turnstile/v0/api.js)
+//     is NOT loaded on every homepage visit. Lazy-load on first focus/click/
+//     paste against the form input. A Playwright regression asserts the
+//     script is NOT requested when the user scrolls past without engaging.
+//   - On submit: render the invisible Turnstile widget, await token, POST
+//     to /api/score with {input, turnstile_token}. Promise.all with a 2 s
+//     timer enforces the cached-theater minimum from
+//     docs/solutions/architecture-patterns/cached-theater-live-fallback-2026-04-17.md.
+//   - Response branches:
+//       kind=='registry_hit'  → window.location = scorecard_url
+//       inline scorecard      → window.location = share_url (/score/live/<binary>)
+//       4xx with chain_*       → render class-specific bounce panel
+//       other errors          → inline error message
+//
+// Turnstile sitekey comes from <meta name="turnstile-sitekey" content="...">,
+// substituted at request time by the Worker. Production pre-promotion ships
+// empty content — the form disables itself with a "not yet live" notice
+// rather than rendering a non-functional widget.
+
+interface TurnstileApi {
+  render(
+    element: HTMLElement | string,
+    options: {
+      sitekey: string;
+      size?: 'invisible' | 'normal' | 'compact';
+      callback?: (token: string) => void;
+      'error-callback'?: () => void;
+      'expired-callback'?: () => void;
+    },
+  ): string;
+  execute(widgetId?: string): void;
+  reset(widgetId?: string): void;
+  remove(widgetId?: string): void;
+}
+
+declare global {
+  interface Window {
+    turnstile?: TurnstileApi;
+  }
+}
+
+const TURNSTILE_SCRIPT_URL = 'https://challenges.cloudflare.com/turnstile/v0/api.js?render=explicit';
+const THEATER_MIN_MS = 2000;
+const STDERR_TRUNCATE_CHARS = 300;
+
+const form = document.querySelector<HTMLFormElement>('[data-live-score-form]');
+const input = document.querySelector<HTMLInputElement>('#live-score-input');
+const submitBtn = document.querySelector<HTMLButtonElement>('[data-live-score-submit]');
+const statusEl = document.querySelector<HTMLParagraphElement>('[data-live-score-status]');
+
+if (form && input && submitBtn && statusEl) {
+  initLiveScore({ form, input, submitBtn, statusEl });
+}
+
+function initLiveScore(els: {
+  form: HTMLFormElement;
+  input: HTMLInputElement;
+  submitBtn: HTMLButtonElement;
+  statusEl: HTMLParagraphElement;
+}): void {
+  const sitekey = readSitekey();
+  if (!sitekey) {
+    // Production pre-promotion path: TURNSTILE_SITEKEY var is not set.
+    // Disable the form so a click can't dispatch a request that will
+    // fail siteverify with no actionable error.
+    disableFormWithMessage(els, 'Live scoring is available on staging only — install anc locally to score.');
+    return;
+  }
+
+  let turnstilePromise: Promise<TurnstileApi> | null = null;
+  let widgetId: string | null = null;
+
+  function ensureTurnstileLoaded(): Promise<TurnstileApi> {
+    if (turnstilePromise) return turnstilePromise;
+    turnstilePromise = new Promise<TurnstileApi>((resolve, reject) => {
+      const script = document.createElement('script');
+      script.src = TURNSTILE_SCRIPT_URL;
+      script.async = true;
+      script.defer = true;
+      script.onload = () => {
+        if (window.turnstile) resolve(window.turnstile);
+        else reject(new Error('Turnstile failed to attach to window'));
+      };
+      script.onerror = () => reject(new Error('Turnstile script failed to load'));
+      document.head.appendChild(script);
+    }).catch((err) => {
+      // Reset on failure so the next interaction retries — common cause
+      // is a transient network blip on first paint.
+      turnstilePromise = null;
+      throw err;
+    });
+    return turnstilePromise;
+  }
+
+  // Lazy-load: first interaction wins. Once the script is in-flight we
+  // don't re-add it on subsequent events.
+  const lazyLoad = () => {
+    void ensureTurnstileLoaded();
+  };
+  els.input.addEventListener('focus', lazyLoad, { once: true });
+  els.input.addEventListener('paste', lazyLoad, { once: true });
+  els.input.addEventListener('click', lazyLoad, { once: true });
+
+  // bfcache restore: when the user browser-backs from /score/live/<binary>
+  // (or any successor page) into this homepage, the browser may restore
+  // the page from the back-forward cache with the form still in its
+  // submitting state — input + button disabled, status slot showing the
+  // curated-reward or phase-progression text from the previous submit.
+  // Reset to a clean state so the form is immediately usable again.
+  // Standard a11y pattern, no copy change needed.
+  window.addEventListener('pageshow', (event) => {
+    if (!event.persisted) return;
+    setSubmitting(els, false);
+    clearStatus(els.statusEl);
+  });
+
+  // Example chips fill the input + trigger the lazy-load (since the user
+  // is clearly engaging with the form). Mirrors the paste interaction.
+  for (const chip of document.querySelectorAll<HTMLButtonElement>('[data-live-score-example]')) {
+    chip.addEventListener('click', () => {
+      const value = chip.dataset.liveScoreExample ?? '';
+      els.input.value = value;
+      els.input.focus();
+      lazyLoad();
+    });
+  }
+
+  els.form.addEventListener('submit', async (event) => {
+    event.preventDefault();
+    const value = els.input.value.trim();
+    if (!value) {
+      renderInlineError(els.statusEl, 'Paste a tool name, install command, or GitHub URL.');
+      return;
+    }
+
+    setSubmitting(els, true);
+    renderStatus(els.statusEl, 'Queued…');
+
+    let token: string;
+    try {
+      token = await acquireTurnstileToken(sitekey, await ensureTurnstileLoaded(), els.form, (id) => {
+        widgetId = id;
+      });
+    } catch {
+      setSubmitting(els, false);
+      renderInlineError(els.statusEl, 'Verification challenge failed to load. Please try again.');
+      return;
+    }
+
+    // Phase progression: while we wait for /api/score, the status line
+    // cycles through realistic prose phases. Timings approximate real
+    // sandbox runs — too long is misleading, too short reads as frantic.
+    // The cycle is cancelled the moment the response arrives so the user
+    // never sees an obviously-stale phase after the work is done.
+    const phaseTimer = startPhaseProgression(els.statusEl);
+    const start = Date.now();
+
+    let response: Response;
+    let payload: Record<string, unknown>;
+    try {
+      response = await fetch('/api/score', {
+        method: 'POST',
+        headers: { 'content-type': 'application/json' },
+        body: JSON.stringify({ input: value, turnstile_token: token }),
+      });
+      payload = await response.json().catch(() => ({}) as Record<string, unknown>);
+    } catch (err) {
+      phaseTimer.cancel();
+      setSubmitting(els, false);
+      renderInlineError(els.statusEl, networkErrorMessage(err));
+      return;
+    } finally {
+      if (widgetId && window.turnstile) {
+        window.turnstile.reset(widgetId);
+      }
+    }
+
+    phaseTimer.cancel();
+
+    // Curated-hit reward: when /api/score short-circuits via the registry-
+    // fast-path (slug, install-command-with-curated-binary, or github-url
+    // matching a curated owner/repo), surface a small "you found one of
+    // ours" moment before the redirect. The reward shows for the
+    // remainder of the 2 s theater floor — long enough to read, not long
+    // enough to annoy. score_pct is enriched into the registry-index at
+    // build time (build.mjs + registry-index.mjs), so this needs no
+    // second round-trip.
+    const sc = payload.scorecard as { kind?: string; scorecard_url?: string; score_pct?: number | null } | undefined;
+    if (response.status === 200 && sc?.kind === 'registry_hit' && typeof sc.scorecard_url === 'string') {
+      const reward =
+        typeof sc.score_pct === 'number'
+          ? `Curated · ${sc.score_pct}% pass rate · opening the audited scorecard…`
+          : 'Curated · opening the audited scorecard…';
+      renderCuratedReward(els.statusEl, reward);
+      const elapsed = Date.now() - start;
+      const remaining = Math.max(0, THEATER_MIN_MS - elapsed);
+      window.setTimeout(() => {
+        window.location.href = sc.scorecard_url as string;
+      }, remaining);
+      return;
+    }
+
+    // All other branches (inline scorecard, error, bounce) flow through
+    // the existing handler. Honor the theater floor first so a fast
+    // cached response doesn't snap the user through before they've
+    // registered that anything happened.
+    const elapsed = Date.now() - start;
+    const remaining = Math.max(0, THEATER_MIN_MS - elapsed);
+    if (remaining > 0) await new Promise((r) => window.setTimeout(r, remaining));
+    setSubmitting(els, false);
+    handleScoreResponse(els, response.status, payload);
+  });
+}
+
+function readSitekey(): string | null {
+  const meta = document.querySelector<HTMLMetaElement>('meta[name=turnstile-sitekey]');
+  const value = meta?.content?.trim();
+  // Empty string (production pre-promotion) is treated as absent, so the
+  // form short-circuits to the local-install copy without a network round-trip.
+  return value ? value : null;
+}
+
+function acquireTurnstileToken(
+  sitekey: string,
+  api: TurnstileApi,
+  formEl: HTMLFormElement,
+  onWidget: (id: string) => void,
+): Promise<string> {
+  return new Promise((resolve, reject) => {
+    let container = formEl.querySelector<HTMLDivElement>('[data-turnstile-mount]');
+    if (!container) {
+      container = document.createElement('div');
+      container.setAttribute('data-turnstile-mount', '');
+      container.style.cssText = 'position:absolute;left:-9999px;width:0;height:0;overflow:hidden';
+      formEl.appendChild(container);
+    }
+    const id = api.render(container, {
+      sitekey,
+      size: 'invisible',
+      callback: (token: string) => resolve(token),
+      'error-callback': () => reject(new Error('turnstile_error')),
+      'expired-callback': () => reject(new Error('turnstile_expired')),
+    });
+    onWidget(id);
+    api.execute(id);
+  });
+}
+
+function handleScoreResponse(
+  els: { statusEl: HTMLParagraphElement },
+  status: number,
+  payload: Record<string, unknown>,
+): void {
+  // Registry hit: the curated /score/<slug> page is the share surface.
+  const scorecard = payload.scorecard as { kind?: string; scorecard_url?: string } | undefined;
+  if (status === 200 && scorecard?.kind === 'registry_hit' && typeof scorecard.scorecard_url === 'string') {
+    window.location.href = scorecard.scorecard_url;
+    return;
+  }
+
+  // Inline scorecard: redirect to the shareable /live-score/<binary> page.
+  if (status === 200 && typeof payload.share_url === 'string') {
+    window.location.href = payload.share_url;
+    return;
+  }
+
+  // 200 but no share_url AND no registry_hit redirect (github-url-without-hint
+  // live run). Show a fallback message — the user got a result, but the
+  // shareable URL surface isn't available for this input shape.
+  if (status === 200) {
+    renderInlineError(
+      els.statusEl,
+      "Scored, but this input doesn't have a shareable result URL yet. Run anc locally for a saved scorecard.",
+    );
+    return;
+  }
+
+  // 4xx / 5xx: branch on error.code for the three bounce panels + the
+  // common error tags. Anything else falls through to a generic message.
+  const err = payload.error as { code?: string; details?: string; retry_after?: number; pm?: string } | undefined;
+
+  // Issue-1 reclassification: the DO currently returns
+  // `chain_resolved_install_failed` even when the package manager couldn't
+  // find the package at all (cargo: "X is not found"; brew: "No available
+  // formula"; pip: "No matching distribution"). In those cases no install
+  // path was ever resolved — the "didn't run" headline is misleading.
+  // Detect the pattern in stderr and render a registry-not-found bounce
+  // instead. Backend follow-up: the DO classifier should emit
+  // `chain_no_resolve` (or a new `chain_resolved_package_not_found`)
+  // directly so the client doesn't have to second-guess.
+  if (err?.code === 'chain_resolved_install_failed' && isPackageNotFoundStderr(err.details)) {
+    renderBouncePanel(els.statusEl, {
+      headline: "That package isn't in the registry.",
+      body: 'The package manager couldn\'t find a package by that name. Check the spelling, or paste a GitHub URL if the project ships releases there. <a href="/install">Install anc locally</a> to score private or unpublished tools.',
+      details: truncateStderr(err.details),
+    });
+    return;
+  }
+
+  switch (err?.code) {
+    case 'chain_no_resolve':
+      renderBouncePanel(els.statusEl, {
+        headline: "We couldn't find a pre-built binary for that.",
+        body: 'anc only scores tools with a published binary release. <a href="/install">Install anc locally</a> to score source + project depth.',
+      });
+      return;
+    case 'github_repo_not_accessible':
+      renderBouncePanel(els.statusEl, {
+        headline: "GitHub couldn't find that repo.",
+        body: 'It may be private, renamed, or never existed. <a href="/install">Install anc locally</a> to score private repos directly — the live sandbox has no GitHub credentials.',
+      });
+      return;
+    case 'chain_resolved_install_failed':
+      renderBouncePanel(els.statusEl, {
+        headline: "Found an install path, but it didn't run.",
+        body: 'The install command returned a non-zero exit. <a href="/install">Install anc locally</a> for more flexible install options.',
+        details: truncateStderr(err?.details),
+      });
+      return;
+    case 'chain_resolved_no_binary_produced':
+      // Two distinct shapes land under this code:
+      //   - "Archive contains no binary named ..." — the direct-install
+      //     auto-detect filter found zero executable candidates after
+      //     stripping docs/manifests. The archive shipped only docs OR
+      //     all candidates failed the path/extension guard. Render the
+      //     archive-specific bounce so the user can see the file list.
+      //   - Otherwise — a registry install succeeded but no entry point
+      //     ended up on PATH (pallets/click-class library miss).
+      if (isArchiveNoBinaryDetails(err?.details)) {
+        renderBouncePanel(els.statusEl, {
+          headline: "The archive doesn't contain the binary we expected.",
+          body: 'The release ships files but no executable that matches our auto-detector. <a href="/install">Install anc locally</a> to score this tool directly — the auto-detector picks the most-likely binary, but humans pick better.',
+          details: truncateStderr(err?.details),
+        });
+        return;
+      }
+      renderBouncePanel(els.statusEl, {
+        headline: 'That looks like a library, not a CLI.',
+        body: 'We installed it, but no command-line entry point appeared on PATH. anc only scores binaries. If this is wrong, paste the actual binary name as <code>&lt;command&gt;</code> to retry. <a href="/install">Install anc locally</a> for full project depth.',
+      });
+      return;
+    case 'install_unsupported':
+      renderInstallUnsupportedBounce(els.statusEl, err?.pm);
+      return;
+    case 'rate_limited': {
+      const retry = err?.retry_after ?? 60;
+      renderInlineError(els.statusEl, `Too many requests. Try again in ${retry}s.`);
+      return;
+    }
+    case 'turnstile_failed':
+      renderInlineError(els.statusEl, 'Verification failed. Please try again.');
+      return;
+    case 'scoring_disabled':
+      renderInlineError(els.statusEl, 'Live scoring is paused. Run anc locally — see the install copy above.');
+      return;
+    case 'sandbox_stub_until_u6':
+      renderInlineError(
+        els.statusEl,
+        'Live scoring is still rolling out for this input shape. Run anc locally for the full check.',
+      );
+      return;
+    case 'non_github_host':
+      renderInlineError(els.statusEl, 'Only public GitHub repos are supported.');
+      return;
+    case 'discovery_redirect_loop':
+      renderInlineError(
+        els.statusEl,
+        'GitHub redirected us in a loop while resolving releases. Try again, or paste the exact owner/repo URL.',
+      );
+      return;
+    case 'non_https_url':
+      renderInlineError(els.statusEl, "Use https://. The scoring sandbox won't fetch http:// URLs.");
+      return;
+    case 'invalid_url_path':
+      renderInlineError(
+        els.statusEl,
+        'Paste the repo root, not a branch or release link. Example: https://github.com/owner/repo.',
+      );
+      return;
+    case 'unparseable_install_command':
+      renderInlineError(
+        els.statusEl,
+        "That looks like an install command, but the package manager isn't supported. Try cargo, brew, npm, pip, bun, uv, or go.",
+      );
+      return;
+    case 'invalid_url':
+    case 'unrecognized_input':
+      renderInlineError(els.statusEl, 'That input is not a recognized tool, install command, or GitHub URL.');
+      return;
+    case 'timeout':
+      renderInlineError(els.statusEl, 'The scan ran past the time budget. Run anc locally for unconstrained scoring.');
+      return;
+    case 'service_misconfigured':
+      renderInlineError(
+        els.statusEl,
+        "Live scoring is misconfigured on our side. We've been notified. Run anc locally for now.",
+      );
+      return;
+    case 'incomplete_response_contract':
+      renderInlineError(
+        els.statusEl,
+        'The scoring service returned an incomplete response. Try again, or run anc locally.',
+      );
+      return;
+    default:
+      renderInlineError(els.statusEl, 'Scoring failed. Please try again or run anc locally.');
+  }
+}
+
+/** Heuristic: does this stderr text indicate "the package manager could
+ * not find the package" (as opposed to "found it but install failed")?
+ * Patterns cover cargo, brew, pip, npm, pipx, go. Case-insensitive.
+ * Kept conservative — a false positive here re-labels a real install
+ * failure as a registry miss, which is less honest than the inverse. */
+function isPackageNotFoundStderr(details: string | undefined): boolean {
+  if (typeof details !== 'string' || details.length === 0) return false;
+  const haystack = details.toLowerCase();
+  return (
+    /\bis not found\b/.test(haystack) ||
+    /\bno matching (package|distribution|formula)\b/.test(haystack) ||
+    /\bcould not find\b/.test(haystack) ||
+    /\bno available formula\b/.test(haystack) ||
+    /\bunknown package\b/.test(haystack) ||
+    /\bdoes not exist\b/.test(haystack) ||
+    /\bnot found in (the )?registry\b/.test(haystack) ||
+    /\b404 not found\b/.test(haystack)
+  );
+}
+
+/** Heuristic: does this stderr text indicate "the archive extracted but
+ * contained no recognizable binary"? The direct-install path emits a
+ * specific `DETAILS:Archive contains no binary named ...` line when the
+ * auto-detect filter finds zero candidates (a release that ships only
+ * docs, or whose binary name + filename were both filtered out as
+ * non-executable). When this fires we render a more specific bounce
+ * panel than the generic install_failed one — the user sees the actual
+ * archive listing and understands why a manual hint is needed. */
+function isArchiveNoBinaryDetails(details: string | undefined): boolean {
+  if (typeof details !== 'string' || details.length === 0) return false;
+  return /\bArchive contains no binary named\b/i.test(details);
+}
+
+/** install_unsupported variant rendering. pm carries the specific install
+ * mechanism the sandbox refused; copy is tailored per pm so the user gets
+ * a concrete alternative instead of a generic "try something else". */
+function renderInstallUnsupportedBounce(statusEl: HTMLParagraphElement, pm: string | undefined): void {
+  switch (pm) {
+    case 'brew':
+    case 'brew_only':
+      renderBouncePanel(statusEl, {
+        headline: "Homebrew installs aren't sandboxed yet.",
+        body: 'Homebrew isn\'t available in the scoring sandbox. Try a <code>cargo install</code>, <code>pipx install</code>, or <code>npm i -g</code> equivalent, or paste a GitHub URL. <a href="/install">Install anc locally</a> to score brew-only tools.',
+      });
+      return;
+    case 'bun':
+      renderBouncePanel(statusEl, {
+        headline: "`bun install` isn't sandboxed yet.",
+        body: 'The sandbox doesn\'t wire Bun\'s global install path onto PATH. Try an <code>npm i -g</code> or <code>pipx install</code> equivalent, or <a href="/install">install anc locally</a>.',
+      });
+      return;
+    case 'go_no_binary':
+      renderBouncePanel(statusEl, {
+        headline: "That Go module doesn't expose a CLI binary.",
+        body: 'anc only scores tools that produce a command on PATH. Paste a binary-producing package, or <a href="/install">install anc locally</a> to score libraries.',
+      });
+      return;
+    default:
+      renderBouncePanel(statusEl, {
+        headline: "That install path isn't supported in the sandbox.",
+        body: 'Paste a <code>cargo install</code>, <code>pipx install</code>, <code>npm i -g</code>, or GitHub URL instead, or <a href="/install">install anc locally</a>.',
+      });
+  }
+}
+
+function truncateStderr(input: unknown): string | undefined {
+  if (typeof input !== 'string' || input.length === 0) return undefined;
+  if (input.length <= STDERR_TRUNCATE_CHARS) return input;
+  return `${input.slice(0, STDERR_TRUNCATE_CHARS)}… (truncated)`;
+}
+
+function renderBouncePanel(
+  statusEl: HTMLParagraphElement,
+  panel: { headline: string; body: string; details?: string },
+): void {
+  statusEl.hidden = false;
+  statusEl.classList.add('live-score__status--bounce');
+  statusEl.classList.remove('live-score__status--error');
+  const detailsBlock = panel.details
+    ? `<pre class="live-score__bounce-stderr"><code>${escapeHtml(panel.details)}</code></pre>`
+    : '';
+  // panel.body is template-literal HTML controlled by THIS module — no
+  // user input flows into it. The headline is escaped (it's a fixed string
+  // per the closed-set bounce error codes). Stderr details are escapeHtml'd
+  // before rendering inside <code>.
+  statusEl.innerHTML = `
+    <span class="live-score__bounce-headline">${escapeHtml(panel.headline)}</span>
+    <span class="live-score__bounce-body">${panel.body}</span>
+    ${detailsBlock}
+  `;
+}
+
+function renderInlineError(statusEl: HTMLParagraphElement, message: string): void {
+  statusEl.hidden = false;
+  statusEl.classList.add('live-score__status--error');
+  statusEl.classList.remove('live-score__status--bounce');
+  statusEl.textContent = message;
+}
+
+/** Reset the status slot to its initial hidden+empty state. Used by the
+ * bfcache `pageshow` handler so a back-nav into the homepage doesn't
+ * leave stale curated-reward or phase-progression text behind. */
+function clearStatus(statusEl: HTMLParagraphElement): void {
+  statusEl.hidden = true;
+  statusEl.classList.remove('live-score__status--error', 'live-score__status--bounce', 'live-score__status--curated');
+  statusEl.textContent = '';
+}
+
+/** Show a transient in-progress message (e.g. "Scoring…") during a request.
+ * Uses the same status slot bounce panels + inline errors target, so the
+ * response render (success or failure) naturally overwrites this text. */
+function renderStatus(statusEl: HTMLParagraphElement, message: string): void {
+  statusEl.hidden = false;
+  statusEl.classList.remove('live-score__status--error', 'live-score__status--bounce', 'live-score__status--curated');
+  statusEl.textContent = message;
+}
+
+/** Show the curated-hit reward inline before redirect. Identity color via
+ * --accent in CSS so the visual cue is "this is one of ours" without a
+ * banner, badge, or animation. */
+function renderCuratedReward(statusEl: HTMLParagraphElement, message: string): void {
+  statusEl.hidden = false;
+  statusEl.classList.remove('live-score__status--error', 'live-score__status--bounce');
+  statusEl.classList.add('live-score__status--curated');
+  statusEl.textContent = message;
+}
+
+/** Phase progression while waiting on /api/score.
+ *
+ * Static "Scoring…" would say nothing about WHAT is taking time, and the
+ * brand voice ("authority through precision, engagement through detail")
+ * rewards a status line that mirrors the actual phases. The phases are a
+ * client-side approximation — real per-step polling would need a
+ * dedicated channel — but the timings approximate the median sandbox run
+ * so the text stays honest:
+ *
+ *   - Queued (until t=900 ms)
+ *   - Resolving install path (until t=2.5 s)
+ *   - Installing in sandbox (until t=18 s)
+ *   - Running anc check (until response)
+ *
+ * Cancelling the cycle when the response arrives keeps the user from
+ * ever seeing a phase that's obviously past the work. No CSS animation,
+ * no spinner — text replacement IS the indicator. */
+type PhaseTimer = { cancel: () => void };
+
+function startPhaseProgression(statusEl: HTMLParagraphElement): PhaseTimer {
+  const schedule: { atMs: number; text: string }[] = [
+    { atMs: 900, text: 'Resolving install path…' },
+    { atMs: 2500, text: 'Installing in sandbox…' },
+    { atMs: 18000, text: 'Running anc check…' },
+  ];
+  const handles: number[] = [];
+  for (const phase of schedule) {
+    handles.push(
+      window.setTimeout(() => {
+        renderStatus(statusEl, phase.text);
+      }, phase.atMs),
+    );
+  }
+  return {
+    cancel: () => {
+      for (const h of handles) window.clearTimeout(h);
+    },
+  };
+}
+
+function setSubmitting(els: { submitBtn: HTMLButtonElement; input: HTMLInputElement }, submitting: boolean): void {
+  els.submitBtn.disabled = submitting;
+  els.input.disabled = submitting;
+  els.submitBtn.textContent = submitting ? 'Scoring…' : 'Score';
+}
+
+function disableFormWithMessage(
+  els: {
+    submitBtn: HTMLButtonElement;
+    input: HTMLInputElement;
+    statusEl: HTMLParagraphElement;
+  },
+  message: string,
+): void {
+  els.input.disabled = true;
+  els.submitBtn.disabled = true;
+  renderInlineError(els.statusEl, message);
+}
+
+function networkErrorMessage(err: unknown): string {
+  if (err instanceof TypeError) return 'Network error. Check your connection and try again.';
+  return 'Scoring failed. Please try again.';
+}
+
+function escapeHtml(s: string): string {
+  return s
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;')
+    .replace(/'/g, '&#39;');
+}
diff --git a/src/data/coverage-matrix.json b/src/data/coverage-matrix.json
index a51c9f4..64a66ad 100644
--- a/src/data/coverage-matrix.json
+++ b/src/data/coverage-matrix.json
@@ -99,7 +99,12 @@
       "applicability": {
         "kind": "universal"
       },
-      "verifiers": []
+      "verifiers": [
+        {
+          "check_id": "p1-defaults-in-help",
+          "layer": "behavioral"
+        }
+      ]
     },
     {
       "id": "p1-may-rich-tui",
@@ -109,7 +114,12 @@
       "applicability": {
         "kind": "universal"
       },
-      "verifiers": []
+      "verifiers": [
+        {
+          "check_id": "p1-rich-tui",
+          "layer": "behavioral"
+        }
+      ]
     },
     {
       "id": "p2-must-output-flag",
@@ -230,7 +240,12 @@
       "applicability": {
         "kind": "universal"
       },
-      "verifiers": []
+      "verifiers": [
+        {
+          "check_id": "p2-more-formats",
+          "layer": "behavioral"
+        }
+      ]
     },
     {
       "id": "p2-may-raw-flag",
@@ -240,7 +255,12 @@
       "applicability": {
         "kind": "universal"
       },
-      "verifiers": []
+      "verifiers": [
+        {
+          "check_id": "p2-raw-flag",
+          "layer": "behavioral"
+        }
+      ]
     },
     {
       "id": "p3-must-subcommand-examples",
@@ -268,6 +288,36 @@
         }
       ]
     },
+    {
+      "id": "p3-must-version",
+      "principle": 3,
+      "level": "must",
+      "summary": "Top-level `--version` prints a non-empty version line and exits 0.",
+      "applicability": {
+        "kind": "universal"
+      },
+      "verifiers": [
+        {
+          "check_id": "p3-version",
+          "layer": "behavioral"
+        }
+      ]
+    },
+    {
+      "id": "p3-should-version-short",
+      "principle": 3,
+      "level": "should",
+      "summary": "A short version alias (`-V`, `-v`, or `-version`) accompanies `--version` for fast version probes.",
+      "applicability": {
+        "kind": "universal"
+      },
+      "verifiers": [
+        {
+          "check_id": "p3-version",
+          "layer": "behavioral"
+        }
+      ]
+    },
     {
       "id": "p3-should-paired-examples",
       "principle": 3,
@@ -286,7 +336,12 @@
       "applicability": {
         "kind": "universal"
       },
-      "verifiers": []
+      "verifiers": [
+        {
+          "check_id": "p3-about-long-about",
+          "layer": "behavioral"
+        }
+      ]
     },
     {
       "id": "p3-may-examples-subcommand",
@@ -296,7 +351,12 @@
       "applicability": {
         "kind": "universal"
       },
-      "verifiers": []
+      "verifiers": [
+        {
+          "check_id": "p3-examples-subcommand",
+          "layer": "behavioral"
+        }
+      ]
     },
     {
       "id": "p4-must-try-parse",
@@ -585,7 +645,12 @@
         "kind": "conditional",
         "condition": "CLI has commands that accept input data"
       },
-      "verifiers": []
+      "verifiers": [
+        {
+          "check_id": "p6-stdin-input",
+          "layer": "behavioral"
+        }
+      ]
     },
     {
       "id": "p6-should-consistent-naming",
@@ -596,7 +661,12 @@
         "kind": "conditional",
         "condition": "CLI uses subcommands"
       },
-      "verifiers": []
+      "verifiers": [
+        {
+          "check_id": "p6-consistent-naming",
+          "layer": "behavioral"
+        }
+      ]
     },
     {
       "id": "p6-should-tier-gating",
@@ -627,7 +697,12 @@
       "applicability": {
         "kind": "universal"
       },
-      "verifiers": []
+      "verifiers": [
+        {
+          "check_id": "p6-color-flag",
+          "layer": "behavioral"
+        }
+      ]
     },
     {
       "id": "p6-may-standard-names",
@@ -684,7 +759,12 @@
       "applicability": {
         "kind": "universal"
       },
-      "verifiers": []
+      "verifiers": [
+        {
+          "check_id": "p7-verbose",
+          "layer": "behavioral"
+        }
+      ]
     },
     {
       "id": "p7-should-limit",
@@ -695,7 +775,12 @@
         "kind": "conditional",
         "condition": "CLI has list-style commands"
       },
-      "verifiers": []
+      "verifiers": [
+        {
+          "check_id": "p7-limit",
+          "layer": "behavioral"
+        }
+      ]
     },
     {
       "id": "p7-should-timeout",
@@ -705,7 +790,12 @@
       "applicability": {
         "kind": "universal"
       },
-      "verifiers": []
+      "verifiers": [
+        {
+          "check_id": "p7-timeout-behavioral",
+          "layer": "behavioral"
+        }
+      ]
     },
     {
       "id": "p7-may-cursor-pagination",
@@ -716,7 +806,12 @@
         "kind": "conditional",
         "condition": "CLI returns paginated results"
       },
-      "verifiers": []
+      "verifiers": [
+        {
+          "check_id": "p7-cursor-pagination",
+          "layer": "behavioral"
+        }
+      ]
     },
     {
       "id": "p7-may-auto-verbosity",
@@ -753,6 +848,10 @@
         "kind": "universal"
       },
       "verifiers": [
+        {
+          "check_id": "p6-agents-md",
+          "layer": "project"
+        },
         {
           "check_id": "p8-bundle-exists",
           "layer": "project"
@@ -793,21 +892,21 @@
     }
   ],
   "summary": {
-    "total": 57,
-    "covered": 30,
-    "uncovered": 27,
-    "dual_layer": 9,
+    "total": 59,
+    "covered": 45,
+    "uncovered": 14,
+    "dual_layer": 10,
     "must": {
-      "total": 27,
-      "covered": 21
+      "total": 28,
+      "covered": 22
     },
     "should": {
-      "total": 20,
-      "covered": 6
+      "total": 21,
+      "covered": 14
     },
     "may": {
       "total": 10,
-      "covered": 3
+      "covered": 9
     }
   },
   "audit_profiles": [
diff --git a/src/shared/scorecard-format.d.ts b/src/shared/scorecard-format.d.ts
new file mode 100644
index 0000000..8d7eb42
--- /dev/null
+++ b/src/shared/scorecard-format.d.ts
@@ -0,0 +1,33 @@
+// Type declarations for src/shared/scorecard-format.mjs.
+// Keeps the implementation in a single .mjs file (importable by both the
+// Node build and the Worker bundle) while giving the Worker's TypeScript
+// callers proper type checking. Pair this with the .mjs implementation —
+// changes to one need a mirroring change to the other.
+
+export type CheckResultLike = {
+  status: 'pass' | 'fail' | 'warn' | 'skip' | string;
+  label: string;
+  group: string;
+  evidence: string | null;
+};
+
+export type ScorecardLike = {
+  results?: CheckResultLike[];
+};
+
+export function escHtml(s: unknown): string;
+
+export const PRINCIPLE_NAMES: Record<string, string>;
+export const PRINCIPLE_GROUPS: string[];
+export const BONUS_GROUPS: string[];
+
+export function groupToPrincipleNum(group: string): number | null;
+
+export function extractTopIssues<T extends CheckResultLike = CheckResultLike>(
+  scorecard: { results?: T[] } | null | undefined,
+  limit?: number,
+): T[];
+
+export function formatCheckRowMarkdown(check: CheckResultLike, opts?: { baseUrl?: string }): string;
+
+export function formatCheckTableMarkdownLines(checks: CheckResultLike[], opts?: { baseUrl?: string }): string[];
diff --git a/src/shared/scorecard-format.mjs b/src/shared/scorecard-format.mjs
new file mode 100644
index 0000000..0e6063b
--- /dev/null
+++ b/src/shared/scorecard-format.mjs
@@ -0,0 +1,123 @@
+// Worker-safe shared primitives used by BOTH the build (scorecards-render.mjs,
+// runs in Node) AND the Worker (src/worker/score/summary-render.ts, runs in
+// the Cloudflare runtime).
+//
+// Single source of truth for:
+//   - HTML escape (escHtml)
+//   - Principle name + group constants (PRINCIPLE_NAMES, PRINCIPLE_GROUPS, BONUS_GROUPS)
+//   - groupToPrincipleNum derivation
+//   - topIssues extractor (FAIL > WARN, capped)
+//   - The shared markdown-summary builder used by /live-score/<binary>.md and
+//     the head of the static /score/<tool>.md page
+//
+// Pure module — no Node imports, no fs reads, no `process.env`. Lives under
+// `src/shared/` so the dependency direction is obvious: build code and worker
+// code both depend on `shared/`, never the other way around.
+
+/**
+ * Escape HTML special characters. Used at every server→client boundary that
+ * embeds scorecard fields (some of which come from CLI evidence strings the
+ * tool author wrote in their --help output).
+ *
+ * @param {string} s
+ * @returns {string}
+ */
+export function escHtml(s) {
+  return String(s).replace(
+    /[<>&"']/g,
+    (c) => ({ '<': '&lt;', '>': '&gt;', '&': '&amp;', '"': '&quot;', "'": '&#39;' })[c],
+  );
+}
+
+/** Map of principle group code → human-readable name. */
+export const PRINCIPLE_NAMES = {
+  P1: 'Non-Interactive by Default',
+  P2: 'Structured, Parseable Output',
+  P3: 'Progressive Help Discovery',
+  P4: 'Fail-Fast, Actionable Errors',
+  P5: 'Safe Retries & Mutation Boundaries',
+  P6: 'Composable, Predictable Command Structure',
+  P7: 'Bounded, High-Signal Responses',
+};
+
+export const PRINCIPLE_GROUPS = Object.keys(PRINCIPLE_NAMES);
+
+export const BONUS_GROUPS = ['CodeQuality', 'ProjectStructure'];
+
+/**
+ * Map a check group string like "P3" to a principle number (3), or null
+ * for bonus groups (CodeQuality / ProjectStructure).
+ *
+ * @param {string} group
+ * @returns {number | null}
+ */
+export function groupToPrincipleNum(group) {
+  const m = group.match(/^P(\d+)$/);
+  return m ? Number(m[1]) : null;
+}
+
+/**
+ * Extract the top failing/warning checks from a scorecard, FAIL before WARN.
+ * Used by both the build (per-tool page top-issues block) and the Worker
+ * (live-score summary top-issues block).
+ *
+ * @template {{ status: string; label: string; group: string; evidence: string | null }} T
+ * @param {{ results?: T[] }} scorecard
+ * @param {number} limit
+ * @returns {T[]}
+ */
+export function extractTopIssues(scorecard, limit = 3) {
+  if (!scorecard || !Array.isArray(scorecard.results)) return [];
+  const issues = scorecard.results.filter((r) => r.status === 'fail' || r.status === 'warn');
+  const order = { fail: 0, warn: 1 };
+  issues.sort((a, b) => (order[a.status] ?? 9) - (order[b.status] ?? 9));
+  return issues.slice(0, limit);
+}
+
+/**
+ * Format a single check as a markdown table row. Both the static
+ * `/score/<tool>.md` (full check table) and the live `/live-score/<binary>.md`
+ * (top-3 issues table) emit the same row shape, so this is the single
+ * source of truth.
+ *
+ * Principle group codes (`P1..P7`) link to the principle page; bonus
+ * groups (`CodeQuality`, `ProjectStructure`) stay as plain text. Evidence
+ * and label strings have `|` escaped so user-controlled evidence with
+ * pipes (shell pipelines, table syntax) doesn't fracture the table.
+ *
+ * Links use a site-relative path by default. Callers serving markdown
+ * twins that may be fetched cross-origin can pass an absolute baseUrl
+ * (e.g., `https://anc.dev`); absolutifyMarkdownLinks does the same
+ * rewrite for site-relative `(/path)` links after the fact, so either
+ * call style produces a self-resolving twin.
+ *
+ * @param {{ status: string; label: string; group: string; evidence: string | null }} check
+ * @param {{ baseUrl?: string }} [opts]
+ * @returns {string}
+ */
+export function formatCheckRowMarkdown(check, opts = {}) {
+  const baseUrl = (opts.baseUrl ?? '').replace(/\/$/, '');
+  const pNum = groupToPrincipleNum(check.group);
+  const groupLabel = pNum ? `[${check.group}](${baseUrl}/p${pNum})` : check.group;
+  const evidence = (check.evidence ?? '').replaceAll('|', '\\|');
+  const label = check.label.replaceAll('|', '\\|');
+  return `| ${check.status.toUpperCase()} | ${label} | ${groupLabel} | ${evidence} |`;
+}
+
+/**
+ * Emit a complete markdown check table (header + rows). When `checks` is
+ * empty, returns an empty array so the caller can decide what to put in
+ * its place (e.g., a "no issues" message).
+ *
+ * @param {Array<{status:string,label:string,group:string,evidence:string|null}>} checks
+ * @param {{ baseUrl?: string }} [opts]
+ * @returns {string[]} markdown lines
+ */
+export function formatCheckTableMarkdownLines(checks, opts = {}) {
+  if (checks.length === 0) return [];
+  return [
+    '| Status | Check | Principle | Evidence |',
+    '|--------|-------|-----------|----------|',
+    ...checks.map((c) => formatCheckRowMarkdown(c, opts)),
+  ];
+}
diff --git a/src/styles/site.css b/src/styles/site.css
index 31fc22d..178e2f3 100644
--- a/src/styles/site.css
+++ b/src/styles/site.css
@@ -1,4 +1,4 @@
-/* site.css — additive layer on top of foundation.css (docs/DESIGN.md §4 + A2). */
+/* site.css — additive layer on top of foundation.css (docs/DESIGN.md §4). */
 
 @font-face {
   font-family: "Uncut Sans";
@@ -113,7 +113,7 @@ main .anchor-icon {
   vertical-align: -2px;
 }
 
-/* RFC-keyword color pairs (A7 colors live in foundation.css). */
+/* RFC-keyword color pairs (token definitions live in foundation.css). */
 .rfc-must {
   color: var(--must, #af2b25);
 }
@@ -152,7 +152,7 @@ main .anchor-icon {
   letter-spacing: var(--tracking-caps);
 }
 
-/* Shiki dual-theme CSS bridge (docs/DESIGN.md §4.6 A7).
+/* Shiki dual-theme CSS bridge (docs/DESIGN.md §4.6).
  * Scope `span` matching to Shiki's own syntax tokens (which live under
  * `<pre class="shiki"><code>…spans…</code></pre>`). The previous selector
  * `.shiki span` matched ANY descendant span, including the client-side-
@@ -221,11 +221,13 @@ main .anchor-icon {
 }
 .site-nav {
   display: flex;
-  gap: 1rem;
+  flex-wrap: wrap;
+  gap: 0.5rem 1rem;
   font-size: 0.95rem;
 }
 .site-nav a {
   color: inherit;
+  white-space: nowrap;
 }
 
 /* Theme toggle — hidden when JS is off (C6). */
@@ -447,6 +449,46 @@ main .anchor-icon {
   justify-content: center;
 }
 
+/* Source-of-truth row. Sits between the AI-summary block and the meta
+ * line, mirroring the meta line's centered-flex layout. Renders as
+ * "[GH] Source: spec · cli · site · skill"; the icon is a quiet kicker,
+ * not a primary call-out — sized smaller than the AI-summary icons and
+ * tinted to fg-muted with the same hover lift as the meta links. */
+.site-footer__source {
+  margin: 0 0 0.45rem;
+  display: flex;
+  flex-wrap: wrap;
+  align-items: center;
+  gap: 0.3rem;
+  justify-content: center;
+  font-size: 0.85rem;
+}
+.site-footer__source-icon {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  width: 1rem;
+  height: 1rem;
+  color: var(--fg-muted, #525960);
+  margin-right: 0.15rem;
+}
+.site-footer__source-icon svg {
+  width: 100%;
+  height: 100%;
+  display: block;
+}
+.site-footer__source-label {
+  color: var(--fg-muted, #525960);
+  letter-spacing: 0.01em;
+}
+.site-footer__source a {
+  color: inherit;
+  transition: color 120ms ease;
+}
+.site-footer__source a:hover {
+  color: var(--fg-body, #1a2026);
+}
+
 /* AI summary CTA — provider icons above the meta line. */
 .ai-summary {
   display: flex;
@@ -1219,3 +1261,381 @@ body:has(.leaderboard-hero) main {
     transition-duration: 0.01ms;
   }
 }
+
+/* =============================================================
+ * Live-scoring form — homepage paste-input surface.
+ *
+ * Integrates with the principle-entry rhythm above the principles
+ * list: same `3rem 1fr` grid (kicker | content), same top-border
+ * separator, same numbered-prefix-in-accent-mono treatment. The
+ * kicker reads "Try" — telling the visitor this row is a
+ * pre-principles entry point, not principle 0.
+ *
+ * Restraint over decoration: no card grid, no bordered widget,
+ * no uppercase-tracked labels. The input IS the centerpiece — the
+ * Score button carries the only solid color in the form so the
+ * eye lands there without ambient ornament.
+ *
+ * Tokens: --accent for focus + submit + kicker, --bg-code for the
+ * input field, --border for the hairline separator, --must for
+ * error and bounce-panel keyword color. No side-stripe borders,
+ * no glassmorphism, no gradient text.
+ * ============================================================= */
+
+.live-score {
+  border-top: 1px solid var(--border-subtle, var(--border));
+  /* Match the principle-entry enter-stagger so the form lands as the
+   * first row of the homepage's content rhythm, not as a separate
+   * widget below the hero. */
+  animation: principle-enter 400ms cubic-bezier(0.16, 1, 0.3, 1) both;
+  animation-delay: 80ms;
+}
+
+.live-score__row {
+  display: grid;
+  grid-template-columns: 3rem 1fr;
+  grid-template-rows: auto;
+  column-gap: 1.25rem;
+  padding: 1.5rem 0.75rem 1.75rem;
+  margin: 0 -0.75rem;
+}
+
+.live-score__kicker {
+  font-family: var(--font-mono);
+  font-size: 1.4rem;
+  font-weight: 350;
+  color: var(--accent);
+  line-height: 1.15;
+  padding-top: 0.15rem;
+  font-feature-settings: var(--ff-tabular, "tnum" 1, "kern" 1);
+  /* "Try" sits where the principle numbers sit — same column, same
+   * font, same weight, same color. Visual continuity with the list
+   * below; no decoration needed. */
+}
+
+.live-score__content {
+  display: grid;
+  /* Generous gap between heading + lede; tighter cluster lower down
+   * (form input + chips). The varied rhythm is intentional. */
+  gap: 0.35rem;
+}
+
+.live-score__title {
+  margin: 0;
+  font-size: 1.08rem;
+  font-weight: 600;
+  color: var(--fg-heading);
+  line-height: 1.35;
+  letter-spacing: -0.005em;
+}
+
+.live-score__lede {
+  margin: 0 0 1rem;
+  font-size: 0.92rem;
+  color: var(--fg-secondary);
+  line-height: 1.5;
+  max-inline-size: 65ch;
+}
+
+.live-score__lede code {
+  font-family: var(--font-mono);
+  font-size: 0.92em;
+  background: var(--bg-code);
+  padding: 0 0.3em;
+  border-radius: 3px;
+}
+
+.live-score__form {
+  display: grid;
+  gap: 0.6rem;
+}
+
+.live-score__input-row {
+  display: grid;
+  grid-template-columns: 1fr auto;
+  gap: 0.5rem;
+  align-items: stretch;
+}
+
+.live-score__input {
+  font-family: var(--font-mono);
+  /* Fluid larger size so the input reads as the centerpiece — bigger
+   * than body type, smaller than h1. Matches the visual weight of
+   * the principle title that follows it. */
+  font-size: clamp(1rem, 0.95rem + 0.3vw, 1.125rem);
+  line-height: 1.4;
+  padding: 0.7rem 0.85rem;
+  background: var(--bg-code);
+  border: 1px solid var(--border);
+  border-radius: 4px;
+  color: var(--fg-body);
+  min-inline-size: 0;
+}
+
+.live-score__input::placeholder {
+  color: var(--fg-muted);
+}
+
+.live-score__input:focus {
+  outline: 2px solid var(--accent);
+  outline-offset: 2px;
+  border-color: var(--accent);
+}
+
+.live-score__input:disabled {
+  opacity: 0.6;
+  cursor: not-allowed;
+}
+
+.live-score__submit {
+  font-family: var(--font-sans);
+  /* Match the input's fluid size step so they read as a pair, not a
+   * size jump. */
+  font-size: clamp(1rem, 0.95rem + 0.3vw, 1.125rem);
+  font-weight: 600;
+  padding: 0.7rem 1.25rem;
+  background: var(--accent);
+  color: var(--bg);
+  border: 1px solid var(--accent);
+  border-radius: 4px;
+  cursor: pointer;
+  /* The only solid-color surface in the form. Restrained color
+   * strategy + one accent ≤10% works here because the form occupies
+   * less than 10% of the homepage real estate. */
+}
+
+.live-score__submit:hover:not(:disabled) {
+  filter: brightness(1.08);
+}
+
+.live-score__submit:focus-visible {
+  outline: 2px solid var(--accent);
+  outline-offset: 2px;
+}
+
+.live-score__submit:disabled {
+  opacity: 0.7;
+  cursor: progress;
+}
+
+/* Chips render as inline prose: "or try <code>ripgrep</code>, <code>...</code>".
+ * Buttons under the hood (keyboard + ARIA), but visually they look
+ * like clickable code spans, not action buttons. Differentiates from
+ * the submit button so the eye lands on Score, not the chips. */
+.live-score__help {
+  margin: 0.1rem 0 0;
+  font-size: 0.88rem;
+  color: var(--fg-secondary);
+  line-height: 1.6;
+}
+
+.live-score__chip {
+  display: inline;
+  font: inherit;
+  background: transparent;
+  border: 0;
+  padding: 0;
+  color: var(--accent);
+  cursor: pointer;
+  text-decoration-line: underline;
+  text-decoration-style: dotted;
+  text-decoration-color: color-mix(in oklch, var(--accent) 35%, transparent);
+  text-underline-offset: 0.18em;
+}
+
+.live-score__chip code {
+  font-family: var(--font-mono);
+  font-size: 0.88em;
+  background: transparent;
+  padding: 0;
+  border-radius: 0;
+  color: inherit;
+}
+
+.live-score__chip:hover {
+  text-decoration-style: solid;
+  text-decoration-color: var(--accent);
+}
+
+.live-score__chip:focus-visible {
+  outline: 2px solid var(--accent);
+  outline-offset: 3px;
+  border-radius: 2px;
+}
+
+/* Single-line status — JS swaps in prose like "Resolving install
+ * path..." during a live run. Replaces the 4-tile progress grid
+ * (slop: identical card grid). No fake animation; the 2 s theater
+ * floor itself is the wait signal. */
+.live-score__status {
+  margin: 0.6rem 0 0;
+  font-size: 0.92rem;
+  color: var(--fg-body);
+  /* Inherits monospace from code descendants when JS injects bounce
+   * panels; for plain status text the sans body font is fine. */
+}
+
+.live-score__status--error {
+  color: var(--must);
+}
+
+/* Curated-hit reward: the user pasted a tool we've already audited.
+ * Identity-color the status text via --accent so the moment lands as
+ * "you found one of ours" without a banner, badge, or animation. The
+ * reward is visible for the remainder of the 2 s theater floor before
+ * the redirect to /score/<slug>; mid-dot separators in the copy mirror
+ * the footer rhythm. */
+.live-score__status--curated {
+  color: var(--accent);
+  font-weight: 600;
+  font-feature-settings: var(--ff-tabular, "tnum" 1, "kern" 1);
+}
+
+.live-score__status--bounce .live-score__bounce-headline {
+  margin: 0 0 0.4rem;
+  font-size: 1rem;
+  font-weight: 600;
+  color: var(--must);
+}
+
+.live-score__status--bounce .live-score__bounce-body {
+  margin: 0 0 0.6rem;
+  color: var(--fg-body);
+  font-size: 0.92rem;
+  line-height: 1.5;
+}
+
+.live-score__status--bounce .live-score__bounce-body code {
+  font-family: var(--font-mono);
+  font-size: 0.92em;
+  background: var(--bg-code);
+  padding: 0 0.3em;
+  border-radius: 3px;
+}
+
+.live-score__status--bounce .live-score__bounce-stderr {
+  margin: 0 0 0.6rem;
+  max-block-size: 16rem;
+  overflow: auto;
+  font-size: 0.82rem;
+  padding: 0.6rem 0.8rem;
+  background: var(--bg-code);
+  border: 1px solid var(--border);
+  border-radius: 4px;
+}
+
+/* Narrow viewports: collapse the principle-rhythm grid so the kicker
+ * sits inline with the content. Avoids the awkward 3rem orphan column
+ * on phones. (max-width vs max-inline-size: the latter is CSS MQ L5 and
+ * not yet broadly supported / not yet in biome's recognized spec list.
+ * max-width is functionally equivalent for this LTR site.) */
+@media (max-width: 36rem) {
+  .live-score__row {
+    grid-template-columns: 1fr;
+    row-gap: 0.4rem;
+    padding: 1.25rem 0.75rem 1.5rem;
+  }
+  .live-score__kicker {
+    font-size: 1rem;
+    line-height: 1.4;
+    padding-top: 0;
+  }
+  .live-score__input-row {
+    grid-template-columns: 1fr;
+    gap: 0.5rem;
+  }
+  .live-score__submit {
+    /* Full-width on narrow viewports — the input and button stack and
+     * each spans the full content column. */
+    justify-self: stretch;
+  }
+}
+
+/* =============================================================
+ * /score/live/<binary> result page — share surface.
+ * Reuses the per-tool scorecard score badge component; adds a
+ * compact header with version + binary + freshness marker.
+ * ============================================================= */
+
+.live-score-summary__header {
+  margin-block-end: var(--space-5);
+}
+
+.live-score-summary__header h1 {
+  margin: 0;
+  font-size: var(--text-h1);
+  letter-spacing: -0.01em;
+}
+
+.live-score-summary__version {
+  font-family: var(--font-mono);
+  font-size: 0.72em;
+  font-weight: 400;
+  color: var(--fg-secondary);
+  margin-inline-start: var(--space-3);
+}
+
+.live-score-summary__meta {
+  margin-block: var(--space-3) 0;
+  color: var(--fg-secondary);
+  font-size: var(--text-secondary);
+}
+
+.live-score-summary__meta code {
+  font-family: var(--font-mono);
+  font-size: 0.92em;
+  background: var(--bg-code);
+  padding: 0 0.35em;
+  border-radius: 3px;
+}
+
+.live-score-summary__freshness {
+  display: inline-block;
+  padding: 0.1rem 0.5rem;
+  margin-inline-start: var(--space-2);
+  font-family: var(--font-mono);
+  font-size: 0.72rem;
+  text-transform: lowercase;
+  background: var(--bg-code);
+  border: 1px solid var(--border);
+  border-radius: 3px;
+  color: var(--fg-muted);
+}
+
+.live-score-summary__freshness--live {
+  color: var(--may);
+  border-color: var(--may);
+}
+
+.live-score-summary__score {
+  margin-block: var(--space-5);
+}
+
+.live-score-summary__issues {
+  margin-block: var(--space-5);
+}
+
+.live-score-summary__issues h2 {
+  margin: 0 0 var(--space-3);
+  font-size: var(--text-h3);
+}
+
+.live-score-summary__cta {
+  margin-block: var(--space-6) var(--space-5);
+  padding-block-start: var(--space-5);
+  border-top: 1px solid var(--border);
+}
+
+.live-score-summary__cta h2 {
+  margin: 0 0 var(--space-3);
+  font-size: var(--text-h3);
+}
+
+.live-score-summary__cta p {
+  color: var(--fg-secondary);
+  max-inline-size: var(--measure);
+}
+
+.live-score-summary__cta-aside {
+  font-size: var(--text-secondary);
+}
diff --git a/src/worker-configuration.d.ts b/src/worker-configuration.d.ts
index e112fc2..a3ca777 100644
--- a/src/worker-configuration.d.ts
+++ b/src/worker-configuration.d.ts
@@ -1,5 +1,5 @@
 /* eslint-disable */
-// Generated by Wrangler by running `wrangler types ./src/worker-configuration.d.ts` (hash: 07037f49006dd1b2ea4ee4db54b50ac3)
+// Generated by Wrangler by running `wrangler types ./src/worker-configuration.d.ts` (hash: 79b542568ea50d642bb557be213bd40f)
 // Runtime types generated with workerd@1.20260405.1 2026-04-01 nodejs_compat
 declare namespace Cloudflare {
 	interface GlobalProps {
@@ -7,19 +7,35 @@ declare namespace Cloudflare {
 		durableNamespaces: "Sandbox";
 	}
 	interface StagingEnv {
+		SCORE_KV: KVNamespace;
 		SCORE_CACHE: R2Bucket;
 		SCORE_LIMITER: RateLimit;
+		SCORE_LIMITER_IP: RateLimit;
 		ASSETS: Fetcher;
+		TURNSTILE_SITEKEY: "1x00000000000000000000AA";
+		TURNSTILE_SECRET: string;
+		SESSION_HMAC_SECRET: string;
 		SCORE: DurableObjectNamespace<import("./worker/index").Sandbox>;
 	}
 	interface Env {
+		SCORE_KV: KVNamespace;
 		SCORE_CACHE: R2Bucket;
 		SCORE_LIMITER: RateLimit;
+		SCORE_LIMITER_IP: RateLimit;
 		ASSETS: Fetcher;
+		TURNSTILE_SITEKEY?: "1x00000000000000000000AA";
+		TURNSTILE_SECRET: string;
+		SESSION_HMAC_SECRET: string;
 		SCORE: DurableObjectNamespace<import("./worker/index").Sandbox>;
 	}
 }
 interface Env extends Cloudflare.Env {}
+type StringifyValues<EnvType extends Record<string, unknown>> = {
+	[Binding in keyof EnvType]: EnvType[Binding] extends string ? EnvType[Binding] : string;
+};
+declare namespace NodeJS {
+	interface ProcessEnv extends StringifyValues<Pick<Cloudflare.Env, "TURNSTILE_SITEKEY" | "TURNSTILE_SECRET" | "SESSION_HMAC_SECRET">> {}
+}
 
 // Begin runtime types
 /*! *****************************************************************************
diff --git a/src/worker/accept.ts b/src/worker/accept.ts
index 4c76ea6..1be8d8f 100644
--- a/src/worker/accept.ts
+++ b/src/worker/accept.ts
@@ -1,16 +1,27 @@
-// Content-negotiation helper — returns whichever of 'html' | 'markdown' the
-// caller prefers, using RFC 7231 q-value parsing via the `accepts` npm
-// package. Falls back to 'html' on absent, malformed, or non-matching Accept
-// headers (html is the citation default; markdown is opt-in).
+// Content-negotiation helpers — use RFC 7231 q-value parsing via the
+// `accepts` npm package (NOT substring matching, per the
+// `accept-header-q-value` learning).
 //
-// See docs/DESIGN.md §3.4 (Worker paragraph) + eng review A3. Test matrix lives
-// in tests/worker.test.ts.
+// detectPreference — site-default ('html' | 'markdown'). Used by index.ts
+//                    for the asset-first path; markdown is opt-in.
+//
+// detectScorePreference — /api/score endpoint ('json' | 'markdown'). JSON is
+//                         default; markdown is opt-in. The handler combines
+//                         this with URL-suffix detection
+//                         (`/api/score.md`, `/api/score.json`) in
+//                         `score/content-negotiation.ts`.
+//
+// See docs/DESIGN.md §3.4 (Worker paragraph) + eng review A3. Site-side
+// test matrix lives in tests/worker.test.ts; /api/score q-value tests live
+// in the same file's /api/score describe block.
 
 import accepts from 'accepts';
 
 export type Preference = 'html' | 'markdown';
+export type ScorePreference = 'json' | 'markdown';
 
-const PREFERENCE_ORDER = ['text/html', 'text/markdown'];
+const SITE_PREFERENCE_ORDER = ['text/html', 'text/markdown'];
+const SCORE_PREFERENCE_ORDER = ['application/json', 'text/markdown', 'text/html'];
 
 /**
  * Shim a Workers `Request` into the shape `accepts` expects: it only reads
@@ -27,6 +38,12 @@ function shim(request: Request) {
 export function detectPreference(request: Request): Preference {
   // @ts-expect-error — the accepts package types an IncomingMessage but only
   // reads `headers.accept`; the shim is sufficient.
-  const match = accepts(shim(request)).type(PREFERENCE_ORDER);
+  const match = accepts(shim(request)).type(SITE_PREFERENCE_ORDER);
   return match === 'text/markdown' ? 'markdown' : 'html';
 }
+
+export function detectScorePreference(request: Request): ScorePreference {
+  // @ts-expect-error — see detectPreference above.
+  const match = accepts(shim(request)).type(SCORE_PREFERENCE_ORDER);
+  return match === 'text/markdown' ? 'markdown' : 'json';
+}
diff --git a/src/worker/headers.ts b/src/worker/headers.ts
index 3f69821..2d00ace 100644
--- a/src/worker/headers.ts
+++ b/src/worker/headers.ts
@@ -1,6 +1,6 @@
 // Response-header policy for the agentnative-site Worker.
 //
-// Contract (docs/DESIGN.md §3.4 + eng review A8, A10, A12, P4):
+// Contract (docs/DESIGN.md §3.4):
 //
 //   HTML responses         Link: </p<n>.md>; rel="alternate"; type="text/markdown"
 //                          X-Llms-Txt: /llms.txt
@@ -36,14 +36,48 @@
 //   Hashed assets          Cache-Control: public, max-age=31536000, immutable
 //   (/fonts/*, /og-image.png)
 //
-//   Staging guard (P4 +    X-Robots-Tag: noindex on every response whose
-//    locked decision #4)   Host ends with `.workers.dev`. Added LAST so it
+//   Staging guard          X-Robots-Tag: noindex on every response whose
+//                          Host ends with `.workers.dev`. Added LAST so it
 //                          composes with the markdown branch (both set
 //                          noindex; last write wins, same value either way).
 
 const SHORT_CACHE = 'public, max-age=300, s-maxage=86400, stale-while-revalidate=60';
 const IMMUTABLE_CACHE = 'public, max-age=31536000, immutable';
 
+// Content-Security-Policy for HTML responses. CSP is required to allow
+// Cloudflare Turnstile's invisible widget script + iframe + siteverify
+// XHR on the homepage form, while keeping the rest of the site locked
+// down. Three directives MUST include `challenges.cloudflare.com` or
+// Turnstile breaks silently:
+//   - script-src  (lazy-loaded api.js)
+//   - frame-src   (invisible widget iframe)
+//   - connect-src (token exchange XHR)
+//
+// `'unsafe-inline'` is required for:
+//   - script-src: shell.mjs inlines the theme-init bootstrap (`<script>${themeInit}</script>`)
+//                 so dark/light mode is set BEFORE first paint, no FOUC.
+//   - style-src:  Shiki emits inline `style="color: #..."` on every code-block
+//                 token (the dual-theme bridge in DESIGN.md §4.6 depends on it).
+//
+// img-src includes `data:` for inline SVG icons; font-src `'self'` because
+// the woff2 files self-host from /fonts/. base-uri + form-action + object-src
+// lock down classic exfil/click-jack vectors that no part of this site needs.
+//
+// Applied to every HTML response (not just /), so a CSP regression test
+// hitting any page surfaces drift on every directive.
+const CSP_HTML =
+  "default-src 'self'; " +
+  "script-src 'self' 'unsafe-inline' https://challenges.cloudflare.com; " +
+  'frame-src https://challenges.cloudflare.com; ' +
+  "connect-src 'self' https://challenges.cloudflare.com; " +
+  "img-src 'self' data:; " +
+  "style-src 'self' 'unsafe-inline'; " +
+  "font-src 'self'; " +
+  "base-uri 'self'; " +
+  "form-action 'self'; " +
+  "object-src 'none'; " +
+  "frame-ancestors 'self'";
+
 export interface ApplyHeadersOptions {
   request: Request;
   servedMarkdown: boolean;
@@ -101,6 +135,10 @@ export function applyHeaders(response: Response, opts: ApplyHeadersOptions): Res
     headers.set('Link', `<${markdownTwinFor(opts.pathname)}>; rel="alternate"; type="text/markdown"`);
     headers.set('X-Llms-Txt', '/llms.txt');
     headers.set('Cache-Control', SHORT_CACHE);
+    // CSP applies to HTML responses only — the markdown / JSON / SVG
+    // branches above MUST stay free of HTML-only directives like
+    // frame-ancestors (Cloudflare WAF flags inconsistent enforcement).
+    headers.set('Content-Security-Policy', CSP_HTML);
   }
 
   // Staging guard — three-line check per locked decision #4. Applied LAST so
diff --git a/src/worker/index.ts b/src/worker/index.ts
index 872339c..fed410c 100644
--- a/src/worker/index.ts
+++ b/src/worker/index.ts
@@ -3,7 +3,7 @@
 // suffix or Accept header) and we're serving an HTML path, rewrite the
 // asset lookup to the `.md` twin before fetching.
 //
-// Contract (docs/DESIGN.md §3.4 + eng review A3, A8, A12):
+// Contract (docs/DESIGN.md §3.4):
 //   - Assets served via env.ASSETS (Workers Static Assets product). Not KV,
 //     not R2, not kv-asset-handler.
 //   - CN branch: path ends with `.md` OR `Accepts(req).type(['text/html',
@@ -13,15 +13,43 @@
 
 import { detectPreference } from './accept';
 import { applyHeaders } from './headers';
+import { isScorePath } from './score/content-negotiation';
+import { handleScore, type ScoreEnv } from './score/handler';
+import { handleLiveScorePage, parseLiveScorePath } from './score/summary-render';
 
+// The CF Sandbox/Containers SDK looks up `ctx.exports.ContainerProxy` at
+// outbound-handler dispatch time and throws "ctx.exports.ContainerProxy
+// is undefined, export ContainerProxy from the containers package in
+// your worker entrypoint" if it's missing. Surfaces only at runtime on
+// the first DO fetch; wrangler dry-run, deploy, and the bun-test
+// `cloudflare:workers` shim all pass. Same class of failure as PR #94
+// (Sandbox `fetch()` missing) — documented in
+// docs/solutions/integration-issues/cloudflare-workers-do-mock-must-mirror-binding-shape-2026-05-15.md.
+export { ContainerProxy } from '@cloudflare/sandbox';
 // Live-scoring DO class. Re-exported so wrangler's binding resolver can
 // find `class_name: "Sandbox"` from wrangler.jsonc's containers +
-// durable_objects sections. Stub until U6 lands the install + score
-// implementation.
+// durable_objects sections.
 export { Sandbox } from './score/do';
 
+// At runtime wrangler injects every binding declared in wrangler.jsonc
+// (ASSETS plus the SCORE_* set used by /api/score). The Env interface is
+// kept narrow so tests that exercise only the asset-first path can stub
+// a minimal env. The /api/score branch casts to ScoreEnv at dispatch
+// time, which is sound because wrangler always populates the full set.
 export interface Env {
   ASSETS: Fetcher;
+  SCORE?: DurableObjectNamespace;
+  SCORE_KV?: KVNamespace;
+  SCORE_LIMITER?: { limit(o: { key: string }): Promise<{ success: boolean }> };
+  SCORE_LIMITER_IP?: { limit(o: { key: string }): Promise<{ success: boolean }> };
+  // TURNSTILE_SECRET is a secret (wrangler secret put). TURNSTILE_SITEKEY
+  // is a public var the homepage form bakes into the widget render — set
+  // in env.staging only while production stays gated. Absent on
+  // production means the homepage form refuses to render Turnstile,
+  // which is the deliberate fail-loud posture pre-promotion.
+  TURNSTILE_SECRET?: string;
+  TURNSTILE_SITEKEY?: string;
+  SESSION_HMAC_SECRET?: string;
 }
 
 function rewriteToMarkdown(url: URL): URL {
@@ -39,6 +67,48 @@ export default {
     const url = new URL(request.url);
     const pathname = url.pathname;
 
+    // Live-scoring routes. Sits ABOVE the asset call so the asset-first
+    // invariant for everything else (every other path proxies to
+    // env.ASSETS) is preserved by exclusion, not by overlap.
+    if (isScorePath(pathname)) {
+      return handleScore(request, env as ScoreEnv);
+    }
+
+    // /score/live/<binary>.html → 301 to /score/live/<binary>. Mirrors
+    // the rest of the site (static `/score/<tool>.html` is canonicalized
+    // away from the .html extension by CF Static Assets'
+    // html_handling=auto-trailing-slash); the /score/live/ route is
+    // Worker-served so the same redirect is explicit here.
+    const liveScoreHtmlMatch = pathname.match(/^\/score\/live\/([a-z0-9][a-z0-9-]{0,63})\.html$/);
+    if (liveScoreHtmlMatch) {
+      const canonical = `/score/live/${liveScoreHtmlMatch[1]}`;
+      return new Response(null, {
+        status: 301,
+        headers: { Location: canonical, 'Cache-Control': 'public, max-age=300' },
+      });
+    }
+
+    // Shareable live-score result page. Reads the cached scorecard from
+    // R2 by binary slug, renders an HTML summary view.
+    // Strict regex enforced by parseLiveScorePath — slugs must match
+    // /^[a-z0-9][a-z0-9-]{0,63}$/, so an attacker can't pivot this
+    // route into an arbitrary R2 key read. Accepts both /score/live/<binary>
+    // and /score/live/<binary>.md (markdown twin) per the site-wide
+    // twin invariant. The "live" segment is reserved as a registry name
+    // (scorecards.mjs) so no curated tool can collide with this route.
+    if (parseLiveScorePath(pathname)) {
+      return handleLiveScorePage(request, env as ScoreEnv);
+    }
+
+    // /_internal/* paths are build-only assets (shell templates the
+    // Worker fetches via env.ASSETS internally). Return 404 here so
+    // direct user navigation never sees the raw template with `{{...}}`
+    // placeholders. The Worker's internal fetch goes straight to
+    // env.ASSETS.fetch and bypasses this interceptor.
+    if (pathname.startsWith('/_internal/')) {
+      return new Response('not found', { status: 404, headers: { 'content-type': 'text/plain' } });
+    }
+
     const pathIsMarkdown = pathname.endsWith('.md');
     const pathIsJson = pathname.endsWith('.json');
     // CN rewrite is markdown-only. Skip for `.json` paths so `Accept:
@@ -56,6 +126,28 @@ export default {
     }
 
     const upstream = await env.ASSETS.fetch(assetRequest);
+
+    // Homepage HTML: substitute {{TURNSTILE_SITEKEY}} placeholder. Runs
+    // AFTER the markdown-CN rewrite above so /index.md content (no
+    // placeholder) flows through untouched. Production with no
+    // TURNSTILE_SITEKEY set substitutes with the empty string, which the
+    // homepage JS treats as "form disabled, install anc locally" per
+    // the deliberate fail-loud-pre-promotion posture.
+    if ((pathname === '/' || pathname === '/index.html') && !servedMarkdown && upstream.ok) {
+      const contentType = upstream.headers.get('content-type') ?? '';
+      if (contentType.toLowerCase().includes('text/html')) {
+        const html = await upstream.text();
+        const sitekey = env.TURNSTILE_SITEKEY ?? '';
+        const substituted = html.replaceAll('{{TURNSTILE_SITEKEY}}', sitekey);
+        const rewritten = new Response(substituted, {
+          status: upstream.status,
+          statusText: upstream.statusText,
+          headers: upstream.headers,
+        });
+        return applyHeaders(rewritten, { request, servedMarkdown, pathname });
+      }
+    }
+
     return applyHeaders(upstream, { request, servedMarkdown, pathname });
   },
 } satisfies ExportedHandler<Env>;
diff --git a/src/worker/score/cache.ts b/src/worker/score/cache.ts
new file mode 100644
index 0000000..06e458d
--- /dev/null
+++ b/src/worker/score/cache.ts
@@ -0,0 +1,135 @@
+// R2 read/write wrapper for live-scoring scorecards.
+//
+// Plan U7 (docs/plans/2026-04-28-002-feat-live-scoring-cf-sandbox-plan.md
+// lines 1994-2123). Single source of truth for the cache key shape so
+// reads and writes can't drift.
+//
+// Cache key: `scores/{binary}/{anc-version}.json`. The {anc-version} slot
+// is filled with the build-time `SPEC_VERSION` constant at launch
+// (handoff Decision 2 + gotcha 3, .context/handoffs/2026-05-19-001):
+// computing the running anc binary's version requires installing it
+// first, which defeats the cache. Spec bumps already mean an anc bump in
+// practice, so SPEC_VERSION-as-proxy carries the "anc bump invalidates"
+// property at the cost of caching across anc-only bumps that don't bump
+// the spec. The 7-day R2 lifecycle reaps the entry on the long tail.
+//
+// Refusal-to-cache-half-state: put() throws if `ancVersion` or
+// `toolVersion` is empty. The cached payload IS the contract; a partial
+// entry would silently degrade future cache reads.
+//
+// Write failures are best-effort: logged, never thrown to the caller.
+// One missed cache write costs at most one extra sandbox spawn the
+// next time; throwing would cost the user the response they came for.
+
+export type CacheEnv = { SCORE_CACHE: R2Bucket };
+
+export type CachedScorecard = {
+  spec_version: string;
+  anc_version: string;
+  tool_version: string;
+  scorecard: unknown;
+};
+
+// Per-write Cache-Control header. Keeps CDN edges from over-caching the
+// R2 object outside the Worker's view. R2 bucket lifecycle handles the
+// 7-day origin TTL — configured once via:
+//
+//   wrangler r2 bucket lifecycle add anc-score-cache --prefix scores/ --expiration 7d
+//
+// Documented under RELEASES.md "Sandbox image releases" so a future
+// bucket recreate doesn't lose the TTL.
+const CACHE_CONTROL = 'public, max-age=300, s-maxage=300';
+
+export function keyFor(binary: string, ancVersion: string): string {
+  return `scores/${binary}/${ancVersion}.json`;
+}
+
+export async function get(env: CacheEnv, key: string): Promise<CachedScorecard | null> {
+  // R2's `get(key)` returns an `R2ObjectBody | null`; the body is
+  // consumed via `.json()` / `.text()` / etc. This differs from KV's
+  // `get(key, "json")` shape — historically a footgun when porting
+  // helpers between the two binding types.
+  let obj: R2ObjectBody | null;
+  try {
+    obj = await env.SCORE_CACHE.get(key);
+  } catch (err) {
+    // R2 read failure: treat as miss + log. Never throw — the live path
+    // can still produce a result for the user.
+    console.log(JSON.stringify({ scope: 'cache.get', key, error: errMsg(err) }));
+    return null;
+  }
+  if (obj === null) return null;
+
+  let raw: unknown;
+  try {
+    raw = await obj.json();
+  } catch (err) {
+    // Malformed JSON body: treat as corrupted + best-effort delete.
+    console.log(JSON.stringify({ scope: 'cache.get', key, error: `json_parse: ${errMsg(err)}` }));
+    env.SCORE_CACHE.delete(key).catch(() => {
+      // delete failed — entry will age out via the 7-day R2 lifecycle.
+    });
+    return null;
+  }
+
+  if (!isCachedScorecard(raw)) {
+    // Schema-corrupted entry: log, best-effort delete, treat as miss. A
+    // future request will recompute and overwrite.
+    console.log(JSON.stringify({ scope: 'cache.get', key, error: 'corrupted_payload' }));
+    env.SCORE_CACHE.delete(key).catch(() => {
+      // delete failed — entry will age out via the 7-day R2 lifecycle.
+    });
+    return null;
+  }
+  return raw;
+}
+
+export async function put(
+  env: CacheEnv,
+  key: string,
+  scorecard: unknown,
+  ancVersion: string,
+  toolVersion: string,
+  specVersion: string,
+): Promise<void> {
+  if (!ancVersion) throw new Error('cache.put: ancVersion required (refusal-to-cache-half-state)');
+  if (!toolVersion) throw new Error('cache.put: toolVersion required (refusal-to-cache-half-state)');
+  if (!specVersion) throw new Error('cache.put: specVersion required (refusal-to-cache-half-state)');
+
+  const payload: CachedScorecard = {
+    spec_version: specVersion,
+    anc_version: ancVersion,
+    tool_version: toolVersion,
+    scorecard,
+  };
+
+  try {
+    await env.SCORE_CACHE.put(key, JSON.stringify(payload), {
+      httpMetadata: {
+        contentType: 'application/json',
+        cacheControl: CACHE_CONTROL,
+      },
+    });
+  } catch (err) {
+    // Best-effort: a write failure does not block the user's response.
+    console.log(JSON.stringify({ scope: 'cache.put', key, error: errMsg(err) }));
+  }
+}
+
+function isCachedScorecard(value: unknown): value is CachedScorecard {
+  if (typeof value !== 'object' || value === null) return false;
+  const obj = value as Record<string, unknown>;
+  return (
+    typeof obj.spec_version === 'string' &&
+    obj.spec_version.length > 0 &&
+    typeof obj.anc_version === 'string' &&
+    obj.anc_version.length > 0 &&
+    typeof obj.tool_version === 'string' &&
+    obj.tool_version.length > 0 &&
+    'scorecard' in obj
+  );
+}
+
+function errMsg(err: unknown): string {
+  return err instanceof Error ? err.message : String(err);
+}
diff --git a/src/worker/score/content-negotiation.ts b/src/worker/score/content-negotiation.ts
new file mode 100644
index 0000000..f1c9e11
--- /dev/null
+++ b/src/worker/score/content-negotiation.ts
@@ -0,0 +1,31 @@
+// /api/score content negotiation. Combines URL-suffix detection
+// (`/api/score.md`, `/api/score.json`) with Accept-header q-value parsing
+// (`accept.ts: detectScorePreference`).
+//
+// Plan U5 (docs/plans/2026-04-28-002-feat-live-scoring-cf-sandbox-plan.md):
+//
+//   .json suffix          → 'json'  (always; bypasses Accept, mirrors the
+//                                    triple-emit-content-negotiation pattern)
+//   .md suffix            → 'markdown'
+//   no suffix             → detectScorePreference(request)  (defaults 'json')
+//
+// `accept-header-q-value` learning: NEVER substring-match the Accept
+// header. The accepts package handles q-values, wildcards, and bad input
+// correctly; substring matching breaks on `Accept: text/markdown;q=0.1,
+// application/json;q=0.9`.
+
+import type { ScorePreference } from '../accept';
+import { detectScorePreference } from '../accept';
+
+export type { ScorePreference } from '../accept';
+
+/** True for the three /api/score path shapes the handler responds to. */
+export function isScorePath(pathname: string): boolean {
+  return pathname === '/api/score' || pathname === '/api/score.md' || pathname === '/api/score.json';
+}
+
+export function preferenceFor(pathname: string, request: Request): ScorePreference {
+  if (pathname.endsWith('.json')) return 'json';
+  if (pathname.endsWith('.md')) return 'markdown';
+  return detectScorePreference(request);
+}
diff --git a/src/worker/score/discover-binary.ts b/src/worker/score/discover-binary.ts
index 5392897..9dd0cdc 100644
--- a/src/worker/score/discover-binary.ts
+++ b/src/worker/score/discover-binary.ts
@@ -1,17 +1,13 @@
 // Live GitHub URL discovery chain. Called by the Worker when registry
 // lookup misses on a github-url input.
 //
-// Plan U4 (docs/plans/2026-04-28-002-feat-live-scoring-cf-sandbox-plan.md
-// lines 1104-1156, with F1 tightening per gate findings).
-//
-// Step 0.5 — discovery-hints lookup (defense in depth; the orchestrator's
-//             registry-lookup also checks hints, but a future caller that
-//             skips registry-lookup still gets the hint short-circuit).
+// Step 0.5 — discovery-hints lookup (zero-cost, in-memory; runs first
+//             so a hint hit short-circuits the network fan-out).
 // Step 2   — GitHub Releases API (linux-x86_64 asset).
 // Step 3   — Parallel distribution lookup (brew/cargo/npm/pypi/go) with
-//             per-registry repository-field match + bin-target check
-//             per gate F1. Without these the chain produces wrong-answer
-//             failures via cross-registry name collisions.
+//             per-registry repository-field match + bin-target check.
+//             Without these the chain produces wrong-answer failures via
+//             cross-registry name collisions.
 // Step 4   — README first-fenced-block install-command parse, with
 //             package-name-matches-repo guard.
 //
@@ -19,18 +15,74 @@
 // only routes repo-root URLs into this module, never release-asset URLs.
 // If a future input shape needs direct-URL paste, that's a validate.ts
 // + this module change.
+//
+// Concurrency model (Fix 2): Steps 2, 3, and 4 fan out in parallel via
+// Promise.allSettled. The wall-clock cost of one slow upstream (e.g.
+// proxy.golang.org occasionally takes 3 s for a cache-cold lookup) no
+// longer blocks the rest. After fan-in, a priority order
+// (hint > release-asset > registry > README-parse) picks the winner.
+// When MULTIPLE sources resolve, the higher-priority spec wins AND the
+// disagreement surfaces as a `discovery_disagreement` event in the
+// returned diagnostics — telemetry for cases where (e.g.) brew formula
+// names binary X but the release artifact ships binary Y. Cross-source
+// AGREEMENT is also surfaced as a `discovery_agreement` event so
+// operations can spot high-confidence resolutions in the field. None of
+// these events affect the API response shape; they're observability for
+// future log queries / regression detection.
+//
+// Why parallel beats serial here: each upstream is a single round-trip
+// against a public registry index (release API, brew formula JSON,
+// crates.io crate JSON, npm registry, pypi JSON, go proxy, raw
+// github.com README). Each is cheap on its own (~100-300 ms warm,
+// occasionally up to 2 s). Serial fan-out makes the chain pay the sum
+// of all the latencies for misses that should bounce in max(latencies).
+// Parallel fan-out + priority pick also gives us cross-validation:
+// when two sources concur, our confidence is higher; when they
+// disagree, we'd rather see it in logs than silently degrade.
 
 import type { ParsedInstall } from './parse-install';
 import { parseInstallCommand } from './parse-install';
 import type { DiscoveryHintsIndex } from './registry-lookup';
 
 export type DirectInstall = { pm: 'direct'; url: string; binary: string };
-export type InstallSpec = ParsedInstall | DirectInstall;
+// Branch-scoped source clone. When a user pastes a github URL with a
+// `/tree/<branch>` path, the DO routes the request through this install
+// spec instead of the discovery chain: discovery targets release
+// artifacts (which are scored against the release, not a branch), so a
+// branch-scoped paste needs the source at THAT branch. The orchestration
+// in sandbox-exec.ts clones the repo at the specified branch with
+// `--depth 1` (shallow) and runs `anc check` against the cloned
+// directory rather than `anc check --command <binary>`.
+export type GitCloneInstall = {
+  pm: 'git-clone';
+  owner: string;
+  repo: string;
+  branch: string;
+  // The "binary" is the repo name by convention — used as the share-url
+  // slug and the cache key. Branch-scoped scores skip the cache write
+  // (handler.ts), so the binary here is purely a display label.
+  binary: string;
+};
+export type InstallSpec = ParsedInstall | DirectInstall | GitCloneInstall;
 
 export type DiscoveryResult =
-  | { ok: true; spec: InstallSpec; resolved_step: ResolvedStep }
+  | { ok: true; spec: InstallSpec; resolved_step: ResolvedStep; diagnostics?: DiscoveryDiagnostics }
   | { ok: false; error: 'chain_no_resolve'; exhausted: ExhaustedSteps };
 
+// Telemetry surface: agreement/disagreement across parallel-fan-out
+// steps. Not user-visible; populated for Workers Logs aggregation so we
+// can see when two registries disagree about a tool's install path.
+// `winners` is the resolved step that won the priority pick. `losers`
+// lists the steps that ALSO produced a hit but lost to priority.
+// `agreed_binary` is true iff every winning + losing source picked the
+// same install path (binary name match). False when (e.g.) brew formula
+// `foo` resolves to a different artifact than the release tarball.
+export type DiscoveryDiagnostics = {
+  winner: ResolvedStep;
+  losers: ResolvedStep[];
+  agreed_binary: boolean;
+};
+
 export type ResolvedStep =
   | '0.5-hints'
   | '2-releases-asset'
@@ -48,8 +100,19 @@ export type ExhaustedSteps = {
   readme: { hit: false; reason: string };
 };
 
-const LINUX_X64_ASSET_RE =
-  /(linux[-_]x86[-_]?64|x86[-_]64[-_]unknown[-_]linux|linux[-_]amd64|amd64[-_]linux|linux64|linux[-_]gnu|linux[-_]musl)/i;
+// Asset must satisfy BOTH conditions:
+//   1. Linux + x86_64/amd64 — the loose substring match below excludes
+//      aarch64 / armhf / i686 by REQUIRING an x86_64 / amd64 token AND a
+//      linux marker in the same name. The legacy regex matched
+//      `aarch64-unknown-linux-gnu` via the `linux-gnu` substring, which
+//      cross-architected installs onto our x86_64 sandbox.
+//   2. A real archive extension. .deb / .rpm / .sha256 / .pkg drop here
+//      because directInstallCommand only knows how to extract tar/zip.
+//      Before this filter, bat releases (which ship .deb files BEFORE
+//      .tar.gz files in the asset list) resolved to a .deb and failed
+//      with `gzip: stdin: not in gzip format`.
+const LINUX_X64_ASSET_RE = /(?=.*(?:x86[-_]?64|amd64))(?=.*linux)/i;
+const LINUX_X64_ARCHIVE_RE = /\.(?:tar\.gz|tar\.xz|tar\.bz2|tgz|txz|tbz2|zip)$/i;
 
 const INSTALL_CMD_RE =
   /^\s*\$?\s*(brew|cargo|bun|uv|pip|pip3|pipx|npm|yarn|pnpm|go)\s+(install|add|i|tool|global|binstall)/i;
@@ -72,7 +135,9 @@ export async function discoverBinary(ctx: DiscoverContext): Promise<DiscoveryRes
   const fetcher = ctx.fetcher ?? globalThis.fetch.bind(globalThis);
   const ownerRepo = `${ctx.owner}/${ctx.repo}`;
 
-  // Step 0.5 — hints
+  // Step 0.5 — hints. Zero-cost, in-memory lookup. Runs synchronously
+  // BEFORE the network fan-out so a hint hit short-circuits before we
+  // pay for the parallel network round-trips.
   const hint = lookupHint(ctx.hintsIndex, ownerRepo);
   if (hint) {
     return {
@@ -84,30 +149,67 @@ export async function discoverBinary(ctx: DiscoverContext): Promise<DiscoveryRes
 
   const deadline = Date.now() + TOTAL_TIMEOUT_MS;
 
-  // Step 2 — GitHub Releases asset
-  const releases = await step2_releasesAsset(ctx, fetcher, deadline);
+  // Parallel fan-out (Fix 2): Steps 2, 3, 4 fire concurrently. Each
+  // step carries its own internal timeout via the shared deadline so
+  // one slow upstream can't blow the total budget. allSettled keeps
+  // one step's rejection from cancelling the others.
+  const [releasesResult, distributionsResult, readmeResult] = await Promise.allSettled([
+    step2_releasesAsset(ctx, fetcher, deadline),
+    step3_distributions(ctx, fetcher, deadline),
+    step4_readmeParse(ctx, fetcher, deadline),
+  ]);
+
+  // Settled-to-value normalization. A rejected promise (network
+  // exception we didn't catch internally) becomes a synthetic "miss"
+  // so downstream priority logic doesn't have to wrangle the union
+  // type discriminant from allSettled.
+  const releases: Step2Hit | Step2Miss =
+    releasesResult.status === 'fulfilled' ? releasesResult.value : { hit: false, reason: 'fetch_threw' };
+  const distributions: Step3Hit | Step3Miss =
+    distributionsResult.status === 'fulfilled' ? distributionsResult.value : { hit: false, per_registry: {} };
+  const readme: Step4Hit | Step4Miss =
+    readmeResult.status === 'fulfilled' ? readmeResult.value : { hit: false, reason: 'fetch_threw' };
+
+  // Priority pick: release-asset > registry > README-parse. Collect
+  // every winning + losing step into the diagnostics record so
+  // disagreement is observable in logs without changing the API.
+  type Candidate = { step: ResolvedStep; spec: InstallSpec; binaryName: string };
+  const candidates: Candidate[] = [];
   if (releases.hit) {
-    return {
-      ok: true,
+    candidates.push({
+      step: '2-releases-asset',
+      // Binary name is the repo by default — Fix 1's auto-detect path
+      // in directInstallCommand corrects it post-extract if the
+      // archive ships a differently-named executable (gogcli → gog).
       spec: { pm: 'direct', url: releases.url, binary: ctx.repo },
-      resolved_step: '2-releases-asset',
-    };
+      binaryName: ctx.repo,
+    });
   }
-
-  // Step 3 — distributions (F1-tightened, parallel)
-  const distributions = await step3_distributions(ctx, fetcher, deadline);
   if (distributions.hit) {
-    return {
-      ok: true,
+    candidates.push({
+      step: distributions.step,
       spec: { pm: distributions.pm, package: ctx.repo, binary: ctx.repo },
-      resolved_step: distributions.step,
-    };
+      binaryName: ctx.repo,
+    });
   }
-
-  // Step 4 — README parse
-  const readme = await step4_readmeParse(ctx, fetcher, deadline);
   if (readme.hit) {
-    return { ok: true, spec: readme.spec, resolved_step: '4-readme-parse' };
+    candidates.push({
+      step: '4-readme-parse',
+      spec: readme.spec,
+      binaryName: readme.spec.binary,
+    });
+  }
+
+  if (candidates.length > 0) {
+    const winner = candidates[0];
+    const losers = candidates.slice(1).map((c) => c.step);
+    const agreed_binary = candidates.every((c) => c.binaryName === winner.binaryName);
+    return {
+      ok: true,
+      spec: winner.spec,
+      resolved_step: winner.step,
+      diagnostics: { winner: winner.step, losers, agreed_binary },
+    };
   }
 
   return {
@@ -115,9 +217,12 @@ export async function discoverBinary(ctx: DiscoverContext): Promise<DiscoveryRes
     error: 'chain_no_resolve',
     exhausted: {
       hints: { hit: false },
-      releases: { hit: false, reason: releases.reason },
-      distributions: { hit: false, per_registry: distributions.per_registry },
-      readme: { hit: false, reason: readme.reason },
+      releases: { hit: false, reason: releases.hit ? '' : (releases as Step2Miss).reason },
+      distributions: {
+        hit: false,
+        per_registry: distributions.hit ? {} : (distributions as Step3Miss).per_registry,
+      },
+      readme: { hit: false, reason: readme.hit ? '' : (readme as Step4Miss).reason },
     },
   };
 }
@@ -201,7 +306,7 @@ async function step2_releasesAsset(
   );
   if (!release) return { hit: false, reason: 'no_release_or_404' };
   const assets = Array.isArray(release.assets) ? release.assets : [];
-  const match = assets.find((a) => a.name && LINUX_X64_ASSET_RE.test(a.name));
+  const match = assets.find((a) => a.name && LINUX_X64_ASSET_RE.test(a.name) && LINUX_X64_ARCHIVE_RE.test(a.name));
   if (match?.browser_download_url) return { hit: true, url: match.browser_download_url };
   return { hit: false, reason: assets.length > 0 ? 'no_linux_x64_asset' : 'release_has_no_assets' };
 }
@@ -295,12 +400,21 @@ async function step3_distributions(
   const goLoose = !!goRes?.ok;
   const goTight = goLoose;
 
-  // Priority order matches the plan: brew -> crates -> npm -> pypi -> go.
-  if (brewTight) return { hit: true, pm: 'brew', step: '3-brew' };
+  // Priority order: sandbox-installable PMs first (crates / npm / pypi /
+  // go), brew last. Brew is unconditionally bounced as install_unsupported
+  // inside the sandbox image (Linuxbrew is non-viable on musl). If a tool
+  // has both a brew formula AND a working
+  // alternative (e.g. csvlens is in brew AND on crates.io), picking
+  // brew sends the user to a guaranteed bounce when scoring was
+  // possible. Brew is kept as the last resort so brew-only tools still
+  // bounce honestly rather than degrading to chain_no_resolve and
+  // hitting Step 4 README parse — the bounce message at least names
+  // the brew formula.
   if (cratesTight) return { hit: true, pm: 'cargo-binstall', step: '3-crates' };
   if (npmTight) return { hit: true, pm: 'npm', step: '3-npm' };
   if (pypiTight) return { hit: true, pm: 'pip', step: '3-pypi' };
   if (goTight) return { hit: true, pm: 'go', step: '3-go' };
+  if (brewTight) return { hit: true, pm: 'brew', step: '3-brew' };
 
   return {
     hit: false,
@@ -354,7 +468,9 @@ async function step4_readmeParse(
           if (parsed.ok) return { hit: true, spec: parsed.value };
         }
       }
-      // Per plan: only the first non-comment line of each fenced block.
+      // Only the first non-comment line of each fenced block — most
+      // READMEs lead with the canonical install command and follow with
+      // alternatives we'd otherwise mis-resolve to.
       break;
     }
   }
diff --git a/src/worker/score/do.ts b/src/worker/score/do.ts
index ae0825e..5663093 100644
--- a/src/worker/score/do.ts
+++ b/src/worker/score/do.ts
@@ -1,28 +1,289 @@
-// Stub Sandbox DO class for plan U3 wrangler binding registration.
-//
-// The full implementation (extends the Cloudflare Sandbox SDK, runs the
-// two-phase egress + install + anc check flow) lands in U6 with the
-// `@cloudflare/sandbox` import. Until then this exists ONLY to satisfy
-// `wrangler deploy --dry-run` — the Containers + DurableObjects bindings
-// in wrangler.jsonc reference `class_name: "Sandbox"` and wrangler
-// resolves that name by reading the Worker's main module exports.
-//
-// Uses the legacy class-form DO pattern (no `cloudflare:workers` import)
-// rather than `extends DurableObject` because Bun's test runtime can't
-// resolve the `cloudflare:workers` virtual module — it's a Workers
-// runtime-only entry that bundles in via the Worker build, not Bun's
-// package resolver. U6 will switch to `extends Sandbox` from
-// `@cloudflare/sandbox`, which IS bun-resolvable as a real npm package.
-//
-// Calling any RPC method before U6 lands returns a typed error so the
-// surfacing is loud rather than silent if something accidentally hits
-// the binding early (e.g. a misrouted handler, a leaked staging URL).
-
-export class Sandbox {
-  // biome-ignore lint/complexity/noUselessConstructor: stub signature mirrors the runtime DO contract that U6 will fill in
-  constructor(_state: DurableObjectState, _env: unknown) {}
-
-  async score(): Promise<{ error: string }> {
-    return { error: 'sandbox_stub_until_u6' };
+// Live-scoring Sandbox Durable Object — install + anc check inside an
+// Alpine + musl Container, with two-phase egress (R7) enforced via the
+// CF Sandbox SDK's named outbound handlers (Pattern Y). The class
+// extends `@cloudflare/sandbox` and inherits the runtime egress control
+// + container exec surface from `@cloudflare/containers`.
+//
+// 2026-05-20 discovery-move: the DO used to own the full
+// ValidatedInput → InstallSpec resolution (including the brew/go
+// fallbacks + the discoverBinary chain). That layer moved upstream to
+// the Worker (src/worker/score/resolve-spec.ts) so chain_no_resolve
+// requests bounce without spinning up a container. The DO's surface
+// now starts at "given an InstallSpec, install + score" — the
+// orchestration in sandbox-exec.ts is unchanged, but the request body
+// crossing the DO boundary is `{spec: InstallSpec, hash: string}`
+// instead of the pre-move `{input: ValidatedInput, hash: string}`.
+// `loadHintsIndex` is no longer needed here either (the Worker loads
+// hints once and threads them through resolveSpec).
+//
+// Test-mode importability:
+//
+//   `@cloudflare/containers` does a top-level `import { DurableObject }
+//   from 'cloudflare:workers'` (workerd virtual module). Bun's test
+//   runtime can't resolve `cloudflare:workers` natively; tests/bun-setup.ts
+//   registers a virtual-module shim so do.ts loads inside `bun test`
+//   without bringing in real DO state machinery. The shim provides no-op
+//   base classes — enough for `import { Sandbox } from '@cloudflare/sandbox'`
+//   to succeed at module load. Tests that exercise real DO behavior
+//   (state, alarms, container exec) require a workerd-backed runtime.
+
+import type { OutboundHandler } from '@cloudflare/containers';
+import { Sandbox as BaseSandbox } from '@cloudflare/sandbox';
+import { SPEC_VERSION } from '../spec-version.gen';
+import * as cache from './cache';
+import type { InstallSpec } from './discover-binary';
+import { score as runSandboxScore, type ScoreResult } from './sandbox-exec';
+
+// ---------------------------------------------------------------------------
+// Env contract
+// ---------------------------------------------------------------------------
+
+// Wrangler injects all Worker bindings into the DO's env at construction.
+// We declare only what this DO uses so tests can pass a minimal stub.
+// SCORE_CACHE is optional because the DO functions correctly without it
+// (the cache write is best-effort by design — failure logs but never
+// blocks the user response), and tests that exercise the install + score
+// flow without exercising the cache write don't need to stub it.
+//
+// ASSETS stays in the env shape because @cloudflare/sandbox + the
+// Worker binding plumbing inject it regardless; the DO no longer
+// uses it now that the hints index lives entirely in the Worker tier.
+export type ScoreSandboxEnv = {
+  ASSETS: Fetcher;
+  SCORE_CACHE?: R2Bucket;
+};
+
+// Request body the Worker sends to the DO after 2026-05-20:
+//
+//   stub.fetch(new Request('https://do.internal/score', {
+//     method: 'POST',
+//     body: JSON.stringify({ spec: InstallSpec, hash: string }),
+//   }))
+//
+// Pre-move shape was `{ input: ValidatedInput, hash }`; the rename to
+// `spec` is the signal that resolution has already happened upstream.
+// `hash` is unused in the install+score path today; it stays on the
+// wire for telemetry alignment with the Worker's per-request log line.
+export type ScoreRequestBody = {
+  spec: InstallSpec;
+  hash: string;
+};
+
+// ---------------------------------------------------------------------------
+// Outbound handlers (Pattern Y — named, runtime-swappable)
+//
+// Per-request egress observability is why we picked named handlers
+// (Pattern Y) over a static allowedHosts list: every outbound attempt
+// during install OR after the noHttp lockdown emits one structured log
+// line so attempted-but-blocked egress surfaces as a security signal in
+// Workers Logs.
+// ---------------------------------------------------------------------------
+
+type AllowedInstallParams = { allowedHostnames: string[] };
+
+// Match a hostname against an allowlist that supports leading-wildcard
+// entries (`*.githubusercontent.com` matches
+// `objects.githubusercontent.com`, `release-assets.githubusercontent.com`,
+// etc.). Exact matches still work without the wildcard. Kept
+// conservative: only `*.` prefix is supported (not arbitrary glob), and
+// the wildcard requires AT LEAST ONE subdomain label — bare apex hits
+// (`githubusercontent.com`) must be allowlisted explicitly to avoid
+// over-permissive matching when the apex domain has different trust
+// semantics from its CDN subdomains.
+function hostnameAllowed(host: string, allowlist: readonly string[]): boolean {
+  for (const entry of allowlist) {
+    if (entry === host) return true;
+    if (entry.startsWith('*.')) {
+      const suffix = entry.slice(1); // `.githubusercontent.com`
+      if (host.length > suffix.length && host.endsWith(suffix)) return true;
+    }
+  }
+  return false;
+}
+
+const allowedInstall: OutboundHandler<unknown, AllowedInstallParams> = async (req, _env, ctx) => {
+  const host = new URL(req.url).hostname;
+  const allowed = hostnameAllowed(host, ctx.params.allowedHostnames);
+  console.log(JSON.stringify({ phase: 'install', host, allowed }));
+  if (allowed) return fetch(req);
+  return new Response(null, { status: 403 });
+};
+
+const noHttp: OutboundHandler = async (req) => {
+  const host = new URL(req.url).hostname;
+  console.log(JSON.stringify({ phase: 'noHttp', host, blocked: true }));
+  return new Response(null, { status: 403 });
+};
+
+// Export the handler shapes so tests can call them as plain functions
+// without instantiating the DO class. Useful for the per-request log
+// shape assertion (test scenario (c)).
+export const handlers = { allowedInstall, noHttp };
+
+// ---------------------------------------------------------------------------
+// DO class
+// ---------------------------------------------------------------------------
+
+export class Sandbox extends BaseSandbox<ScoreSandboxEnv> {
+  // DIAGNOSTIC: HTTPS interception OFF to isolate whether the SDK's
+  // Worker-fetch passthrough is the cause of the upstream-403 regressions
+  // seen on staging after the Debian-slim rework. With interception off,
+  // container HTTPS bypasses allowedInstall + noHttp entirely; outbound
+  // hits upstream from the CF Container IP rather than the Worker fetch
+  // IP. Phase 2 lockdown is lost while this flag is false — must revert
+  // before merge.
+  override interceptHttps = false;
+
+  // Override BaseSandbox.fetch (which normally proxies to the container's
+  // HTTP listener) to dispatch the score endpoint instead. Our container
+  // is a compute substrate exposed via exec(), not an HTTP service.
+  override async fetch(request: Request): Promise<Response> {
+    if (request.method !== 'POST') {
+      return json({ error: 'method_not_allowed' }, 405);
+    }
+
+    let parsed: ScoreRequestBody;
+    try {
+      const body = (await request.json()) as ScoreRequestBody;
+      if (!body || typeof body !== 'object' || !body.spec) {
+        return json({ error: 'invalid_do_body' }, 400);
+      }
+      parsed = body;
+    } catch {
+      return json({ error: 'invalid_do_body' }, 400);
+    }
+
+    const result = await this.score(parsed.spec);
+    if (!result.ok) {
+      return json({ error: result.error, details: result.details }, statusFor(result.error));
+    }
+
+    // Write the successful scorecard to R2 so the next request for the
+    // same binary short-circuits at the handler's lookupScorecard cache
+    // tier. Best-effort: the cache helpers swallow R2 failures
+    // (logged, never thrown). The await delays the response by one R2
+    // round-trip (~30-100 ms typical); the latency cost is paid once per
+    // tool per anc bump and saves a full sandbox spawn (~3-20 s) on the
+    // next request. The trade is intentional and bounded.
+    //
+    // Branch-scoped clones skip the cache write: the cache key is
+    // `scores/<binary>/<spec-version>.json` which doesn't include the
+    // branch. Caching a branch-scored result would clobber the
+    // default-branch scorecard for any subsequent request that hits
+    // the same binary. Branch-scoring is intentionally one-off.
+    if (parsed.spec.pm !== 'git-clone') {
+      await writeCacheBestEffort(this.env, parsed.spec, result.value);
+    }
+
+    return json(result.value, 200);
+  }
+
+  // RPC entry point — used by tests that want to invoke the score flow
+  // without round-tripping a Request. Also makes the orchestration unit
+  // independently exercisable from a server-side caller (e.g. a future
+  // batch-scoring cron Worker).
+  async score(spec: InstallSpec): Promise<ScoreResult> {
+    return runSandboxScore(this, spec);
+  }
+}
+
+// Wire named handlers on the class. Done at module load so a wrangler
+// binding-resolution pass picks up the static map before any handler
+// invocation.
+Sandbox.outboundHandlers = { allowedInstall, noHttp };
+
+// ---------------------------------------------------------------------------
+// Cache write
+// ---------------------------------------------------------------------------
+
+// Best-effort R2 write after a successful score. Skipped (with a log) when
+// SCORE_CACHE isn't bound on the DO env, or when the scorecard doesn't
+// carry an extractable tool version (cache.put refuses half-state, so we
+// short-circuit at the surface to avoid the throw). All write paths
+// inside cache.put already swallow R2 failures — this wrapper handles
+// the precondition layer above that.
+//
+// Exported for unit tests (tests/score-do-cache-write.test.ts) since the
+// Sandbox class itself isn't directly instantiable under bun:test without
+// the workerd shim. The wrapper carries the full precondition + write
+// flow that fetch() invokes, so testing it directly pins the cache-write
+// contract without touching DO boilerplate.
+export async function writeCacheBestEffort(
+  env: ScoreSandboxEnv,
+  spec: InstallSpec,
+  value: { scorecard: unknown; anc_version: string },
+): Promise<void> {
+  if (!env.SCORE_CACHE) {
+    console.log(JSON.stringify({ scope: 'cache.write', skipped: 'no_binding' }));
+    return;
+  }
+  const toolVersion = extractToolVersion(value.scorecard);
+  if (!toolVersion) {
+    console.log(JSON.stringify({ scope: 'cache.write', skipped: 'no_tool_version', binary: spec.binary }));
+    return;
+  }
+  // SPEC_VERSION is the proxy for anc-version in the cache key. The
+  // cached payload still carries the exec-captured anc_version as data
+  // — the key vs. payload split is intentional. See cache.ts module
+  // header for the full rationale.
+  const key = cache.keyFor(spec.binary, SPEC_VERSION);
+  try {
+    await cache.put(
+      { SCORE_CACHE: env.SCORE_CACHE },
+      key,
+      value.scorecard,
+      value.anc_version,
+      toolVersion,
+      SPEC_VERSION,
+    );
+  } catch (err) {
+    // cache.put only throws on refusal-to-cache-half-state (missing
+    // version), which the guards above already cover. Defense-in-depth:
+    // a future regression that bypasses those guards still doesn't
+    // surface to the user.
+    console.log(JSON.stringify({ scope: 'cache.write', error: err instanceof Error ? err.message : String(err) }));
+  }
+}
+
+// Pulls `scorecard.tool.version` if present. The shape is the anc
+// JSON envelope; the field is populated by `anc check` from whatever
+// version flag the tool exposes. Unknown values bail out so cache.put's
+// refusal-to-cache-half-state isn't reached at runtime. Exported for
+// the same unit-test reason as writeCacheBestEffort.
+export function extractToolVersion(scorecard: unknown): string | null {
+  if (typeof scorecard !== 'object' || scorecard === null) return null;
+  const tool = (scorecard as { tool?: unknown }).tool;
+  if (typeof tool !== 'object' || tool === null) return null;
+  const version = (tool as { version?: unknown }).version;
+  if (typeof version !== 'string' || version.length === 0) return null;
+  return version;
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function json(payload: unknown, status: number): Response {
+  return new Response(JSON.stringify(payload), {
+    status,
+    headers: { 'content-type': 'application/json' },
+  });
+}
+
+function statusFor(error: string): number {
+  switch (error) {
+    case 'chain_resolved_install_failed':
+    case 'chain_resolved_no_binary_produced':
+    case 'install_unsupported':
+    case 'anc_check_failed':
+      return 502;
+    case 'timeout':
+      return 504;
+    case 'chain_no_resolve':
+      return 404;
+    case 'anc_version_unreadable':
+      return 500;
+    default:
+      return 500;
   }
 }
diff --git a/src/worker/score/github-accessibility.ts b/src/worker/score/github-accessibility.ts
new file mode 100644
index 0000000..a802162
--- /dev/null
+++ b/src/worker/score/github-accessibility.ts
@@ -0,0 +1,123 @@
+// Cheap pre-DO probe for github-url inputs: HEAD https://github.com/<owner>/<repo>.
+// 200/2xx means the repo is anonymously visible — proceed to the DO. 404 means
+// the repo is private, deleted, or never existed — fast-fail BEFORE the DO
+// dispatch so the user doesn't pay a sandbox cold-start (and the platform
+// doesn't pay container minutes) on a request that cannot resolve a binary
+// regardless. Anything else (5xx, network error, non-redirect non-404) is
+// treated as "unknown" and fails-OPEN so a transient github outage doesn't
+// silently break scoring.
+//
+// Redirect handling: github 301s for renamed repos (the redirect points at
+// the canonical owner/repo on github.com — fine, that's still "accessible").
+// But following redirects unconditionally would let a malicious upstream
+// pivot the probe to an arbitrary host. We use `redirect: 'manual'` and
+// treat any 3xx as "accessible" without inspecting Location — github's own
+// 301s for moves all land on github.com anyway, and we don't need the
+// target URL, just the binary "is this fetchable" answer.
+//
+// In-isolate cache: a Map keyed by `<owner>/<repo>` (lowercased) with a
+// timestamp-based TTL. Workers re-instantiate isolates frequently, so the
+// cache is bounded; the TTL exists so a private→public flip is observed
+// within ~5 min on a long-lived isolate.
+
+const PROBE_TIMEOUT_MS = 3000;
+const CACHE_TTL_MS = 5 * 60 * 1000;
+
+// Owner+repo shape lock applied independently here, even though validate.ts
+// already enforces the same character classes at the Worker boundary. This
+// is defense-in-depth against a future caller that bypasses validate.ts and
+// hands a raw string to this module: a missing guard here would let
+// arbitrary characters interpolate into the URL passed to fetch(). The
+// regexes mirror GitHub's own owner + repo rules.
+const OWNER_RE = /^[A-Za-z0-9](?:[A-Za-z0-9-]{0,38})$/;
+const REPO_RE = /^[A-Za-z0-9._-]{1,100}$/;
+
+export type AccessibilityResult =
+  | { state: 'accessible' }
+  | { state: 'not_accessible' }
+  | { state: 'unknown'; reason: 'timeout' | 'network_error' | 'non_2xx_non_404' | 'invalid_slug' };
+
+export type CheckOpts = {
+  /** Injectable for tests; defaults to globalThis.fetch. */
+  fetcher?: typeof fetch;
+  /** Override the default 3 s probe timeout. */
+  timeoutMs?: number;
+};
+
+type CacheEntry = { result: AccessibilityResult; expiresAt: number };
+
+// Module-scoped cache. Bounded by isolate lifetime + TTL. We don't bother
+// with an LRU eviction because the working set on a public-binary scorer is
+// dominated by the same ~hundred repos across requests; an unbounded Map of
+// owner/repo keys on a per-isolate basis stays well under any sensible
+// memory ceiling.
+const cache = new Map<string, CacheEntry>();
+
+/** Test-only: drop the in-isolate cache between tests. */
+export function _resetAccessibilityCache(): void {
+  cache.clear();
+}
+
+export async function checkGithubAccessibility(
+  owner: string,
+  repo: string,
+  opts: CheckOpts = {},
+): Promise<AccessibilityResult> {
+  // Defense-in-depth: refuse to interpolate anything we wouldn't accept at
+  // validate.ts. An invalid slug here means a caller bypassed the validator;
+  // we return `unknown` rather than `not_accessible` so the caller fails
+  // OPEN (the DO will run its own validation and bounce with the right
+  // error). The bonus is no spurious HEAD probes against malformed URLs.
+  if (!OWNER_RE.test(owner) || !REPO_RE.test(repo)) {
+    return { state: 'unknown', reason: 'invalid_slug' };
+  }
+
+  const key = `${owner.toLowerCase()}/${repo.toLowerCase()}`;
+  const now = Date.now();
+  const cached = cache.get(key);
+  if (cached && cached.expiresAt > now) return cached.result;
+
+  const fetcher = opts.fetcher ?? globalThis.fetch.bind(globalThis);
+  const timeout = opts.timeoutMs ?? PROBE_TIMEOUT_MS;
+  const ctrl = new AbortController();
+  const t = setTimeout(() => ctrl.abort(), timeout);
+
+  let result: AccessibilityResult;
+  try {
+    const res = await fetcher(`https://github.com/${owner}/${repo}`, {
+      method: 'HEAD',
+      // Manual redirects: a github 30x for a renamed repo means the repo
+      // exists (just moved); we treat that as accessible without
+      // dereferencing the Location header. This blocks a hypothetical
+      // pivot where github 30x'd to a non-github host (won't happen for
+      // real github traffic, but the manual mode makes the property
+      // structural rather than trust-based).
+      redirect: 'manual',
+      signal: ctrl.signal,
+      headers: { 'User-Agent': 'anc.dev-accessibility-probe/1' },
+    });
+    if (res.status === 404) {
+      result = { state: 'not_accessible' };
+    } else if (res.status >= 200 && res.status < 400) {
+      // 2xx + 3xx both mean the repo is reachable. We don't follow the
+      // 30x to confirm; see redirect comment above.
+      result = { state: 'accessible' };
+    } else {
+      // 5xx, 401, 403, 429, etc. Fail-open: the DO will run its own probe
+      // and produce an honest error code if the repo really is broken.
+      result = { state: 'unknown', reason: 'non_2xx_non_404' };
+    }
+  } catch (err) {
+    // AbortError when the timeout fires; everything else collapses to
+    // network_error. In both cases the caller fails-open and dispatches
+    // the DO. Differentiating timeout vs. network helps log analysis
+    // without changing behavior.
+    const reason = err instanceof DOMException && err.name === 'AbortError' ? 'timeout' : 'network_error';
+    result = { state: 'unknown', reason };
+  } finally {
+    clearTimeout(t);
+  }
+
+  cache.set(key, { result, expiresAt: now + CACHE_TTL_MS });
+  return result;
+}
diff --git a/src/worker/score/handler.ts b/src/worker/score/handler.ts
new file mode 100644
index 0000000..6668cd3
--- /dev/null
+++ b/src/worker/score/handler.ts
@@ -0,0 +1,1072 @@
+// /api/score request handler — orchestrates the live-scoring pipeline.
+//
+// Pipeline (post 2026-05-20 gates-before-discovery reorder):
+//
+//   1. Validate input.
+//   2. Unified scorecard lookup — pre-discovery. One call to
+//      lookupScorecard() collapses the registry-fast-path and the R2
+//      cache pre-check into a single tier-resolved decision. `curated`
+//      returns the registry-hit envelope pointing at /score/<slug>;
+//      `cached` returns the inline scorecard JSON; both bypass the
+//      metered gates (kill-switch, Turnstile, rate-limit, DO) — cached
+//      scorecards are functionally identical to curated ones (no
+//      sandbox cost). `miss` falls through to the live path.
+//
+//      The pre-discovery cache key is keyed by whatever binary is
+//      cheaply derivable from input alone: install-command's
+//      `spec.binary`, or a hinted github-url's `hint.binary`. A
+//      github-url WITHOUT a hint has no binary upfront — that case
+//      always misses here and falls through to discovery (step 6),
+//      after which step 6.5 re-checks the cache with the resolved
+//      binary.
+//   3. GET requests stop after step 2: GET is the paste-and-share /
+//      bookmark read-only contract. A miss returns 404 chain_no_resolve.
+//      GET never consults gates and never reaches discovery or the DO.
+//   4. [METERED GATES — POST only, after registry+cache miss.]
+//      a. Kill switch (`scoring_disabled` in SCORE_KV; isolate-cached KV
+//         read) — 503 + Retry-After. Cheapest gate, ordered first so a
+//         flipped switch denies before any external network call.
+//      b. Turnstile siteverify — 400 turnstile_failed on miss. External
+//         call (~50-200ms) to challenges.cloudflare.com; the bot-defense
+//         layer that guards everything below it.
+//      c. Rate limit on `<session-id>:<sha256(input)>` (SCORE_LIMITER)
+//         plus a coarse per-IP fallback (SCORE_LIMITER_IP). 429 with
+//         Retry-After.
+//      The gates fire BEFORE any outbound that costs us money or a
+//      third-party quota (steps 5 and 6). An unauthenticated caller
+//      cannot fan out the discovery chain at zero rate-limit cost.
+//   5. GitHub accessibility pre-check (POST + github-url + no branch +
+//      no hint) — single HEAD against github.com. Fast-fail private/
+//      inaccessible repos as github_repo_not_accessible before the
+//      ~5-call discovery fan-out. Lives AFTER the metered gates: the
+//      probe is cheap but it's still an outbound, and gates apply
+//      uniformly to every external call discovery would make.
+//   6. Resolve InstallSpec (resolve-spec.ts). The Worker runs the
+//      discovery chain (api.github.com releases, brew/crates/npm/pypi/
+//      go, README parse) + brew/go fallbacks. A `chain_no_resolve` /
+//      `install_unsupported` / `invalid_url_path` result bounces HERE
+//      — no DO dispatch, no compute billed. The bounces land AFTER the
+//      metered gates so an attacker cannot DoS the discovery layer
+//      (~5 parallel registry calls + GitHub Releases per request) at
+//      zero rate-limit cost.
+//   6.5. Unified scorecard lookup — post-discovery cache. Discovery now
+//      knows `spec.binary`, so for github-url-without-hint inputs that
+//      missed at step 2 we can re-check the cache with the resolved
+//      binary before paying the DO container cost. Same cache binding,
+//      same key shape (`scores/<binary>/<SPEC_VERSION>.json`) as step
+//      2 — readers and writers can't drift. Skipped for
+//      `git-clone` specs (branch-scoped, ephemeral, never cached) and
+//      when `?fromCache=false` is set. A hit here is wire-indistinguish-
+//      able from a step-2 cache hit: same `freshness: 'cache-hit'`,
+//      same `Cache-Control: public, max-age=300`. Both bypass the DO.
+//   7. DO call with the RESOLVED InstallSpec ({spec, hash} body).
+//      Pre-2026-05-20 the DO received `{input, hash}` and did its own
+//      discovery; the move drops a duplicate `loadHintsIndex` and lets
+//      no-resolve requests skip the container entirely. On success the
+//      DO writes to SCORE_CACHE itself (do.ts), so the next request
+//      for the same binary short-circuits at step 2's cache tier
+//      (when the binary is derivable from input) or at step 6.5's
+//      post-discovery re-check (when it isn't).
+//
+// `?fromCache=false` operator escape hatch: skips BOTH the pre-discovery
+// (step 2) and post-discovery (step 6.5) cache read tiers. The curated
+// registry is still consulted, and the cache WRITE after a live run still
+// fires. Useful when "did the registry version just update?" needs an
+// authoritative re-score.
+//
+// Telemetry: one structured log line per request, `scope: 'score.tier'`,
+// captures which tier served the response (`curated` | `cache_pre` |
+// `cache_post` | `live` | `error_<code>`) plus per-tier attempt + hit
+// flags so we can later query "what percentage of cache hits came from
+// pre vs post discovery?" via the observability binding. Not exposed in
+// the response body — operational signal only.
+//
+// GET / POST split:
+//   - GET  /api/score(.md|.json)?input=…  read-only. Registry-fast-path
+//                                          only; non-registry input
+//                                          returns 404 chain_no_resolve.
+//                                          Used by docs links + bookmark
+//                                          paste-and-share UX.
+//   - POST /api/score(.md|.json)          { input, turnstile_token? }
+//                                          full pipeline.
+//
+// Other methods → 405.
+
+import type { Container } from '@cloudflare/containers';
+import { getRandom } from '@cloudflare/containers';
+import { detectScorePreference } from '../accept';
+import { CHECKER_URL, SPEC_VERSION } from '../spec-version.gen';
+import type { CacheEnv } from './cache';
+import * as cache from './cache';
+import type { ResolvedStep } from './discover-binary';
+import { checkGithubAccessibility } from './github-accessibility';
+import { isScoringDisabled, type KillSwitchEnv } from './kill-switch';
+import {
+  type DiscoveryHintsIndex,
+  deriveShareBinary,
+  lookupRegistry,
+  lookupScorecard,
+  type RegistryIndex,
+} from './registry-lookup';
+import { resolveSpec } from './resolve-spec';
+import { CTA, type ScoreError, shapeScoreError, shapeScoreSuccess, statusForError } from './response-shape';
+import { issue, newSession, read as readSession, SessionConfigError, type SessionEnv } from './session';
+import {
+  type FreshnessTag,
+  type InputKindTag,
+  type PmTag,
+  recordScoreEvent,
+  type ScoreEventFields,
+  type ScoreTelemetryEnv,
+} from './telemetry';
+import { TurnstileConfigError, type TurnstileEnv, verifyTurnstile } from './turnstile';
+import { type ValidatedInput, validateInput } from './validate';
+
+// Sandbox DO instance pool size. Must match `max_instances` in
+// wrangler.jsonc `containers[]` so getRandom's hash space lines up with
+// the CF Containers app config — under-shooting wastes provisioned
+// capacity; over-shooting picks IDs that don't have a container.
+const MAX_INSTANCES = 10;
+
+// ---------------------------------------------------------------------------
+// Env contract
+// ---------------------------------------------------------------------------
+
+export type ScoreEnv = KillSwitchEnv &
+  SessionEnv &
+  TurnstileEnv &
+  CacheEnv &
+  ScoreTelemetryEnv & {
+    ASSETS: Fetcher;
+    // Optional because a mid-rollback Worker (between v2-drop-sandbox
+    // and v3-restore-sandbox) deploys cleanly without the SCORE binding.
+    // The binding-presence guard before the DO call returns a typed 503
+    // sandbox_unavailable; without it `getRandom(env.SCORE, ...)` throws
+    // and surfaces as Cloudflare error 1101.
+    SCORE?: DurableObjectNamespace;
+    SCORE_LIMITER: RateLimit;
+    SCORE_LIMITER_IP?: RateLimit;
+  };
+
+export interface RateLimit {
+  limit(options: { key: string }): Promise<{ success: boolean }>;
+}
+
+// ---------------------------------------------------------------------------
+// Registry / hints index loading. Cached at module scope across invocations
+// in the same isolate (Workers re-instantiate isolates frequently, so this
+// is bounded and recovers from build-deploy drift within seconds).
+// ---------------------------------------------------------------------------
+
+let registryIndexPromise: Promise<RegistryIndex> | null = null;
+let hintsIndexPromise: Promise<DiscoveryHintsIndex> | null = null;
+
+async function fetchAssetJson<T>(env: ScoreEnv, path: string): Promise<T> {
+  const res = await env.ASSETS.fetch(new Request(`https://assets.internal${path}`));
+  if (!res.ok) throw new Error(`asset fetch failed: ${path} (status ${res.status})`);
+  return (await res.json()) as T;
+}
+
+function loadRegistryIndex(env: ScoreEnv): Promise<RegistryIndex> {
+  if (!registryIndexPromise) {
+    registryIndexPromise = fetchAssetJson<RegistryIndex>(env, '/registry-index.json').catch((err) => {
+      registryIndexPromise = null;
+      throw err;
+    });
+  }
+  return registryIndexPromise;
+}
+
+function loadHintsIndex(env: ScoreEnv): Promise<DiscoveryHintsIndex> {
+  if (!hintsIndexPromise) {
+    hintsIndexPromise = fetchAssetJson<DiscoveryHintsIndex>(env, '/discovery-hints-index.json').catch((err) => {
+      hintsIndexPromise = null;
+      throw err;
+    });
+  }
+  return hintsIndexPromise;
+}
+
+/** Test-only — drop in-memory index caches. */
+export function _resetIndexCache(): void {
+  registryIndexPromise = null;
+  hintsIndexPromise = null;
+}
+
+// ---------------------------------------------------------------------------
+// Telemetry — per-request tier accumulator.
+//
+// One structured log line per request, scope `score.tier`, captures which
+// tier served the response and the pre/post-discovery cache attempt+hit
+// flags so operators can later query "what percentage of cache hits came
+// from pre vs post discovery?" via the observability binding. NOT exposed
+// in the response body — operational signal, not part of the
+// spec_version + anc_version + checker_url response contract.
+//
+// `tier` records the resolution branch that produced the response:
+//   - `curated`     — registry-fast-path hit
+//   - `cache_pre`   — step 2 R2 cache hit (binary derivable from input)
+//   - `cache_post`  — step 6.5 R2 cache hit (binary discovered, then re-checked)
+//   - `live`        — DO dispatched and returned success
+//   - `error_<code>`— terminal error (validation, gate denial, no-resolve, etc.)
+//
+// The accumulator is mutated as the pipeline progresses; the single log
+// line is emitted in a try/finally so every code path reports.
+// ---------------------------------------------------------------------------
+
+type Telemetry = {
+  tier: string;
+  cache_pre_attempted: boolean;
+  cache_pre_hit: boolean;
+  cache_post_attempted: boolean;
+  cache_post_hit: boolean;
+  binary: string | null;
+  input_kind: string | null;
+  // U10 Analytics Engine fields — see telemetry.ts for the blob/double
+  // slot map. Captured here as the pipeline advances; folded into a
+  // single writeDataPoint call in handleScore's finally block.
+  pm: PmTag | null;
+  freshness: FreshnessTag | null;
+  resolved_step: ResolvedStep | 'registry' | null;
+  install_ms: number | null;
+  anc_check_ms: number | null;
+};
+
+function newTelemetry(): Telemetry {
+  return {
+    tier: 'unset',
+    cache_pre_attempted: false,
+    cache_pre_hit: false,
+    cache_post_attempted: false,
+    cache_post_hit: false,
+    binary: null,
+    input_kind: null,
+    pm: null,
+    freshness: null,
+    resolved_step: null,
+    install_ms: null,
+    anc_check_ms: null,
+  };
+}
+
+function emitTelemetry(t: Telemetry): void {
+  console.log(
+    JSON.stringify({
+      scope: 'score.tier',
+      tier: t.tier,
+      cache_pre_attempted: t.cache_pre_attempted,
+      cache_pre_hit: t.cache_pre_hit,
+      cache_post_attempted: t.cache_post_attempted,
+      cache_post_hit: t.cache_post_hit,
+      binary: t.binary,
+      input_kind: t.input_kind,
+    }),
+  );
+}
+
+// Map the in-handler Telemetry shape into the AE writeDataPoint
+// payload. Pure function so the telemetry-regression test can pin
+// every slot's derivation. blob1 maps ValidatedInput.kind ('slug' |
+// 'install-command' | 'github-url' | 'unknown') onto the AE input-
+// kind union — 'slug' becomes 'registry' because validate.ts only
+// emits 'slug' for inputs that matched the by_slug index. Error
+// codes are derived by stripping the `error_` prefix the in-handler
+// tier string carries; non-error tiers (curated / cache_pre /
+// cache_post / live / unset) return null in blob3.
+function buildScoreEventFields(t: Telemetry, totalMs: number, status: number): ScoreEventFields {
+  const errorCode = t.tier.startsWith('error_') ? (t.tier.slice('error_'.length) as ScoreError['code']) : null;
+  return {
+    input_kind: mapInputKind(t.input_kind),
+    pm: t.pm,
+    error_code: errorCode,
+    freshness: t.freshness,
+    resolved_step: t.resolved_step,
+    total_ms: totalMs,
+    install_ms: t.install_ms,
+    anc_check_ms: t.anc_check_ms,
+    response_status: status,
+    tool: t.binary,
+  };
+}
+
+function mapInputKind(kind: string | null): InputKindTag | null {
+  switch (kind) {
+    case 'slug':
+      return 'registry';
+    case 'install-command':
+      return 'install-command';
+    case 'github-url':
+      return 'github-url';
+    case 'unknown':
+      return 'invalid';
+    default:
+      return null;
+  }
+}
+
+// ---------------------------------------------------------------------------
+// Handler
+// ---------------------------------------------------------------------------
+
+const CTA_INSTALL_ANC = CTA.installAnc;
+
+export async function handleScore(request: Request, env: ScoreEnv): Promise<Response> {
+  const telemetry = newTelemetry();
+  const start = Date.now();
+  let response: Response | undefined;
+  try {
+    response = await handleScoreInner(request, env, telemetry);
+    return response;
+  } finally {
+    const totalMs = Date.now() - start;
+    // Response missing means handleScoreInner threw — treat as 500 for
+    // the AE row so the error-code distribution still sees the
+    // unhandled-exception class as 5xx rather than a missing value.
+    const status = response?.status ?? 500;
+    emitTelemetry(telemetry);
+    recordScoreEvent(env, buildScoreEventFields(telemetry, totalMs, status));
+  }
+}
+
+async function handleScoreInner(request: Request, env: ScoreEnv, telemetry: Telemetry): Promise<Response> {
+  const url = new URL(request.url);
+  const method = request.method.toUpperCase();
+  const preference = preferenceForResponse(url.pathname, request);
+
+  if (method !== 'GET' && method !== 'POST') {
+    telemetry.tier = 'error_unrecognized_input';
+    return shapeWithPreference(
+      shapeScoreError({
+        code: 'unrecognized_input',
+        cta_text: 'Use GET /api/score?input=… or POST /api/score {input}.',
+      }),
+      preference,
+      { status: 405 },
+    );
+  }
+
+  // 1. Parse + validate input.
+  let rawInput: string | null;
+  let turnstileToken: string | null = null;
+  if (method === 'POST') {
+    const parsed = await parsePostBody(request);
+    if (!parsed.ok) {
+      telemetry.tier = 'error_unrecognized_input';
+      return shapeWithPreference(
+        shapeScoreError({
+          code: 'unrecognized_input',
+          cta_text: 'POST body must be JSON {"input": "...", "turnstile_token?": "..."}',
+        }),
+        preference,
+      );
+    }
+    rawInput = parsed.input;
+    turnstileToken = parsed.turnstile_token;
+  } else {
+    rawInput = url.searchParams.get('input');
+  }
+
+  if (!rawInput) {
+    telemetry.tier = 'error_unrecognized_input';
+    return shapeWithPreference(shapeScoreError({ code: 'unrecognized_input', cta_text: CTA_INSTALL_ANC }), preference);
+  }
+
+  const registryIndex = await loadRegistryIndex(env);
+  const hintsIndex = await loadHintsIndex(env);
+
+  const validated = validateInput(rawInput, registryIndex);
+  // Set input_kind before the early-return so AE blob1 records `invalid`
+  // for validation rejects rather than leaving the field null.
+  telemetry.input_kind = validated.kind;
+  if (validated.kind === 'unknown') {
+    telemetry.tier = `error_${validated.error}`;
+    return shapeWithPreference(shapeScoreError(validationErrorFor(validated.error, rawInput)), preference);
+  }
+
+  // 2. Unified scorecard lookup — registry tier first, then R2 cache
+  //    tier when the binary is cheaply derivable. Both hit kinds are
+  //    unmetered (R6 extended to cached scorecards).
+  //
+  //    `?fromCache=false` skips the R2 read tier so an operator can
+  //    force a fresh registry consult + live run. The cache WRITE
+  //    after the live run still fires (so the next request benefits).
+  //
+  //    Branch-on-github-url SKIPS the curated/cache tiers entirely.
+  //    Curated scorecards are scored against release artifacts, NOT
+  //    arbitrary branches; serving a curated scorecard for a branch
+  //    request would be misleading. The user asked for THIS branch —
+  //    respect that and live-score it. The cache write after the live
+  //    run is also skipped (the live path passes the branch into the
+  //    git clone; caching under the bare binary name would clobber
+  //    the default-branch scorecard).
+  const skipCache = url.searchParams.get('fromCache') === 'false';
+  const isBranchScopedUrl = validated.kind === 'github-url' && typeof validated.branch === 'string';
+  // Pre-discovery cache attempt is recorded for any non-branch input
+  // that didn't opt out via ?fromCache=false. Whether the attempt
+  // results in a hit depends on lookupScorecard's tier-2 path —
+  // install-command and hinted github-url paste have a binary upfront
+  // and reach the cache read; github-url-without-hint silently skips
+  // the R2 read inside lookupScorecard (no binary derivable). We treat
+  // "attempted" as the policy intent (we WOULD have looked it up if a
+  // binary were available) rather than the wire fact, so the field
+  // stays useful for the "what percentage of cache hits came from
+  // round-1 vs round-2?" question even when the round-1 read was a
+  // structural no-op.
+  if (!isBranchScopedUrl && !skipCache) {
+    telemetry.cache_pre_attempted = true;
+  }
+  const lookup = isBranchScopedUrl
+    ? ({ kind: 'miss' } as const)
+    : await lookupScorecard(validated, env, registryIndex, hintsIndex, {
+        specVersion: SPEC_VERSION,
+        skipCache,
+      });
+
+  if (lookup.kind === 'curated') {
+    telemetry.tier = 'curated';
+    telemetry.binary = lookup.entry.binary ?? null;
+    telemetry.freshness = 'registry-hit';
+    telemetry.resolved_step = 'registry';
+    return shapeWithPreference(
+      shapeScoreSuccess(
+        {
+          kind: 'registry_hit',
+          tool: lookup.entry,
+          scorecard_url: lookup.scorecard_url,
+          // Surface the curated score so the homepage form can render a
+          // "Curated · N% pass rate" reward inline before the redirect.
+          // null when the registry entry predates the score_pct
+          // enrichment (gracefully degrades on the client).
+          score_pct: typeof lookup.entry.score_pct === 'number' ? lookup.entry.score_pct : null,
+        },
+        lookup.anc_version,
+        'cache-hit',
+      ),
+      preference,
+    );
+  }
+
+  if (lookup.kind === 'cached') {
+    telemetry.tier = 'cache_pre';
+    telemetry.cache_pre_hit = true;
+    telemetry.freshness = 'cache-hit';
+    const shareUrl = shareUrlForInput(validated, hintsIndex);
+    telemetry.binary = shareUrl ? shareUrl.replace(/^\/score\/live\//, '') : null;
+    return shapeWithPreference(
+      shapeScoreSuccess(lookup.scorecard, lookup.anc_version, 'cache-hit', shareUrl),
+      preference,
+    );
+  }
+
+  // GET requests stop after the read-only tiers: paste-and-share contract.
+  if (method === 'GET') {
+    telemetry.tier = 'error_chain_no_resolve';
+    return shapeWithPreference(shapeScoreError({ code: 'chain_no_resolve', cta_text: CTA_INSTALL_ANC }), preference);
+  }
+
+  // 4. Metered gates — kill-switch, Turnstile, rate-limit. These fire
+  //    BEFORE any cost-bearing outbound (the GitHub HEAD probe at step 5
+  //    and the discovery fan-out at step 6). Discovery alone can issue
+  //    5+ parallel HTTPS calls (brew/crates/npm/pypi/go/GitHub Releases/
+  //    README); without gates ahead of it, an unauthenticated caller
+  //    could fire the fan-out at zero rate-limit cost and burn through
+  //    third-party quotas (notably api.github.com's 60/hr unauthenticated
+  //    cap, pooled across Cloudflare egress IPs).
+  //
+  //    The R6 unmetered contract is preserved because curated + cache
+  //    hits short-circuit at step 2 — they never reach this block. Only
+  //    POSTs that missed both read-only tiers pay these gates.
+  //
+  //    Gate ordering inside this block is by ascending cost:
+  //      a. kill-switch  — KV read with isolate-level cache (cheapest)
+  //      b. Turnstile    — external siteverify call (~50-200ms)
+  //      c. rate-limit   — bindings call (cheap but mints session first)
+  //    A flipped kill switch denies before any external network call,
+  //    so a kill-switched Worker can't be used to flood siteverify or
+  //    the limiter even at zero score-handler cost.
+
+  // 4a. Kill switch (operator flip).
+  if (await isScoringDisabled(env)) {
+    telemetry.tier = 'error_scoring_disabled';
+    return shapeWithPreference(shapeScoreError({ code: 'scoring_disabled', cta_text: CTA_INSTALL_ANC }), preference);
+  }
+
+  // 4b. Turnstile siteverify. Misconfigured env (no secret) is a fail-fast
+  // 500 — the route MUST NOT accept POST traffic with the bot-defense
+  // layer disabled.
+  let verifyResult: Awaited<ReturnType<typeof verifyTurnstile>>;
+  try {
+    verifyResult = await verifyTurnstile(env, turnstileToken, {
+      remoteIp: request.headers.get('cf-connecting-ip') ?? undefined,
+    });
+  } catch (err) {
+    telemetry.tier = 'error_service_misconfigured';
+    return shapeWithPreference(serviceMisconfigured(err), preference);
+  }
+
+  if (!verifyResult.ok) {
+    if (verifyResult.reason === 'misconfigured') {
+      telemetry.tier = 'error_service_misconfigured';
+      return shapeWithPreference(serviceMisconfigured('TURNSTILE_SECRET missing'), preference);
+    }
+    telemetry.tier = 'error_turnstile_failed';
+    return shapeWithPreference(shapeScoreError({ code: 'turnstile_failed', cta_text: CTA_INSTALL_ANC }), preference);
+  }
+
+  // 4c. Session cookie + rate limit. Fresh session is minted on first
+  //   passing-Turnstile request; subsequent requests reuse it via cookie.
+  let session: { sid: string } | null;
+  let setCookie: string | null = null;
+  try {
+    session = await readSession(env, request);
+    if (!session) {
+      const fresh = newSession();
+      setCookie = await issue(env, fresh);
+      session = fresh;
+    }
+  } catch (err) {
+    if (err instanceof SessionConfigError) {
+      telemetry.tier = 'error_service_misconfigured';
+      return shapeWithPreference(serviceMisconfigured('SESSION_HMAC_SECRET missing'), preference);
+    }
+    throw err;
+  }
+
+  const inputHash = await sha256(rawInput);
+  const limiterKey = `${session.sid}:${inputHash}`;
+
+  const limited = await env.SCORE_LIMITER.limit({ key: limiterKey });
+  if (!limited.success) {
+    telemetry.tier = 'error_rate_limited';
+    return shapeWithPreference(
+      shapeScoreError({ code: 'rate_limited', retry_after: 60, cta_text: CTA_INSTALL_ANC }),
+      preference,
+      { setCookie },
+    );
+  }
+
+  // Coarse per-IP fallback: a session that swaps cookies still gets capped.
+  if (env.SCORE_LIMITER_IP) {
+    const ipKey = request.headers.get('cf-connecting-ip') ?? 'unknown';
+    const ipLimited = await env.SCORE_LIMITER_IP.limit({ key: ipKey });
+    if (!ipLimited.success) {
+      telemetry.tier = 'error_rate_limited';
+      return shapeWithPreference(
+        shapeScoreError({ code: 'rate_limited', retry_after: 60, cta_text: CTA_INSTALL_ANC }),
+        preference,
+        { setCookie },
+      );
+    }
+  }
+
+  // 5. GitHub accessibility pre-check. For github-url inputs without a
+  //    hint and without an explicit branch, probe github.com directly
+  //    with a HEAD before paying the discovery fan-out (and any
+  //    downstream DO cold-start cost). A 404 from github means the repo
+  //    is private, deleted, or never existed — discovery can't resolve
+  //    a binary regardless. Fast-fail with `github_repo_not_accessible`
+  //    so the user sees an honest "we can't see that repo" panel rather
+  //    than a generic `chain_no_resolve` after several upstream-API
+  //    round-trips.
+  //
+  //    The probe runs AFTER the metered gates because it's an outbound
+  //    HTTPS call, and the gate ordering principle is uniform: every
+  //    cost-bearing fetch (HEAD probe, discovery fan-out, DO dispatch)
+  //    sits behind the same kill-switch / Turnstile / rate-limit
+  //    boundary. The ~50-300ms HEAD is a fast-fail that lives one tier
+  //    away from discovery, not pre-gate.
+  //
+  //    Skip conditions (each is an information-preserving short-circuit):
+  //      - non-github-url input (slug / install-command — no repo to probe)
+  //      - github-url with explicit branch (the live path clones anyway;
+  //        HEAD on the repo root tells us nothing about the branch
+  //        existing)
+  //      - github-url that resolved to a hint (we already know the
+  //        install path; a transient github 404 here shouldn't break a
+  //        repo we've explicitly curated install metadata for)
+  //
+  //    Fail-OPEN on anything other than a clean 404: 5xx, network
+  //    timeout, abort all fall through to discovery so a github outage
+  //    doesn't silently break scoring. The accessibility module's
+  //    in-isolate cache absorbs repeated probes for the same repo.
+  if (validated.kind === 'github-url' && !validated.branch) {
+    const registryHit = lookupRegistry(validated, registryIndex, hintsIndex);
+    if (registryHit.kind !== 'hint') {
+      const accessibility = await checkGithubAccessibility(validated.owner, validated.repo);
+      if (accessibility.state === 'not_accessible') {
+        telemetry.tier = 'error_github_repo_not_accessible';
+        return shapeWithPreference(
+          shapeScoreError({
+            code: 'github_repo_not_accessible',
+            cta_text: CTA_INSTALL_ANC,
+          }),
+          preference,
+          { setCookie },
+        );
+      }
+    }
+  }
+
+  // 6. Resolve InstallSpec. Pre-2026-05-20 this happened inside the DO;
+  //    moving it to the Worker means a `chain_no_resolve` paste (e.g.
+  //    brettdavies/dotfiles) bounces here in ~200 ms instead of spinning
+  //    up a container to discover the same fact. The brew/go fallbacks
+  //    live here too — they share the discovery chain's fetcher, so a
+  //    single `globalThis.fetch` covers every outbound this step makes
+  //    (tests inject via globalThis.fetch on the request boundary;
+  //    production runs on Cloudflare's fetch).
+  //
+  //    Failure here exits the pipeline AFTER the metered gates have
+  //    already cleared. The discovery fan-out is the most expensive
+  //    cost-bearing operation on the live path (~5 parallel registry
+  //    calls + GitHub Releases) and the gates exist precisely to keep
+  //    unauthenticated traffic from firing it. A no-resolve still ate
+  //    one rate-limit slot and one Turnstile siteverify — that's the
+  //    designed behavior, not a leak.
+  const resolution = await resolveSpec(validated, hintsIndex);
+  if (!resolution.ok) {
+    telemetry.tier = `error_${resolution.error}`;
+    return shapeWithPreference(resolutionErrorToResponse(resolution.error, resolution.details), preference, {
+      setCookie,
+    });
+  }
+  const spec = resolution.spec;
+  telemetry.binary = spec.binary;
+  telemetry.pm = spec.pm;
+  telemetry.resolved_step = resolution.resolved_step ?? null;
+
+  // 6.5. Post-discovery cache lookup. Discovery now knows `spec.binary`,
+  //      which the step-2 pre-discovery check couldn't derive for
+  //      github-url-without-hint inputs. Re-check the cache with the
+  //      resolved binary before paying the DO container cost.
+  //
+  //      Same cache binding, same key shape as step 2 — readers and
+  //      writers can't drift. A hit here is wire-indistinguishable from
+  //      a step-2 hit (same `freshness: 'cache-hit'`, same Cache-Control
+  //      `public, max-age=300`); both bypass the DO.
+  //
+  //      Skip conditions:
+  //        - `spec.pm === 'git-clone'`: branch-scoped scores aren't
+  //          cached (no share_url, ephemeral). Caching under the bare
+  //          binary name would clobber the default-branch scorecard,
+  //          so the live path skips the write too and this read has
+  //          nothing meaningful to consult.
+  //        - `skipCache` (?fromCache=false): the operator escape hatch
+  //          is documented as "do not consult any cache, force a live
+  //          run" — applies uniformly to both round-1 and round-2.
+  //
+  //      Telemetry: `cache_post_attempted` records whether we issued
+  //      the R2 read; `cache_post_hit` flips when the read returned a
+  //      payload. The combination lets us separate "we tried and the
+  //      cache was empty" from "we never tried" for hit-rate analysis.
+  if (spec.pm !== 'git-clone' && !skipCache) {
+    telemetry.cache_post_attempted = true;
+    const cached = await cache.get(env, cache.keyFor(spec.binary, SPEC_VERSION));
+    if (cached) {
+      telemetry.cache_post_hit = true;
+      telemetry.tier = 'cache_post';
+      telemetry.freshness = 'cache-hit';
+      const shareUrl = shareUrlForInput(validated, hintsIndex);
+      return shapeWithPreference(
+        shapeScoreSuccess(cached.scorecard, cached.anc_version, 'cache-hit', shareUrl),
+        preference,
+        { setCookie },
+      );
+    }
+  }
+
+  // 7. DO call — the DO now receives a resolved InstallSpec rather than
+  //    a raw ValidatedInput. The contract narrowed in the 2026-05-20
+  //    discovery-move; do.ts no longer fans out to the discovery chain
+  //    or runs brew/go fallbacks (those happen at step 6 above). The DO
+  //    returns either `{scorecard, anc_version}` on success or
+  //    `{error, details?}` on failure, mapped below into the typed
+  //    ScoreError union. The DO still writes successful scorecards to
+  //    SCORE_CACHE itself, so the next request for the same binary
+  //    short-circuits at step 2's cache tier.
+  //
+  // Pool of MAX_INSTANCES DO instances via getRandom. Each request
+  // picks a random instance — parallel load
+  // spreads across the pool instead of queuing serially behind a
+  // single container session. Critical for Show HN spike absorption
+  // (singleton bottlenecked at one exec at a time inside the SDK
+  // session, observed 2026-05-18; cold-start + parallel queue =
+  // cascading 60s timeouts).
+  //
+  // getRandom (from @cloudflare/containers) calls
+  // `binding.idFromName('instance-${0..N-1}')` + `binding.get(id)`. IDs
+  // are stable across requests so the same instance reuses its warm
+  // container session for subsequent requests routed to it.
+  //
+  // Binding-presence guard: a Worker version deployed mid-rollback
+  // (between v2-drop-sandbox and v3-restore-sandbox) has no SCORE
+  // binding. Without this check, getRandom() throws on the undefined
+  // namespace and surfaces as Cloudflare error 1101 (Worker exception).
+  if (!env.SCORE) {
+    telemetry.tier = 'error_sandbox_unavailable';
+    return shapeWithPreference(
+      shapeScoreError({ code: 'sandbox_unavailable', cta_text: CTA_INSTALL_ANC }),
+      preference,
+      { setCookie },
+    );
+  }
+  const stub = (await getRandom(
+    env.SCORE as unknown as DurableObjectNamespace<Container>,
+    MAX_INSTANCES,
+  )) as DurableObjectStub;
+  const doRes = await stub.fetch(
+    new Request('https://do.internal/score', {
+      method: 'POST',
+      body: JSON.stringify({ spec, hash: inputHash }),
+      headers: { 'content-type': 'application/json' },
+    }),
+  );
+
+  let doPayload: unknown;
+  try {
+    doPayload = await doRes.json();
+  } catch {
+    telemetry.tier = 'error_incomplete_response_contract';
+    return shapeWithPreference(
+      shapeScoreError({
+        code: 'incomplete_response_contract',
+        details: 'DO returned non-JSON',
+        cta_text: CTA_INSTALL_ANC,
+      }),
+      preference,
+      { setCookie },
+    );
+  }
+
+  // Defense-in-depth: if the binding ever points back at the legacy
+  // sandbox-stub class (botched rollback, misconfigured wrangler.jsonc)
+  // the user gets a
+  // typed 503 instead of a raw stub error envelope.
+  if (isStubError(doPayload)) {
+    telemetry.tier = 'error_sandbox_stub_until_u6';
+    return shapeWithPreference(
+      shapeScoreError({ code: 'sandbox_stub_until_u6', cta_text: CTA_INSTALL_ANC }),
+      preference,
+      { setCookie },
+    );
+  }
+
+  if (isDoError(doPayload)) {
+    telemetry.tier = `error_${doPayload.error}`;
+    return shapeWithPreference(mapDoError(doPayload), preference, { setCookie });
+  }
+
+  if (isDoSuccess(doPayload)) {
+    telemetry.tier = 'live';
+    telemetry.freshness = 'live';
+    telemetry.install_ms = typeof doPayload.install_ms === 'number' ? doPayload.install_ms : null;
+    telemetry.anc_check_ms = typeof doPayload.anc_check_ms === 'number' ? doPayload.anc_check_ms : null;
+    const shareUrl = shareUrlForInput(validated, hintsIndex);
+    return shapeWithPreference(
+      shapeScoreSuccess(doPayload.scorecard, doPayload.anc_version, 'live', shareUrl),
+      preference,
+      { setCookie },
+    );
+  }
+
+  // DO returned 2xx but with an unrecognized envelope shape. Fail loud
+  // rather than synthesize a partial success — better an honest 500
+  // than a response missing the spec_version / anc_version / checker_url
+  // triad.
+  telemetry.tier = 'error_incomplete_response_contract';
+  return shapeWithPreference(
+    shapeScoreError({
+      code: 'incomplete_response_contract',
+      details: 'DO returned unrecognized envelope shape',
+      cta_text: CTA_INSTALL_ANC,
+    }),
+    preference,
+    { setCookie },
+  );
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+type PostBody = { ok: true; input: string; turnstile_token: string | null } | { ok: false };
+
+async function parsePostBody(request: Request): Promise<PostBody> {
+  let body: unknown;
+  try {
+    body = await request.json();
+  } catch {
+    return { ok: false };
+  }
+  if (!body || typeof body !== 'object') return { ok: false };
+  const obj = body as Record<string, unknown>;
+  const input = typeof obj.input === 'string' ? obj.input : null;
+  const token = typeof obj.turnstile_token === 'string' ? obj.turnstile_token : null;
+  if (!input) return { ok: false };
+  return { ok: true, input, turnstile_token: token };
+}
+
+function preferenceForResponse(pathname: string, request: Request): 'json' | 'markdown' {
+  if (pathname.endsWith('.json')) return 'json';
+  if (pathname.endsWith('.md')) return 'markdown';
+  return detectScorePreference(request);
+}
+
+function shapeWithPreference(
+  jsonResponse: Response,
+  preference: 'json' | 'markdown',
+  opts: { status?: number; setCookie?: string | null } = {},
+): Response {
+  const status = opts.status ?? jsonResponse.status;
+  const headers = new Headers(jsonResponse.headers);
+  if (opts.setCookie) headers.append('Set-Cookie', opts.setCookie);
+
+  if (preference === 'json') {
+    return new Response(jsonResponse.body, { status, headers });
+  }
+
+  // Minimal markdown rendering — honors the content-negotiation
+  // contract; deeper polish lives in summary-render.ts.
+  // Reading the body twice for markdown rendering: clone the response.
+  return renderMarkdownVariant(jsonResponse, status, headers);
+}
+
+async function renderMarkdownVariantAsync(
+  jsonResponse: Response,
+  status: number,
+  baseHeaders: Headers,
+): Promise<Response> {
+  const payload = (await jsonResponse.json()) as Record<string, unknown>;
+  const md = renderJsonAsMarkdown(payload);
+  const headers = new Headers(baseHeaders);
+  headers.set('Content-Type', 'text/markdown; charset=utf-8');
+  return new Response(md, { status, headers });
+}
+
+function renderMarkdownVariant(jsonResponse: Response, status: number, baseHeaders: Headers): Response {
+  return new Response(
+    new ReadableStream({
+      async start(controller) {
+        const md = await renderMarkdownVariantAsync(jsonResponse.clone(), status, baseHeaders).then((r) => r.text());
+        controller.enqueue(new TextEncoder().encode(md));
+        controller.close();
+      },
+    }),
+    { status, headers: markdownHeaders(baseHeaders) },
+  );
+}
+
+function markdownHeaders(base: Headers): Headers {
+  const headers = new Headers(base);
+  headers.set('Content-Type', 'text/markdown; charset=utf-8');
+  return headers;
+}
+
+function renderJsonAsMarkdown(payload: Record<string, unknown>): string {
+  const triad = [
+    `**spec_version:** ${String(payload.spec_version ?? 'unknown')}`,
+    `**checker_url:** ${String(payload.checker_url ?? CHECKER_URL)}`,
+  ];
+  if (payload.error) {
+    const err = payload.error as { code: string; details?: string; cta_text?: string };
+    return [
+      '# anc.dev — score request rejected',
+      '',
+      `**error:** \`${err.code}\``,
+      err.details ? `**details:** ${err.details}` : null,
+      ...triad,
+      '',
+      err.cta_text ?? CTA_INSTALL_ANC,
+      '',
+    ]
+      .filter(Boolean)
+      .join('\n');
+  }
+  const scorecard = payload.scorecard as
+    | { kind?: string; scorecard_url?: string; tool?: { name?: string } }
+    | undefined;
+  if (scorecard?.kind === 'registry_hit') {
+    return [
+      `# anc.dev — ${scorecard.tool?.name ?? 'tool'} (registry hit)`,
+      '',
+      `Scorecard: ${scorecard.scorecard_url}`,
+      ...triad,
+      '',
+    ].join('\n');
+  }
+  return ['# anc.dev — score response', '', '```json', JSON.stringify(payload, null, 2), '```', ''].join('\n');
+}
+
+function isStubError(payload: unknown): boolean {
+  return (
+    typeof payload === 'object' && payload !== null && (payload as { error?: string }).error === 'sandbox_stub_until_u6'
+  );
+}
+
+// ---------------------------------------------------------------------------
+// DO response envelope type guards + error mapping.
+//
+// The DO returns one of two shapes after install + score:
+//   success:  { scorecard: <anc JSON envelope>, anc_version: '0.3.1' }
+//   failure:  { error: '<ScoreErrorCode>', details?: '<string>' }
+//
+// The handler narrows on the envelope shape, then maps DO error codes to
+// user-facing ScoreError variants. Codes the DO knows about but the user
+// envelope doesn't (anc_check_failed, anc_version_unreadable) collapse to
+// incomplete_response_contract so the hard-gate semantics on the
+// response triad hold.
+
+function isDoSuccess(
+  payload: unknown,
+): payload is { scorecard: unknown; anc_version: string; install_ms?: number; anc_check_ms?: number } {
+  if (typeof payload !== 'object' || payload === null) return false;
+  const obj = payload as Record<string, unknown>;
+  return 'scorecard' in obj && typeof obj.anc_version === 'string';
+}
+
+function isDoError(payload: unknown): payload is { error: string; details?: string } {
+  if (typeof payload !== 'object' || payload === null) return false;
+  const obj = payload as Record<string, unknown>;
+  return typeof obj.error === 'string';
+}
+
+// Translate a `resolveSpec()` failure into a shaped ScoreError response.
+// Worker-side resolution can fail in three ways: no spec discoverable
+// (chain_no_resolve), an unsupported PM after fallback (install_unsupported
+// pm=brew_only / pm=go_no_binary), or a branch-shape that bypassed
+// validate.ts somehow (invalid_url_path — defense in depth). The pm
+// extraction here mirrors mapDoError() so the user-facing error envelope
+// shape is identical regardless of which tier produced the bounce.
+function resolutionErrorToResponse(
+  error: 'chain_no_resolve' | 'install_unsupported' | 'invalid_url_path',
+  details?: string,
+): Response {
+  if (error === 'chain_no_resolve') {
+    return shapeScoreError({ code: 'chain_no_resolve', cta_text: CTA_INSTALL_ANC });
+  }
+  if (error === 'invalid_url_path') {
+    return shapeScoreError({
+      code: 'invalid_url_path',
+      cta_text: 'Paste the repo root URL (e.g. https://github.com/owner/repo), not a branch or release link.',
+    });
+  }
+  // install_unsupported — extract pm from `details` (e.g. `pm=brew_only`).
+  // Worker-side resolveSpec only emits brew_only and go_no_binary today;
+  // any other pm collapses to a generic chain_resolved_install_failed so
+  // the user-facing envelope doesn't claim a pm we can't classify.
+  const pm = details?.match(/^pm=(\w+)/)?.[1];
+  if (pm === 'brew_only' || pm === 'brew' || pm === 'bun' || pm === 'go_no_binary') {
+    return shapeScoreError({ code: 'install_unsupported', pm, cta_text: CTA_INSTALL_ANC });
+  }
+  return shapeScoreError({
+    code: 'chain_resolved_install_failed',
+    details: details ?? '',
+    cta_text: CTA_INSTALL_ANC,
+  });
+}
+
+function mapDoError(payload: { error: string; details?: string }): Response {
+  const details = payload.details ?? '';
+  switch (payload.error) {
+    case 'chain_no_resolve':
+      return shapeScoreError({ code: 'chain_no_resolve', cta_text: CTA_INSTALL_ANC });
+    case 'chain_resolved_install_failed':
+      return shapeScoreError({ code: 'chain_resolved_install_failed', details, cta_text: CTA_INSTALL_ANC });
+    case 'chain_resolved_no_binary_produced':
+      return shapeScoreError({ code: 'chain_resolved_no_binary_produced', details, cta_text: CTA_INSTALL_ANC });
+    case 'install_unsupported': {
+      // DO emits details like `pm=brew_only` or `pm=bun`. ScoreError.pm is a
+      // closed union over the PMs the user-facing error envelope knows
+      // about. After the 2026-05-18 rework: 'brew_only' (brew formula
+      // exists but has no alternative PM via the discovery fallback),
+      // 'brew' (legacy code path kept for safety — should be unreachable
+      // post-rework but still maps to a sensible variant if emitted),
+      // and 'bun' (kept for safety; bun is now installable so this
+      // branch should also be unreachable). Any other pm bouncing here
+      // collapses to chain_resolved_install_failed so we don't lie
+      // about which surface is broken.
+      const pm = details.match(/^pm=(\w+)/)?.[1];
+      if (pm === 'brew_only' || pm === 'brew' || pm === 'bun' || pm === 'go_no_binary') {
+        return shapeScoreError({ code: 'install_unsupported', pm, cta_text: CTA_INSTALL_ANC });
+      }
+      return shapeScoreError({ code: 'chain_resolved_install_failed', details, cta_text: CTA_INSTALL_ANC });
+    }
+    case 'timeout':
+      // DO doesn't differentiate install-phase vs score-phase timeout
+      // (the 60 s budget covers both). Defaulting to 'score' matches the
+      // common case: install completes quickly, anc check is the long pole.
+      return shapeScoreError({ code: 'timeout', phase: 'score', cta_text: CTA_INSTALL_ANC });
+    default:
+      // anc_check_failed / anc_version_unreadable / setOutboundHandler
+      // failures land here. If we can't deliver scorecard + anc_version,
+      // surface the contract gap loudly rather than synthesize a partial:
+      // a missing-field response shape would leak into the cache and
+      // poison subsequent reads.
+      return shapeScoreError({
+        code: 'incomplete_response_contract',
+        details: `${payload.error}${details ? `: ${details.slice(0, 160)}` : ''}`,
+        cta_text: CTA_INSTALL_ANC,
+      });
+  }
+}
+
+function validationErrorFor(
+  code: ValidatedInput & { kind: 'unknown' } extends infer T ? (T extends { error: infer E } ? E : never) : never,
+  raw: string,
+): ScoreError {
+  switch (code) {
+    case 'invalid_url':
+      return { code: 'invalid_url', details: raw.slice(0, 200), cta_text: CTA_INSTALL_ANC };
+    case 'non_https_url':
+      return { code: 'non_https_url', cta_text: 'Use https:// — http:// is not allowed.' };
+    case 'non_github_host':
+      return { code: 'non_github_host', cta_text: 'anc.dev only scores public GitHub repos.' };
+    case 'invalid_url_path':
+      return {
+        code: 'invalid_url_path',
+        cta_text: 'Paste the repo root URL (e.g. https://github.com/owner/repo), not a branch or release link.',
+      };
+    case 'unparseable_install_command':
+      return {
+        code: 'unparseable_install_command',
+        details: raw.slice(0, 200),
+        cta_text: CTA_INSTALL_ANC,
+      };
+    default:
+      return { code: 'unrecognized_input', cta_text: CTA_INSTALL_ANC };
+  }
+}
+
+function serviceMisconfigured(err: unknown): Response {
+  const details = err instanceof Error ? err.message : String(err);
+  return shapeScoreError({ code: 'service_misconfigured', details, cta_text: CTA_INSTALL_ANC });
+}
+
+async function sha256(input: string): Promise<string> {
+  const bytes = new TextEncoder().encode(input);
+  const digest = await crypto.subtle.digest('SHA-256', bytes);
+  return [...new Uint8Array(digest)].map((b) => b.toString(16).padStart(2, '0')).join('');
+}
+
+/**
+ * Build the shareable HTML URL for an inline-scorecard response. Reads the
+ * cache-tier binary derivation from registry-lookup so the share URL and
+ * the cache key the DO writes to stay in lockstep. The `/score/live/`
+ * prefix nests under the existing `/score/<tool>` curated namespace; the
+ * string "live" is reserved in the registry (scorecards.mjs) so no
+ * curated tool can collide.
+ *
+ * Returns null when the binary isn't derivable upfront (github-url without
+ * a hint). In that case the JSON response ships without `share_url`; the
+ * user still has the scorecard inline and can re-paste to re-score.
+ */
+function shareUrlForInput(input: ValidatedInput, hintsIndex: DiscoveryHintsIndex): string | null {
+  const binary = deriveShareBinary(input, hintsIndex);
+  return binary ? `/score/live/${binary}` : null;
+}
+
+// Statically referenced so `_unused` linters see these as live exports —
+// the type-narrowing utility for the validation switch.
+void statusForError;
+void SPEC_VERSION;
+void TurnstileConfigError;
diff --git a/src/worker/score/kill-switch.ts b/src/worker/score/kill-switch.ts
new file mode 100644
index 0000000..6ec3134
--- /dev/null
+++ b/src/worker/score/kill-switch.ts
@@ -0,0 +1,38 @@
+// `scoring_disabled` operator kill switch.
+//
+// Plan U5 (docs/plans/2026-04-28-002-feat-live-scoring-cf-sandbox-plan.md
+// "Cost ceiling and abuse mitigation" step 3): the Worker reads
+// `env.SCORE_KV.get("scoring_disabled")` first thing in /api/score.
+// Truthy → 503 with Retry-After: 3600. Operator flips via
+// `wrangler kv:key put SCORE_KV scoring_disabled true` in seconds.
+//
+// In-memory cache for the lifetime of a single Worker invocation only.
+// Workers isolates are short-lived and re-instantiate frequently, so a
+// process-lifetime cache is enough to coalesce many concurrent requests
+// against the same invocation without making the kill-switch sticky
+// across the operator's flip. A flip propagates to all isolates within
+// the global KV-read TTL (≤60 s).
+
+export type KillSwitchEnv = {
+  SCORE_KV: KVNamespace;
+};
+
+const CACHE_TTL_MS = 30_000;
+
+type CacheEntry = { value: boolean; expiresAt: number };
+let cache: CacheEntry | null = null;
+
+export async function isScoringDisabled(env: KillSwitchEnv): Promise<boolean> {
+  const now = Date.now();
+  if (cache && cache.expiresAt > now) return cache.value;
+
+  const raw = await env.SCORE_KV.get('scoring_disabled');
+  const value = raw === 'true' || raw === '1';
+  cache = { value, expiresAt: now + CACHE_TTL_MS };
+  return value;
+}
+
+/** Test-only — drops the cache so a unit test's stub KV is read on the next call. */
+export function _resetKillSwitchCache(): void {
+  cache = null;
+}
diff --git a/src/worker/score/parse-install.ts b/src/worker/score/parse-install.ts
index a934cdb..b941016 100644
--- a/src/worker/score/parse-install.ts
+++ b/src/worker/score/parse-install.ts
@@ -7,7 +7,7 @@
 // Inputs that don't match any row return `unparseable_install_command`.
 // Test-first per the plan's Execution note: the test suite IS the spec.
 
-export type PM = 'brew' | 'cargo-binstall' | 'bun' | 'pip' | 'npm' | 'go';
+export type PM = 'brew' | 'cargo-binstall' | 'bun' | 'pip' | 'uv' | 'npm' | 'go';
 
 export type ParsedInstall = {
   pm: PM;
@@ -72,10 +72,17 @@ export function parseInstallCommand(raw: string): ParseResult {
     }
     case 'uv': {
       // uv tool install <pkg>
+      //
+      // Split from pm=pip in the 2026-05-18 U6 rework: the sandbox image
+      // now ships native uv (pinned tarball + sha256), so uv-shape inputs
+      // run through `uv tool install <pkg>` end-to-end rather than being
+      // silently downgraded to `pip install <pkg>`. The resolver and
+      // wheel-fetch paths differ enough that conflating them masked the
+      // pip metadata 403 (Bug M) that uv does not exhibit.
       if (tokens[1] !== 'tool' || tokens[2] !== 'install') return FAIL;
       const pkg = firstPositional(tokens, 3);
       if (!pkg) return FAIL;
-      return { ok: true, value: { pm: 'pip', package: pkg, binary: pkg } };
+      return { ok: true, value: { pm: 'uv', package: pkg, binary: pkg } };
     }
     case 'pip':
     case 'pip3':
diff --git a/src/worker/score/registry-lookup.ts b/src/worker/score/registry-lookup.ts
index a66598a..0157947 100644
--- a/src/worker/score/registry-lookup.ts
+++ b/src/worker/score/registry-lookup.ts
@@ -1,8 +1,5 @@
 // Registry + discovery-hints hit-test for the live-scoring path.
 //
-// Plan U4 (docs/plans/2026-04-28-002-feat-live-scoring-cf-sandbox-plan.md,
-// "registry-lookup.ts" bullet at the end of the U4 Approach block).
-//
 // Order matters: registry-fast-path > hint > miss. Committed scorecards
 // always win over hints (avoids drift); hints always win over live
 // discovery (we curated them because live discovery was wrong).
@@ -10,7 +7,17 @@
 // Lookup is case-insensitive on owner/repo because GitHub URLs are
 // case-preserving but case-insensitive at resolution. A user pasting
 // `github.com/aider-ai/aider` should hit the `Aider-AI/aider` hint.
+//
+// `lookupScorecard()` is the async unified resolution that consults
+// registry first and then falls through to the R2 cache when the binary
+// is cheaply derivable. Both `curated` and `cached` results bypass the
+// metered gates (Turnstile, rate-limit, DO) — cached scorecards
+// are functionally identical to curated ones (no sandbox cost). The sync
+// `lookupRegistry()` stays exported for callers that don't need the
+// cache layer (registry-lookup tests, future callers that want just the
+// registry tier).
 
+import * as cache from './cache';
 import type { ParsedInstall } from './parse-install';
 import type { ValidatedInput } from './validate';
 
@@ -20,6 +27,19 @@ export type RegistryEntry = {
   install: string;
   audit_profile?: string;
   repo?: string;
+  // Present when the tool has a committed scorecard. The Worker uses
+  // these to build the spec_version + anc_version + checker_url triad
+  // and route to /score/<slug> without fetching the scorecard JSON.
+  // Tools without a scorecard ship the
+  // metadata-only entry; the registry-fast-path treats them as a miss.
+  version?: string;
+  anc_version?: string;
+  scorecard_url?: string;
+  // score_pct surfaces into the registry_hit envelope so the homepage
+  // form can show a curated-tool reward (e.g., "Curated · 92% pass rate
+  // · Opening the audited scorecard…") inline before redirect, without
+  // a second round-trip to fetch the scorecard JSON.
+  score_pct?: number;
 };
 
 export type RegistryIndex = {
@@ -67,8 +87,141 @@ export function lookupRegistry(
     if (hint) return { kind: 'hint', hint };
     return { kind: 'miss' };
   }
-  // install-command and unknown don't trigger lookups; the caller passes
-  // them through directly (install-command -> U6 with the parsed spec;
-  // unknown -> 400 to user).
+  if (input.kind === 'install-command') {
+    // Cross-check the parser's binary against curated by_slug. Catches
+    // inputs like `cargo install bat` (binary='bat', curated as
+    // by_slug['bat']) and `npm i -g typescript` (binary='typescript',
+    // curated as by_slug['typescript']). Without this, install-commands
+    // that resolve to a curated tool fall through to the R2 cache (empty
+    // on first request) and then to the live path — paying sandbox cost
+    // for a tool the site already has a curated audit for. Per-binary
+    // alias edge case (e.g., `cargo install rg` typing the binary name
+    // not the package name) still falls through; an explicit by_binary
+    // map would catch that but isn't worth the index churn for the
+    // current corpus.
+    const entry = registryIndex.by_slug[input.spec.binary];
+    if (entry) return { kind: 'registry', entry };
+    return { kind: 'miss' };
+  }
+  // unknown — passed through to a 400 by the caller.
+  return { kind: 'miss' };
+}
+
+// ---------------------------------------------------------------------------
+// Unified scorecard lookup
+// ---------------------------------------------------------------------------
+
+// Resolution covers BOTH the curated registry tier (in-memory hashmap,
+// no I/O) and the R2 cache tier (one R2 GET on hit, cheap). Resolution
+// order:
+//
+//   1. Registry first. Slug or github-url with a curated entry whose
+//      scorecard_url+anc_version are populated → `curated`. Done.
+//   2. R2 cache fallback when the binary is cheaply known:
+//      - install-command: `spec.binary` from the parser
+//      - github-url with a hint: `hint.binary`
+//      - github-url without hint: skipped (no binary derivable upfront;
+//        discovery is part of the live path)
+//      - slug-without-curated-scorecard: skipped (slugs without a
+//        scorecard_url have no install spec to derive a binary from)
+//   3. `miss` otherwise. The handler proceeds to the metered live path.
+//
+// `cached` results carry the cached payload's anc_version (NOT the
+// build-time SPEC_VERSION constant used to build the lookup key), so the
+// response triad reflects which anc the scorecard was actually scored by.
+//
+// `skipCache` short-circuits the R2 read tier — registry still consults
+// freely. Callers pass `skipCache: true` to honor the `?fromCache=false`
+// operator escape hatch ("did the registry version just update?").
+
+export type ScorecardLookupResult =
+  | { kind: 'curated'; entry: RegistryEntry; scorecard_url: string; anc_version: string }
+  | { kind: 'cached'; scorecard: unknown; anc_version: string; tool_version: string }
+  | { kind: 'miss' };
+
+export type ScorecardLookupOptions = {
+  // Build-time spec version, used as the partition slot in the cache key.
+  // All readers and writers must pass the same value to avoid key drift.
+  specVersion: string;
+  // When true, skip the R2 read tier. Registry is still consulted.
+  skipCache?: boolean;
+};
+
+export async function lookupScorecard(
+  input: ValidatedInput,
+  env: cache.CacheEnv,
+  registryIndex: RegistryIndex,
+  hintsIndex: DiscoveryHintsIndex,
+  opts: ScorecardLookupOptions,
+): Promise<ScorecardLookupResult> {
+  // Tier 1: registry. Curated scorecards always win over the cache.
+  const registry = lookupRegistry(input, registryIndex, hintsIndex);
+  if (registry.kind === 'registry' && registry.entry.scorecard_url && registry.entry.anc_version) {
+    return {
+      kind: 'curated',
+      entry: registry.entry,
+      scorecard_url: registry.entry.scorecard_url,
+      anc_version: registry.entry.anc_version,
+    };
+  }
+
+  // Tier 2: R2 cache. Derive the binary from whatever is cheaply
+  // available; bail out otherwise (no I/O speculation).
+  if (opts.skipCache) return { kind: 'miss' };
+
+  const binary = deriveCacheBinary(input, registry);
+  if (!binary) return { kind: 'miss' };
+
+  const cached = await cache.get(env, cache.keyFor(binary, opts.specVersion));
+  if (cached) {
+    return {
+      kind: 'cached',
+      scorecard: cached.scorecard,
+      anc_version: cached.anc_version,
+      tool_version: cached.tool_version,
+    };
+  }
+
   return { kind: 'miss' };
 }
+
+// Returns the binary slug usable as a cache key, or null when the input
+// can't be resolved without running discovery. Lifted out of
+// lookupScorecard so the derivation is independently testable and so the
+// "where does the binary come from?" decision lives in one place.
+function deriveCacheBinary(input: ValidatedInput, registry: RegistryLookupResult): string | null {
+  if (input.kind === 'install-command') return input.spec.binary;
+  if (registry.kind === 'hint') return registry.hint.binary;
+  // github-url without a hint, or slug without a curated scorecard:
+  // no upfront binary. The live path will run discovery and write to
+  // the cache afterward, so the NEXT request benefits.
+  return null;
+}
+
+/**
+ * Public form of the cache-key binary derivation, used by the handler to
+ * compute the `share_url` (`/live-score/<binary>`) for cached + live
+ * inline-scorecard responses. Same logic as the internal cache-tier
+ * derivation, exported so the handler can reuse it without re-running a
+ * full lookup. Returns null when no binary is derivable upfront (the only
+ * case is github-url without a hint; the user's response carries no
+ * share_url and they can re-paste to re-score).
+ */
+export function deriveShareBinary(input: ValidatedInput, hintsIndex: DiscoveryHintsIndex): string | null {
+  if (input.kind === 'install-command') return input.spec.binary;
+  if (input.kind === 'github-url') {
+    // Branch-scoped pastes don't get a share URL. The /score/live/<binary>
+    // surface is keyed by binary alone; reusing it for a branch-scoped
+    // score would clobber the default-branch scorecard. The user still
+    // gets the scorecard inline in the response — they just can't bookmark
+    // it. A branch-aware share URL is a future enhancement.
+    if (input.branch) return null;
+    const key = `${input.owner}/${input.repo}`;
+    const hint = lookupOwnerRepo(hintsIndex.by_owner_repo, key);
+    return hint?.binary ?? null;
+  }
+  // slug: registry-fast-path catches curated slugs into the `registry_hit`
+  // branch (which uses scorecard_url, not share_url). A slug without a
+  // curated scorecard isn't valid input — validateInput rejects it.
+  return null;
+}
diff --git a/src/worker/score/resolve-spec.ts b/src/worker/score/resolve-spec.ts
new file mode 100644
index 0000000..1fad1d1
--- /dev/null
+++ b/src/worker/score/resolve-spec.ts
@@ -0,0 +1,283 @@
+// Resolution layer: turn a `ValidatedInput` into an `InstallSpec` the
+// sandbox can act on. Lives in the Worker tier (NOT the DO) so that
+// requests which fail to resolve a spec (`chain_no_resolve`) bounce
+// without spinning up a container — same answer, no DO compute billed.
+// Pre-2026-05-20 this lived inside the DO's `resolveSpec()`; the move
+// keeps the DO's surface tightly scoped to "given a spec, install +
+// score" and collapses the duplicate `loadHintsIndex` that used to fan
+// out across both tiers.
+//
+// What this module owns:
+//
+//   - Install-command inputs with pm=brew → `resolveBrewFallback`:
+//     fetch formula metadata, find the GitHub homepage, hand off to
+//     `discoverBinary`, accept any non-brew resolution. Linuxbrew on
+//     the sandbox image is too slow for the 60 s budget; treating
+//     `brew install <pkg>` as a hint for "find me an alternative PM"
+//     is the workaround the 2026-05-18 image rework formalized.
+//   - Install-command inputs with pm=go → `resolveGoFallback`: the
+//     parallel rework for `go install <module>@latest`. The sandbox
+//     ships no Go toolchain by design (binary-only premise), so a Go
+//     module path that resolves to a GitHub repo gets redirected
+//     through the discovery chain in search of a release binary.
+//   - GitHub-URL inputs WITHOUT a branch → run the full discovery chain.
+//   - GitHub-URL inputs WITH a branch → bypass discovery (release
+//     artifacts aren't the right scoring target for an arbitrary ref)
+//     and synthesize a `git-clone` spec. Branch name re-validated here
+//     even though validate.ts already did so at the Worker boundary —
+//     defense in depth so a future caller that bypasses validate.ts
+//     can't smuggle shell metacharacters through.
+//   - install-command inputs for any other PM → pass-through.
+//   - slug inputs that didn't hit the registry tier → `chain_no_resolve`
+//     (live-scoring bare slugs is deferred).
+//
+// Trust boundary: this module produces an `InstallSpec`. The DO's
+// sandbox-exec layer shell-quotes every value it interpolates from the
+// spec, so the move from "DO does discovery" to "Worker does discovery"
+// doesn't change the input-sanitization story. The user-pasted string
+// is still validated by validate.ts at the Worker boundary; what flows
+// across the DO request boundary now is a typed, narrowed InstallSpec
+// rather than a raw `ValidatedInput`.
+
+import { discoverBinary, type InstallSpec, type ResolvedStep } from './discover-binary';
+import type { DiscoveryHintsIndex } from './registry-lookup';
+import { type ValidatedInput, validBranchName } from './validate';
+
+// `resolved_step` is populated when the discovery chain or one of its
+// fallbacks ran; absent for paths that never touch discoverBinary
+// (install-command non-brew/go, branch-scoped git-clone, registry slug
+// miss). Handler.ts threads it into the AE telemetry blob5 so analytics
+// queries can attribute live traffic to specific discovery tiers.
+export type ResolveResult =
+  | { ok: true; spec: InstallSpec; resolved_step?: ResolvedStep }
+  | { ok: false; error: 'chain_no_resolve' | 'install_unsupported' | 'invalid_url_path'; details?: string };
+
+export type BrewFallbackResult =
+  | { ok: true; value: InstallSpec; resolved_step?: ResolvedStep }
+  | { ok: false; error: 'install_unsupported'; details: 'pm=brew_only' };
+
+export type GoFallbackResult =
+  | { ok: true; value: InstallSpec; resolved_step?: ResolvedStep }
+  | { ok: false; error: 'install_unsupported'; details: 'pm=go_no_binary' };
+
+export type ResolveOptions = {
+  // Injectable for tests; defaults to globalThis.fetch. Threaded through
+  // the brew/go fallbacks and the discovery chain so a single override
+  // covers every outbound call this module makes.
+  fetcher?: typeof fetch;
+};
+
+/**
+ * Resolve a validated user input into an InstallSpec. The Worker calls
+ * this AFTER the cache + accessibility tiers; the DO never sees a
+ * `ValidatedInput` after the 2026-05-20 move, only the InstallSpec
+ * produced here.
+ */
+export async function resolveSpec(
+  input: ValidatedInput,
+  hintsIndex: DiscoveryHintsIndex,
+  opts: ResolveOptions = {},
+): Promise<ResolveResult> {
+  if (input.kind === 'install-command') {
+    if (input.spec.pm === 'brew') {
+      const result = await resolveBrewFallback(input.spec.package, hintsIndex, opts.fetcher);
+      return result.ok
+        ? { ok: true, spec: result.value, resolved_step: result.resolved_step }
+        : { ok: false, error: result.error, details: result.details };
+    }
+    if (input.spec.pm === 'go') {
+      const result = await resolveGoFallback(input.spec.package, hintsIndex, opts.fetcher);
+      return result.ok
+        ? { ok: true, spec: result.value, resolved_step: result.resolved_step }
+        : { ok: false, error: result.error, details: result.details };
+    }
+    return { ok: true, spec: input.spec };
+  }
+  if (input.kind === 'github-url') {
+    // Branch-scoped paste: skip discovery entirely. Release artifacts
+    // are scored against a release, not against an arbitrary ref, so a
+    // branch-scoped paste needs the source at THAT branch. validBranchName
+    // is checked at validate.ts at the Worker boundary; the re-check
+    // here is defense in depth for any future caller that constructs a
+    // github-url ValidatedInput directly without re-running validate.ts.
+    if (typeof input.branch === 'string') {
+      if (!validBranchName(input.branch)) {
+        return { ok: false, error: 'invalid_url_path' };
+      }
+      const spec: InstallSpec = {
+        pm: 'git-clone',
+        owner: input.owner,
+        repo: input.repo,
+        branch: input.branch,
+        binary: input.repo,
+      };
+      return { ok: true, spec };
+    }
+    const result = await discoverBinary({
+      owner: input.owner,
+      repo: input.repo,
+      hintsIndex,
+      fetcher: opts.fetcher,
+    });
+    if (result.ok) return { ok: true, spec: result.spec, resolved_step: result.resolved_step };
+    return { ok: false, error: result.error };
+  }
+  // slug input that didn't hit the registry tier: we don't live-score
+  // bare slugs (deferred). Same error code GET requests use so the
+  // front-end renders the same CTA panel.
+  return { ok: false, error: 'chain_no_resolve' };
+}
+
+// ---------------------------------------------------------------------------
+// Brew discovery-fallback
+//
+// `brew install <pkg>` user input is translated to an alternative PM
+// via the discovery chain. brew_only bounces happen when:
+//   - the formula isn't on formulae.brew.sh (404 or fetch error), OR
+//   - the formula's homepage isn't a github.com URL, OR
+//   - the discovery chain misses every distribution OR loops back to
+//     brew (the chain's brew-last priority should prevent the loop,
+//     but the guard catches a regression there).
+//
+// Fetcher injection lets tests pin behavior without touching
+// globalThis.fetch.
+// ---------------------------------------------------------------------------
+
+export async function resolveBrewFallback(
+  pkg: string,
+  hintsIndex: DiscoveryHintsIndex,
+  fetcher: typeof fetch = globalThis.fetch.bind(globalThis),
+): Promise<BrewFallbackResult> {
+  const formula = await fetchBrewFormula(pkg, fetcher);
+  if (!formula) {
+    return { ok: false, error: 'install_unsupported', details: 'pm=brew_only' };
+  }
+  const ownerRepo = parseGithubOwnerRepo(formula.homepage);
+  if (!ownerRepo) {
+    return { ok: false, error: 'install_unsupported', details: 'pm=brew_only' };
+  }
+  const result = await discoverBinary({
+    owner: ownerRepo.owner,
+    repo: ownerRepo.repo,
+    hintsIndex,
+    fetcher,
+  });
+  if (result.ok && result.spec.pm !== 'brew') {
+    return { ok: true, value: result.spec, resolved_step: result.resolved_step };
+  }
+  return { ok: false, error: 'install_unsupported', details: 'pm=brew_only' };
+}
+
+// ---------------------------------------------------------------------------
+// Go discovery-fallback
+//
+// `go install <module>@latest` is source-compilation by design — Go
+// modules don't ship binaries. Running it on the sandbox would either
+// require a Go toolchain capable of compiling within the 60 s budget
+// (impossible on CF Containers basic) OR violate the binary-only
+// premise that the rest of the sandbox install path assumes. We redirect
+// through the
+// discovery chain: a module path of the form
+// `github.com/<owner>/<repo>/...` is treated as a GitHub-URL input,
+// and discoverBinary picks the GitHub Releases asset (Step 2) for
+// tools that ship binaries (glow, lazygit, gh, fzf, etc.). Modules
+// outside github.com OR github.com repos without release binaries
+// bounce as install_unsupported pm=go_no_binary — fast-fail UX rather
+// than a long compile that times out.
+// ---------------------------------------------------------------------------
+
+export async function resolveGoFallback(
+  modulePath: string,
+  hintsIndex: DiscoveryHintsIndex,
+  fetcher: typeof fetch = globalThis.fetch.bind(globalThis),
+): Promise<GoFallbackResult> {
+  const ownerRepo = parseGoModuleOwnerRepo(modulePath);
+  if (!ownerRepo) {
+    return { ok: false, error: 'install_unsupported', details: 'pm=go_no_binary' };
+  }
+  const result = await discoverBinary({
+    owner: ownerRepo.owner,
+    repo: ownerRepo.repo,
+    hintsIndex,
+    fetcher,
+  });
+  // Only accept a `direct` resolution (Step 2 GitHub Releases asset)
+  // or a non-go cross-PM resolution. If discovery looped back to
+  // `go` somehow (shouldn't — Step 3 picks brew last among PMs,
+  // and Step 4 README parse won't return pm=go for a `go install`
+  // input), bounce honestly to avoid infinite indirection.
+  if (result.ok && result.spec.pm !== 'go') {
+    return { ok: true, value: result.spec, resolved_step: result.resolved_step };
+  }
+  return { ok: false, error: 'install_unsupported', details: 'pm=go_no_binary' };
+}
+
+// Parse a Go module path of the form `github.com/<owner>/<repo>[/...]`
+// into { owner, repo }. Subpath segments (e.g. `cmd/humanize`) are
+// stripped — the GitHub release for the repo applies, regardless of
+// which subpackage the module declares. Returns null for non-github
+// module paths (rsc.io/quote, golang.org/x/..., etc.) — those have no
+// GitHub release equivalent and bounce as go_no_binary.
+function parseGoModuleOwnerRepo(modulePath: string): { owner: string; repo: string } | null {
+  // Strip any @ version suffix the parser might have left in place,
+  // defensively (parse-install already does this, but the fallback
+  // shouldn't depend on the caller's hygiene).
+  const cleaned = modulePath.split('@')[0];
+  const segments = cleaned.split('/').filter(Boolean);
+  if (segments.length < 3) return null;
+  if (segments[0] !== 'github.com') return null;
+  const owner = segments[1];
+  const repo = segments[2];
+  if (!owner || !repo) return null;
+  return { owner, repo };
+}
+
+// ---------------------------------------------------------------------------
+// Brew formula fetcher (discovery-fallback support)
+// ---------------------------------------------------------------------------
+
+type BrewFormulaShape = {
+  homepage?: string;
+};
+
+// Short 2 s timeout: discovery already runs against 5+ registries with
+// their own deadlines; stacking another long timeout here would hurt
+// the worst-case latency more than the bounce itself.
+async function fetchBrewFormula(pkg: string, fetcher: typeof fetch): Promise<BrewFormulaShape | null> {
+  const url = `https://formulae.brew.sh/api/formula/${encodeURIComponent(pkg.toLowerCase())}.json`;
+  const ctrl = new AbortController();
+  const t = setTimeout(() => ctrl.abort(), 2_000);
+  try {
+    const res = await fetcher(url, {
+      signal: ctrl.signal,
+      headers: { 'User-Agent': 'anc-discovery/1.0 (+https://anc.dev)' },
+    });
+    if (!res.ok) return null;
+    const data = (await res.json()) as BrewFormulaShape;
+    return data ?? null;
+  } catch {
+    return null;
+  } finally {
+    clearTimeout(t);
+  }
+}
+
+// Mirrors validate.ts's GITHUB_URL_RE shape so the same repo-root
+// constraints apply — `tree/branch` paths in a formula's homepage
+// field don't drift into resolveSpec.
+export function parseGithubOwnerRepo(url: string | undefined): { owner: string; repo: string } | null {
+  if (!url) return null;
+  let parsed: URL;
+  try {
+    parsed = new URL(url);
+  } catch {
+    return null;
+  }
+  if (parsed.hostname !== 'github.com') return null;
+  const segments = parsed.pathname.split('/').filter(Boolean);
+  if (segments.length < 2) return null;
+  const owner = segments[0];
+  const repo = segments[1].replace(/\.git$/, '');
+  if (!owner || !repo) return null;
+  return { owner, repo };
+}
diff --git a/src/worker/score/response-shape.ts b/src/worker/score/response-shape.ts
new file mode 100644
index 0000000..4d3d035
--- /dev/null
+++ b/src/worker/score/response-shape.ts
@@ -0,0 +1,197 @@
+// Response-shape module for /api/score — single source of truth for the
+// success envelope, the error envelope, and the ScoreError discriminated
+// union every score-pipeline module imports.
+//
+// Every /api/score response carries the triad spec_version + anc_version +
+// checker_url. Missing any of the three is a hard 500, NOT a quiet
+// omission. The check fires at response-build time so a partial response
+// can never escape the Worker.
+//
+// The ScoreError union routes every error through one wire shape;
+// assertNever() makes adding a new variant a compile error everywhere it
+// is consumed (handler.ts maps each variant to an HTTP status), so a new
+// variant cannot silently fall through with no status mapping.
+//
+// The exec-time fields are split by source:
+//   - SPEC_VERSION / SITE_SPEC_VERSION come from build-emitted constants
+//     (spec-version.gen.ts).
+//   - ANC_VERSION comes from the running sandbox at exec time and is
+//     persisted into the cache payload; cached responses read it from the
+//     payload, NOT from a build-time constant — otherwise a re-deployed
+//     site with a stale cache would lie about which anc actually scored
+//     the artifact.
+//   - CHECKER_URL is a build-time constant pointing at the production
+//     surface; if anc.dev ever moves, the constant moves with it.
+
+import { CHECKER_URL, SITE_SPEC_VERSION, SPEC_VERSION } from '../spec-version.gen';
+
+export type ScoreError =
+  | { code: 'invalid_url'; details: string; cta_text: string }
+  | { code: 'non_https_url'; cta_text: string }
+  | { code: 'non_github_host'; cta_text: string }
+  | { code: 'invalid_url_path'; cta_text: string }
+  | { code: 'unrecognized_input'; cta_text: string }
+  | { code: 'unparseable_install_command'; details: string; cta_text: string }
+  | { code: 'chain_no_resolve'; cta_text: string }
+  | { code: 'github_repo_not_accessible'; cta_text: string }
+  | { code: 'discovery_redirect_loop'; cta_text: string }
+  | { code: 'rate_limited'; retry_after: number; cta_text: string }
+  | { code: 'install_unsupported'; pm: 'brew' | 'brew_only' | 'bun' | 'go_no_binary'; cta_text: string }
+  | { code: 'chain_resolved_install_failed'; details: string; cta_text: string }
+  | { code: 'chain_resolved_no_binary_produced'; details: string; cta_text: string }
+  | { code: 'timeout'; phase: 'install' | 'score'; cta_text: string }
+  | { code: 'turnstile_failed'; cta_text: string }
+  | { code: 'scoring_disabled'; cta_text: string }
+  | { code: 'sandbox_stub_until_u6'; cta_text: string }
+  | { code: 'sandbox_unavailable'; cta_text: string }
+  | { code: 'incomplete_response_contract'; details: string; cta_text: string }
+  | { code: 'service_misconfigured'; details: string; cta_text: string };
+
+export type ScoreErrorResponse = {
+  error: ScoreError;
+  spec_version: string;
+  checker_url: string;
+};
+
+export type ScoreSuccess = {
+  scorecard: unknown;
+  spec_version: string;
+  site_spec_version: string;
+  anc_version: string;
+  checker_url: string;
+  // Set for inline scorecards (cached + live branches) when the binary is
+  // derivable from the input. The homepage form's JS redirects here after
+  // a successful submit. URL shape `/live-score/<binary>` reads from the
+  // R2 cache that the DO + cached lookups write to; one write, one share
+  // surface. Absent for:
+  //   - `registry_hit` responses (carry their own `scorecard_url` pointing
+  //     at the curated static page)
+  //   - github-url-without-hint live runs (binary not derivable in the
+  //     handler before the DO discovery; rare in practice — Aider-AI/aider
+  //     etc. all ship hints)
+  share_url?: string;
+};
+
+const CTA_INSTALL_ANC = 'Install `anc` and run `anc check .` in your project for full depth.';
+
+/** Compile-time exhaustiveness guard. Reaching this at runtime is a bug. */
+export function assertNever(value: never): never {
+  throw new Error(`Unhandled ScoreError variant: ${JSON.stringify(value)}`);
+}
+
+/** HTTP status for each ScoreError variant. Centralised so handler.ts cannot drift. */
+export function statusForError(error: ScoreError): number {
+  switch (error.code) {
+    case 'invalid_url':
+    case 'non_https_url':
+    case 'non_github_host':
+    case 'invalid_url_path':
+    case 'unrecognized_input':
+    case 'unparseable_install_command':
+    case 'turnstile_failed':
+      return 400;
+    case 'chain_no_resolve':
+    case 'github_repo_not_accessible':
+      return 404;
+    case 'rate_limited':
+      return 429;
+    case 'install_unsupported':
+    case 'chain_resolved_install_failed':
+    case 'chain_resolved_no_binary_produced':
+      return 502;
+    case 'timeout':
+      return 504;
+    case 'discovery_redirect_loop':
+      return 502;
+    case 'scoring_disabled':
+    case 'sandbox_stub_until_u6':
+    case 'sandbox_unavailable':
+      return 503;
+    case 'incomplete_response_contract':
+    case 'service_misconfigured':
+      return 500;
+    default:
+      return assertNever(error);
+  }
+}
+
+const JSON_HEADERS_LIVE = {
+  'Content-Type': 'application/json; charset=utf-8',
+  'Access-Control-Allow-Origin': '*',
+  'X-Robots-Tag': 'noindex',
+  'Cache-Control': 'no-store',
+} as const;
+
+const JSON_HEADERS_CACHE_HIT = {
+  'Content-Type': 'application/json; charset=utf-8',
+  'Access-Control-Allow-Origin': '*',
+  'X-Robots-Tag': 'noindex',
+  'Cache-Control': 'public, max-age=300',
+} as const;
+
+export type ResponseFreshness = 'live' | 'cache-hit';
+
+/**
+ * Build a successful score response. The response triad is asserted
+ * inline — a payload missing spec_version / anc_version / checker_url
+ * returns 500 with `incomplete_response_contract` so the contract
+ * violation is loud, not a silent partial.
+ */
+export function shapeScoreSuccess(
+  scorecard: unknown,
+  anc_version: string | null | undefined,
+  freshness: ResponseFreshness,
+  shareUrl?: string | null,
+): Response {
+  if (!anc_version) {
+    return shapeScoreError(
+      {
+        code: 'incomplete_response_contract',
+        details: 'anc_version missing — refusing to emit a partial response',
+        cta_text: CTA_INSTALL_ANC,
+      },
+      'live',
+    );
+  }
+
+  const body: ScoreSuccess = {
+    scorecard,
+    spec_version: SPEC_VERSION,
+    site_spec_version: SITE_SPEC_VERSION,
+    anc_version,
+    checker_url: CHECKER_URL,
+    ...(shareUrl ? { share_url: shareUrl } : {}),
+  };
+
+  const headers = freshness === 'cache-hit' ? JSON_HEADERS_CACHE_HIT : JSON_HEADERS_LIVE;
+  return new Response(JSON.stringify(body), { status: 200, headers });
+}
+
+/**
+ * Build an error response carrying the response triad on every error too.
+ * `retry_after` from `rate_limited` is mirrored onto the `Retry-After`
+ * HTTP header so well-behaved clients back off automatically.
+ */
+export function shapeScoreError(error: ScoreError, freshness: ResponseFreshness = 'live'): Response {
+  const body: ScoreErrorResponse = {
+    error,
+    spec_version: SPEC_VERSION,
+    checker_url: CHECKER_URL,
+  };
+
+  const headers = new Headers(freshness === 'cache-hit' ? JSON_HEADERS_CACHE_HIT : JSON_HEADERS_LIVE);
+  if (error.code === 'rate_limited') {
+    headers.set('Retry-After', String(error.retry_after));
+  } else if (error.code === 'scoring_disabled') {
+    headers.set('Retry-After', '3600');
+  }
+
+  return new Response(JSON.stringify(body), {
+    status: statusForError(error),
+    headers,
+  });
+}
+
+export const CTA = {
+  installAnc: CTA_INSTALL_ANC,
+} as const;
diff --git a/src/worker/score/sandbox-exec.ts b/src/worker/score/sandbox-exec.ts
new file mode 100644
index 0000000..67acac5
--- /dev/null
+++ b/src/worker/score/sandbox-exec.ts
@@ -0,0 +1,751 @@
+// Live-scoring orchestration — install + anc check inside a Sandbox DO,
+// with two-phase egress enforced via the SDK's named outbound handlers.
+// The DO class in ./do.ts holds the static `outboundHandlers` map; this
+// module orchestrates the per-request install + score flow by calling
+// `setOutboundHandler` and `exec` against the DO instance it's passed.
+//
+// Pure orchestration — no SDK class imports beyond a type-only reference
+// for the parameter type. Lets `tests/score-do.test.ts` exercise the
+// two-phase ordering invariant against a hand-rolled Container-like
+// stub without instantiating the real Sandbox class.
+//
+// Per-PM install command table mirrors a per-package-manager script-
+// execution audit: `npm` and `bun` carry `--ignore-scripts`; `pip`
+// carries `--only-binary=:all:`; `cargo binstall` is binary-only by
+// design; `uv tool install` uses uv's own resolver (binary-only by
+// default for wheel-bearing packages). `brew` returns null from
+// installCommandFor() so the resolveSpec() discovery-fallback in resolve-spec.ts
+// (2026-05-18 rework) can translate `brew install <tool>` inputs to
+// whatever cargo / npm / pip / go alternative the discovery chain finds
+// for the brew formula's GitHub repo. brew-only tools (no other PM)
+// bounce as install_unsupported with pm=brew_only.
+
+import type { Sandbox } from '@cloudflare/sandbox';
+import type { GitCloneInstall, InstallSpec } from './discover-binary';
+import { SDIST_TRUSTED_NAMES } from './sdist-allowlist';
+import { validBranchName } from './validate';
+
+// Per-clone destination — fixed name keeps the path predictable for the
+// `anc check <path>` invocation and the cleanup post-score (the warm
+// container session may reuse this DO instance for the next request).
+// Lives under /tmp so it's wiped by the container's tmpfs semantics.
+const CLONE_DEST = '/tmp/anc-clone-target';
+
+// ---------------------------------------------------------------------------
+// Result + error types
+// ---------------------------------------------------------------------------
+
+export type ScoreSuccess = {
+  ok: true;
+  value: {
+    scorecard: unknown;
+    anc_version: string;
+    // Wall-clock duration of the install exec, captured around the
+    // single `sandbox.exec(installCmd, ...)` call. Null only for paths
+    // that never reach this orchestrator (the DO never runs); inside
+    // runScore the install always runs before a success, so this is
+    // always populated on the ok-true branch. Threaded through the
+    // DO success envelope so handler.ts can populate the AE
+    // `install ms` slot without a second timing surface.
+    install_ms: number;
+    // Wall-clock duration of the anc check exec. Same shape +
+    // rationale as install_ms; populated on the ok-true branch.
+    anc_check_ms: number;
+  };
+};
+
+export type ScoreFailure = {
+  ok: false;
+  error: ScoreErrorCode;
+  details?: string;
+};
+
+export type ScoreResult = ScoreSuccess | ScoreFailure;
+
+export type ScoreErrorCode =
+  // Install path classes (gate F4 — three distinct error tags).
+  | 'install_unsupported' // brew on Alpine; bounce at the install table.
+  | 'chain_resolved_install_failed' // install command returned non-zero.
+  | 'chain_resolved_no_binary_produced' // install succeeded but `which <binary>` missed.
+  // Exec failure classes.
+  | 'anc_version_unreadable' // anc --version returned no parseable version.
+  | 'anc_check_failed' // anc check returned non-zero AND no parseable JSON envelope.
+  // Wall-clock.
+  | 'timeout';
+
+// Marker tokens the direct-install command emits so the orchestrator can
+// classify shell-subshell failures into specific error details (Fix 3 —
+// gate-capture). Each `GATE:<name>` line is written to stderr BEFORE the
+// shell step it labels runs; on subshell failure the LAST GATE marker
+// names the step that failed. `DETAILS:<text>` carries a step-specific
+// message into the orchestration. `DETECTED_BINARY=<name>` is emitted to
+// stdout on success so auto-detect (Fix 1) can rename spec.binary to the
+// archive's actual executable rather than guessing it's the repo name.
+const GATE_PREFIX = 'GATE:';
+const DETAILS_PREFIX = 'DETAILS:';
+const DETECTED_BINARY_PREFIX = 'DETECTED_BINARY=';
+
+// ---------------------------------------------------------------------------
+// Public surface
+// ---------------------------------------------------------------------------
+
+// Capability surface this module needs from the DO instance. Typed as a
+// structural subset of the real Sandbox class so tests can pass a plain
+// object with these two methods and the call-order invariant is
+// observable from outside the class.
+export type ContainerLike = {
+  setOutboundHandler<P = unknown>(name: string, params?: P): Promise<void>;
+  exec(command: string, options?: { timeout?: number }): Promise<ExecLike>;
+};
+
+export type ExecLike = {
+  success: boolean;
+  stdout: string;
+  stderr: string;
+  exitCode?: number;
+};
+
+// Quick static-shape sanity check: the real Sandbox class implements
+// the ContainerLike surface (the assignability check fires at compile
+// time if SDK drift removes either method).
+type _ContainerLikeShapeCheck = Sandbox extends ContainerLike ? true : never;
+const _shapeCheck: _ContainerLikeShapeCheck = true;
+void _shapeCheck;
+
+const TOTAL_TIMEOUT_MS = 60_000; // R7 — install + score combined.
+const SHORT_EXEC_TIMEOUT_MS = 5_000; // `which`, `anc --version`.
+
+// Per-PM install-host allowlists. Only these hosts are reachable during
+// Phase 1 install for each PM; Phase 2 (anc check) blocks all hosts.
+// Tightening or relaxing this map changes the security baseline — pair
+// any update with a refresh of the script-execution audit row.
+//
+// GitHub release downloads (cargo-binstall, go install with GitHub-hosted
+// modules, direct binary URLs) hit api.github.com for release metadata,
+// then github.com for the download URL, which 302-redirects to one of
+// several CDN hosts under `*.githubusercontent.com`
+// (`objects.githubusercontent.com`, `release-assets.githubusercontent.com`,
+// `codeload.githubusercontent.com`, `raw.githubusercontent.com`, etc.).
+// The list shifts over time — GitHub moved release assets from
+// `objects.` to `release-assets.` mid-2024 and may shift again. The
+// wildcard `*.githubusercontent.com` entry (matched by the
+// hostnameAllowed helper in do.ts) covers the moving CDN target so we
+// don't keep playing whack-a-mole as GitHub rotates infrastructure.
+// api.github.com queries are subject to the anonymous rate limit
+// (60/hr/IP, pooled across CF egress IPs) — separate runtime risk.
+const GITHUB_RELEASE_HOSTS = [
+  'api.github.com',
+  'github.com',
+  'codeload.github.com',
+  '*.githubusercontent.com',
+] as const;
+
+const INSTALL_HOSTS: Record<string, readonly string[]> = {
+  // `index.crates.io` is the sparse-index host (default in cargo
+  // 1.70+); cargo-binstall hits it for `config.json` before any crate
+  // download. Older `crates.io` redirects there, but the sparse index
+  // is the direct path. Without it, cargo-binstall fails with
+  // `403 Forbidden for url (https://index.crates.io/config.json)`.
+  'cargo-binstall': ['crates.io', 'static.crates.io', 'index.crates.io', ...GITHUB_RELEASE_HOSTS],
+  pip: ['pypi.org', 'files.pythonhosted.org'],
+  // uv hits the same wheel-hosting hosts as pip — pypi.org for metadata
+  // and files.pythonhosted.org for wheel downloads — but via a
+  // different client + resolver path that we hope sidesteps Bug M
+  // (pip metadata 403 via CF fetch passthrough).
+  uv: ['pypi.org', 'files.pythonhosted.org'],
+  npm: ['registry.npmjs.org'],
+  // bun's `add -g` resolves from npm — `registry.npmjs.org` is the
+  // only host the install path needs.
+  bun: ['registry.npmjs.org'],
+  go: ['proxy.golang.org', 'sum.golang.org', ...GITHUB_RELEASE_HOSTS],
+} as const;
+
+// ---------------------------------------------------------------------------
+// Orchestration
+// ---------------------------------------------------------------------------
+
+export async function score(sandbox: ContainerLike, spec: InstallSpec): Promise<ScoreResult> {
+  return await Promise.race([runScore(sandbox, spec), timeoutAfter(TOTAL_TIMEOUT_MS)]);
+}
+
+async function runScore(sandbox: ContainerLike, spec: InstallSpec): Promise<ScoreResult> {
+  const installCmd = installCommandFor(spec);
+  if (!installCmd) {
+    return { ok: false, error: 'install_unsupported', details: `pm=${spec.pm}` };
+  }
+  const hosts = installHostsFor(spec);
+  let binary = spec.binary;
+
+  // Phase 1 — allow install hosts. Setting the handler BEFORE exec is the
+  // safety invariant covered by tests/score-do.test.ts scenario (b).
+  await sandbox.setOutboundHandler<{ allowedHostnames: string[] }>('allowedInstall', {
+    allowedHostnames: [...hosts],
+  });
+
+  const installStart = Date.now();
+  const installResult = await sandbox.exec(installCmd, { timeout: TOTAL_TIMEOUT_MS });
+  const installMs = Date.now() - installStart;
+  if (!installResult.success) {
+    // Gate-capture (Fix 3): direct-install commands emit `GATE:<step>` markers
+    // to stderr before each step. The LAST marker names the step that
+    // tripped `set -e`. If a step also emitted `DETAILS:<text>` (e.g. the
+    // archive listing for the no-binary-candidate case), thread that into
+    // the user-facing details field instead of the raw stderr tail.
+    const gateDetails = extractGateDetails(installResult.stderr);
+    // Path-traversal short-circuit: archive contained a candidate the
+    // validator rejected. Bounce as no-binary-produced (an "archive
+    // shipped a malformed path" case), not install_failed.
+    if (gateDetails?.kind === 'no_binary_candidates') {
+      return {
+        ok: false,
+        error: 'chain_resolved_no_binary_produced',
+        details: gateDetails.details,
+      };
+    }
+    return {
+      ok: false,
+      error: 'chain_resolved_install_failed',
+      details: gateDetails?.details ?? (truncate(installResult.stderr) || truncate(installResult.stdout)),
+    };
+  }
+
+  // Auto-detect (Fix 1): direct-install commands print
+  // `DETECTED_BINARY=<name>` on stdout when the archive carried a binary
+  // whose filename differs from spec.binary (the gogcli → gog case).
+  // Override spec.binary so the downstream `which` gate + `anc check
+  // --command <binary>` invocation targets the file that actually got
+  // installed. The detected name is the basename, character-validated
+  // by the install command's filter before it lands here.
+  const detected = extractDetectedBinary(installResult.stdout);
+  if (detected) {
+    binary = detected;
+  }
+
+  // Git-clone source-scoped path: no binary on PATH to verify — `anc
+  // check <path>` runs against the cloned source. Skip the `which
+  // <binary>` gate, which would always miss because the repo name is
+  // not necessarily a CLI binary the clone produced.
+  const isSourceScoped = spec.pm === 'git-clone';
+
+  if (!isSourceScoped) {
+    // Verify the install produced a runnable binary on PATH. Catches the
+    // pallets/click case (wheel installs cleanly, no console_scripts entry).
+    const whichCmd = `which ${shellQuote(binary)}`;
+    const whichResult = await sandbox.exec(whichCmd, { timeout: SHORT_EXEC_TIMEOUT_MS });
+    if (!whichResult.success || !whichResult.stdout.trim()) {
+      return { ok: false, error: 'chain_resolved_no_binary_produced', details: `binary=${binary}` };
+    }
+  }
+
+  // Phase 2 — lock down. `anc check` must not reach any host. Setting the
+  // handler BEFORE exec is the second safety invariant covered by test
+  // scenario (b).
+  await sandbox.setOutboundHandler('noHttp');
+
+  // Capture anc_version live from the running binary, never a build-time
+  // constant — a cached scorecard must record the anc that actually
+  // produced it so re-deployed sites don't lie about provenance.
+  const versionResult = await sandbox.exec('anc --version', { timeout: SHORT_EXEC_TIMEOUT_MS });
+  if (!versionResult.success) {
+    return { ok: false, error: 'anc_version_unreadable' };
+  }
+  const ancVersion = parseAncVersion(versionResult.stdout);
+  if (!ancVersion) {
+    return {
+      ok: false,
+      error: 'anc_version_unreadable',
+      details: truncate(versionResult.stdout, 120),
+    };
+  }
+
+  // Run anc check. Two invocation shapes:
+  //   - binary install (default): `anc check --command <binary>` scores
+  //     the running binary's behavior against the spec.
+  //   - source clone (git-clone PM, branch-scoped paste): `anc check
+  //     <clone-path>` scores the source layout + project files. The
+  //     clone-path is interpolated via shellQuote and the path itself
+  //     is built from the spec, NOT from user input — the user's input
+  //     only flows in through the validated owner/repo/branch slots
+  //     which are character-class-restricted at validate.ts.
+  const auditProfile = (spec as { audit_profile?: string }).audit_profile;
+  const ancCheckCmd = isSourceScoped
+    ? buildAncCheckSourceCmd(spec as GitCloneInstall, auditProfile)
+    : auditProfile
+      ? `anc check --command ${shellQuote(binary)} --output json --audit-profile ${shellQuote(auditProfile)}`
+      : `anc check --command ${shellQuote(binary)} --output json`;
+  const ancCheckStart = Date.now();
+  const checkResult = await sandbox.exec(ancCheckCmd, { timeout: TOTAL_TIMEOUT_MS });
+  const ancCheckMs = Date.now() - ancCheckStart;
+
+  // anc emits a structured envelope on stdout even on non-zero exit when
+  // a check produced findings. Try to parse before declaring failure.
+  let scorecard: unknown;
+  try {
+    scorecard = JSON.parse(checkResult.stdout);
+  } catch {
+    if (!checkResult.success) {
+      return {
+        ok: false,
+        error: 'anc_check_failed',
+        details: truncate(checkResult.stderr) || truncate(checkResult.stdout),
+      };
+    }
+    return { ok: false, error: 'anc_check_failed', details: 'anc returned non-JSON stdout' };
+  }
+
+  return {
+    ok: true,
+    value: { scorecard, anc_version: ancVersion, install_ms: installMs, anc_check_ms: ancCheckMs },
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Install table
+// ---------------------------------------------------------------------------
+
+function installCommandFor(spec: InstallSpec): string | null {
+  switch (spec.pm) {
+    case 'brew':
+      // brew returns null so resolveSpec() in resolve-spec.ts (Worker
+      // tier post-2026-05-20) can apply the discovery-fallback before
+      // this table is consulted. By the time
+      // a request reaches installCommandFor() with pm=brew, the
+      // fallback has already missed — i.e. no alternative PM exists
+      // for the formula. score() catches the null and bounces as
+      // install_unsupported with pm=brew_only (mapped through
+      // resolveSpec, not here, so the user-facing detail surfaces the
+      // brew_only case rather than the legacy pm=brew message).
+      return null;
+    case 'bun':
+      // Native bun runtime ships in the image (2026-05-18 rework).
+      // --ignore-scripts suppresses npm-style lifecycle hooks since
+      // bun resolves from the npm registry and runs the same script
+      // lifecycle as npm. --no-summary cuts noise from the install
+      // output that would otherwise pollute the truncated details
+      // field on failure.
+      return `bun add -g --ignore-scripts ${shellQuote(spec.package)}`;
+    case 'uv':
+      // Native uv (2026-05-18 rework — split from pm=pip). uv tool
+      // install places the binary at $UV_TOOL_BIN_DIR (default
+      // $HOME/.local/bin, covered by Dockerfile PATH). uv's resolver
+      // sidesteps the pip 24+ PEP 658 metadata fast-path that 403s
+      // through CF fetch passthrough for some packages (Bug M).
+      return `uv tool install ${shellQuote(spec.package)}`;
+    case 'cargo-binstall':
+      // Standalone `cargo-binstall` binary lives at /usr/local/bin/
+      // (Dockerfile lines 73-80). The image ships NO rust toolchain per
+      // Premise #2 ("no compilers, no toolchains"), so the `cargo` CLI
+      // does not exist — calling `cargo binstall <pkg>` would fail with
+      // `cargo: command not found`. The binstall README documents the
+      // standalone use case.
+      //
+      // --install-path /usr/local/bin overrides cargo-binstall's default
+      // of $CARGO_HOME/bin (= ~/.cargo/bin), which isn't on our PATH.
+      // Without it, the binary installs successfully but the post-install
+      // `which <binary>` gate misses and the request bounces as
+      // chain_resolved_no_binary_produced.
+      return `cargo-binstall --no-confirm --no-symlinks --install-path /usr/local/bin ${shellQuote(spec.package)}`;
+    case 'pip':
+      // --only-binary=:all: refuses sdist execution (the setup.py
+      // arbitrary-code-exec class). --no-cache-dir keeps the container
+      // filesystem clean across requests on a warm DO. PIP_NO_COLOR=1
+      // suppresses ANSI escape sequences in pip's progress output that
+      // pollute the orchestration's error `details` field when an
+      // install fails. --break-system-packages overrides PEP 668's
+      // "externally-managed-environment" refusal that Debian's
+      // python3-pip ships with — kept for safety even though the
+      // python:3.12-slim-trixie base (2026-05-19) does NOT carry the
+      // EXTERNALLY-MANAGED marker, so the flag is a no-op there.
+      //
+      // 2026-05-18: dropped `--use-deprecated=legacy-resolver` (Bug M
+      // workaround on Alpine/musllinux). The Debian-slim rework moves
+      // pip onto manylinux wheels which we believe closes the metadata
+      // 403 gap; staging retest of `pip install httpie` validates.
+      // Re-add this flag in a follow-up if httpie regresses.
+      //
+      // 2026-05-19: `--no-binary=<name1,name2,...>` selectively allows
+      // sdist install for specific trusted packages (sdist-allowlist.ts).
+      // Each entry has a vetted maintainer + upstream issue trail;
+      // adding to the list is a deliberate security loosening for that
+      // ONE package, the rest of the dep graph stays wheel-only.
+      // Empty allowlist → no --no-binary flag.
+      //
+      // 2026-05-19: `PIP_UPLOADED_PRIOR_TO=<date>` enforces a 7-day
+      // package-release delay so a fresh-publish supply-chain attack
+      // has at minimum a 7-day detection window before our sandbox
+      // would install it. The date is computed at exec time via shell
+      // substitution so image age doesn't widen the gate; uv's
+      // equivalent (UV_EXCLUDE_NEWER) is baked as an image ENV because
+      // uv accepts relative durations natively. pip support is v26.0+;
+      // older pip versions ignore the env var (no-op until upstream
+      // lands, then the gate auto-activates on image rebuild).
+      // `PIP_DISABLE_PIP_VERSION_CHECK=1` suppresses the "A new release
+      // of pip is available" stderr notice. It's also baked as an image
+      // ENV in docker/sandbox/Dockerfile so future builds carry it
+      // intrinsically; the inline pass here keeps the
+      // currently-deployed image quiet until the next rebuild lands.
+      return (
+        `PIP_UPLOADED_PRIOR_TO=$(date -u -d '7 days ago' +%Y-%m-%dT%H:%M:%SZ) ` +
+        `PIP_DISABLE_PIP_VERSION_CHECK=1 ` +
+        `PIP_NO_COLOR=1 pip install --only-binary=:all:` +
+        (SDIST_TRUSTED_NAMES ? ` --no-binary=${SDIST_TRUSTED_NAMES}` : '') +
+        ` --no-cache-dir --break-system-packages ${shellQuote(spec.package)}`
+      );
+    case 'npm':
+      // --ignore-scripts suppresses preinstall/install/postinstall
+      // lifecycle hooks — keeps Phase 1 egress from being abused by
+      // lifecycle scripts before the Phase 2 lockdown fires.
+      return `npm install -g --ignore-scripts ${shellQuote(spec.package)}`;
+    case 'go':
+      // pm=go bounces here so resolveSpec()'s go discovery-fallback
+      // (resolve-spec.ts:resolveGoFallback) translates `go install <module>`
+      // inputs upstream of this layer. If a request reaches
+      // installCommandFor() with pm=go the fallback has already
+      // missed, which means the module isn't on github.com OR the
+      // repo has no GitHub release binary — both flagged as
+      // install_unsupported pm=go_no_binary by resolveSpec. The null
+      // here is a safety net; sandbox-exec wouldn't otherwise know
+      // whether to compile (we ship no toolchain) or bounce.
+      return null;
+    case 'git-clone':
+      // Branch-scoped source clone. The branch name was validated at
+      // validate.ts (BRANCH_NAME_RE + explicit
+      // `..` reject) AND re-validated at the DO boundary (do.ts
+      // resolveSpec in resolve-spec.ts). buildGitCloneCommand() refuses to emit a command
+      // for a branch that fails the validBranchName check — defense
+      // in depth so a future caller that builds an InstallSpec
+      // directly (skipping validate.ts AND resolveSpec) still can't
+      // smuggle shell metacharacters through. Returns null when the
+      // branch fails late-stage validation, which collapses to
+      // install_unsupported with pm=git-clone.
+      return buildGitCloneCommand(spec);
+    case 'direct':
+      // Archive download + extract to /usr/local/bin. The user-pasted
+      // URL is the trust boundary; SHA verification is not done at
+      // this layer (no known-good SHA available for arbitrary user
+      // input). -L follows redirects so github.com release URLs that
+      // 302 to objects.githubusercontent.com resolve correctly (the
+      // allowlist expansion in installHostsFor covers the CDN host).
+      //
+      // 2026-05-18 (Bug N): dispatch extraction on URL extension. The
+      // legacy single-form `tar xz` worked for .tar.gz/.tgz only;
+      // many newer Rust tools (csvlens, etc.) ship .tar.xz exclusively
+      // for compression, plus .zip / .tar.bz2 appear in the wild.
+      // .tar.gz / .tgz   → tar xz
+      // .tar.xz / .txz   → tar xJ  (requires xz-utils in image)
+      // .tar.bz2 / .tbz2 → tar xj  (requires bzip2 in image)
+      // .zip             → unzip into a tmp dir, install matched binary
+      // Anything else    → falls through to tar xz (preserves legacy
+      //                    behavior, will fail loud on unsupported
+      //                    formats so the bounce is visible).
+      return directInstallCommand(spec.url, spec.binary);
+    default: {
+      // Exhaustiveness check — adding a new PM to the InstallSpec union
+      // is a compile error here until the table is updated.
+      const _exhaustive: never = spec;
+      void _exhaustive;
+      return null;
+    }
+  }
+}
+
+// Dispatch the direct-PM install command on archive extension. Kept
+// alongside installCommandFor() (vs. inlined) so the per-extension
+// shapes are individually testable and the test file pins each form.
+//
+// All formats extract into a per-invocation tmp dir, then `find` an
+// executable and `install` it to /usr/local/bin. The earlier streaming
+// `tar -C /usr/local/bin/` shape failed for archives whose binary was
+// nested inside a top-level directory (csvlens ships
+// `csvlens-x86_64-unknown-linux-musl/csvlens`); the recursive find+install
+// shape handles both flat and nested layouts.
+//
+// Auto-detect (Fix 1): instead of `find -name <expectedBinary>` (which
+// fails the moment an archive ships a binary whose filename doesn't
+// match the GitHub repo name — the gogcli/openclaw case, repo=gogcli but
+// archive contains `gog`), the install command lists ALL executables in
+// the archive, filters out known documentation/manifest filenames, and
+// picks the best candidate via a scoring tiebreaker (exact-match to the
+// preferred name wins; otherwise repo-name substring; otherwise shortest
+// name; ties broken by lexicographic order for determinism). The chosen
+// file is installed under its OWN basename, and that basename is echoed
+// to stdout as `DETECTED_BINARY=<name>` so runScore() can override
+// spec.binary before the `which <binary>` gate + `anc check --command
+// <binary>` invocation run.
+//
+// Gate-capture (Fix 3): each pipeline step echoes `GATE:<name>` to
+// stderr BEFORE running. On `set -e` failure the final GATE marker
+// names the step that tripped, and a step-specific `DETAILS:<text>`
+// stderr line carries enough context to user-render an honest bounce
+// (archive listing for no-candidates case, etc.).
+//
+// Path-traversal: the candidate filter rejects any path segment
+// containing `..` or starting with `/` (absolute paths from a maliciously
+// crafted archive). Tar's own `--no-same-owner --no-same-permissions`
+// flags + the per-request mktemp prefix close the rest of the attack
+// surface; the candidate-name regex is defense in depth.
+function directInstallCommand(url: string, preferredBinary: string): string {
+  const lower = url.toLowerCase();
+  const qUrl = shellQuote(url);
+  // The preferred-binary hint passes through awk as a variable; awk
+  // single-quote escapes are handled by the surrounding shell quote.
+  const qPreferred = shellQuote(preferredBinary);
+  let extractCmd: string;
+  if (lower.endsWith('.tar.gz') || lower.endsWith('.tgz')) {
+    extractCmd = `tar xzf "$tmp/a" -C "$tmp/x"`;
+  } else if (lower.endsWith('.tar.xz') || lower.endsWith('.txz')) {
+    extractCmd = `tar xJf "$tmp/a" -C "$tmp/x"`;
+  } else if (lower.endsWith('.tar.bz2') || lower.endsWith('.tbz2')) {
+    extractCmd = `tar xjf "$tmp/a" -C "$tmp/x"`;
+  } else if (lower.endsWith('.zip')) {
+    extractCmd = `unzip -q "$tmp/a" -d "$tmp/x"`;
+  } else {
+    // Unknown extension: attempt gzip-tar as a last resort. Fails loud
+    // on mismatch; orchestration bounces as chain_resolved_install_failed.
+    extractCmd = `tar xzf "$tmp/a" -C "$tmp/x"`;
+  }
+  // Wrapped in `( ... )` subshell so `set -e` exits the subshell on
+  // failure rather than the persistent container shell session (which
+  // would kill the session and 1101-error every subsequent request
+  // routed to this DO instance — SessionTerminatedError).
+  //
+  // The candidate-listing pipeline:
+  //   1. find executables (perm /111) under the extraction root, print
+  //      paths relative to it (-printf '%P\n').
+  //   2. grep -v out doc/manifest filenames (LICENSE, README, etc.) and
+  //      known non-binary extensions (.md, .txt, .html, .json, ...).
+  //   3. grep -v out any path containing `..` (path-traversal guard) or
+  //      starting with `/` (absolute path from malicious archive).
+  // awk scores each candidate by name match + shortness; tie-broken by
+  // lex order. The exit-code-11 path emits a DETAILS:<list> line so the
+  // user-facing bounce can name the files it saw.
+  return (
+    `( set -e; ` +
+    `tmp=$(mktemp -d); ` +
+    `mkdir "$tmp/x"; ` +
+    `echo '${GATE_PREFIX}download' >&2; ` +
+    `curl -fsSL ${qUrl} -o "$tmp/a" 2>"$tmp/curl_err" || ` +
+    `{ echo "${DETAILS_PREFIX}Download failed: $(cat "$tmp/curl_err" | head -c 200)" >&2; exit 10; }; ` +
+    `echo '${GATE_PREFIX}extract' >&2; ` +
+    `${extractCmd} 2>"$tmp/ext_err" || ` +
+    `{ echo "${DETAILS_PREFIX}Extract failed: $(cat "$tmp/ext_err" | head -c 200)" >&2; exit 12; }; ` +
+    `echo '${GATE_PREFIX}find_binary' >&2; ` +
+    `candidates=$(find "$tmp/x" -type f -perm /111 -printf '%P\\n' 2>/dev/null | ` +
+    `grep -viE '(^|/)(LICEN[CS]E|README|CHANGELOG|NOTICE|AUTHORS|COPYING|MANIFEST|Makefile|\\.gitignore)([._-].*)?$' | ` +
+    `grep -viE '\\.(md|markdown|txt|html|htm|json|yml|yaml|toml|xml|cfg|ini|sh|bat|cmd|py|rb|pl)$' | ` +
+    `grep -vE '(^|/)\\.\\.(/|$)' | ` +
+    `grep -vE '^/' || true); ` +
+    `if [ -z "$candidates" ]; then ` +
+    `all=$(find "$tmp/x" -type f -printf '%P\\n' 2>/dev/null | head -10 | tr '\\n' ' '); ` +
+    `echo "${DETAILS_PREFIX}Archive contains no binary named ${preferredBinary}. Files seen: $all" >&2; ` +
+    `exit 11; ` +
+    `fi; ` +
+    `best=$(printf '%s\\n' "$candidates" | awk -v pref=${qPreferred} '` +
+    `{ ` +
+    `n=split($0, parts, "/"); name=parts[n]; ` +
+    `score=0; ` +
+    `if (name == pref) score=1000; ` +
+    `else if (index(name, pref) > 0) score=500; ` +
+    `if (name !~ /\\./) score+=10; ` +
+    `score -= length(name); ` +
+    `if (score > best_score || best == "") { best_score=score; best=$0 } ` +
+    `} END { print best }'); ` +
+    `detected=$(basename "$best"); ` +
+    `echo '${GATE_PREFIX}install_binary' >&2; ` +
+    `install -m 0755 "$tmp/x/$best" "/usr/local/bin/$detected" 2>"$tmp/inst_err" || ` +
+    `{ echo "${DETAILS_PREFIX}Install staging failed: $(cat "$tmp/inst_err" | head -c 200)" >&2; exit 13; }; ` +
+    `rm -rf "$tmp"; ` +
+    `echo "${DETECTED_BINARY_PREFIX}$detected" )`
+  );
+}
+
+function installHostsFor(spec: InstallSpec): readonly string[] {
+  if (spec.pm === 'git-clone') {
+    // git clone over https hits github.com directly; for some repos the
+    // server-side may 302 to codeload.github.com for the pack file. Both
+    // are in the GITHUB_RELEASE_HOSTS set already, plus the
+    // `*.githubusercontent.com` wildcard covers any future redirect target.
+    return GITHUB_RELEASE_HOSTS;
+  }
+  if (spec.pm === 'direct') {
+    try {
+      const host = new URL(spec.url).hostname;
+      // GitHub release download URLs (`github.com/.../releases/download/...`)
+      // HTTP 302 redirect to `objects.githubusercontent.com`, sometimes
+      // via `codeload.github.com` for source archives. Allow all three
+      // together so `curl -fsSL` can follow the redirect chain to the
+      // actual asset without the allowlist handler 403-ing the redirect
+      // target. Other hosts (e.g. a direct CDN URL) get only the
+      // declared hostname.
+      if (host === 'github.com' || GITHUB_RELEASE_HOSTS.includes(host as (typeof GITHUB_RELEASE_HOSTS)[number])) {
+        return GITHUB_RELEASE_HOSTS;
+      }
+      return [host];
+    } catch {
+      return [];
+    }
+  }
+  return INSTALL_HOSTS[spec.pm] ?? [];
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+// POSIX single-quote shell escape — wraps in `'...'` and replaces internal
+// `'` with `'\''`. Safe for arbitrary user-pasted package names and URLs.
+function shellQuote(s: string): string {
+  return `'${s.replaceAll("'", "'\\''")}'`;
+}
+
+function parseAncVersion(stdout: string): string | null {
+  // Expected forms: `anc 0.3.1`, `anc version 0.3.1`, `anc 0.3.1 (commit
+  // <sha>)`. The semver match is the load-bearing part.
+  const match = stdout.match(/(\d+\.\d+\.\d+(?:-[0-9A-Za-z.-]+)?)/);
+  return match ? match[1] : null;
+}
+
+// CSI (Control Sequence Introducer) escape sequences emitted by terminal-
+// aware tools (pip progress bars, npm spinners) pollute the details
+// field that surfaces back to the user. Strip before truncation so the
+// truncated tail isn't a mangled partial escape sequence. The ESC
+// (\x1b) byte is the load-bearing prefix of every ANSI CSI sequence —
+// matching it literally is the point of this pattern, so the biome
+// noControlCharactersInRegex lint is deliberately suppressed here.
+// biome-ignore lint/suspicious/noControlCharactersInRegex: ESC is the CSI prefix; matching it is intentional
+const ANSI_CSI_RE = /\x1b\[[0-?]*[ -/]*[@-~]/g;
+
+function truncate(s: string | undefined, n = 500): string {
+  if (!s) return '';
+  const clean = s.replace(ANSI_CSI_RE, '');
+  return clean.length > n ? `${clean.slice(0, n)}…` : clean;
+}
+
+function timeoutAfter(ms: number): Promise<ScoreFailure> {
+  return new Promise((resolve) => {
+    setTimeout(() => resolve({ ok: false, error: 'timeout' }), ms);
+  });
+}
+
+// Parse the DETECTED_BINARY=<name> line emitted by directInstallCommand.
+// Returns the basename of the file the install path actually staged at
+// /usr/local/bin, or null if the marker is absent (any non-direct PM
+// install command, or a future direct-install variant that doesn't
+// emit the marker). The name is filename-character-restricted before
+// it lands back in spec.binary, since downstream it shell-quotes the
+// value rather than re-validating its shape.
+export function extractDetectedBinary(stdout: string): string | null {
+  const lines = stdout.split(/\r?\n/);
+  for (let i = lines.length - 1; i >= 0; i--) {
+    const line = lines[i].trim();
+    if (line.startsWith(DETECTED_BINARY_PREFIX)) {
+      const name = line.slice(DETECTED_BINARY_PREFIX.length).trim();
+      // Whitelist filename characters — the install command's own filter
+      // rejects path-traversal upstream, but defense in depth keeps any
+      // smuggled bytes out of the shell-quoted `anc check --command` slot.
+      if (/^[A-Za-z0-9._-]+$/.test(name) && name.length > 0 && name.length <= 64) {
+        return name;
+      }
+    }
+  }
+  return null;
+}
+
+// Parse GATE:<step> + DETAILS:<text> markers from the install stderr.
+// Returns the highest-fidelity error description we can produce for the
+// user, plus a `kind` discriminator so runScore() can re-classify the
+// no-binary-candidate case (an "archive shipped only docs" miss, not an
+// "install command failed" miss). Returns null when no markers were
+// emitted, so the caller falls back to the raw truncated stderr.
+type GateDetails = {
+  kind: 'download' | 'extract' | 'no_binary_candidates' | 'install_staging' | 'unknown';
+  details: string;
+};
+export function extractGateDetails(stderr: string): GateDetails | null {
+  if (!stderr) return null;
+  const lines = stderr.split(/\r?\n/);
+  let lastGate: string | null = null;
+  let detailsLine: string | null = null;
+  for (const raw of lines) {
+    const line = raw.trim();
+    if (line.startsWith(GATE_PREFIX)) {
+      lastGate = line.slice(GATE_PREFIX.length).trim();
+    } else if (line.startsWith(DETAILS_PREFIX)) {
+      detailsLine = line.slice(DETAILS_PREFIX.length).trim();
+    }
+  }
+  if (!lastGate && !detailsLine) return null;
+  const kind: GateDetails['kind'] = detailsLine?.startsWith('Archive contains no binary named')
+    ? 'no_binary_candidates'
+    : detailsLine?.startsWith('Download failed')
+      ? 'download'
+      : detailsLine?.startsWith('Extract failed')
+        ? 'extract'
+        : detailsLine?.startsWith('Install staging failed')
+          ? 'install_staging'
+          : 'unknown';
+  const details = detailsLine ?? (lastGate ? `Install step '${lastGate}' failed` : 'Install failed');
+  return { kind, details: truncate(details) };
+}
+
+// ---------------------------------------------------------------------------
+// Git clone install path — branch-scoped scoring
+// ---------------------------------------------------------------------------
+
+// Build the git-clone install command for a branch-scoped paste.
+//
+// Security shape:
+//
+//   - owner + repo come from validate.ts. Owner matches GitHub's own
+//     username rules (alphanumeric + hyphen, no leading hyphen);
+//     repo matches `[A-Za-z0-9._-]+`. Neither character class includes
+//     shell metacharacters.
+//   - branch is double-validated: validate.ts at the Worker boundary
+//     AND resolve-spec.ts at the Worker boundary (resolveSpec).
+//     buildGitCloneCommand
+//     does a THIRD check via validBranchName() before string
+//     interpolation as a final defense — if a future code path
+//     constructs an InstallSpec directly (bypassing both upstream
+//     guards), this layer still refuses unsafe branch values.
+//   - Even with all that, every interpolated value flows through
+//     shellQuote(), which POSIX-single-quote-escapes the value. That's
+//     the load-bearing safety property: a single-quote-wrapped value
+//     with internal `'` rewritten to `'\''` cannot escape the quoted
+//     context regardless of regex coverage.
+//
+// The Sandbox SDK exposes exec(command: string) only — no argv array
+// form — so shellQuote IS the trust boundary at exec time. The strict
+// regex layers above shrink the attack surface; shellQuote closes it.
+//
+// Why `--depth 1 --no-tags --single-branch`: minimize bandwidth + time.
+// A branch-scoped score doesn't need full history or sibling refs;
+// the clone runs inside the 60 s combined install + score budget and
+// every saved second helps the worst-case latency.
+export function buildGitCloneCommand(spec: GitCloneInstall): string | null {
+  if (!validBranchName(spec.branch)) return null;
+  // owner + repo shape is enforced by validate.ts and re-enforced at
+  // the DO layer (validBranchName covers branch; the owner/repo character
+  // classes are enforced before this layer is reached). shellQuote
+  // remains the runtime closer.
+  const repoUrl = `https://github.com/${spec.owner}/${spec.repo}.git`;
+  // `( set -e; ... )` subshell so a failure mid-clone exits the
+  // subshell rather than killing the container's persistent shell
+  // session. `rm -rf` of the destination first handles re-runs on a
+  // warm DO instance (the prior request's clone would otherwise
+  // collide).
+  return (
+    `( set -e; rm -rf ${shellQuote(CLONE_DEST)}; ` +
+    `git clone --depth 1 --no-tags --single-branch ` +
+    `--branch ${shellQuote(spec.branch)} ` +
+    `${shellQuote(repoUrl)} ${shellQuote(CLONE_DEST)} )`
+  );
+}
+
+// Build the `anc check <path>` invocation for a source-scoped score.
+// Mirrors the `--command <binary>` form's audit-profile handling.
+export function buildAncCheckSourceCmd(_spec: GitCloneInstall, auditProfile: string | undefined): string {
+  const path = shellQuote(CLONE_DEST);
+  return auditProfile
+    ? `anc check ${path} --output json --audit-profile ${shellQuote(auditProfile)}`
+    : `anc check ${path} --output json`;
+}
diff --git a/src/worker/score/sdist-allowlist.ts b/src/worker/score/sdist-allowlist.ts
new file mode 100644
index 0000000..17f5325
--- /dev/null
+++ b/src/worker/score/sdist-allowlist.ts
@@ -0,0 +1,150 @@
+// Python packages allowed to install from sdist inside the sandbox,
+// overriding the default `--only-binary=:all:` enforcement on the pip
+// install path.
+//
+// Plan U7 follow-up (option C from the install-path triage on 2026-05-19).
+//
+// Background
+// ----------
+// `sandbox-exec.ts:installCommandFor()` runs `pip install
+// --only-binary=:all:` so installs MUST come from a precompiled wheel.
+// This removes the install-time arbitrary-code-exec surface (setup.py
+// runs during sdist builds) and was a hard-line security default from
+// plan U6's K-decision audit.
+//
+// The cost: any transitive dep that ships sdist-only on PyPI for the
+// current Python + linux_x86_64 fails the install. Pip's resolver
+// surfaces this as `ResolutionImpossible` after backing off through
+// many older versions, not as "no wheel for X". The error is opaque to
+// users.
+//
+// Specific blockers identified on 2026-05-19:
+//   - Aider-AI/aider#4105: `pyperclip==1.9.0` ships sdist-only.
+//   - Aider-AI/aider#4309: `numpy==1.24.3` triggers a build error path.
+//   - Aider-AI/aider#3037, #3660, #4340: combined evidence that aider's
+//     dep graph requires sdist for at least one path under
+//     `--only-binary=:all:`.
+//
+// Trust criteria for adding an entry
+// ----------------------------------
+// Each allowlisted package gets `--no-binary=<name>` on the pip install
+// command, which lets pip fall back to sdist (running setup.py) for
+// that specific package only. The rest of the dep graph stays
+// wheel-only. Adding a package to this list is a meaningful security
+// loosening for that one package, so every entry must satisfy:
+//
+//   1. Mature, well-known maintainer or PyPI org (no anonymous individual
+//      maintainers with low download counts).
+//   2. Clear reason this package can't always ship a wheel (legacy
+//      project, build-step at install, conditional native deps).
+//   3. Upstream issue link if a specific bug report drove the addition.
+//   4. Date added + commit/PR reference for the vetting trail.
+//
+// Removing an entry is always safe: the only consequence is the
+// previously-allowlisted package returns to `--only-binary` enforcement,
+// which may break tools that depend on it.
+//
+// How it's wired
+// --------------
+// `sandbox-exec.ts:installCommandFor()` joins `SDIST_TRUSTED_NAMES` into
+// the `--no-binary=<comma-list>` portion of the pip install command.
+// Empty list emits no `--no-binary` flag at all. uv installs already
+// fall back to sdist automatically (no equivalent flag needed); this
+// file targets the pip path specifically.
+
+export type SdistTrustedEntry = {
+  /** PyPI package name exactly as it appears in `--no-binary=<name>`. */
+  name: string;
+  /** Why this package needs sdist install (manylinux gap, legacy, etc.). */
+  reason: string;
+  /** Date added (YYYY-MM-DD) for chronological auditing. */
+  added: string;
+  /** Upstream issues, PRs, or maintainer docs that motivated the addition. */
+  evidence: readonly string[];
+  /**
+   * Lowest version where the sdist-only condition applies. Inclusive.
+   * Omit (or use `0.0.0`) when the condition applies to all known versions.
+   */
+  affected_min_version?: string;
+  /**
+   * Highest version where the sdist-only condition applies. Inclusive.
+   * Versions above this are expected to ship a wheel and won't need the
+   * allowlist entry; re-evaluate removal when the package's pinned
+   * version in aider-chat or other consumers crosses this threshold.
+   */
+  affected_max_version?: string;
+  /**
+   * Optional recommended pin a downstream consumer could use to avoid
+   * the sdist condition entirely. Documentary only — not enforced.
+   */
+  safe_pin?: string;
+};
+
+export type SdistRejectedEntry = {
+  /** PyPI package name. */
+  name: string;
+  /** Why allowlisting this package would NOT fix the underlying issue. */
+  reason: string;
+  /** Date investigated (YYYY-MM-DD). */
+  investigated: string;
+  /** Lowest version where the issue described in `reason` applies. */
+  affected_min_version?: string;
+  /** Highest version where the issue applies. Inclusive. */
+  affected_max_version?: string;
+  /**
+   * Optional pin recommendation that sidesteps the issue without
+   * touching `--only-binary`. The right fix for these rejected
+   * entries usually involves pinning, not allowlisting.
+   */
+  safe_pin?: string;
+};
+
+export const SDIST_TRUSTED_DEPS: readonly SdistTrustedEntry[] = [
+  {
+    name: 'pyperclip',
+    reason:
+      'Cross-platform clipboard utility. Pure Python (~300 lines) with no C compilation, no install-time network calls, no setup.py beyond a sys import. PyPI publishes sdist-only for 1.8.x and 1.9.0 (the versions aider-chat 0.83-0.86 pins); v1.11.0 finally ships a wheel. Maintained by Al Sweigart (well-known PyPI author, author of Automate the Boring Stuff with Python). No CVEs.',
+    added: '2026-05-19',
+    evidence: ['https://github.com/Aider-AI/aider/issues/4105', 'https://github.com/asweigart/pyperclip/issues/213'],
+    affected_min_version: '0.0.0',
+    affected_max_version: '1.10.0',
+    safe_pin: '>=1.11.0',
+  },
+  {
+    name: 'pycparser',
+    reason:
+      'Pure-Python C grammar parser, no wheel through v2.23 on PyPI (v3.0 published 2026-01-21 finally ships py3-none-any.whl). Maintained by Eli Bendersky (long-time PyPI author, also maintains pyelftools). Widely audited because cffi depends on it for OpenSSL bindings used across the cryptography ecosystem. No CVEs.',
+    added: '2026-05-19',
+    evidence: ['https://github.com/eliben/pycparser/issues/288', 'https://github.com/eliben/pycparser/issues/359'],
+    affected_min_version: '0.0.0',
+    affected_max_version: '2.23',
+    safe_pin: '>=3.0',
+  },
+];
+
+// Packages explicitly investigated and REJECTED for the allowlist. Kept
+// here so a future "should we add X?" question gets a quick "no, here's
+// why" rather than a re-investigation.
+export const SDIST_REJECTED_NOTES: readonly SdistRejectedEntry[] = [
+  {
+    name: 'numpy',
+    reason:
+      "numpy==1.24.3 (the version aider-chat pins via its playwright extra) predates cp312 wheel publication AND fails to build from sdist on Python 3.12 because the standard library dropped `distutils` in 3.12. Allowlisting wouldn't fix the install; a real fix needs numpy>=1.26 (which has cp312 wheels). Don't add.",
+    investigated: '2026-05-19',
+    affected_min_version: '0.0.0',
+    affected_max_version: '1.25.99',
+    safe_pin: '>=1.26.0',
+  },
+  {
+    name: 'cffi',
+    reason:
+      'cffi 2.0.0 wheels are tagged `manylinux_2_17_x86_64` only (not dual-tagged with `manylinux2014_x86_64`). Modern pip (>=22.3) understands PEP 600 tags and resolves the wheel correctly. cffi 1.17.1 has confirmed `cp312-manylinux2014_x86_64` wheels and is the safe pin. Allowlisting is not the right tool; pin cffi instead if needed.',
+    investigated: '2026-05-19',
+    affected_min_version: '2.0.0',
+    affected_max_version: '2.99.99',
+    safe_pin: '==1.17.1',
+  },
+];
+
+/** Comma-joined name list for the pip `--no-binary=<a,b,c>` flag. Empty string when no entries. */
+export const SDIST_TRUSTED_NAMES: string = SDIST_TRUSTED_DEPS.map((d) => d.name).join(',');
diff --git a/src/worker/score/session.ts b/src/worker/score/session.ts
new file mode 100644
index 0000000..8d3b2da
--- /dev/null
+++ b/src/worker/score/session.ts
@@ -0,0 +1,124 @@
+// Signed `__Host-anc-session` cookie — issue, parse, verify.
+//
+// Plan U5 (docs/plans/2026-04-28-002-feat-live-scoring-cf-sandbox-plan.md
+// "Cost ceiling and abuse mitigation" step 2): after a Turnstile solve,
+// the Worker sets a signed session cookie. The cookie value identifies
+// the session for `SCORE_LIMITER` rekeying: the limiter key is
+// `<session-id>:<sha256(input)>` so same-tool requests within a session
+// don't burn rate-limit budget.
+//
+// Cookie format:
+//   __Host-anc-session=<sid>.<expEpochSec>.<sigBase64Url>
+//
+// where `sigBase64Url = HMAC-SHA256(sid + "." + expEpochSec)` using
+// `env.SESSION_HMAC_SECRET`. Constant-time signature comparison.
+//
+// `__Host-` prefix requires Secure, Path=/, no Domain. Combined with
+// HttpOnly + SameSite=Lax this is the strict-cookie shape per OWASP
+// session-management guidance.
+
+const COOKIE_NAME = '__Host-anc-session';
+const COOKIE_TTL_SEC = 60 * 60; // 1 h, per plan
+const SID_BYTES = 16;
+
+export type SessionEnv = {
+  SESSION_HMAC_SECRET?: string;
+};
+
+export type Session = { sid: string; expiresAt: number };
+
+export class SessionConfigError extends Error {
+  constructor() {
+    super('SESSION_HMAC_SECRET not configured');
+    this.name = 'SessionConfigError';
+  }
+}
+
+/** Generate a fresh session payload (no signature yet — see issue()). */
+export function newSession(nowMs: number = Date.now()): Session {
+  const bytes = new Uint8Array(SID_BYTES);
+  crypto.getRandomValues(bytes);
+  return {
+    sid: base64Url(bytes),
+    expiresAt: Math.floor(nowMs / 1000) + COOKIE_TTL_SEC,
+  };
+}
+
+/** Build the Set-Cookie header value for a fresh session. */
+export async function issue(env: SessionEnv, session: Session): Promise<string> {
+  const secret = requireSecret(env);
+  const payload = `${session.sid}.${session.expiresAt}`;
+  const sig = await sign(secret, payload);
+  const value = `${payload}.${sig}`;
+  return `${COOKIE_NAME}=${value}; HttpOnly; Secure; SameSite=Lax; Path=/; Max-Age=${COOKIE_TTL_SEC}`;
+}
+
+/**
+ * Parse + verify the session cookie from a request. Returns the session on
+ * success, `null` on missing/expired/tampered cookie. Constant-time signature
+ * comparison via Web Crypto.
+ */
+export async function read(env: SessionEnv, request: Request, nowMs: number = Date.now()): Promise<Session | null> {
+  const secret = requireSecret(env);
+  const cookieHeader = request.headers.get('cookie');
+  if (!cookieHeader) return null;
+
+  const raw = extractCookie(cookieHeader, COOKIE_NAME);
+  if (!raw) return null;
+
+  const parts = raw.split('.');
+  if (parts.length !== 3) return null;
+  const [sid, expStr, sig] = parts;
+
+  const exp = Number(expStr);
+  if (!Number.isFinite(exp) || exp <= Math.floor(nowMs / 1000)) return null;
+
+  const expected = await sign(secret, `${sid}.${expStr}`);
+  if (!constantTimeEquals(sig, expected)) return null;
+
+  return { sid, expiresAt: exp };
+}
+
+function requireSecret(env: SessionEnv): string {
+  if (!env.SESSION_HMAC_SECRET) throw new SessionConfigError();
+  return env.SESSION_HMAC_SECRET;
+}
+
+async function sign(secret: string, payload: string): Promise<string> {
+  const key = await crypto.subtle.importKey(
+    'raw',
+    new TextEncoder().encode(secret),
+    { name: 'HMAC', hash: 'SHA-256' },
+    false,
+    ['sign'],
+  );
+  const sig = await crypto.subtle.sign('HMAC', key, new TextEncoder().encode(payload));
+  return base64Url(new Uint8Array(sig));
+}
+
+function constantTimeEquals(a: string, b: string): boolean {
+  if (a.length !== b.length) return false;
+  let diff = 0;
+  for (let i = 0; i < a.length; i++) {
+    diff |= a.charCodeAt(i) ^ b.charCodeAt(i);
+  }
+  return diff === 0;
+}
+
+function extractCookie(header: string, name: string): string | null {
+  for (const part of header.split(';')) {
+    const trimmed = part.trim();
+    const eq = trimmed.indexOf('=');
+    if (eq < 0) continue;
+    if (trimmed.slice(0, eq) === name) return trimmed.slice(eq + 1);
+  }
+  return null;
+}
+
+function base64Url(bytes: Uint8Array): string {
+  let s = '';
+  for (const b of bytes) s += String.fromCharCode(b);
+  return btoa(s).replace(/\+/g, '-').replace(/\//g, '_').replace(/=+$/, '');
+}
+
+export const _internal = { COOKIE_NAME, COOKIE_TTL_SEC };
diff --git a/src/worker/score/summary-render.ts b/src/worker/score/summary-render.ts
new file mode 100644
index 0000000..2874d21
--- /dev/null
+++ b/src/worker/score/summary-render.ts
@@ -0,0 +1,442 @@
+// Server-side renderer for /score/live/<binary> + markdown twin.
+//
+// Reads the cached scorecard from R2 and emits either:
+//
+//   - HTML at /score/live/<binary> — top-3 issues + score badge + CTA,
+//     wrapped in the site shell (build-emitted template asset).
+//   - Markdown at /score/live/<binary>.md OR Accept: text/markdown — same
+//     content, plain markdown twin so agents pasting `Accept:
+//     text/markdown` get a clean document. Mirrors the site-wide
+//     "every HTML page has a markdown twin" invariant.
+//
+// Skips the full check table + per-tool metadata blocks the static
+// `/score/<tool>` page carries — this is a paste-and-share surface, not
+// a deep-dive page.
+//
+// Shell template comes from `dist/_internal/score-live-shell.html`,
+// emitted by `src/build/build.mjs` from the same `emitShell()` helper
+// that builds the static pages. Drift can't happen because the template
+// is regenerated on every build.
+
+import {
+  extractTopIssues,
+  formatCheckTableMarkdownLines,
+  groupToPrincipleNum,
+  escHtml as sharedEscHtml,
+} from '../../shared/scorecard-format.mjs';
+import { detectPreference } from '../accept';
+import { SITE_SPEC_VERSION, SPEC_VERSION } from '../spec-version.gen';
+import type { CacheEnv } from './cache';
+import { get as cacheGet, keyFor as cacheKeyFor } from './cache';
+
+// Lazy-cached shell template — fetched on the first /score/live request
+// in each isolate and held for the lifetime of the isolate. Workers re-
+// instantiate isolates frequently so the bounded staleness is fine.
+let shellTemplatePromise: Promise<string> | null = null;
+
+async function loadShellTemplate(env: { ASSETS: Fetcher }): Promise<string> {
+  if (!shellTemplatePromise) {
+    shellTemplatePromise = (async () => {
+      const res = await env.ASSETS.fetch(new Request('https://assets.internal/_internal/score-live-shell.html'));
+      if (!res.ok) throw new Error(`score-live shell template missing (status ${res.status})`);
+      return await res.text();
+    })().catch((err) => {
+      shellTemplatePromise = null;
+      throw err;
+    });
+  }
+  return shellTemplatePromise;
+}
+
+/** Test-only — drop the cached template. */
+export function _resetShellTemplateCache(): void {
+  shellTemplatePromise = null;
+}
+
+// ---------------------------------------------------------------------------
+// Scorecard shape — minimal subset the summary renderer reads. Aligned with
+// schema 0.5 emitted by `anc check` (see content/scorecard-schema.md).
+// ---------------------------------------------------------------------------
+
+type CheckResult = {
+  status: 'pass' | 'fail' | 'warn' | 'skip';
+  label: string;
+  group: string;
+  evidence: string | null;
+};
+
+type Scorecard = {
+  schema_version?: string;
+  tool?: { name?: string; binary?: string; version?: string | null };
+  target?: { kind?: string; command?: string; path?: string | null };
+  badge?: { score_pct?: number; eligible?: boolean };
+  results?: CheckResult[];
+  audience?: string | null;
+  audit_profile?: string | null;
+};
+
+// HTML escape + top-issues extraction + principle-number derivation all
+// come from src/shared/scorecard-format.mjs so the Worker + build use the
+// same primitives. `sharedEscHtml` accepts `unknown`; this thin wrapper
+// narrows to string so callsites stay readable.
+function esc(s: string): string {
+  return sharedEscHtml(s);
+}
+
+// principle-num derivation uses the shared `groupToPrincipleNum` (above).
+
+// ---------------------------------------------------------------------------
+// Body builder
+// ---------------------------------------------------------------------------
+
+export type SummaryRenderInput = {
+  scorecard: Scorecard;
+  binary: string;
+  ancVersion: string;
+  toolVersion: string;
+  // 'cache-hit' shows a quiet "(cached)" marker; 'live' does not.
+  freshness: 'cache-hit' | 'live';
+};
+
+/**
+ * Build the HTML body for `/score/live/<binary>`. Reuses the visual rhythm
+ * of `buildScorecardBody` in `scorecards-render.mjs` but trims to the
+ * summary surface: header + score badge + top-3 issues + install-anc CTA.
+ * No full check table; no per-tool meta block.
+ */
+export function buildScoreSummaryBody(input: SummaryRenderInput): string {
+  const { scorecard, binary, ancVersion, toolVersion, freshness } = input;
+  const toolName = scorecard.tool?.name ?? binary;
+  const pct = scorecard.badge?.score_pct ?? 0;
+  const issues = extractTopIssues(scorecard);
+  const freshnessMarker =
+    freshness === 'cache-hit'
+      ? `<span class="live-score-summary__freshness" title="Served from cached scorecard">cached</span>`
+      : `<span class="live-score-summary__freshness live-score-summary__freshness--live" title="Just scored">just scored</span>`;
+
+  const issuesBlock =
+    issues.length === 0
+      ? `<section class="live-score-summary__issues live-score-summary__issues--clean">
+  <h2>Status</h2>
+  <p>No failing or warning checks in this scorecard.</p>
+</section>`
+      : `<section class="live-score-summary__issues">
+  <h2>Top issues</h2>
+  <ul class="issue-list">
+${issues
+  .map((issue) => {
+    const pNum = groupToPrincipleNum(issue.group);
+    const statusClass = issue.status === 'fail' ? 'issue--fail' : 'issue--warn';
+    const groupLink = pNum ? `<a href="/p${pNum}">${esc(issue.group)}</a>` : esc(issue.group);
+    const evidence = issue.evidence ? `<span class="issue__evidence">${esc(issue.evidence)}</span>` : '';
+    return `    <li class="issue ${statusClass}">
+      <span class="issue__status">${esc(issue.status.toUpperCase())}</span>
+      <span class="issue__label">${esc(issue.label)}</span>
+      <span class="issue__group">${groupLink}</span>
+      ${evidence}
+    </li>`;
+  })
+  .join('\n')}
+  </ul>
+</section>`;
+
+  return `<nav class="scorecard-breadcrumb" aria-label="Breadcrumb">
+  <a href="/">&larr; Score another</a>
+</nav>
+<header class="live-score-summary__header">
+  <h1>${esc(toolName)} <span class="live-score-summary__version">${esc(toolVersion || '—')}</span></h1>
+  <p class="live-score-summary__meta">
+    Binary <code>${esc(binary)}</code> · scored by anc ${esc(ancVersion)} · spec ${esc(SPEC_VERSION)} ${freshnessMarker}
+  </p>
+</header>
+<section class="live-score-summary__score">
+  <div class="scorecard-score-badge">
+    <span class="scorecard-score-badge__pct">${pct}%</span>
+    <span class="scorecard-score-badge__label">pass rate</span>
+  </div>
+</section>
+${issuesBlock}
+<section class="live-score-summary__cta">
+  <h2>Get the full picture locally</h2>
+  <p>This is a binary/behavioral summary. <a href="/install">Install <code>anc</code></a> and run <code>anc check .</code> in your project for source-level and project-level checks too.</p>
+  <p class="live-score-summary__cta-aside">Re-score this tool from a fresh paste on the <a href="/">homepage</a>, or browse the curated <a href="/scorecards">leaderboard</a>.</p>
+</section>`;
+}
+
+/**
+ * Build the markdown body for `/score/live/<binary>.md`. Same content
+ * structure as the HTML body — header, score, top issues, CTA — emitted
+ * as plain markdown so agents pasting `Accept: text/markdown` get a
+ * clean document with no HTML escapes. Mirrors the markdown-twin
+ * pattern used elsewhere on the site.
+ */
+export function buildScoreSummaryMarkdown(input: SummaryRenderInput): string {
+  const { scorecard, binary, ancVersion, toolVersion, freshness } = input;
+  const toolName = scorecard.tool?.name ?? binary;
+  const pct = scorecard.badge?.score_pct ?? 0;
+  const issues = extractTopIssues(scorecard);
+  const lines: string[] = [];
+
+  lines.push(`# ${toolName} ${toolVersion ? `(${toolVersion})` : ''}`.trim());
+  lines.push('');
+  lines.push(
+    `Binary \`${binary}\` · scored by anc ${ancVersion} · spec ${SPEC_VERSION} · ${freshness === 'cache-hit' ? 'cached' : 'just scored'}`,
+  );
+  lines.push('');
+  lines.push(`**Score:** ${pct}% pass rate`);
+  lines.push('');
+
+  if (issues.length === 0) {
+    lines.push('## Status');
+    lines.push('');
+    lines.push('No failing or warning checks in this scorecard.');
+    lines.push('');
+  } else {
+    lines.push('## Top issues');
+    lines.push('');
+    // Shared with the static /score/<tool>.md check table — single source
+    // of truth for the row format in src/shared/scorecard-format.mjs.
+    // Absolute baseUrl because /score/live/<binary>.md is consumed by
+    // agents via Accept negotiation and must self-resolve cross-origin
+    // (no absolutifyMarkdownLinks pass like the static .md twins get).
+    for (const row of formatCheckTableMarkdownLines(issues, { baseUrl: 'https://anc.dev' })) {
+      lines.push(row);
+    }
+    lines.push('');
+  }
+
+  lines.push('## Get the full picture locally');
+  lines.push('');
+  lines.push(
+    'This is a binary/behavioral summary. [Install `anc`](https://anc.dev/install) and run `anc check .` in your project for source-level and project-level checks too.',
+  );
+  lines.push('');
+  lines.push(
+    'Re-score this tool from a fresh paste on the [homepage](https://anc.dev/), or browse the curated [leaderboard](https://anc.dev/scorecards).',
+  );
+  lines.push('');
+
+  return lines.join('\n');
+}
+
+// ---------------------------------------------------------------------------
+// Page renderer + Worker-route handler
+// ---------------------------------------------------------------------------
+
+// Same CSP shape applyHeaders sets on static pages — mirrored here because
+// /score/live/<binary> bypasses the static asset pipeline. Three Turnstile
+// directives (script-src, frame-src, connect-src) are kept even though
+// this page itself doesn't load Turnstile, because the share-URL surface
+// links back to the homepage form, and a uniform CSP across HTML responses
+// is easier to assert than per-page exceptions.
+const LIVE_SCORE_CSP =
+  "default-src 'self'; " +
+  "script-src 'self' 'unsafe-inline' https://challenges.cloudflare.com; " +
+  'frame-src https://challenges.cloudflare.com; ' +
+  "connect-src 'self' https://challenges.cloudflare.com; " +
+  "img-src 'self' data:; " +
+  "style-src 'self' 'unsafe-inline'; " +
+  "font-src 'self'; " +
+  "base-uri 'self'; " +
+  "form-action 'self'; " +
+  "object-src 'none'; " +
+  "frame-ancestors 'self'";
+
+const HTML_HEADERS = {
+  'Content-Type': 'text/html; charset=utf-8',
+  // 5 minutes at the edge with stale-while-revalidate matches the cache
+  // policy elsewhere on the site. A re-score within the TTL still hits the
+  // cache; after eviction, the page 404s until the next scoring event.
+  'Cache-Control': 'public, max-age=300, s-maxage=300, stale-while-revalidate=60',
+  'X-Robots-Tag': 'noindex',
+  'Content-Security-Policy': LIVE_SCORE_CSP,
+} as const;
+
+const MARKDOWN_HEADERS = {
+  'Content-Type': 'text/markdown; charset=utf-8',
+  'Cache-Control': 'public, max-age=300, s-maxage=300, stale-while-revalidate=60',
+  'X-Robots-Tag': 'noindex',
+} as const;
+
+function substituteShell(
+  template: string,
+  fields: { title: string; description: string; canonicalPath: string; body: string },
+): string {
+  // Single-pass substitution — placeholders are well-known + author-fixed,
+  // not user input, so no escape-injection risk on the placeholder side.
+  // The `body` slot is built from escaped scorecard fields above.
+  return template
+    .replaceAll('{{TITLE}}', esc(fields.title))
+    .replaceAll('{{DESCRIPTION}}', esc(fields.description))
+    .replaceAll('{{CANONICAL_PATH}}', esc(fields.canonicalPath))
+    .replaceAll('{{BODY}}', fields.body);
+}
+
+type LiveScoreEnv = CacheEnv & { ASSETS: Fetcher };
+
+/** Strict slug shape — matches registry-name validation in scorecards.mjs. */
+const BINARY_SLUG_RE = /^[a-z0-9][a-z0-9-]{0,63}$/;
+
+export type LiveScorePathMatch = {
+  binary: string;
+  /** True for `/score/live/<binary>.md`, false for the canonical HTML path. */
+  isMarkdown: boolean;
+};
+
+/**
+ * Extract `<binary>` from `/score/live/<binary>` or `/score/live/<binary>.md`.
+ * Returns null when the path doesn't match OR the slug fails the strict
+ * shape check (no uppercase, no dots, no slashes, no leading hyphen,
+ * bounded length). Tight regex matters here — this is the user-input
+ * boundary for an R2 key lookup.
+ *
+ * URL pattern nests under the existing `/score/` namespace so the URL
+ * hierarchy reads as: `/score/<tool>` (curated static) and
+ * `/score/live/<binary>` (dynamic live-scored). The string "live" is
+ * reserved as a registry name in scorecards.mjs so a future curated tool
+ * named "live" can't collide.
+ *
+ * The two surfaces share routing because every HTML page on the site
+ * carries a markdown twin (site-wide invariant). The handler picks the
+ * response format from the suffix; Accept-header negotiation kicks in for
+ * the suffix-less path.
+ *
+ * Returns just the binary string for caller convenience when the .md
+ * distinction doesn't matter; use parseLiveScorePathMatch for the
+ * structured form.
+ */
+export function parseLiveScorePath(pathname: string): string | null {
+  return parseLiveScorePathMatch(pathname)?.binary ?? null;
+}
+
+export function parseLiveScorePathMatch(pathname: string): LiveScorePathMatch | null {
+  const mdMatch = pathname.match(/^\/score\/live\/([^/]+)\.md$/);
+  if (mdMatch) {
+    return BINARY_SLUG_RE.test(mdMatch[1]) ? { binary: mdMatch[1], isMarkdown: true } : null;
+  }
+  const m = pathname.match(/^\/score\/live\/([^/]+)$/);
+  if (!m) return null;
+  return BINARY_SLUG_RE.test(m[1]) ? { binary: m[1], isMarkdown: false } : null;
+}
+
+/**
+ * Handle a GET `/score/live/<binary>` (or `.md`) request. Returns:
+ *   - 200 HTML / markdown with the rendered summary if R2 has a cached scorecard
+ *   - 404 HTML / markdown if the cache is empty (no recent paste-and-score
+ *     for this binary, or the 7-day lifecycle reaped the entry)
+ *   - 405 for non-GET/HEAD methods
+ *
+ * Format selection:
+ *   - `.md` suffix → markdown
+ *   - no suffix + `Accept: text/markdown` (q-weighted) → markdown
+ *   - otherwise → HTML
+ */
+export async function handleLiveScorePage(request: Request, env: LiveScoreEnv): Promise<Response> {
+  if (request.method !== 'GET' && request.method !== 'HEAD') {
+    return new Response('method not allowed', { status: 405, headers: { 'content-type': 'text/plain' } });
+  }
+
+  const url = new URL(request.url);
+  const match = parseLiveScorePathMatch(url.pathname);
+  if (!match) {
+    return renderNotFound(env, '(invalid)', false);
+  }
+
+  const { binary } = match;
+  // Content negotiation: explicit `.md` suffix always wins; otherwise
+  // honor the Accept header (defaults to HTML when ambiguous, same as
+  // the rest of the site).
+  const wantMarkdown = match.isMarkdown || (!match.isMarkdown && detectPreference(request) === 'markdown');
+
+  // The DO's cache write uses spec.binary (the parser-derived binary).
+  // The handler's share_url uses the same. So a user never visits a
+  // /score/live/<alias> URL we'd need to redirect — the URL we emit IS
+  // the cache key. Aliases (e.g., the static /score/rg → /score/ripgrep
+  // redirect) live on the curated-static side and don't apply here.
+  const cached = await cacheGet(env, cacheKeyFor(binary, SPEC_VERSION));
+  if (!cached) {
+    return renderNotFound(env, binary, wantMarkdown);
+  }
+
+  const renderInput: SummaryRenderInput = {
+    scorecard: cached.scorecard as Scorecard,
+    binary,
+    ancVersion: cached.anc_version,
+    toolVersion: cached.tool_version,
+    freshness: 'cache-hit',
+  };
+
+  if (wantMarkdown) {
+    const md = buildScoreSummaryMarkdown(renderInput);
+    return new Response(md, { status: 200, headers: MARKDOWN_HEADERS });
+  }
+
+  const body = buildScoreSummaryBody(renderInput);
+
+  const toolName = (cached.scorecard as Scorecard).tool?.name ?? binary;
+  const pct = (cached.scorecard as Scorecard).badge?.score_pct ?? 0;
+  const title = `${toolName} — Agent-Native Live Score`;
+  const description = `${toolName} scored ${pct}% against the agent-native CLI standard (anc ${cached.anc_version}, spec ${SPEC_VERSION}). Live-scored binary, not a curated audit.`;
+  const canonicalPath = `/score/live/${binary}`;
+
+  let template: string;
+  try {
+    template = await loadShellTemplate(env);
+  } catch (err) {
+    return new Response(`shell template unavailable: ${err instanceof Error ? err.message : String(err)}`, {
+      status: 500,
+      headers: { 'content-type': 'text/plain' },
+    });
+  }
+
+  const html = substituteShell(template, { title, description, canonicalPath, body });
+  return new Response(html, { status: 200, headers: HTML_HEADERS });
+}
+
+async function renderNotFound(env: LiveScoreEnv, binary: string, wantMarkdown: boolean): Promise<Response> {
+  if (wantMarkdown) {
+    const lines = [
+      `# No live score for \`${binary}\` yet`,
+      '',
+      'Live-score URLs surface a cached scorecard from a recent paste-and-score run. If no one has scored this binary in the last 7 days, the cache is empty.',
+      '',
+      '## Score it now',
+      '',
+      'Paste the tool name, install command, or GitHub URL on the [homepage](https://anc.dev/) to score it. Once it scores, the share URL works.',
+      '',
+      `Or [install \`anc\`](https://anc.dev/install) and run \`anc check ${binary}\` locally.`,
+      '',
+    ];
+    return new Response(lines.join('\n'), { status: 404, headers: MARKDOWN_HEADERS });
+  }
+
+  const body = `<header class="live-score-summary__header">
+  <h1>No live score for <code>${esc(binary)}</code> yet</h1>
+  <p class="live-score-summary__meta">Live-score URLs surface a cached scorecard from a recent paste-and-score run. If no one has scored this binary in the last 7 days, the cache is empty.</p>
+</header>
+<section class="live-score-summary__cta">
+  <h2>Score it now</h2>
+  <p>Paste the tool name, install command, or GitHub URL on the <a href="/">homepage</a> to score it. Once it scores, the share URL works.</p>
+  <p>Or <a href="/install">install <code>anc</code></a> and run <code>anc check ${esc(binary)}</code> locally.</p>
+</section>`;
+
+  const title = `Not yet scored — anc.dev`;
+  const description = `No cached live scorecard for ${binary}. Score it on the homepage or run anc check locally.`;
+  const canonicalPath = `/score/live/${binary}`;
+
+  let template: string;
+  try {
+    template = await loadShellTemplate(env);
+  } catch (err) {
+    return new Response(`shell template unavailable: ${err instanceof Error ? err.message : String(err)}`, {
+      status: 500,
+      headers: { 'content-type': 'text/plain' },
+    });
+  }
+
+  const html = substituteShell(template, { title, description, canonicalPath, body });
+  return new Response(html, { status: 404, headers: HTML_HEADERS });
+}
+
+// Statically referenced so unused-export linters keep these alive.
+void SITE_SPEC_VERSION;
diff --git a/src/worker/score/telemetry.ts b/src/worker/score/telemetry.ts
new file mode 100644
index 0000000..bc2f4b9
--- /dev/null
+++ b/src/worker/score/telemetry.ts
@@ -0,0 +1,95 @@
+// Workers Analytics Engine telemetry helper for /api/score.
+//
+// One writeDataPoint per request, emitted from handler.ts in the same
+// try/finally that emits the `score.tier` console log line. The console
+// log is the manual-recovery fallback when AE is down; this helper is
+// the queryable surface.
+//
+// Field schema is contractual — `tests/score-telemetry.test.ts` pins
+// every blob/double/index slot so a future reorder breaks loudly
+// rather than silently invalidating saved AE SQL queries. AE rejects
+// values silently rather than throwing on cardinality limits, so this
+// wrapper enforces shape at the boundary and ALSO enforces the
+// graceful-degradation discipline (same posture as `kill-switch.ts`):
+// any AE write error logs under scope `score.telemetry.write_failed`
+// and is swallowed, so an AE outage cannot block a `/api/score`
+// response.
+//
+// Slot map (canonical — DO NOT reorder without updating
+// `docs/runbooks/live-scoring-analytics.md` AND the
+// `tests/score-telemetry.test.ts` regression pin):
+//
+//   blob1   input kind     "registry" | "install-command" | "github-url" |
+//                          "slug-miss" | "invalid"
+//   blob2   pm             "npm" | "cargo-binstall" | "pip" | "uv" | "bun" |
+//                          "go" | "brew" | "direct" | "git-clone" | null
+//   blob3   error code     null on success, else ScoreError.code
+//   blob4   freshness      "live" | "cache-hit" | "registry-hit" | null
+//   blob5   resolved step  DiscoveryResult.resolved_step on live;
+//                          "registry" on curated hits; null otherwise
+//
+//   double1 total ms       Worker handler wall clock
+//   double2 install ms     sandbox exec install duration; null on
+//                          non-live paths (registry hit, cache hit,
+//                          pre-install error)
+//   double3 anc check ms   sandbox exec anc-check duration; null on
+//                          non-live paths
+//   double4 status         HTTP status the response carried
+//
+//   index1  tool name OR slug; null on validation errors. Cardinality
+//           target ≤10k; AE samples high-cardinality indexes
+//           automatically.
+
+import type { ResolvedStep } from './discover-binary';
+import type { ScoreError } from './response-shape';
+
+// The AE binding type ships in @cloudflare/workers-types; declared
+// locally as a structural shape so the worker module compiles in
+// environments where the binding type isn't loaded and tests can
+// pass a hand-rolled stub. The writeDataPoint signature mirrors the
+// Cloudflare runtime's contract.
+export interface AnalyticsEngineDataset {
+  writeDataPoint(event: { blobs?: (string | null)[]; doubles?: (number | null)[]; indexes?: string[] }): void;
+}
+
+export type ScoreTelemetryEnv = {
+  SCORE_TELEMETRY: AnalyticsEngineDataset;
+};
+
+export type PmTag = 'npm' | 'cargo-binstall' | 'pip' | 'uv' | 'bun' | 'go' | 'brew' | 'direct' | 'git-clone';
+
+export type InputKindTag = 'registry' | 'install-command' | 'github-url' | 'slug-miss' | 'invalid';
+
+export type FreshnessTag = 'live' | 'cache-hit' | 'registry-hit';
+
+export type ScoreEventFields = {
+  input_kind: InputKindTag | null;
+  pm: PmTag | null;
+  error_code: ScoreError['code'] | null;
+  freshness: FreshnessTag | null;
+  resolved_step: ResolvedStep | 'registry' | null;
+  total_ms: number;
+  install_ms: number | null;
+  anc_check_ms: number | null;
+  response_status: number;
+  // tool name OR slug — whichever the input resolved to. Null when
+  // input validation rejected before any name was knowable.
+  tool: string | null;
+};
+
+export function recordScoreEvent(env: ScoreTelemetryEnv, fields: ScoreEventFields): void {
+  try {
+    env.SCORE_TELEMETRY.writeDataPoint({
+      blobs: [fields.input_kind, fields.pm, fields.error_code, fields.freshness, fields.resolved_step],
+      doubles: [fields.total_ms, fields.install_ms, fields.anc_check_ms, fields.response_status],
+      indexes: fields.tool ? [fields.tool] : [],
+    });
+  } catch (err) {
+    console.log(
+      JSON.stringify({
+        scope: 'score.telemetry.write_failed',
+        error: err instanceof Error ? err.message : String(err),
+      }),
+    );
+  }
+}
diff --git a/src/worker/score/turnstile.ts b/src/worker/score/turnstile.ts
new file mode 100644
index 0000000..f484173
--- /dev/null
+++ b/src/worker/score/turnstile.ts
@@ -0,0 +1,62 @@
+// Cloudflare Turnstile siteverify wrapper.
+//
+// Plan U5 (docs/plans/2026-04-28-002-feat-live-scoring-cf-sandbox-plan.md
+// "Cost ceiling and abuse mitigation" step 1 + U5 handler step 4): the U8
+// form submits a `turnstile_token` in the POST body. The Worker POSTs it
+// (with the secret) to challenges.cloudflare.com/turnstile/v0/siteverify.
+// Failure → 400 with `turnstile_failed`. Success → caller may set the
+// session cookie.
+//
+// Invisible-mode (no checkbox) + lazy-load are U8 client-side decisions;
+// this module only validates whatever token the client sends.
+
+const SITEVERIFY_URL = 'https://challenges.cloudflare.com/turnstile/v0/siteverify';
+
+export type TurnstileEnv = {
+  TURNSTILE_SECRET?: string;
+};
+
+export type VerifyResult =
+  | { ok: true }
+  | { ok: false; reason: 'misconfigured' | 'missing_token' | 'rejected' | 'transport_error' };
+
+export class TurnstileConfigError extends Error {
+  constructor() {
+    super('TURNSTILE_SECRET not configured');
+    this.name = 'TurnstileConfigError';
+  }
+}
+
+export type VerifyOpts = {
+  /** Injectable for tests; defaults to globalThis.fetch. */
+  fetcher?: typeof fetch;
+  /** Remote IP from the request (CF-Connecting-IP); optional but Cloudflare-recommended. */
+  remoteIp?: string;
+};
+
+export async function verifyTurnstile(
+  env: TurnstileEnv,
+  token: string | null | undefined,
+  opts: VerifyOpts = {},
+): Promise<VerifyResult> {
+  if (!env.TURNSTILE_SECRET) return { ok: false, reason: 'misconfigured' };
+  if (!token) return { ok: false, reason: 'missing_token' };
+
+  const fetcher = opts.fetcher ?? globalThis.fetch.bind(globalThis);
+  const body = new FormData();
+  body.set('secret', env.TURNSTILE_SECRET);
+  body.set('response', token);
+  if (opts.remoteIp) body.set('remoteip', opts.remoteIp);
+
+  let res: Response;
+  try {
+    res = await fetcher(SITEVERIFY_URL, { method: 'POST', body });
+  } catch {
+    return { ok: false, reason: 'transport_error' };
+  }
+  if (!res.ok) return { ok: false, reason: 'transport_error' };
+
+  const parsed = (await res.json().catch(() => null)) as { success?: boolean } | null;
+  if (!parsed || parsed.success !== true) return { ok: false, reason: 'rejected' };
+  return { ok: true };
+}
diff --git a/src/worker/score/validate.ts b/src/worker/score/validate.ts
index c264fec..62a7de3 100644
--- a/src/worker/score/validate.ts
+++ b/src/worker/score/validate.ts
@@ -2,10 +2,31 @@
 // kinds (slug | install-command | github-url | unknown) the rest of the
 // scoring pipeline consumes.
 //
-// Plan U4 (docs/plans/2026-04-28-002-feat-live-scoring-cf-sandbox-plan.md
-// lines 1086-1091). URL validation rules per the rust-url-validation
-// learning referenced in the plan (HTTPS only, github.com host only,
-// homoglyph guard via literal hostname comparison after URL parsing).
+// URL validation rules: HTTPS only, github.com host only, homoglyph
+// guard via literal hostname comparison after URL parsing.
+//
+// Accepted shapes beyond the obvious `https://github.com/owner/repo`:
+//
+//   - http:// is upgraded to https:// silently. The user pasted a tool
+//     URL; the protocol is the wrong scheme but the intent is clear.
+//     Substring attacks (`http://github.com.evil.com/...`) still fail
+//     `non_github_host` because the host check is exact-match against
+//     the URL parser's hostname field — the upgrade only changes the
+//     scheme.
+//   - `owner/repo` shorthand. `tobi/qmd` (no protocol, no github.com
+//     prefix) routes to the same github-url path as
+//     `https://github.com/tobi/qmd`. Strict per-GitHub username + repo
+//     name rules (no leading hyphens, no spaces, capped lengths).
+//   - Branch URLs. `https://github.com/<owner>/<repo>/tree/<branch>`
+//     and `…/tree/<branch>/<subpath>` accept; the github-url variant
+//     carries an optional `branch` field. Strict branch-name regex
+//     plus an explicit `..` reject (defense in depth — the strict
+//     regex already excludes shell metacharacters but the path-
+//     traversal pattern is worth a separate guard for clarity).
+//
+// The `non_https_url` + `invalid_url_path` error codes stay in the union
+// so they fire for genuinely-malformed inputs (e.g., `javascript:` or a
+// repo URL with `/releases/download/...` instead of `/tree/...`).
 
 import type { ParsedInstall } from './parse-install';
 import { parseInstallCommand } from './parse-install';
@@ -21,17 +42,63 @@ export type ValidationError =
 export type ValidatedInput =
   | { kind: 'slug'; slug: string }
   | { kind: 'install-command'; spec: ParsedInstall }
-  | { kind: 'github-url'; owner: string; repo: string }
+  | { kind: 'github-url'; owner: string; repo: string; branch?: string }
   | { kind: 'unknown'; error: ValidationError };
 
 const SLUG_RE = /^[a-z0-9-]+$/;
 const PM_PREFIX_RE = /^(brew|cargo|bun|uv|pip|pip3|pipx|npm|yarn|pnpm|go)\s/;
-// Anchored: only repo-root URLs (with optional .git suffix and optional
-// trailing slash). Branch paths like `/tree/main` are rejected.
+// "Looks like an install command for a package manager we don't support."
+// These prefixes are routed to `unparseable_install_command` (not
+// `unrecognized_input`) so the homepage form can render a precise
+// "this kind of install isn't supported" copy with the supported set
+// listed, rather than a generic "not a recognized tool" line. Each
+// entry is a literal head token; `apt-get` is hyphenated so the regex
+// pins the whole word boundary.
+const UNSUPPORTED_PM_PREFIX_RE =
+  /^(apt-get|apt|dnf|yum|zypper|pacman|snap|flatpak|port|choco|scoop|winget|gem|composer|emerge)\s/;
+// Anchored: repo-root URL (with optional .git suffix and optional
+// trailing slash). Branch URLs (`/tree/<branch>[/<subpath>]`) match a
+// separate pattern below — kept separate so the repo-root case stays
+// the obvious-by-eye shape and branch handling doesn't muddy it.
 const GITHUB_URL_RE = /^https:\/\/github\.com\/([^/]+)\/([^/]+?)(?:\.git)?\/?$/;
+// Branch URL: `…/<owner>/<repo>/tree/<branch>[/<subpath>]`. Owner and
+// repo segments captured for re-validation via the same character
+// classes the shorthand uses. Branch capture is greedy because a
+// branch name MAY contain `/` (e.g., `feature/new-thing`). The optional
+// `/<subpath>` tail is allowed but discarded — users frequently paste
+// `…/tree/main/docs/architecture.md`; the scoring contract is
+// repo+branch granularity, not file granularity. If subpath-aware
+// scoring ever lands, capture this tail then.
+const GITHUB_BRANCH_URL_RE = /^https:\/\/github\.com\/([^/]+)\/([^/]+?)\/tree\/(.+)$/;
 
-// Mirrors the shape U1 emits at dist/registry-index.json. The Worker
-// imports the actual file at request time; here we declare the contract.
+// GitHub username rules: 1-39 chars, alphanumeric + hyphen, no leading
+// hyphen. Org names follow the same rule. Mirrors GitHub's own
+// validation so a regex pass here is the same gate the user would hit
+// at github.com.
+const OWNER_RE = /^[A-Za-z0-9](?:[A-Za-z0-9-]{0,38})$/;
+// GitHub repo name rules: alphanumeric, `.`, `_`, `-`. The literal
+// strings `.` and `..` are reserved by GitHub itself, so we reject
+// them explicitly. Cap at 100 chars (GitHub's documented limit is
+// effectively unbounded but anything past 100 is almost certainly a
+// paste mistake).
+const REPO_RE = /^[A-Za-z0-9._-]{1,100}$/;
+// `owner/repo` shorthand: exactly two segments split by a single `/`.
+// Substring attacks (`../etc/passwd`, `foo/bar/baz`, leading slashes)
+// fail this regex before the owner+repo character classes run.
+const SHORTHAND_RE = /^([^/\s]+)\/([^/\s]+)$/;
+
+// Branch-name shape lock: alphanumeric, dot, underscore, slash, hyphen.
+// Length capped at 250 chars (git itself enforces 255 for refs minus
+// some overhead; 250 stays inside that and is plenty for any real
+// branch). Path-traversal pattern (`..`) and shell metacharacters
+// (space, `;`, `$`, `(`, `)`, backtick, `&`, `|`, `<`, `>`, quotes)
+// are excluded by the character class; the explicit `..` guard in
+// validBranchName() catches the path-traversal case clearly.
+const BRANCH_NAME_RE = /^[A-Za-z0-9._/-]{1,250}$/;
+
+// Mirrors the shape the build emits at dist/registry-index.json. The
+// Worker imports the actual file at request time; here we declare the
+// contract.
 export type RegistryIndexShape = {
   by_slug: Record<string, unknown>;
   by_owner_repo: Record<string, unknown>;
@@ -55,12 +122,54 @@ export function validateInput(raw: string, registryIndex: RegistryIndexShape): V
     return { kind: 'unknown', error: parsed.error };
   }
 
-  // URL paste: must be parseable, https-only, github.com only, repo-root only.
-  if (trimmed.includes('://')) return classifyUrl(trimmed);
+  // Looks-like-install-command for an unsupported package manager:
+  // route directly to `unparseable_install_command` so the homepage form
+  // surfaces the "PM isn't supported" copy with the supported set listed,
+  // rather than the generic "not a recognized tool" line. Without this
+  // branch, `apt-get install foo` would fall through to
+  // `unrecognized_input` and read the same as random text.
+  if (UNSUPPORTED_PM_PREFIX_RE.test(trimmed)) {
+    return { kind: 'unknown', error: 'unparseable_install_command' };
+  }
+
+  // URL paste: must be parseable, github.com only, repo-root OR branch.
+  // http:// is silently upgraded to https:// before routing — the user's
+  // intent is unambiguous and the protocol is the only thing wrong.
+  // Genuinely malformed protocols (`javascript:`, `htp:`, etc.) still
+  // fail through the URL-parse path or the protocol check.
+  if (trimmed.includes('://')) {
+    const upgraded = maybeUpgradeHttp(trimmed);
+    return classifyUrl(upgraded);
+  }
+
+  // `owner/repo` shorthand. Tried AFTER slug + install-command checks so
+  // an installed-by-name lookup wins over an accidental shorthand match,
+  // and BEFORE the unknown bounce so two-segment github-shaped inputs
+  // route to the github-url path. The regex is strict on segment shape;
+  // path traversal (`../foo`), triple-slash (`foo/bar/baz`), leading
+  // hyphens (`-bad/repo`), and whitespace all bounce as
+  // unrecognized_input here rather than producing a malformed github-url.
+  const shorthand = trimmed.match(SHORTHAND_RE);
+  if (shorthand && OWNER_RE.test(shorthand[1]) && REPO_RE.test(shorthand[2])) {
+    return { kind: 'github-url', owner: shorthand[1], repo: shorthand[2] };
+  }
 
   return { kind: 'unknown', error: 'unrecognized_input' };
 }
 
+// Silent http:// → https:// upgrade. Only the `http://` prefix is
+// rewritten (case-insensitive); `https://`, `javascript:`, `data:`,
+// `htp:`, etc. pass through untouched and fall to the normal URL-parse
+// path. The substring is matched at position 0 so a string like
+// `random text http://x` doesn't trigger the upgrade — only a paste
+// that actually STARTS with http:// gets the silent fix.
+function maybeUpgradeHttp(input: string): string {
+  if (/^http:\/\//i.test(input)) {
+    return `https://${input.slice('http://'.length)}`;
+  }
+  return input;
+}
+
 function classifyUrl(url: string): ValidatedInput {
   let parsed: URL;
   try {
@@ -72,10 +181,72 @@ function classifyUrl(url: string): ValidatedInput {
   // The URL parser IDN-encodes non-ASCII hostnames into Punycode
   // (`xn--*`). Literal comparison against `github.com` rejects homoglyph
   // spoofs (e.g. Cyrillic 'і' in `gіthub.com` becomes `xn--gthub-cph.com`)
-  // AND the standard non-github suffixes.
+  // AND substring-attack hosts like `github.com.evil.com` (whose parsed
+  // hostname is the full `github.com.evil.com`, not `github.com`).
   if (parsed.hostname !== 'github.com') return { kind: 'unknown', error: 'non_github_host' };
 
-  const m = url.match(GITHUB_URL_RE);
-  if (!m) return { kind: 'unknown', error: 'invalid_url_path' };
-  return { kind: 'github-url', owner: m[1], repo: m[2] };
+  // Match against the parser-normalized href so case-variant pastes
+  // (`HTTP://GitHub.com/...`) succeed: the parser lowercases scheme +
+  // host but preserves path case, so `normalized` is always
+  // `https://github.com/<owner>/<repo>[/...]`.
+  const normalized = parsed.href;
+  // Try repo-root URL first (the common case).
+  const root = normalized.match(GITHUB_URL_RE);
+  if (root) return { kind: 'github-url', owner: root[1], repo: stripGitSuffix(root[2]) };
+
+  // Branch URL: `…/<owner>/<repo>/tree/<branch>[/<subpath>]`. The
+  // branch capture is greedy through the rest of the URL; we split it
+  // again to peel a leading `<branch>` segment off any trailing
+  // `/<subpath>` so a paste like `…/tree/main/docs/file.md` resolves
+  // to branch=`main` (subpath discarded). Branch may itself contain
+  // `/` (e.g. `feature/new-thing`), but the standard GitHub URL shape
+  // doesn't disambiguate `feature/new-thing/<no-subpath>` from
+  // `feature/new-thing/some-subpath` — we accept the FULL tail as the
+  // branch name in that case and let the DO's git clone bounce if the
+  // branch doesn't exist. This matches GitHub's own URL semantics
+  // (which also can't tell the difference without a server round-trip)
+  // and biases toward "let the user score what they pasted".
+  const branchUrl = normalized.match(GITHUB_BRANCH_URL_RE);
+  if (branchUrl) {
+    const owner = branchUrl[1];
+    const repo = stripGitSuffix(branchUrl[2]);
+    const tail = branchUrl[3];
+    const branch = peelBranch(tail);
+    if (!branch || !validBranchName(branch)) {
+      return { kind: 'unknown', error: 'invalid_url_path' };
+    }
+    return { kind: 'github-url', owner, repo, branch };
+  }
+
+  return { kind: 'unknown', error: 'invalid_url_path' };
+}
+
+// Peel a branch name off a `/tree/<...>` tail, taking the FULL tail as
+// the branch. The URL parser already URL-decoded the path, so `%2F`
+// inputs land here as literal `/`. The validBranchName() guard then
+// rejects path-traversal patterns (`..`) before the branch reaches the
+// DO. Empty tail returns null so `…/tree/` (no branch) bounces.
+function peelBranch(tail: string): string | null {
+  // Trim a trailing slash so `…/tree/main/` matches `main`.
+  const cleaned = tail.replace(/\/+$/, '');
+  if (!cleaned) return null;
+  return cleaned;
+}
+
+// Branch-name shape lock applied after URL parsing. Pure-character-class
+// check plus an explicit `..` reject so path-traversal stands out in
+// the code (the regex already excludes `..` by way of dot AND adjacent
+// dot being a non-repeating run, but the explicit guard documents the
+// security property loudly and protects against a future regex relax
+// that would silently re-open the gap).
+export function validBranchName(branch: string): boolean {
+  if (!BRANCH_NAME_RE.test(branch)) return false;
+  if (branch.includes('..')) return false;
+  if (branch.startsWith('/') || branch.endsWith('/')) return false;
+  if (branch.startsWith('.') || branch.endsWith('.')) return false;
+  return true;
+}
+
+function stripGitSuffix(repo: string): string {
+  return repo.replace(/\.git$/, '');
 }
diff --git a/src/worker/spec-version.gen.ts b/src/worker/spec-version.gen.ts
new file mode 100644
index 0000000..9d8c5c8
--- /dev/null
+++ b/src/worker/spec-version.gen.ts
@@ -0,0 +1,13 @@
+// GENERATED by src/build/00-spec-version-gen.mjs — do NOT edit.
+// Re-run `bun run build` to regenerate. The drift check in
+// tests/spec-version-gen.test.ts fails CI if this file is out of date.
+//
+// SPEC_VERSION       — from src/data/spec/VERSION (the standard the
+//                      Worker scores against).
+// SITE_SPEC_VERSION  — from content/principles/VERSION (the principle
+//                      copy this site renders).
+// CHECKER_URL        — production live-scoring surface; moves with anc.dev.
+
+export const SPEC_VERSION = '0.4.0';
+export const SITE_SPEC_VERSION = '0.4.0';
+export const CHECKER_URL = 'https://anc.dev/score';
diff --git a/styles/brand/FillerAdjectives.yml b/styles/brand/FillerAdjectives.yml
index a0d077a..c1082f5 100644
--- a/styles/brand/FillerAdjectives.yml
+++ b/styles/brand/FillerAdjectives.yml
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT OR Apache-2.0
-# Source: BRAND.md § Universal anti-patterns — No filler adjectives.
+# Source: BRAND.md § Universal anti-patterns: No filler adjectives.
 extends: existence
 message: "Filler adjective: '%s'. Concrete before abstract; let the noun do the work."
 link: https://github.com/brettdavies/agentnative/blob/main/BRAND.md#universal-anti-patterns
diff --git a/styles/brand/HedgeWords.yml b/styles/brand/HedgeWords.yml
index 07d7319..60215a7 100644
--- a/styles/brand/HedgeWords.yml
+++ b/styles/brand/HedgeWords.yml
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT OR Apache-2.0
-# Source: BRAND.md § Universal anti-patterns — No hedge words.
+# Source: BRAND.md § Universal anti-patterns: No hedge words.
 extends: existence
 message: "Hedge word: '%s'. MUST is the contract; SHOULD is the contract. Hedges undercut both."
 link: https://github.com/brettdavies/agentnative/blob/main/BRAND.md#universal-anti-patterns
diff --git a/styles/brand/MarketingRegister.yml b/styles/brand/MarketingRegister.yml
index 4f6d400..f34da1a 100644
--- a/styles/brand/MarketingRegister.yml
+++ b/styles/brand/MarketingRegister.yml
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: MIT OR Apache-2.0
-# Source: BRAND.md § Universal anti-patterns — No marketing register.
+# Source: BRAND.md § Universal anti-patterns: No marketing register.
 extends: existence
 message: "Marketing register: '%s'. The standard speaks in third person about contracts, not first person about beliefs."
 link: https://github.com/brettdavies/agentnative/blob/main/BRAND.md#universal-anti-patterns
diff --git a/styles/brand/README.md b/styles/brand/README.md
index dc3298a..c9bf992 100644
--- a/styles/brand/README.md
+++ b/styles/brand/README.md
@@ -6,7 +6,7 @@ regex; this README is the human-readable companion.
 
 ## brand.FillerAdjectives
 
-*Source: BRAND.md § Universal anti-patterns — No filler adjectives.*
+*Source: BRAND.md § Universal anti-patterns: No filler adjectives.*
 
 **Message:** Filler adjective: `<matched text>`. Concrete before abstract; let the noun do the work.
 
@@ -24,7 +24,7 @@ regex; this README is the human-readable companion.
 
 ## brand.HedgeWords
 
-*Source: BRAND.md § Universal anti-patterns — No hedge words.*
+*Source: BRAND.md § Universal anti-patterns: No hedge words.*
 
 **Message:** Hedge word: `<matched text>`. MUST is the contract; SHOULD is the contract. Hedges undercut both.
 
@@ -44,7 +44,7 @@ regex; this README is the human-readable companion.
 
 ## brand.MarketingRegister
 
-*Source: BRAND.md § Universal anti-patterns — No marketing register.*
+*Source: BRAND.md § Universal anti-patterns: No marketing register.*
 
 **Message:** Marketing register: `<matched text>`. The standard speaks in third person about contracts, not first person
 about beliefs.
@@ -65,4 +65,4 @@ about beliefs.
 - `we recommend`
 - `we think`
 
-<!-- generated by scripts/generate-pack-readme.mjs from styles/brand/*.yml — do not edit by hand -->
+<!-- generated by scripts/generate-pack-readme.mjs from styles/brand/*.yml. Do not edit by hand. -->
diff --git a/styles/config/vocabularies/site/accept.txt b/styles/config/vocabularies/site/accept.txt
index 9c4ff6a..557655a 100644
--- a/styles/config/vocabularies/site/accept.txt
+++ b/styles/config/vocabularies/site/accept.txt
@@ -10,6 +10,7 @@ Cheng
 Citeable
 Cloudflare
 Conda
+Crowdsourced
 Desaturate
 Dialogs
 Dinamo
@@ -47,6 +48,8 @@ Plex
 Polypane
 Pseudocode
 Qamarjafari
+Quantiles
+Queryable
 Reframe
 Resync
 Roboto
@@ -85,6 +88,7 @@ citeable
 cmake
 codeblock
 coverage_summary
+cron
 culori
 cutover
 denylist
@@ -215,6 +219,7 @@ viewport
 viewports
 vw
 watchexec
+watchpoints
 webfont
 wordmark
 wrangler
@@ -227,3 +232,21 @@ yazi
 yq
 zoomable
 zoxide
+unauth
+client_secret
+client_id
+non_identity
+CPython
+Trixie
+aider
+litellm
+scipy
+pyperclip
+pycparser
+backports
+hashmap
+misconfigured
+sdist
+siteverify
+uncached
+unmetered
diff --git a/styles/site/BannedAesthetics.yml b/styles/site/BannedAesthetics.yml
index 788f181..249e7dc 100644
--- a/styles/site/BannedAesthetics.yml
+++ b/styles/site/BannedAesthetics.yml
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: MIT OR Apache-2.0
-# Source: PRODUCT.md § Aesthetic Direction — banned visual patterns.
+# Source: PRODUCT.md § Aesthetic Direction: banned visual patterns.
 extends: existence
-message: "Banned aesthetic pattern '%s' — anti-reference per the site's design context. Choose a different pattern or describe the underlying need without the loaded label."
+message: "Banned aesthetic pattern '%s': anti-reference per the site's design context; choose a different pattern or describe the underlying need without the loaded label."
 link: https://github.com/brettdavies/agentnative-site/blob/main/styles/site/README.md
 level: warning
 ignorecase: true
diff --git a/styles/site/BannedFonts.yml b/styles/site/BannedFonts.yml
index 1b7dc79..6fa24f0 100644
--- a/styles/site/BannedFonts.yml
+++ b/styles/site/BannedFonts.yml
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: MIT OR Apache-2.0
-# Source: PRODUCT.md § Aesthetic Direction — second-favorite font reflex.
+# Source: PRODUCT.md § Aesthetic Direction: second-favorite font reflex.
 extends: existence
-message: "Banned font name '%s' — second-favorite font reflex per the impeccable font-selection procedure. Cite by category, not by name."
+message: "Banned font name '%s': second-favorite font reflex per the impeccable font-selection procedure. Cite by category, not by name."
 link: https://github.com/brettdavies/agentnative-site/blob/main/styles/site/README.md
 level: warning
 ignorecase: true
diff --git a/styles/site/README.md b/styles/site/README.md
index cfa34ba..53a1b6f 100644
--- a/styles/site/README.md
+++ b/styles/site/README.md
@@ -6,10 +6,10 @@ regex; this README is the human-readable companion.
 
 ## site.BannedAesthetics
 
-*Source: PRODUCT.md § Aesthetic Direction — banned visual patterns.*
+*Source: PRODUCT.md § Aesthetic Direction: banned visual patterns.*
 
-**Message:** Banned aesthetic pattern `<matched text>` — anti-reference per the site's design context. Choose a
-different pattern or describe the underlying need without the loaded label.
+**Message:** Banned aesthetic pattern `<matched text>`: anti-reference per the site's design context; choose a different
+pattern or describe the underlying need without the loaded label.
 
 **Rationale:** <https://github.com/brettdavies/agentnative-site/blob/main/styles/site/README.md>
 
@@ -27,10 +27,10 @@ different pattern or describe the underlying need without the loaded label.
 
 ## site.BannedFonts
 
-*Source: PRODUCT.md § Aesthetic Direction — second-favorite font reflex.*
+*Source: PRODUCT.md § Aesthetic Direction: second-favorite font reflex.*
 
-**Message:** Banned font name `<matched text>` — second-favorite font reflex per the impeccable font-selection
-procedure. Cite by category, not by name.
+**Message:** Banned font name `<matched text>`: second-favorite font reflex per the impeccable font-selection procedure.
+Cite by category, not by name.
 
 **Rationale:** <https://github.com/brettdavies/agentnative-site/blob/main/styles/site/README.md>
 
@@ -48,4 +48,4 @@ procedure. Cite by category, not by name.
 - `Plus Jakarta Sans`
 - `Space Grotesk`
 
-<!-- generated by scripts/generate-pack-readme.mjs from styles/site/*.yml — do not edit by hand -->
+<!-- generated by scripts/generate-pack-readme.mjs from styles/site/*.yml. Do not edit by hand. -->
diff --git a/tests/bun-setup.ts b/tests/bun-setup.ts
new file mode 100644
index 0000000..15e88fa
--- /dev/null
+++ b/tests/bun-setup.ts
@@ -0,0 +1,40 @@
+// Bun-test setup — registered via bunfig.toml `[test].preload`.
+//
+// Why this exists: `@cloudflare/containers` (transitive dep of
+// `@cloudflare/sandbox`, imported by `src/worker/score/do.ts`) does a
+// top-level `import { DurableObject, WorkerEntrypoint } from 'cloudflare:workers'`
+// in its CJS bundle. `cloudflare:workers` is a workerd-runtime-only virtual
+// module — Bun can't resolve it and the import throws at module load,
+// taking down every test that transitively imports the Worker entry
+// (worker.test.ts, score-handler.test.ts via shared fixtures, etc.).
+//
+// This shim provides no-op `DurableObject` and `WorkerEntrypoint` classes
+// so the import succeeds. Bun-side tests that exercise pure logic (handler
+// orchestration, content negotiation, header policy) keep working.
+//
+// Tests that need real DO behavior (state persistence, alarms, fetch
+// dispatch through the binding) must use a different test runtime
+// (workerd via @cloudflare/vitest-pool-workers) or run as E2E against a
+// deployed Worker. The shim catches the "module loads" floor; it doesn't
+// pretend DurableObject semantics work.
+
+import { plugin } from 'bun';
+
+plugin({
+  name: 'cloudflare-workers-shim',
+  setup(build) {
+    build.module('cloudflare:workers', () => ({
+      contents: [
+        'export class DurableObject {',
+        '  constructor(ctx, env) { this.ctx = ctx; this.env = env; }',
+        '}',
+        'export class WorkerEntrypoint {',
+        '  constructor(ctx, env) { this.ctx = ctx; this.env = env; }',
+        '}',
+        // env wrapper sentinel — some CF helpers probe for this at module load.
+        'export const env = undefined;',
+      ].join('\n'),
+      loader: 'js',
+    }));
+  },
+});
diff --git a/tests/dockerfile-sandbox.test.ts b/tests/dockerfile-sandbox.test.ts
index d403f6a..e02f08e 100644
--- a/tests/dockerfile-sandbox.test.ts
+++ b/tests/dockerfile-sandbox.test.ts
@@ -1,9 +1,13 @@
-// Static shape assertions for the live-scoring sandbox image (plan U2).
+// Static shape assertions for the live-scoring sandbox image. Base
+// reworks: 2026-05-18 moved to debian-trixie-slim / glibc; 2026-05-19
+// moved to python:3.12-slim-trixie to satisfy aider-chat and similar
+// tools that require Python <3.13.
 //
 // The image-size + smoke-test verifications require a working Docker
 // daemon (CI doesn't have one) and live in docker/sandbox/README.md as
 // manual steps. This test covers the parts that survive without docker:
-// SHA-pin discipline, no-toolchains invariant, and pm coverage.
+// SHA-pin discipline, no-toolchains invariant, pm coverage, and the
+// brew-omitted rationale.
 
 import { describe, expect, test } from 'bun:test';
 import { readFile } from 'node:fs/promises';
@@ -26,42 +30,80 @@ describe('docker/sandbox/Dockerfile — SHA-pin discipline', () => {
     }
   });
 
+  test('base images are CF Sandbox SDK 0.9.x (glibc) + python:3.12-slim-trixie', async () => {
+    const df = await loadDockerfile();
+    // The 0.9.4 (non-suffixed) tag is the glibc base; -musl/-python/etc are
+    // siblings. Mismatching the variant against the apt/binary install
+    // table (e.g. picking -musl while installing libstdc++6) breaks the
+    // sandbox-server runtime contract.
+    expect(df).toMatch(/cloudflare\/sandbox:0\.9\.\d+@sha256:/);
+    expect(df).not.toMatch(/cloudflare\/sandbox:0\.9\.\d+-musl@/);
+    // 2026-05-19: swapped from `debian:trixie-slim` to
+    // `python:3.12-slim-trixie` so the system Python is 3.12 (satisfies
+    // the <3.13 constraint that broad swaths of the PyPI ecosystem
+    // declare, e.g. aider-chat per Aider-AI/aider#3037). The Trixie
+    // variant keeps the same Debian userland we already validated.
+    expect(df).toMatch(/python:3\.12-slim-trixie@sha256:/);
+    expect(df).not.toMatch(/^FROM docker\.io\/library\/debian:/m);
+  });
+
   test('cargo-binstall download verifies via sha256sum -c', async () => {
     const df = await loadDockerfile();
     expect(df).toMatch(/cargo-binstall.*\.tgz/);
-    // The verification line: echo '<sha>  /tmp/cb.tgz' | sha256sum -c -
     expect(df).toMatch(/echo '[0-9a-f]{64} {2}\/tmp\/cb\.tgz' \| sha256sum -c -/);
   });
 
-  test('agentnative musl tarball download verifies via sha256sum -c', async () => {
+  test('agentnative gnu tarball download verifies via sha256sum -c', async () => {
     const df = await loadDockerfile();
-    expect(df).toMatch(/agentnative-x86_64-unknown-linux-musl\.tar\.gz/);
+    // The rework switched anc from the musl static-pie binary to the
+    // gnu variant matched to the new glibc base image. The half-bumped
+    // state (URL pointing at gnu but sha256 still the musl one) would
+    // fail at build time loudly, but the dual-match guard below catches
+    // a quieter half-bump where someone updates the URL fragment but
+    // leaves the .tar.gz filename unchanged.
+    expect(df).toMatch(/agentnative-x86_64-unknown-linux-gnu\.tar\.gz/);
+    expect(df).not.toMatch(/agentnative-x86_64-unknown-linux-musl\.tar\.gz/);
     expect(df).toMatch(/echo '[0-9a-f]{64} {2}\/tmp\/anc\.tgz' \| sha256sum -c -/);
   });
 
-  test('pinned anc release matches v0.3.1 (the one whose sha256 is in the file)', async () => {
+  test('bun zip download verifies via sha256sum -c', async () => {
+    const df = await loadDockerfile();
+    // Bun is added in the rework as part of the native-PM pivot. Pinned
+    // for the same reason cargo-binstall and anc are pinned: prevent
+    // upstream re-tag attacks from silently changing what we ship.
+    expect(df).toMatch(/bun-linux-x64\.zip/);
+    expect(df).toMatch(/echo '[0-9a-f]{64} {2}\/tmp\/bun\.zip' \| sha256sum -c -/);
+  });
+
+  test('uv tarball download verifies via sha256sum -c', async () => {
+    const df = await loadDockerfile();
+    expect(df).toMatch(/uv-x86_64-unknown-linux-gnu\.tar\.gz/);
+    expect(df).toMatch(/echo '[0-9a-f]{64} {2}\/tmp\/uv\.tgz' \| sha256sum -c -/);
+  });
+
+  test('pinned anc release matches v0.4.0 (the one whose sha256 is in the file)', async () => {
     const df = await loadDockerfile();
-    // The plan's musl HARD BLOCKER was satisfied by v0.3.1; later bumps need
-    // the URL AND the sha256 line updated together. This guard catches the
-    // half-bumped state where one was changed and the other wasn't.
-    expect(df).toContain('agentnative-cli/releases/download/v0.3.1/');
+    expect(df).toContain('agentnative-cli/releases/download/v0.4.0/');
   });
 });
 
 describe('docker/sandbox/Dockerfile — no-toolchains invariant (Premise #2)', () => {
-  test('apk add does NOT install rust, cargo (the compiler), or build-base', async () => {
-    const df = await loadDockerfile();
-    const apkLines = df.match(/^RUN apk add[^\n]*(\n[ ]+[^\n]*)*/gm) || [];
-    expect(apkLines.length).toBeGreaterThan(0);
-    for (const block of apkLines) {
-      // Block-level: tokenize to whole words so "rustup-init" or "go" pass while
-      // "rust" alone fails.
-      const tokens = block.split(/\s+/).filter((t) => t && !t.startsWith('-'));
-      // Forbidden compiler/toolchain packages.
-      const forbidden = ['rust', 'rustup', 'cargo', 'build-base', 'gcc', 'g++', 'clang', 'make'];
+  test('apt install does NOT pull in compilers or build toolchains', async () => {
+    const df = await loadDockerfile();
+    const aptBlocks = df.match(/^RUN apt-get[^\n]*(\n[ ]+[^\n]*)*/gm) || [];
+    expect(aptBlocks.length).toBeGreaterThan(0);
+    // Forbidden packages — anything that lets a user input build C/Rust/Go
+    // from source. golang-go ships the go toolchain (we rely on `go install`
+    // pulling precompiled module artifacts in practice; modules that build
+    // from source bounce at sandbox install time). The forbidden set is the
+    // CGO / native-extension surface that would let an attacker stretch
+    // exec time past the 60 s budget by triggering long compiles.
+    const forbidden = ['build-essential', 'gcc', 'g++', 'clang', 'make', 'cmake', 'rustc', 'cargo', 'rustup'];
+    for (const block of aptBlocks) {
+      const tokens = block.split(/\s+/).filter((t) => t && !t.startsWith('-') && !t.startsWith('&&'));
       for (const f of forbidden) {
-        expect({ apkBlock: block.slice(0, 80), token: f, present: tokens.includes(f) }).toEqual({
-          apkBlock: block.slice(0, 80),
+        expect({ aptBlock: block.slice(0, 80), token: f, present: tokens.includes(f) }).toEqual({
+          aptBlock: block.slice(0, 80),
           token: f,
           present: false,
         });
@@ -69,28 +111,50 @@ describe('docker/sandbox/Dockerfile — no-toolchains invariant (Premise #2)', (
     }
   });
 
-  test('go is present (runtime needed for `go install` of precompiled modules)', async () => {
+  test('upstream Go runtime (cgo-enabled) is installed from go.dev/dl', async () => {
     const df = await loadDockerfile();
-    expect(df).toMatch(/apk add[^\n]*(\n[ ]+[^\n]*)*\bgo\b/);
+    // Debian's golang-go is built with CGO_ENABLED=0 — that silently
+    // disables GODEBUG=netdns=cgo and makes go install hang on CF
+    // Containers' IPv6 path. Upstream Go ships with cgo enabled.
+    expect(df).toMatch(/go\.dev\/dl\/go[0-9.]+\.linux-amd64\.tar\.gz/);
+    expect(df).toMatch(/echo '[0-9a-f]{64} {2}\/tmp\/go\.tgz' \| sha256sum -c -/);
   });
 });
 
 describe('docker/sandbox/Dockerfile — package manager coverage', () => {
-  test('cargo-binstall is installed (cargo-bins/cargo-binstall release)', async () => {
+  test('cargo-binstall is installed (gnu variant)', async () => {
     const df = await loadDockerfile();
     expect(df).toContain('cargo-bins/cargo-binstall/releases/download/');
+    expect(df).toContain('cargo-binstall-x86_64-unknown-linux-gnu.full.tgz');
     // cargo-binstall uses `-V` for binary version (its `--version` is reserved
     // for specifying the package version to install — different semantic).
     expect(df).toMatch(/cargo-binstall -V/);
   });
 
-  test('all four U4-supported pms have a runtime in the image: cargo-binstall, pip, npm, go', async () => {
+  test('all six supported pms have a runtime in the image: cargo-binstall, pip, npm, go, bun, uv', async () => {
     const df = await loadDockerfile();
-    // py3-pip / npm / go come from apk; cargo-binstall comes from the curl step.
-    expect(df).toMatch(/\bpy3-pip\b/);
+    // Python + pip come from the python:3.12-slim-trixie FROM line
+    // (2026-05-19), not from an apt python3-pip install. The base image
+    // provides /usr/local/bin/pip and /usr/local/bin/python3 ahead of
+    // /usr/bin on PATH.
+    expect(df).toMatch(/python:3\.12-slim-trixie/);
     expect(df).toMatch(/\bnpm\b/);
-    expect(df).toMatch(/\bgo\b/);
+    expect(df).toMatch(/go\.dev\/dl\/go[0-9.]+\.linux-amd64/);
     expect(df).toMatch(/cargo-binstall/);
+    expect(df).toMatch(/bun-linux-x64\.zip/);
+    expect(df).toMatch(/uv-x86_64-unknown-linux-gnu\.tar\.gz/);
+  });
+
+  test('archive extraction tools cover .tar.gz / .tar.xz / .tar.bz2 / .zip (Bug N)', async () => {
+    const df = await loadDockerfile();
+    // The direct-PM install path now dispatches extraction on URL
+    // extension (sandbox-exec.ts directInstallCommand). The image must
+    // carry the matching userspace tools; missing xz-utils would surface
+    // as cryptic `tar: xz utility not present` failures on csvlens-style
+    // .tar.xz releases.
+    expect(df).toMatch(/\bbzip2\b/);
+    expect(df).toMatch(/\bunzip\b/);
+    expect(df).toMatch(/\bxz-utils\b/);
   });
 
   test('brew is intentionally absent and the rationale is documented in-file', async () => {
@@ -104,11 +168,11 @@ describe('docker/sandbox/Dockerfile — package manager coverage', () => {
       .join('\n');
     expect(code).not.toMatch(/\bbrew install\b/);
     expect(code).not.toMatch(/\b(linuxbrew|homebrew)\b/i);
-    // Positive: comment block names brew + bounce class explicitly so a
-    // future maintainer doesn't silently re-add brew without revisiting
-    // the chain_resolved_install_failed CTA work in U8.
-    expect(df).toMatch(/brew is intentionally OMITTED/);
-    expect(df).toMatch(/chain_resolved_install_failed/);
+    // Rationale + the bounce contract token (pm=brew_only) must be
+    // documented so a future maintainer doesn't reinstate brew without
+    // revisiting the discovery-fallback in do.ts.
+    expect(df).toMatch(/brew is NOT installed/);
+    expect(df).toMatch(/brew_only/);
   });
 });
 
@@ -129,6 +193,83 @@ describe('docker/sandbox/Dockerfile — sandbox runtime', () => {
     expect(envPath).toContain('/usr/local/bin');
     expect(envPath).toContain('/usr/local/cargo/bin');
     expect(envPath).toContain('/usr/local/go/bin');
-    expect(envPath).toContain('/root/.local/bin'); // pip user-installs
+  });
+
+  test('every PM redirects global installs to /usr/local/bin (single dest)', async () => {
+    // Consistency invariant: the post-install `which <binary>` gate in
+    // sandbox-exec.ts looks on PATH; centralising every PM at
+    // /usr/local/bin avoids the per-PM "where does this binary land"
+    // game. BUN_INSTALL/bin = /usr/local/bin; UV_TOOL_BIN_DIR =
+    // /usr/local/bin; cargo-binstall --install-path + GOBIN in the
+    // sandbox-exec install commands also target /usr/local/bin.
+    const df = await loadDockerfile();
+    expect(df).toMatch(/^ENV BUN_INSTALL=\/usr\/local$/m);
+    expect(df).toMatch(/^ENV UV_TOOL_BIN_DIR=\/usr\/local\/bin$/m);
+  });
+
+  test('Go uses cgo resolver to honor /etc/gai.conf IPv4 precedence', async () => {
+    // CF Containers IPv6 outbound is unreliable. /etc/gai.conf is
+    // patched to prefer IPv4 for glibc's getaddrinfo. Go's pure-Go
+    // resolver bypasses gai.conf; GODEBUG=netdns=cgo forces Go to use
+    // getaddrinfo and honor the precedence. Requires Go built with
+    // CGO (upstream tarball, not Debian's CGO_ENABLED=0 build).
+    const df = await loadDockerfile();
+    expect(df).toMatch(/^ENV GODEBUG=netdns=cgo$/m);
+    expect(df).toMatch(/sed -i .* \/etc\/gai\.conf/);
+  });
+
+  test('declares at least one EXPOSE so wrangler dev --local accepts the container binding', async () => {
+    // deep-check.yml only schedules containers when wrangler can see an
+    // EXPOSE line. Port 3000 is reserved by the CF Sandbox SDK's internal
+    // Bun server, so any placeholder must avoid it. 8080 is the chosen
+    // placeholder.
+    const df = await loadDockerfile();
+    const exposeLines = df.split('\n').filter((l) => /^EXPOSE\s+\d+/.test(l));
+    expect(exposeLines.length).toBeGreaterThanOrEqual(1);
+    expect(df).not.toMatch(/^EXPOSE\s+3000\b/m);
+  });
+});
+
+describe('docker/sandbox/Dockerfile — supply-chain release-delay gate', () => {
+  // The image bakes a 7-day "package must have been published at least
+  // this long ago" gate for uv installs. Mirrors the maintainer's shell
+  // convention for the same defense. A malicious fresh-publish (or a
+  // legitimate package taken over and re-published) cannot reach our
+  // sandbox until it has been on PyPI for at least 7 days.
+  //
+  // uv accepts a relative duration natively (UV_EXCLUDE_NEWER), so the
+  // gate is set at image build time as an ENV var. pip's equivalent
+  // (PIP_UPLOADED_PRIOR_TO) requires an absolute timestamp and is
+  // therefore computed at exec time in sandbox-exec.ts (see the
+  // companion test in tests/score-do.test.ts).
+
+  test('ENV UV_EXCLUDE_NEWER is set to "7 days"', async () => {
+    const df = await loadDockerfile();
+    expect(df).toMatch(/^ENV UV_EXCLUDE_NEWER="7 days"$/m);
+  });
+
+  test('UV_EXCLUDE_NEWER is set AFTER uv is installed so future uv-using RUN steps inherit it', async () => {
+    // Order matters: if UV_EXCLUDE_NEWER were declared above the uv
+    // install step, any in-image `uv` invocation during build would
+    // start enforcing the 7-day gate. Setting it after the uv install
+    // leaves the image-build uv calls (uv --version, etc.) gate-free
+    // while ensuring runtime uv invocations honor it.
+    const df = await loadDockerfile();
+    const uvInstallIdx = df.search(/uv --version/);
+    const uvExcludeNewerIdx = df.search(/^ENV UV_EXCLUDE_NEWER=/m);
+    expect(uvInstallIdx).toBeGreaterThan(0);
+    expect(uvExcludeNewerIdx).toBeGreaterThan(uvInstallIdx);
+  });
+
+  test('ENV PIP_DISABLE_PIP_VERSION_CHECK=1 suppresses pip upgrade notice in evidence/stderr', async () => {
+    // Without this env var, every `pip install <pkg>` in the sandbox
+    // writes a multi-line "A new release of pip is available" notice to
+    // stderr, which pollutes the scorecard evidence field and the
+    // bounce-panel stderr block. Baked at image build time so future
+    // builds carry it intrinsically; sandbox-exec.ts also prepends it
+    // inline at exec time so the currently-deployed image gets the
+    // suppression before the next image rebuild lands.
+    const df = await loadDockerfile();
+    expect(df).toMatch(/^ENV PIP_DISABLE_PIP_VERSION_CHECK=1$/m);
   });
 });
diff --git a/tests/e2e/agents.e2e.ts b/tests/e2e/agents.e2e.ts
index a65b928..a8c4263 100644
--- a/tests/e2e/agents.e2e.ts
+++ b/tests/e2e/agents.e2e.ts
@@ -81,7 +81,7 @@ test.describe('llms.txt + llms-full.txt — live', () => {
     expect(body).toMatch(/^>\s+/m);
     expect(body).toContain('## Principles');
     const bullets = body.match(/^-\s+\[[^\]]+\]\([^)]*\/p\d+\.md\)$/gm) ?? [];
-    expect(bullets.length).toBe(7);
+    expect(bullets.length).toBe(8);
     // Sub-pages (check, about) present under ## Pages.
     expect(body).toContain('## Pages');
     const pageLinks = body.match(/^-\s+\[[^\]]+\]\([^)]*\/(check|about)\.md\)$/gm) ?? [];
@@ -91,7 +91,7 @@ test.describe('llms.txt + llms-full.txt — live', () => {
     expect(body).toContain('## Scorecards');
   });
 
-  test('/llms-full.txt is served in a single fetch with A5 delimiters', async ({ request }) => {
+  test('/llms-full.txt is served in a single fetch with concatenation delimiters', async ({ request }) => {
     const res = await request.get(`${BASE}/llms-full.txt`);
     expect(res.status()).toBe(200);
     const body = await res.text();
diff --git a/tests/e2e/flows.e2e.ts b/tests/e2e/flows.e2e.ts
index 76a3a03..c3b3246 100644
--- a/tests/e2e/flows.e2e.ts
+++ b/tests/e2e/flows.e2e.ts
@@ -6,11 +6,11 @@ import { expect, test } from '@playwright/test';
 import { checkA11y, injectAxe } from 'axe-playwright';
 
 test.describe('cold HN land → browse principles → theme dark → reload still dark', () => {
-  test('landing on / shows hero + principle listing with 7 entries', async ({ page }) => {
+  test('landing on / shows hero + principle listing with 8 entries', async ({ page }) => {
     await page.goto('/');
     await expect(page.locator('.hero__title')).toBeVisible();
     const entries = page.locator('.principle-entry');
-    await expect(entries).toHaveCount(7);
+    await expect(entries).toHaveCount(8);
   });
 
   test('clicking a principle entry navigates to its detail page', async ({ page }) => {
@@ -143,10 +143,10 @@ test.describe('code-copy + anchor-copy', () => {
 });
 
 test.describe('principle listing', () => {
-  test('index page has a principle listing with 7 entries', async ({ page }) => {
+  test('index page has a principle listing with 8 entries', async ({ page }) => {
     await page.goto('/');
     const entries = page.locator('.principle-entry');
-    await expect(entries).toHaveCount(7);
+    await expect(entries).toHaveCount(8);
   });
 
   test('principle entry links to its detail page', async ({ page }) => {
diff --git a/tests/e2e/homepage-score-live.e2e.ts b/tests/e2e/homepage-score-live.e2e.ts
new file mode 100644
index 0000000..c97436a
--- /dev/null
+++ b/tests/e2e/homepage-score-live.e2e.ts
@@ -0,0 +1,136 @@
+// Live-network e2e for /api/score against the staging Worker.
+//
+// Opt-in suite (project: homepage-score-live). Excluded from the default
+// `bun run test:e2e` run because it hits the real CF staging Worker, the
+// real Sandbox container, real Turnstile siteverify (with the always-
+// passes test secret), and real R2. Use to validate a staging deploy
+// before merging or to triage a regression that mocks can't reproduce.
+//
+// Run with:
+//   ANC_STAGING_BASE_URL=https://agentnative-site-staging.brettdavies.workers.dev \
+//     bun x playwright test --project=homepage-score-live
+//
+// The staging Worker is gated by Cloudflare Access. Set
+// ANC_STAGING_ACCESS_CLIENT_ID + ANC_STAGING_ACCESS_CLIENT_SECRET to a
+// service-token pair if running headless (CI / cron); otherwise interactive
+// auth works in a real browser via the Access challenge.
+//
+// Turnstile note: staging uses CF's always-passes test SECRET, so a
+// turnstile_token of "x" passes siteverify. This test posts a real token
+// because the homepage script lazy-loads the real CF Turnstile widget;
+// the always-passes test SITEKEY makes that widget hand back a valid
+// (test-shape) token without a user interaction.
+
+import { expect, test } from '@playwright/test';
+
+const STAGING_BASE = process.env.ANC_STAGING_BASE_URL;
+
+test.skip(
+  !STAGING_BASE,
+  'ANC_STAGING_BASE_URL not set — opt-in live-sandbox suite. Set it to the staging Worker URL to run.',
+);
+
+const ACCESS_HEADERS: Record<string, string> = {};
+if (process.env.ANC_STAGING_ACCESS_CLIENT_ID && process.env.ANC_STAGING_ACCESS_CLIENT_SECRET) {
+  ACCESS_HEADERS['CF-Access-Client-Id'] = process.env.ANC_STAGING_ACCESS_CLIENT_ID;
+  ACCESS_HEADERS['CF-Access-Client-Secret'] = process.env.ANC_STAGING_ACCESS_CLIENT_SECRET;
+}
+
+test.describe('staging /api/score — live round-trip', () => {
+  test('POST {input: "ripgrep"} returns curated registry_hit with response triad', async ({ request }) => {
+    const res = await request.post(`${STAGING_BASE}/api/score`, {
+      headers: { 'content-type': 'application/json', ...ACCESS_HEADERS },
+      data: JSON.stringify({ input: 'ripgrep', turnstile_token: 'x' }),
+    });
+    expect(res.status()).toBe(200);
+    const body = (await res.json()) as {
+      scorecard: { kind?: string; scorecard_url?: string };
+      spec_version: string;
+      site_spec_version: string;
+      anc_version: string;
+      checker_url: string;
+    };
+    expect(body.scorecard.kind).toBe('registry_hit');
+    expect(body.scorecard.scorecard_url).toBe('/score/ripgrep');
+    expect(body.spec_version).toMatch(/^\d+\.\d+\.\d+/);
+    expect(body.site_spec_version).toMatch(/^\d+\.\d+\.\d+/);
+    expect(body.anc_version).toMatch(/^\d+\.\d+\.\d+/);
+    expect(body.checker_url).toContain('anc.dev');
+  });
+
+  test('POST {input: "cargo install ripgrep"} hits cache OR live path, gets share_url', async ({ request }) => {
+    test.setTimeout(120_000); // live path may take ~30-60s on cold cache
+    const res = await request.post(`${STAGING_BASE}/api/score`, {
+      headers: { 'content-type': 'application/json', ...ACCESS_HEADERS },
+      data: JSON.stringify({ input: 'cargo install ripgrep', turnstile_token: 'x' }),
+    });
+    expect(res.status()).toBe(200);
+    const body = (await res.json()) as { share_url?: string; scorecard: unknown };
+    expect(body.share_url).toBe('/score/live/ripgrep');
+    expect(body.scorecard).toBeTruthy();
+  });
+
+  test('GET /score/live/ripgrep renders the cached scorecard as HTML', async ({ request }) => {
+    test.setTimeout(60_000);
+    // Prime the cache first via a POST (cached or live).
+    await request.post(`${STAGING_BASE}/api/score`, {
+      headers: { 'content-type': 'application/json', ...ACCESS_HEADERS },
+      data: JSON.stringify({ input: 'cargo install ripgrep', turnstile_token: 'x' }),
+    });
+    const res = await request.get(`${STAGING_BASE}/score/live/ripgrep`, { headers: ACCESS_HEADERS });
+    expect(res.status()).toBe(200);
+    expect(res.headers()['content-type']).toContain('text/html');
+    const html = await res.text();
+    expect(html).toContain('ripgrep');
+    expect(html).toContain('pass rate');
+    expect(html).toContain('href="/install"');
+  });
+
+  test('GET /score/live/ripgrep.md returns markdown twin', async ({ request }) => {
+    test.setTimeout(60_000);
+    await request.post(`${STAGING_BASE}/api/score`, {
+      headers: { 'content-type': 'application/json', ...ACCESS_HEADERS },
+      data: JSON.stringify({ input: 'cargo install ripgrep', turnstile_token: 'x' }),
+    });
+    const res = await request.get(`${STAGING_BASE}/score/live/ripgrep.md`, { headers: ACCESS_HEADERS });
+    expect(res.status()).toBe(200);
+    expect(res.headers()['content-type']).toContain('text/markdown');
+    const md = await res.text();
+    expect(md).toContain('# ripgrep');
+    expect(md).toContain('**Score:**');
+  });
+
+  test('GET /score/live/ripgrep.html → 301 to /score/live/ripgrep', async ({ request }) => {
+    const res = await request.get(`${STAGING_BASE}/score/live/ripgrep.html`, {
+      headers: ACCESS_HEADERS,
+      maxRedirects: 0,
+    });
+    expect(res.status()).toBe(301);
+    expect(res.headers().location).toBe('/score/live/ripgrep');
+  });
+
+  test('GET /score/live/unknown-binary-xyz → 404 HTML', async ({ request }) => {
+    const res = await request.get(`${STAGING_BASE}/score/live/unknown-binary-xyz`, { headers: ACCESS_HEADERS });
+    expect(res.status()).toBe(404);
+    expect(res.headers()['content-type']).toContain('text/html');
+  });
+});
+
+test.describe('staging homepage form — real Turnstile + real /api/score', () => {
+  test('full submit flow: paste registry slug → redirect to /score/ripgrep', async ({ page }) => {
+    test.setTimeout(60_000);
+    // Cloudflare Access challenge happens on first navigation. If the
+    // session is already authenticated, the page loads directly. Service-
+    // token headers are scoped to API requests; full-browser nav uses
+    // interactive Access auth or a pre-warmed cookie.
+    await page.goto(`${STAGING_BASE}/`);
+    await expect(page.locator('#live-score-input')).toBeVisible({ timeout: 30_000 });
+
+    await page.locator('#live-score-input').fill('ripgrep');
+    await page.locator('[data-live-score-submit]').click();
+
+    // ripgrep is curated → registry_hit → redirect to /score/ripgrep.
+    await page.waitForURL(/\/score\/ripgrep/, { timeout: 30_000 });
+    await expect(page.locator('h1')).toContainText(/ripgrep/i);
+  });
+});
diff --git a/tests/e2e/homepage-score.e2e.ts b/tests/e2e/homepage-score.e2e.ts
new file mode 100644
index 0000000..424f856
--- /dev/null
+++ b/tests/e2e/homepage-score.e2e.ts
@@ -0,0 +1,624 @@
+// Playwright e2e: homepage live-scoring form.
+//
+// Default chromium project. Mocks `/api/score` via page.route() so the
+// suite runs offline + deterministically. Asserts:
+//   - happy path: lazy-loaded Turnstile, 2 s theater floor, redirect to share_url
+//   - lazy-load regression: Turnstile NOT requested without form interaction
+//   - registry_hit redirect
+//   - invalid + non-GitHub URL + 429 + Turnstile-fail inline errors
+//   - three bounce panels (chain_no_resolve, chain_resolved_install_failed,
+//     chain_resolved_no_binary_produced)
+//   - CSP regression: script-src, frame-src, connect-src all contain
+//     challenges.cloudflare.com on the homepage response header
+//   - markdown-twin silence: /index.md must NOT mention live-score,
+//     turnstile, challenges.cloudflare.com, or /api/score
+//   - /score/live/<binary>.html → 301 redirect to /score/live/<binary>
+//     (URL pattern consistency with the rest of the site)
+//   - red-team: no token leak in URL on redirect, sitekey absent in
+//     prod-style env (the form disables itself)
+
+import { expect, test } from '@playwright/test';
+
+const SCORECARD_SAMPLE = {
+  schema_version: '0.5',
+  tool: { name: 'ripgrep', binary: 'rg', version: '14.1.0' },
+  target: { kind: 'command', command: 'rg' },
+  badge: { score_pct: 92, eligible: true },
+  audience: 'agent-optimized',
+  audit_profile: null,
+  results: [
+    {
+      status: 'fail',
+      label: 'exits 0 on missing required flag',
+      group: 'P4',
+      evidence: 'expected non-zero exit, got 0',
+    },
+    { status: 'pass', label: 'streams stdout', group: 'P1', evidence: 'OK' },
+  ],
+};
+
+// Mock helper — every test that hits the form needs Turnstile siteverify
+// to pass (we mock the script entirely) and `/api/score` to respond with
+// the test's chosen shape.
+async function mockTurnstileAndScore(
+  page: import('@playwright/test').Page,
+  scorePayload: { status: number; body: Record<string, unknown> },
+): Promise<{ turnstileRequested: () => boolean; scoreCalls: () => number }> {
+  let turnstileRequested = false;
+  let scoreCalls = 0;
+  // The real Turnstile script lazy-loads on first interaction. We replace
+  // it with a tiny stub that synthesizes window.turnstile.{render,execute,reset}
+  // so the form's submit flow gets a token without a network round-trip
+  // and without dependency on the real CF infrastructure.
+  await page.route('https://challenges.cloudflare.com/turnstile/v0/api.js**', async (route) => {
+    turnstileRequested = true;
+    await route.fulfill({
+      contentType: 'application/javascript',
+      body: `
+        window.turnstile = {
+          render(_el, opts) {
+            // Synchronously deliver a fake token to mirror the real callback shape.
+            // Use a timeout so the call stack matches real Turnstile (callback
+            // fires async after execute()).
+            window.__lastTurnstileCallback = opts.callback;
+            return 'fake-widget-id';
+          },
+          execute(_id) {
+            const cb = window.__lastTurnstileCallback;
+            if (cb) setTimeout(() => cb('fake-token'), 10);
+          },
+          reset() {},
+          remove() {},
+        };
+      `,
+    });
+  });
+  await page.route('**/api/score', async (route) => {
+    scoreCalls += 1;
+    await route.fulfill({
+      status: scorePayload.status,
+      contentType: 'application/json; charset=utf-8',
+      body: JSON.stringify(scorePayload.body),
+    });
+  });
+  return {
+    turnstileRequested: () => turnstileRequested,
+    scoreCalls: () => scoreCalls,
+  };
+}
+
+test.describe('homepage live-scoring form — happy path', () => {
+  test('paste registry slug → 2 s theater → redirect to share_url', async ({ page }) => {
+    const observer = await mockTurnstileAndScore(page, {
+      status: 200,
+      body: {
+        scorecard: SCORECARD_SAMPLE,
+        spec_version: '0.4.0',
+        site_spec_version: '0.4.0',
+        anc_version: '0.3.1',
+        checker_url: 'https://anc.dev/score',
+        share_url: '/score/live/ripgrep',
+      },
+    });
+
+    await page.goto('/');
+
+    // Wait for the form to be ready (live-score.js is deferred).
+    const input = page.locator('#live-score-input');
+    await expect(input).toBeVisible();
+
+    // Capture the start time and submit; the 2 s theater is enforced
+    // client-side via Promise.all([fetch, setTimeout(2000)]).
+    const start = Date.now();
+    await input.fill('ripgrep');
+    await page.locator('[data-live-score-submit]').click();
+
+    // After submit, the page should redirect to share_url.
+    await page.waitForURL('**/score/live/ripgrep', { timeout: 10_000 });
+    const elapsed = Date.now() - start;
+    expect(elapsed).toBeGreaterThanOrEqual(1900); // 2 s minus a small jitter tolerance
+
+    // Sanity: Turnstile script was loaded after interaction, /api/score
+    // was called exactly once.
+    expect(observer.turnstileRequested()).toBe(true);
+    expect(observer.scoreCalls()).toBe(1);
+  });
+
+  test('registry_hit response redirects to scorecard_url', async ({ page }) => {
+    const observer = await mockTurnstileAndScore(page, {
+      status: 200,
+      body: {
+        scorecard: { kind: 'registry_hit', tool: { name: 'ripgrep' }, scorecard_url: '/score/ripgrep' },
+        spec_version: '0.4.0',
+        anc_version: '0.3.1',
+        checker_url: 'https://anc.dev/score',
+      },
+    });
+
+    await page.goto('/');
+    await page.locator('#live-score-input').fill('ripgrep');
+    await page.locator('[data-live-score-submit]').click();
+
+    await page.waitForURL('**/score/ripgrep', { timeout: 10_000 });
+    expect(observer.scoreCalls()).toBe(1);
+  });
+
+  test('curated registry_hit shows "Curated · N% pass rate" reward before redirect', async ({ page }) => {
+    // The registry_hit envelope now carries score_pct so the homepage form
+    // can render a small "you found one of ours" reward inline before the
+    // redirect. The reward shows for the remainder of the 2 s theater
+    // floor, then the page navigates.
+    await mockTurnstileAndScore(page, {
+      status: 200,
+      body: {
+        scorecard: {
+          kind: 'registry_hit',
+          tool: { name: 'bat' },
+          scorecard_url: '/score/bat',
+          score_pct: 78,
+        },
+        spec_version: '0.4.0',
+        anc_version: '0.3.1',
+        checker_url: 'https://anc.dev/score',
+      },
+    });
+
+    await page.goto('/');
+    await page.locator('#live-score-input').fill('cargo install bat');
+    await page.locator('[data-live-score-submit]').click();
+
+    // Reward text appears in the status slot (with the --curated class
+    // applied for the accent-color identity cue) BEFORE the redirect.
+    const status = page.locator('[data-live-score-status]');
+    await expect(status).toHaveClass(/live-score__status--curated/, { timeout: 5_000 });
+    await expect(status).toContainText(/Curated/);
+    await expect(status).toContainText(/78% pass rate/);
+
+    // After the theater floor elapses, the page navigates to the curated
+    // scorecard URL.
+    await page.waitForURL('**/score/bat', { timeout: 10_000 });
+  });
+
+  test('phase progression updates status text while waiting on /api/score', async ({ page }) => {
+    // Mock /api/score with an artificial delay so the phase progression
+    // has time to tick at least once before the response arrives.
+    await page.route('https://challenges.cloudflare.com/turnstile/v0/api.js**', async (route) => {
+      await route.fulfill({
+        contentType: 'application/javascript',
+        body: `
+          window.turnstile = {
+            render(_el, opts) {
+              window.__lastTurnstileCallback = opts.callback;
+              return 'fake-widget-id';
+            },
+            execute() {
+              const cb = window.__lastTurnstileCallback;
+              if (cb) setTimeout(() => cb('fake-token'), 10);
+            },
+            reset() {}, remove() {},
+          };
+        `,
+      });
+    });
+    await page.route('**/api/score', async (route) => {
+      // Hold the response for 1.5 s so the phase ticker has time to fire
+      // the t=900 ms "Resolving install path…" tick.
+      await new Promise((r) => setTimeout(r, 1500));
+      await route.fulfill({
+        status: 200,
+        contentType: 'application/json; charset=utf-8',
+        body: JSON.stringify({
+          scorecard: SCORECARD_SAMPLE,
+          spec_version: '0.4.0',
+          anc_version: '0.3.1',
+          checker_url: 'https://anc.dev/score',
+          share_url: '/score/live/ripgrep',
+        }),
+      });
+    });
+
+    await page.goto('/');
+    await page.locator('#live-score-input').fill('cargo install something-uncurated');
+    await page.locator('[data-live-score-submit]').click();
+
+    const status = page.locator('[data-live-score-status]');
+    // First tick: "Queued…" lands immediately on submit.
+    await expect(status).toContainText(/Queued/, { timeout: 1_000 });
+    // Second tick at t=900 ms: "Resolving install path…"
+    await expect(status).toContainText(/Resolving install path/, { timeout: 2_500 });
+  });
+
+  test('example chip click fills input and lazy-loads Turnstile', async ({ page }) => {
+    const observer = await mockTurnstileAndScore(page, {
+      status: 200,
+      body: { scorecard: SCORECARD_SAMPLE, anc_version: '0.3.1', spec_version: '0.4.0', share_url: '/score/live/bat' },
+    });
+
+    await page.goto('/');
+    // No interaction yet → Turnstile not requested.
+    expect(observer.turnstileRequested()).toBe(false);
+
+    await page.locator('[data-live-score-example="brew install bat"]').click();
+    await expect(page.locator('#live-score-input')).toHaveValue('brew install bat');
+
+    // Chip click is one of the lazy-load triggers; Turnstile request fires.
+    await page.waitForFunction(() => Boolean((window as { turnstile?: object }).turnstile), { timeout: 5_000 });
+    expect(observer.turnstileRequested()).toBe(true);
+  });
+});
+
+test.describe('homepage live-scoring form — lazy-load regression', () => {
+  test('scrolling past the form without interaction does NOT load Turnstile', async ({ page }) => {
+    let turnstileRequested = false;
+    await page.route('https://challenges.cloudflare.com/turnstile/v0/api.js**', async (route) => {
+      turnstileRequested = true;
+      await route.fulfill({ status: 204 });
+    });
+
+    await page.goto('/');
+    // Scroll the form into view and out again — no focus/click/paste.
+    await page.evaluate(() => {
+      document.querySelector('.live-score')?.scrollIntoView({ behavior: 'instant', block: 'center' });
+      window.scrollBy(0, 1000);
+    });
+    // Give the page a generous window — any deferred script that picks
+    // up the form should have fired by now if it was going to.
+    await page.waitForTimeout(1000);
+    expect(turnstileRequested).toBe(false);
+  });
+});
+
+test.describe('homepage live-scoring form — error + bounce branches', () => {
+  test('invalid input shows inline error', async ({ page }) => {
+    await mockTurnstileAndScore(page, {
+      status: 400,
+      body: {
+        error: { code: 'unrecognized_input', cta_text: 'paste a tool name…' },
+        spec_version: '0.4.0',
+        checker_url: 'https://anc.dev/score',
+      },
+    });
+
+    await page.goto('/');
+    await page.locator('#live-score-input').fill('garbage{{{');
+    await page.locator('[data-live-score-submit]').click();
+
+    const status = page.locator('[data-live-score-status]');
+    await expect(status).toBeVisible({ timeout: 5_000 });
+    await expect(status).toHaveClass(/live-score__status--error/);
+    await expect(status).toContainText(/not a recognized/i);
+  });
+
+  test('non-GitHub URL → inline error', async ({ page }) => {
+    await mockTurnstileAndScore(page, {
+      status: 400,
+      body: {
+        error: { code: 'non_github_host', cta_text: 'anc.dev only scores public GitHub repos.' },
+        spec_version: '0.4.0',
+        checker_url: 'https://anc.dev/score',
+      },
+    });
+    await page.goto('/');
+    await page.locator('#live-score-input').fill('https://gitlab.com/some/repo');
+    await page.locator('[data-live-score-submit]').click();
+
+    const status = page.locator('[data-live-score-status]');
+    await expect(status).toContainText(/public GitHub/i, { timeout: 5_000 });
+  });
+
+  test('429 rate limit shows countdown copy', async ({ page }) => {
+    await mockTurnstileAndScore(page, {
+      status: 429,
+      body: {
+        error: { code: 'rate_limited', retry_after: 60, cta_text: '...' },
+        spec_version: '0.4.0',
+        checker_url: 'https://anc.dev/score',
+      },
+    });
+    await page.goto('/');
+    await page.locator('#live-score-input').fill('ripgrep');
+    await page.locator('[data-live-score-submit]').click();
+
+    const status = page.locator('[data-live-score-status]');
+    await expect(status).toContainText(/60s/i, { timeout: 5_000 });
+  });
+
+  test('Turnstile siteverify fail shows generic verification error', async ({ page }) => {
+    await mockTurnstileAndScore(page, {
+      status: 400,
+      body: {
+        error: { code: 'turnstile_failed', cta_text: '...' },
+        spec_version: '0.4.0',
+        checker_url: 'https://anc.dev/score',
+      },
+    });
+    await page.goto('/');
+    await page.locator('#live-score-input').fill('ripgrep');
+    await page.locator('[data-live-score-submit]').click();
+
+    const status = page.locator('[data-live-score-status]');
+    await expect(status).toContainText(/verification/i, { timeout: 5_000 });
+  });
+
+  test('bounce: chain_no_resolve renders the right headline + CTA', async ({ page }) => {
+    await mockTurnstileAndScore(page, {
+      status: 404,
+      body: {
+        error: { code: 'chain_no_resolve', cta_text: '...' },
+        spec_version: '0.4.0',
+        checker_url: 'https://anc.dev/score',
+      },
+    });
+    await page.goto('/');
+    await page.locator('#live-score-input').fill('unknown-tool');
+    await page.locator('[data-live-score-submit]').click();
+
+    const status = page.locator('[data-live-score-status]');
+    await expect(status).toBeVisible({ timeout: 5_000 });
+    await expect(status).toHaveClass(/live-score__status--bounce/);
+    await expect(status.locator('.live-score__bounce-headline')).toContainText(/pre-built binary/);
+    await expect(status.locator('a[href="/install"]')).toBeVisible();
+  });
+
+  test('bounce: chain_resolved_install_failed renders headline + truncated stderr', async ({ page }) => {
+    const longStderr = 'error: '.repeat(80); // > 300 chars → truncates
+    await mockTurnstileAndScore(page, {
+      status: 502,
+      body: {
+        error: { code: 'chain_resolved_install_failed', details: longStderr, cta_text: '...' },
+        spec_version: '0.4.0',
+        checker_url: 'https://anc.dev/score',
+      },
+    });
+    await page.goto('/');
+    await page.locator('#live-score-input').fill('cargo install bogus');
+    await page.locator('[data-live-score-submit]').click();
+
+    const status = page.locator('[data-live-score-status]');
+    await expect(status.locator('.live-score__bounce-headline')).toContainText(/install path/);
+    const stderrBlock = status.locator('.live-score__bounce-stderr');
+    await expect(stderrBlock).toBeVisible();
+    await expect(stderrBlock).toContainText(/truncated/);
+  });
+
+  test('bounce: chain_resolved_no_binary_produced shows library-not-CLI headline', async ({ page }) => {
+    await mockTurnstileAndScore(page, {
+      status: 502,
+      body: {
+        error: { code: 'chain_resolved_no_binary_produced', details: '', cta_text: '...' },
+        spec_version: '0.4.0',
+        checker_url: 'https://anc.dev/score',
+      },
+    });
+    await page.goto('/');
+    await page.locator('#live-score-input').fill('npm i -g react');
+    await page.locator('[data-live-score-submit]').click();
+
+    const status = page.locator('[data-live-score-status]');
+    await expect(status.locator('.live-score__bounce-headline')).toContainText(/library/i);
+  });
+
+  test('non_https_url shows a distinct https-required message (NOT the generic copy)', async ({ page }) => {
+    // The client copy is mapped per error code. The illustrative input
+    // here is a non-upgradeable protocol (`javascript:`) — http:// is
+    // silently upgraded to https:// by validateInput, so it no longer
+    // surfaces the non_https_url copy. The mock pins the differentiated
+    // message regardless of what the user types.
+    await mockTurnstileAndScore(page, {
+      status: 400,
+      body: {
+        error: { code: 'non_https_url', cta_text: 'Use https:// — http:// is not allowed.' },
+        spec_version: '0.4.0',
+        checker_url: 'https://anc.dev/score',
+      },
+    });
+    await page.goto('/');
+    await page.locator('#live-score-input').fill('javascript://github.com/x/y');
+    await page.locator('[data-live-score-submit]').click();
+
+    const status = page.locator('[data-live-score-status]');
+    await expect(status).toBeVisible({ timeout: 5_000 });
+    await expect(status).toContainText(/https:\/\//);
+    await expect(status).toContainText(/http:\/\//);
+    // Must NOT show the generic catch-all copy.
+    await expect(status).not.toContainText(/not a recognized/i);
+  });
+
+  test('invalid_url_path shows a distinct "paste the repo root" message', async ({ page }) => {
+    // `/tree/<branch>` URLs are ACCEPTED (route through the git-clone
+    // path), so the invalid_url_path bounce only fires for genuinely-
+    // malformed URL paths (release-download links, empty branch, branch-
+    // name regex misses). The mock here pins the copy when the server
+    // returns the code; the fill input is a release-asset URL which the
+    // validator still rejects.
+    await mockTurnstileAndScore(page, {
+      status: 400,
+      body: {
+        error: {
+          code: 'invalid_url_path',
+          cta_text: 'Paste the repo root URL (e.g. https://github.com/owner/repo), not a branch or release link.',
+        },
+        spec_version: '0.4.0',
+        checker_url: 'https://anc.dev/score',
+      },
+    });
+    await page.goto('/');
+    await page.locator('#live-score-input').fill('https://github.com/cli/cli/releases/download/v1/cli.tar.gz');
+    await page.locator('[data-live-score-submit]').click();
+
+    const status = page.locator('[data-live-score-status]');
+    await expect(status).toBeVisible({ timeout: 5_000 });
+    await expect(status).toContainText(/repo root/i);
+    await expect(status).toContainText(/branch or release link/i);
+    await expect(status).not.toContainText(/not a recognized/i);
+  });
+
+  test('unparseable_install_command surfaces the supported-PM hint copy', async ({ page }) => {
+    // Server now routes apt-get / dnf / yum / etc. install commands to
+    // unparseable_install_command (was unrecognized_input). The client
+    // copy lists the supported PMs so the user has a concrete next
+    // step instead of staring at a generic "not recognized" line.
+    await mockTurnstileAndScore(page, {
+      status: 400,
+      body: {
+        error: {
+          code: 'unparseable_install_command',
+          details: 'apt-get install foo',
+          cta_text: '...',
+        },
+        spec_version: '0.4.0',
+        checker_url: 'https://anc.dev/score',
+      },
+    });
+    await page.goto('/');
+    await page.locator('#live-score-input').fill('apt-get install foo');
+    await page.locator('[data-live-score-submit]').click();
+
+    const status = page.locator('[data-live-score-status]');
+    await expect(status).toBeVisible({ timeout: 5_000 });
+    await expect(status).toContainText(/install command/i);
+    await expect(status).toContainText(/package manager isn't supported/i);
+    // The supported set must be enumerated so the user can pivot
+    // without checking the docs.
+    await expect(status).toContainText(/cargo/);
+    await expect(status).toContainText(/brew/);
+    await expect(status).toContainText(/npm/);
+    await expect(status).toContainText(/pip/);
+  });
+
+  test('bounce: install_unsupported pm=brew_only does NOT mention "desktop"', async ({ page }) => {
+    // Pre-fix the bounce said "Homebrew needs a desktop runtime the
+    // sandbox doesn't provide" — homebrew doesn't need a desktop. The
+    // copy now reads "Homebrew isn't available in the scoring sandbox",
+    // which is honest about what the sandbox is missing without
+    // inventing a phantom runtime requirement.
+    await mockTurnstileAndScore(page, {
+      status: 502,
+      body: {
+        error: { code: 'install_unsupported', pm: 'brew_only', cta_text: '...' },
+        spec_version: '0.4.0',
+        checker_url: 'https://anc.dev/score',
+      },
+    });
+    await page.goto('/');
+    await page.locator('#live-score-input').fill('brew install some-brew-only-tool');
+    await page.locator('[data-live-score-submit]').click();
+
+    const status = page.locator('[data-live-score-status]');
+    await expect(status).toBeVisible({ timeout: 5_000 });
+    await expect(status).toHaveClass(/live-score__status--bounce/);
+    // Headline still pins the topic.
+    await expect(status.locator('.live-score__bounce-headline')).toContainText(/Homebrew/);
+    // New body copy.
+    await expect(status.locator('.live-score__bounce-body')).toContainText(
+      /Homebrew isn't available in the scoring sandbox/i,
+    );
+    // No phantom "desktop" or "desktop runtime" claim.
+    const bodyText = await status.locator('.live-score__bounce-body').textContent();
+    expect(bodyText ?? '').not.toMatch(/desktop/i);
+    // The cargo / pipx / npm fallback hint must still be present.
+    await expect(status.locator('.live-score__bounce-body')).toContainText(/cargo install/);
+    await expect(status.locator('.live-score__bounce-body')).toContainText(/pipx install/);
+    await expect(status.locator('.live-score__bounce-body')).toContainText(/npm i -g/);
+  });
+});
+
+test.describe('homepage live-scoring form — CSP + markdown-twin regressions', () => {
+  test('CSP header includes challenges.cloudflare.com in script-src + frame-src + connect-src', async ({ request }) => {
+    const res = await request.get('/');
+    expect(res.status()).toBe(200);
+    const csp = res.headers()['content-security-policy'];
+    expect(csp).toBeTruthy();
+    // Build a fragmented matcher so directive ordering doesn't matter.
+    expect(csp).toMatch(/script-src[^;]*challenges\.cloudflare\.com/);
+    expect(csp).toMatch(/frame-src[^;]*challenges\.cloudflare\.com/);
+    expect(csp).toMatch(/connect-src[^;]*challenges\.cloudflare\.com/);
+  });
+
+  test('/index.md does NOT mention live-score, turnstile, or /api/score', async ({ request }) => {
+    const res = await request.get('/index.md');
+    expect(res.status()).toBe(200);
+    const md = (await res.text()).toLowerCase();
+    expect(md).not.toContain('live-score');
+    expect(md).not.toContain('turnstile');
+    expect(md).not.toContain('challenges.cloudflare.com');
+    expect(md).not.toContain('/api/score');
+  });
+
+  test('Accept: text/markdown on / serves the silent twin (no live-scoring leaks)', async ({ request }) => {
+    const res = await request.get('/', { headers: { accept: 'text/markdown' } });
+    expect(res.headers()['content-type']).toContain('text/markdown');
+    const md = (await res.text()).toLowerCase();
+    expect(md).not.toContain('live-score');
+    expect(md).not.toContain('turnstile');
+  });
+});
+
+test.describe('/live-score URL canonicalization', () => {
+  test('/score/live/<binary>.html → 301 to /score/live/<binary>', async ({ request }) => {
+    const res = await request.get('/score/live/ripgrep.html', { maxRedirects: 0 });
+    expect(res.status()).toBe(301);
+    expect(res.headers().location).toBe('/score/live/ripgrep');
+  });
+
+  test('/score/live/<binary> (no extension) returns HTML 404 when uncached', async ({ request }) => {
+    const res = await request.get('/score/live/unknown-binary-xyz');
+    expect(res.status()).toBe(404);
+    expect(res.headers()['content-type']).toContain('text/html');
+  });
+
+  test('/score/live/<binary>.md returns markdown twin (404 when uncached)', async ({ request }) => {
+    const res = await request.get('/score/live/unknown-binary-xyz.md');
+    expect(res.status()).toBe(404);
+    expect(res.headers()['content-type']).toContain('text/markdown');
+  });
+});
+
+test.describe('homepage live-scoring — red-team', () => {
+  test('successful submit does NOT leave the Turnstile token in the URL', async ({ page }) => {
+    await mockTurnstileAndScore(page, {
+      status: 200,
+      body: {
+        scorecard: SCORECARD_SAMPLE,
+        spec_version: '0.4.0',
+        anc_version: '0.3.1',
+        share_url: '/score/live/ripgrep',
+        checker_url: 'https://anc.dev/score',
+      },
+    });
+
+    await page.goto('/');
+    await page.locator('#live-score-input').fill('ripgrep');
+    await page.locator('[data-live-score-submit]').click();
+    await page.waitForURL('**/score/live/ripgrep', { timeout: 10_000 });
+
+    const finalUrl = page.url();
+    expect(finalUrl).not.toContain('fake-token');
+    expect(finalUrl).not.toContain('turnstile_token');
+  });
+
+  test('CSP blocks an injected inline script tag from executing', async ({ page }) => {
+    await page.goto('/');
+    // Inject a fresh inline script via document.write of a new <script>
+    // tag. The CSP rules permit `'unsafe-inline'` (load-bearing for
+    // theme-init), so this test is a sanity check that the OVERALL CSP
+    // doesn't accidentally permit cross-origin scripts. Specifically:
+    // an `https://evil.example.com/x.js` external script should be
+    // blocked by `script-src 'self' 'unsafe-inline' challenges.cloudflare.com`.
+    const violations: string[] = [];
+    page.on('console', (msg) => {
+      if (msg.type() === 'error' && /Content Security Policy/i.test(msg.text())) {
+        violations.push(msg.text());
+      }
+    });
+
+    await page.evaluate(() => {
+      const s = document.createElement('script');
+      s.src = 'https://evil.example.com/x.js';
+      document.head.appendChild(s);
+    });
+    // Give the browser a moment to fire the CSP report.
+    await page.waitForTimeout(500);
+    expect(violations.some((v) => /evil\.example\.com/.test(v))).toBe(true);
+  });
+});
diff --git a/tests/e2e/og.e2e.ts b/tests/e2e/og.e2e.ts
index 3066763..70e6d53 100644
--- a/tests/e2e/og.e2e.ts
+++ b/tests/e2e/og.e2e.ts
@@ -28,14 +28,27 @@ test('index has OG + Twitter card meta with 1200×630 image', async ({ page }) =
   expect(await meta('twitter:image')).toContain('/og-image.png');
 });
 
-test('JSON-LD TechArticle present and parses', async ({ page }) => {
+test('JSON-LD @graph carries Organization and TechArticle with author', async ({ page }) => {
   await page.goto('/');
   const raw = await page.locator('script[type="application/ld+json"]').first().textContent();
   expect(raw).toBeTruthy();
   const data = JSON.parse(raw ?? '{}');
-  expect(data['@type']).toBe('TechArticle');
-  expect(data.headline).toBeTruthy();
-  expect(data.url).toContain('anc.dev');
+  expect(data['@context']).toBe('https://schema.org');
+  expect(Array.isArray(data['@graph'])).toBe(true);
+
+  const org = data['@graph'].find((n: { '@type': string }) => n['@type'] === 'Organization');
+  expect(org).toBeTruthy();
+  expect(org.name).toBe('anc.dev');
+  expect(org['@id']).toContain('#organization');
+  expect(Array.isArray(org.sameAs)).toBe(true);
+
+  const article = data['@graph'].find((n: { '@type': string }) => n['@type'] === 'TechArticle');
+  expect(article).toBeTruthy();
+  expect(article.headline).toBeTruthy();
+  expect(article.url).toContain('anc.dev');
+  expect(article.author?.['@type']).toBe('Person');
+  expect(article.author?.name).toBe('Brett Davies');
+  expect(article.publisher?.['@id']).toBe(org['@id']);
 });
 
 test('principle pages inherit the same OG shape', async ({ page }) => {
diff --git a/tests/regression.test.ts b/tests/regression.test.ts
index 0c097c0..3524c5f 100644
--- a/tests/regression.test.ts
+++ b/tests/regression.test.ts
@@ -258,7 +258,7 @@ describe('regression #6 — /install (CLI install page) — HTML+MD only, no JSO
     const html = await readFile(join(DIST, 'install.html'), 'utf8');
     const md = await readFile(join(DIST, 'install.md'), 'utf8');
     expect(html).toContain('<h1');
-    expect(md).toMatch(/^#\s+Install agentnative/);
+    expect(md).toMatch(/^#\s+Install anc/);
     await expect(readFile(join(DIST, 'install.json'), 'utf8')).rejects.toThrow(/ENOENT/);
   });
 
@@ -388,3 +388,56 @@ describe('regression #7 — live-scoring build indexes (plan U1)', () => {
     expect(overlap).toEqual([]);
   });
 });
+
+describe('regression #8 — /api/score response triad + spec-version exports + name coverage', () => {
+  test('shapeScoreSuccess emits the four-field response triad', async () => {
+    // Cheap smoke for the contract enforced by response-shape.ts. The
+    // full handler-dispatch variant lives in tests/score-contract.test.ts;
+    // this one fails fast in the regression suite without spinning up the
+    // pipeline stubs.
+    const { shapeScoreSuccess } = await import('../src/worker/score/response-shape');
+    const res = shapeScoreSuccess({ kind: 'registry_hit' }, '0.3.0', 'live');
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as Record<string, unknown>;
+    expect(body).toHaveProperty('scorecard');
+    expect(typeof body.spec_version).toBe('string');
+    expect(typeof body.site_spec_version).toBe('string');
+    expect(typeof body.anc_version).toBe('string');
+    expect(typeof body.checker_url).toBe('string');
+  });
+
+  test('shapeScoreSuccess refuses to emit a partial response when anc_version is missing', async () => {
+    // Mirror image of the triad contract: a missing anc_version forces
+    // a 500 incomplete_response_contract, not a quiet omission.
+    const { shapeScoreSuccess } = await import('../src/worker/score/response-shape');
+    const res = shapeScoreSuccess({ kind: 'registry_hit' }, null, 'live');
+    expect(res.status).toBe(500);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('incomplete_response_contract');
+  });
+
+  test('every registry.yaml tools[].name appears in by_slug', async () => {
+    // Cheap overlap smoke; the full join (anc_version, scorecard_url,
+    // version, score_pct) lives in tests/score-contract.test.ts.
+    const yaml = await import('js-yaml');
+    const raw = await readFile(join(REPO_ROOT, 'registry.yaml'), 'utf8');
+    const doc = yaml.load(raw) as { tools: Array<{ name: string }> };
+    const idx = JSON.parse(await readFile(join(DIST, 'registry-index.json'), 'utf8'));
+    const missing = doc.tools.map((t) => t.name).filter((name) => !(name in idx.by_slug));
+    expect(missing).toEqual([]);
+  });
+
+  test('src/worker/spec-version.gen.ts exports SPEC_VERSION + SITE_SPEC_VERSION + CHECKER_URL as non-empty strings', async () => {
+    // Existence guard on the build-emitted constants the response-shape
+    // depends on. tests/spec-version-gen.test.ts already covers freshness
+    // against the source VERSION files; this assertion only catches the
+    // export-shape regression (renamed or removed symbol).
+    const mod = await import('../src/worker/spec-version.gen');
+    expect(typeof mod.SPEC_VERSION).toBe('string');
+    expect(mod.SPEC_VERSION.length).toBeGreaterThan(0);
+    expect(typeof mod.SITE_SPEC_VERSION).toBe('string');
+    expect(mod.SITE_SPEC_VERSION.length).toBeGreaterThan(0);
+    expect(typeof mod.CHECKER_URL).toBe('string');
+    expect(mod.CHECKER_URL.length).toBeGreaterThan(0);
+  });
+});
diff --git a/tests/score-cache.test.ts b/tests/score-cache.test.ts
new file mode 100644
index 0000000..6feed82
--- /dev/null
+++ b/tests/score-cache.test.ts
@@ -0,0 +1,191 @@
+// R2 cache wrapper unit tests.
+//
+// Plan U7 (docs/plans/2026-04-28-002-feat-live-scoring-cf-sandbox-plan.md
+// "Test scenarios" under U7). Exercises cache.get / cache.put / cache.keyFor
+// against an in-memory R2 stub. Real R2 round-trips are covered by staging
+// verification, not bun-test (workerd-only behavior).
+
+import { describe, expect, test } from 'bun:test';
+import { type CachedScorecard, type CacheEnv, get, keyFor, put } from '../src/worker/score/cache';
+
+// ---------------------------------------------------------------------------
+// In-memory R2 stub
+// ---------------------------------------------------------------------------
+
+type StubOpts = {
+  throwOnGet?: boolean;
+  throwOnPut?: boolean;
+  throwOnDelete?: boolean;
+  // Override get() to return a raw value (used to inject corrupted
+  // payloads that wouldn't go through put()'s validation).
+  prefill?: Record<string, unknown>;
+};
+
+function makeR2Stub(opts: StubOpts = {}): { env: CacheEnv; store: Map<string, string>; deletedKeys: string[] } {
+  const store = new Map<string, string>();
+  const deletedKeys: string[] = [];
+  if (opts.prefill) {
+    for (const [k, v] of Object.entries(opts.prefill)) {
+      store.set(k, typeof v === 'string' ? v : JSON.stringify(v));
+    }
+  }
+  const env: CacheEnv = {
+    SCORE_CACHE: {
+      async get(key: string) {
+        if (opts.throwOnGet) throw new Error('r2_get_failed');
+        const raw = store.get(key);
+        if (raw === undefined) return null;
+        // R2's `get(key)` returns an R2ObjectBody. The minimum surface
+        // our cache helper uses is `.json()` — that's what we mock.
+        return {
+          async json() {
+            return JSON.parse(raw);
+          },
+          async text() {
+            return raw;
+          },
+        };
+      },
+      async put(key: string, value: unknown) {
+        if (opts.throwOnPut) throw new Error('r2_put_failed');
+        store.set(key, typeof value === 'string' ? value : String(value));
+      },
+      async delete(key: string) {
+        if (opts.throwOnDelete) throw new Error('r2_delete_failed');
+        deletedKeys.push(key);
+        store.delete(key);
+      },
+    } as unknown as R2Bucket,
+  };
+  return { env, store, deletedKeys };
+}
+
+// ---------------------------------------------------------------------------
+// keyFor
+// ---------------------------------------------------------------------------
+
+describe('cache.keyFor', () => {
+  test('returns the canonical scores/{binary}/{ancVersion}.json shape', () => {
+    expect(keyFor('rg', '0.4.0')).toBe('scores/rg/0.4.0.json');
+    expect(keyFor('cowsay', '0.4.0')).toBe('scores/cowsay/0.4.0.json');
+  });
+
+  test('passes through hyphens and dots in binary names', () => {
+    expect(keyFor('chrome-launcher', '0.4.0')).toBe('scores/chrome-launcher/0.4.0.json');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// get
+// ---------------------------------------------------------------------------
+
+describe('cache.get', () => {
+  test('miss returns null', async () => {
+    const { env } = makeR2Stub();
+    expect(await get(env, keyFor('rg', '0.4.0'))).toBeNull();
+  });
+
+  test('hit returns the cached payload (shape validated)', async () => {
+    const payload: CachedScorecard = {
+      spec_version: '0.4.0',
+      anc_version: '0.3.1',
+      tool_version: '15.1.0',
+      scorecard: { tool: { name: 'ripgrep' }, score: { value: 88 } },
+    };
+    const { env } = makeR2Stub({ prefill: { 'scores/rg/0.4.0.json': payload } });
+    const result = await get(env, 'scores/rg/0.4.0.json');
+    expect(result).toEqual(payload);
+  });
+
+  test('corrupted payload (missing anc_version) → miss + best-effort delete', async () => {
+    const corrupted = { spec_version: '0.4.0', tool_version: '15.1.0', scorecard: {} };
+    const { env, deletedKeys } = makeR2Stub({ prefill: { 'scores/rg/0.4.0.json': corrupted } });
+    expect(await get(env, 'scores/rg/0.4.0.json')).toBeNull();
+    // Drain microtasks so the .catch() chain on delete settles.
+    await new Promise((r) => setTimeout(r, 0));
+    expect(deletedKeys).toContain('scores/rg/0.4.0.json');
+  });
+
+  test('corrupted payload (missing tool_version) → miss', async () => {
+    const corrupted = { spec_version: '0.4.0', anc_version: '0.3.1', scorecard: {} };
+    const { env } = makeR2Stub({ prefill: { 'scores/rg/0.4.0.json': corrupted } });
+    expect(await get(env, 'scores/rg/0.4.0.json')).toBeNull();
+  });
+
+  test('corrupted payload (missing scorecard field) → miss', async () => {
+    const corrupted = { spec_version: '0.4.0', anc_version: '0.3.1', tool_version: '15.1.0' };
+    const { env } = makeR2Stub({ prefill: { 'scores/rg/0.4.0.json': corrupted } });
+    expect(await get(env, 'scores/rg/0.4.0.json')).toBeNull();
+  });
+
+  test('empty-string fields treated as corrupted', async () => {
+    const corrupted = { spec_version: '0.4.0', anc_version: '', tool_version: '15.1.0', scorecard: {} };
+    const { env } = makeR2Stub({ prefill: { 'scores/rg/0.4.0.json': corrupted } });
+    expect(await get(env, 'scores/rg/0.4.0.json')).toBeNull();
+  });
+
+  test('R2 throws on read → treated as miss (best-effort)', async () => {
+    const { env } = makeR2Stub({ throwOnGet: true });
+    expect(await get(env, 'scores/rg/0.4.0.json')).toBeNull();
+  });
+
+  test('delete failure on corrupted payload does not throw', async () => {
+    const corrupted = { spec_version: '0.4.0', anc_version: '0.3.1', tool_version: '', scorecard: {} };
+    const { env } = makeR2Stub({ prefill: { 'scores/rg/0.4.0.json': corrupted }, throwOnDelete: true });
+    // Still returns null without surfacing the delete error.
+    expect(await get(env, 'scores/rg/0.4.0.json')).toBeNull();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// put
+// ---------------------------------------------------------------------------
+
+describe('cache.put', () => {
+  test('happy path writes a well-formed payload', async () => {
+    const { env, store } = makeR2Stub();
+    await put(env, keyFor('rg', '0.4.0'), { tool: { name: 'ripgrep' } }, '0.3.1', '15.1.0', '0.4.0');
+    const raw = store.get('scores/rg/0.4.0.json');
+    expect(raw).toBeTruthy();
+    if (!raw) return;
+    const parsed = JSON.parse(raw) as CachedScorecard;
+    expect(parsed.spec_version).toBe('0.4.0');
+    expect(parsed.anc_version).toBe('0.3.1');
+    expect(parsed.tool_version).toBe('15.1.0');
+    expect(parsed.scorecard).toEqual({ tool: { name: 'ripgrep' } });
+  });
+
+  test('refusal-to-cache-half-state: missing ancVersion throws', async () => {
+    const { env } = makeR2Stub();
+    await expect(put(env, 'scores/rg/0.4.0.json', {}, '', '15.1.0', '0.4.0')).rejects.toThrow(/ancVersion/);
+  });
+
+  test('refusal-to-cache-half-state: missing toolVersion throws', async () => {
+    const { env } = makeR2Stub();
+    await expect(put(env, 'scores/rg/0.4.0.json', {}, '0.3.1', '', '0.4.0')).rejects.toThrow(/toolVersion/);
+  });
+
+  test('refusal-to-cache-half-state: missing specVersion throws', async () => {
+    const { env } = makeR2Stub();
+    await expect(put(env, 'scores/rg/0.4.0.json', {}, '0.3.1', '15.1.0', '')).rejects.toThrow(/specVersion/);
+  });
+
+  test('R2 write failure is best-effort: logs but does not throw', async () => {
+    const { env } = makeR2Stub({ throwOnPut: true });
+    // Should not throw — the user's response must not depend on the cache.
+    await put(env, keyFor('rg', '0.4.0'), {}, '0.3.1', '15.1.0', '0.4.0');
+  });
+
+  test('round-trip: put then get returns the same payload', async () => {
+    const { env } = makeR2Stub();
+    const scorecard = { tool: { name: 'ripgrep', version: '15.1.0' }, score: { value: 88 } };
+    await put(env, keyFor('rg', '0.4.0'), scorecard, '0.3.1', '15.1.0', '0.4.0');
+    const result = await get(env, keyFor('rg', '0.4.0'));
+    expect(result).toEqual({
+      spec_version: '0.4.0',
+      anc_version: '0.3.1',
+      tool_version: '15.1.0',
+      scorecard,
+    });
+  });
+});
diff --git a/tests/score-contract.test.ts b/tests/score-contract.test.ts
new file mode 100644
index 0000000..8cc4aa9
--- /dev/null
+++ b/tests/score-contract.test.ts
@@ -0,0 +1,285 @@
+// Cross-validates the three artifacts that together form the /api/score
+// registry-fast-path contract:
+//
+//   1. registry.yaml              — editorial source of truth for tools
+//   2. dist/registry-index.json   — build-emitted index the Worker reads
+//   3. scorecards/<name>-v<v>.json — committed score outputs that enrich
+//                                    the index with version, anc_version,
+//                                    scorecard_url, and score_pct
+//
+// A break in any of the joins between these three artifacts lands a wrong
+// response on /api/score for a curated tool. This file fails CI before
+// that drift ships.
+//
+// Run `bun run build` before these tests — the contract depends on
+// dist/registry-index.json being current.
+
+import { beforeEach, describe, expect, test } from 'bun:test';
+import { readdir, readFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import yaml from 'js-yaml';
+import { buildRegistryIndex } from '../src/build/registry-index.mjs';
+import { _resetIndexCache, handleScore, type ScoreEnv } from '../src/worker/score/handler';
+import { _resetKillSwitchCache } from '../src/worker/score/kill-switch';
+
+const REPO_ROOT = join(import.meta.dir, '..');
+const DIST = join(REPO_ROOT, 'dist');
+const SCORECARDS_DIR = join(REPO_ROOT, 'scorecards');
+
+type RegistryTool = {
+  name: string;
+  binary: string;
+  install: string;
+  repo?: string;
+  url?: string;
+  audit_profile?: string;
+};
+
+type RegistryIndexEntry = {
+  name: string;
+  binary: string;
+  install: string;
+  repo?: string;
+  audit_profile?: string;
+  version?: string;
+  anc_version?: string;
+  scorecard_url?: string;
+  score_pct?: number;
+};
+
+type RegistryIndex = {
+  by_slug: Record<string, RegistryIndexEntry>;
+  by_owner_repo: Record<string, RegistryIndexEntry>;
+};
+
+type Scorecard = {
+  schema_version: string;
+  tool: { name: string; binary: string; version: string };
+  anc: { version: string };
+  badge?: { score_pct?: number; eligible?: boolean };
+};
+
+// Matches build/scorecards.mjs:indexScorecardsByName(), which is the
+// authoritative parser for the on-disk filename pattern.
+const SCORECARD_FILENAME_RE = /^(?<name>[a-z0-9-]+)-v(?<version>.+)\.json$/;
+
+async function loadRegistry(): Promise<RegistryTool[]> {
+  const raw = await readFile(join(REPO_ROOT, 'registry.yaml'), 'utf8');
+  const doc = yaml.load(raw) as { tools: RegistryTool[] };
+  return doc.tools;
+}
+
+async function loadIndex(): Promise<RegistryIndex> {
+  const raw = await readFile(join(DIST, 'registry-index.json'), 'utf8');
+  return JSON.parse(raw) as RegistryIndex;
+}
+
+async function loadScorecards(): Promise<Array<{ filename: string; name: string; version: string; data: Scorecard }>> {
+  const files = (await readdir(SCORECARDS_DIR)).filter((f) => f.endsWith('.json'));
+  const out: Array<{ filename: string; name: string; version: string; data: Scorecard }> = [];
+  for (const filename of files) {
+    const m = filename.match(SCORECARD_FILENAME_RE);
+    if (!m?.groups) continue;
+    const name = m.groups.name as string;
+    const version = m.groups.version as string;
+    const data = JSON.parse(await readFile(join(SCORECARDS_DIR, filename), 'utf8')) as Scorecard;
+    out.push({ filename, name, version, data });
+  }
+  return out;
+}
+
+describe('score-contract — registry.yaml <-> dist/registry-index.json <-> scorecards/', () => {
+  test('every registry.yaml tool name appears in by_slug', async () => {
+    const registry = await loadRegistry();
+    const index = await loadIndex();
+    const missing = registry.map((t) => t.name).filter((name) => !(name in index.by_slug));
+    expect(missing).toEqual([]);
+  });
+
+  test('every committed scorecard has a matching registry.yaml entry', async () => {
+    const registry = await loadRegistry();
+    const cards = await loadScorecards();
+    const registryNames = new Set(registry.map((t) => t.name));
+    const orphans = cards.filter((c) => !registryNames.has(c.name)).map((c) => c.filename);
+    expect(orphans).toEqual([]);
+  });
+
+  test('every scorecard joins to by_slug with matching version, anc_version, and scorecard_url', async () => {
+    const cards = await loadScorecards();
+    const index = await loadIndex();
+    const drifts: string[] = [];
+    for (const card of cards) {
+      const entry = index.by_slug[card.name];
+      if (!entry) {
+        drifts.push(`${card.filename}: by_slug has no entry for "${card.name}"`);
+        continue;
+      }
+      // Filename version is the source of truth for the index's `version`
+      // field. build.mjs derives it from indexScorecardsByName, not from
+      // the raw --version output inside the scorecard.
+      if (entry.version !== card.version) {
+        drifts.push(`${card.filename}: by_slug.version "${entry.version}" != filename version "${card.version}"`);
+      }
+      if (entry.anc_version !== card.data.anc.version) {
+        drifts.push(
+          `${card.filename}: by_slug.anc_version "${entry.anc_version}" != scorecard anc.version "${card.data.anc.version}"`,
+        );
+      }
+      const expectedUrl = `/score/${card.name}`;
+      if (entry.scorecard_url !== expectedUrl) {
+        drifts.push(`${card.filename}: by_slug.scorecard_url "${entry.scorecard_url}" != "${expectedUrl}"`);
+      }
+    }
+    expect(drifts).toEqual([]);
+  });
+
+  test('score_pct on by_slug equals badge.score_pct on the scorecard', async () => {
+    const cards = await loadScorecards();
+    const index = await loadIndex();
+    const drifts: string[] = [];
+    for (const card of cards) {
+      const entry = index.by_slug[card.name];
+      const expected = card.data.badge?.score_pct ?? null;
+      // The enrichment only writes score_pct when it's a number. Null on
+      // either side is OK; numeric mismatch is the contract violation.
+      if (typeof entry?.score_pct === 'number' && entry.score_pct !== expected) {
+        drifts.push(`${card.filename}: by_slug.score_pct ${entry.score_pct} != badge.score_pct ${expected}`);
+      }
+    }
+    expect(drifts).toEqual([]);
+  });
+});
+
+// Stubbed-Worker call against the REAL committed dist/registry-index.json.
+// Pattern follows tests/score-handler-share-url.test.ts:makeEnv() — only
+// the ASSETS fetcher is rewired to return the on-disk index so the join
+// the contract describes is the join production actually serves.
+
+function postScore(input: string): Request {
+  return new Request('https://anc.dev/api/score', {
+    method: 'POST',
+    headers: { 'content-type': 'application/json' },
+    body: JSON.stringify({ input, turnstile_token: 'tok' }),
+  });
+}
+
+function makeEnvFromIndex(index: RegistryIndex): ScoreEnv {
+  const hintsIndex = { by_owner_repo: {} };
+  const cacheStore = new Map<string, string>();
+  const cacheStub = {
+    async get(key: string) {
+      const raw = cacheStore.get(key);
+      if (raw === undefined) return null;
+      return {
+        async json() {
+          return JSON.parse(raw);
+        },
+        async text() {
+          return raw;
+        },
+      };
+    },
+    async put(key: string, value: unknown) {
+      cacheStore.set(key, typeof value === 'string' ? value : String(value));
+    },
+    async delete(key: string) {
+      cacheStore.delete(key);
+    },
+  };
+
+  return {
+    ASSETS: {
+      async fetch(req: Request | string): Promise<Response> {
+        const url = typeof req === 'string' ? req : req.url;
+        const path = new URL(url).pathname;
+        if (path === '/registry-index.json') {
+          return new Response(JSON.stringify(index), { status: 200 });
+        }
+        if (path === '/discovery-hints-index.json') {
+          return new Response(JSON.stringify(hintsIndex), { status: 200 });
+        }
+        return new Response('not found', { status: 404 });
+      },
+    } as Fetcher,
+    SCORE: {} as DurableObjectNamespace,
+    SCORE_KV: {
+      async get() {
+        return null;
+      },
+    } as unknown as KVNamespace,
+    SCORE_CACHE: cacheStub as unknown as R2Bucket,
+    SCORE_LIMITER: {
+      async limit() {
+        return { success: true };
+      },
+    },
+    SCORE_LIMITER_IP: {
+      async limit() {
+        return { success: true };
+      },
+    },
+    SCORE_TELEMETRY: { writeDataPoint() {} },
+    TURNSTILE_SECRET: 'test',
+    SESSION_HMAC_SECRET: 'test-hmac-secret-long-enough',
+  } as ScoreEnv;
+}
+
+describe('score-contract — /api/score registry-fast-path response shape', () => {
+  beforeEach(() => {
+    _resetIndexCache();
+    _resetKillSwitchCache();
+  });
+
+  test('curated slug returns registry_hit with the full response triad', async () => {
+    const index = await loadIndex();
+    const env = makeEnvFromIndex(index);
+    const res = await handleScore(postScore('ripgrep'), env);
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as {
+      scorecard: { kind?: string; scorecard_url?: string; score_pct?: number | null };
+      spec_version?: string;
+      site_spec_version?: string;
+      anc_version?: string;
+      checker_url?: string;
+    };
+    expect(body.scorecard.kind).toBe('registry_hit');
+    expect(body.scorecard.scorecard_url).toBe('/score/ripgrep');
+    expect(typeof body.spec_version).toBe('string');
+    expect(typeof body.site_spec_version).toBe('string');
+    expect(typeof body.anc_version).toBe('string');
+    expect(typeof body.checker_url).toBe('string');
+  });
+});
+
+describe('score-contract — negative drift catcher', () => {
+  test('a scorecard whose name is missing from by_slug surfaces as a contract violation', () => {
+    // Demonstrates the failure shape the cross-validation above catches:
+    // a renamed or moved scorecard whose name no longer resolves in the
+    // built index. Without a regenerated dist/registry-index.json, by_slug
+    // forgets the tool and the Worker would 404 the curated path.
+    const fakeCard = { name: 'ghost-tool', filename: 'ghost-tool-v1.0.0.json' };
+    const fakeIndex: RegistryIndex = {
+      by_slug: {
+        curl: { name: 'curl', binary: 'curl', install: 'brew install curl' },
+      },
+      by_owner_repo: {},
+    };
+    expect(fakeCard.name in fakeIndex.by_slug).toBe(false);
+  });
+
+  test('buildRegistryIndex omits enrichment fields when no scorecard is present', () => {
+    // Pins the build emitter contract: tools without a paired scorecard
+    // appear in by_slug with the editorial fields (name, binary, install)
+    // but without version/anc_version/scorecard_url. If a future emitter
+    // change starts synthesizing defaults, the cross-validation above
+    // needs to be revised so it doesn't accept fabricated values.
+    const { index } = buildRegistryIndex(
+      [{ name: 'no-card-tool', binary: 'no-card-tool', install: 'brew install no-card-tool' }],
+      {},
+    );
+    expect(index.by_slug['no-card-tool']).toBeDefined();
+    expect(index.by_slug['no-card-tool'].version).toBeUndefined();
+    expect(index.by_slug['no-card-tool'].anc_version).toBeUndefined();
+    expect(index.by_slug['no-card-tool'].scorecard_url).toBeUndefined();
+  });
+});
diff --git a/tests/score-discover-binary.test.ts b/tests/score-discover-binary.test.ts
index 643cb6b..9bc15e4 100644
--- a/tests/score-discover-binary.test.ts
+++ b/tests/score-discover-binary.test.ts
@@ -211,7 +211,14 @@ describe('discoverBinary — step 3 F1 tightening (repository-field match)', ()
     if (r.ok) expect(r.resolved_step).toBe('3-go');
   });
 
-  test('priority order: brew → crates → npm → pypi → go (brew wins when multiple hit)', async () => {
+  test('priority order: crates → npm → pypi → go → brew (sandbox-installable PMs first)', async () => {
+    // The sandbox image bounces brew (Linuxbrew non-viable on musl). If a
+    // tool has both a brew formula AND a working alternative (e.g.
+    // csvlens on brew AND on crates.io), picking brew sends the user to
+    // a guaranteed bounce when scoring was possible. Brew is now last so
+    // brew-only tools still resolve to brew (and bounce honestly with
+    // the brew formula name in the error), but tools with any other
+    // supported PM score successfully.
     const fetcher = mockFetcher({
       'https://api.github.com/repos/foo/bar/releases/latest': { body: { assets: [] } },
       'https://formulae.brew.sh/api/formula/bar.json': {
@@ -227,6 +234,25 @@ describe('discoverBinary — step 3 F1 tightening (repository-field match)', ()
     });
     const r = await discoverBinary({ owner: 'foo', repo: 'bar', hintsIndex: EMPTY_HINTS, fetcher });
     expect(r.ok).toBe(true);
+    if (r.ok) expect(r.resolved_step).toBe('3-crates');
+  });
+
+  test('brew wins only when no other distribution matches (last-resort priority)', async () => {
+    // No crates, npm, pypi, or go match — brew formula is the only
+    // hit. Discovery picks brew; the sandbox bounces it as install_unsupported
+    // with the formula name in the error.
+    const fetcher = mockFetcher({
+      'https://api.github.com/repos/foo/baz/releases/latest': { body: { assets: [] } },
+      'https://formulae.brew.sh/api/formula/baz.json': {
+        body: {
+          homepage: 'https://github.com/foo/baz',
+          urls: { stable: { url: 'https://github.com/foo/baz/archive/v1.tar.gz' } },
+        },
+      },
+      // No crates / npm / pypi / go responses — fetcher returns 404
+    });
+    const r = await discoverBinary({ owner: 'foo', repo: 'baz', hintsIndex: EMPTY_HINTS, fetcher });
+    expect(r.ok).toBe(true);
     if (r.ok) expect(r.resolved_step).toBe('3-brew');
   });
 });
@@ -294,3 +320,152 @@ describe('discoverBinary — chain miss', () => {
     }
   });
 });
+
+// ---------------------------------------------------------------------------
+// Fix 2 — parallel fan-out + priority pick + agreement diagnostics
+// ---------------------------------------------------------------------------
+
+describe('discoverBinary — parallel fan-out (Fix 2)', () => {
+  // Build a fetcher whose responses each take a known delay. The
+  // parallel fan-out asserts: total wall-clock for a chain miss is the
+  // MAX of the per-step delays, not the SUM. If a future refactor
+  // re-serializes the steps this test fails LOUDLY.
+  function delayedFetcher(table: Record<string, { delayMs: number; status?: number; body: unknown }>): typeof fetch {
+    return (async (input: string | URL | Request) => {
+      const url = typeof input === 'string' ? input : input instanceof URL ? input.toString() : input.url;
+      const response = table[url];
+      if (response) {
+        await new Promise((r) => setTimeout(r, response.delayMs));
+        const body = response.body;
+        const status = response.status ?? 200;
+        const ok = status >= 200 && status < 300;
+        return {
+          ok,
+          status,
+          json: async () => body,
+          text: async () => (typeof body === 'string' ? body : JSON.stringify(body)),
+        } as Response;
+      }
+      return { ok: false, status: 404, json: async () => ({}), text: async () => '' } as Response;
+    }) as unknown as typeof fetch;
+  }
+
+  test('steps 2/3/4 fire concurrently (total time ≈ max(step times), not sum)', async () => {
+    // Each step takes 200 ms. Serial fan-out would be 200 ms (releases)
+    // + 200 ms (5x parallel registries) + 200 ms (readme) = 600 ms.
+    // Parallel fan-out finishes in ~200 ms once all three steps settle.
+    // 400 ms threshold gives generous slack for test-runner jitter.
+    const delay = 200;
+    const fetcher = delayedFetcher({
+      'https://api.github.com/repos/foo/bar/releases/latest': { delayMs: delay, body: { assets: [] } },
+      'https://formulae.brew.sh/api/formula/bar.json': { delayMs: delay, status: 404, body: {} },
+      'https://crates.io/api/v1/crates/bar': { delayMs: delay, status: 404, body: {} },
+      'https://registry.npmjs.org/bar/latest': { delayMs: delay, status: 404, body: {} },
+      'https://pypi.org/pypi/bar/json': { delayMs: delay, status: 404, body: {} },
+      'https://proxy.golang.org/foo/bar/@latest': { delayMs: delay, status: 404, body: {} },
+      'https://raw.githubusercontent.com/foo/bar/HEAD/README.md': { delayMs: delay, status: 404, body: '' },
+      'https://raw.githubusercontent.com/foo/bar/main/README.md': { delayMs: delay, status: 404, body: '' },
+      'https://raw.githubusercontent.com/foo/bar/master/README.md': { delayMs: delay, status: 404, body: '' },
+    });
+    const start = Date.now();
+    const r = await discoverBinary({ owner: 'foo', repo: 'bar', hintsIndex: EMPTY_HINTS, fetcher });
+    const elapsed = Date.now() - start;
+    expect(r.ok).toBe(false);
+    // Each step internally parallelizes (step 3 uses Promise.all over 5
+    // registries; step 4 walks the README candidate list sequentially
+    // — 3 x 200 ms = 600 ms worst-case for step 4 alone). The fan-out
+    // assertion is: the OUTER chain ran the three steps concurrently,
+    // so the total stays under sum-of-outer-steps. Without the
+    // parallelization the README step's serial walk would stack on
+    // top of release + distributions for >= 1000 ms total.
+    expect(elapsed).toBeLessThan(900);
+  });
+
+  test('release-asset wins over registry on agreement (priority pick)', async () => {
+    // Both step 2 AND step 3 (crates) hit. Priority: release > registry.
+    const fetcher = mockFetcher({
+      'https://api.github.com/repos/foo/bar/releases/latest': {
+        body: {
+          assets: [{ name: 'bar-x86_64-unknown-linux-musl.tar.gz', browser_download_url: 'https://x/bar.tar.gz' }],
+        },
+      },
+      'https://crates.io/api/v1/crates/bar': {
+        body: { crate: { repository: 'https://github.com/foo/bar', max_stable_version: '1.0.0' } },
+      },
+      'https://crates.io/api/v1/crates/bar/1.0.0': { body: { version: { bin_names: ['bar'] } } },
+    });
+    const r = await discoverBinary({ owner: 'foo', repo: 'bar', hintsIndex: EMPTY_HINTS, fetcher });
+    expect(r.ok).toBe(true);
+    if (!r.ok) return;
+    expect(r.resolved_step).toBe('2-releases-asset');
+    expect(r.diagnostics?.winner).toBe('2-releases-asset');
+    // Loser list captures the OTHER source that also hit.
+    expect(r.diagnostics?.losers).toContain('3-crates');
+    // Agreement: both sources resolve to the same binary name (the
+    // repo name in this fixture). agreed_binary stays true.
+    expect(r.diagnostics?.agreed_binary).toBe(true);
+  });
+
+  test('registry wins over README (priority pick when no release hit)', async () => {
+    const fetcher = mockFetcher({
+      'https://api.github.com/repos/foo/bar/releases/latest': { body: { assets: [] } },
+      'https://crates.io/api/v1/crates/bar': {
+        body: { crate: { repository: 'https://github.com/foo/bar', max_stable_version: '1.0.0' } },
+      },
+      'https://crates.io/api/v1/crates/bar/1.0.0': { body: { version: { bin_names: ['bar'] } } },
+      'https://raw.githubusercontent.com/foo/bar/HEAD/README.md': {
+        body: '```\npip install bar\n```',
+      },
+    });
+    const r = await discoverBinary({ owner: 'foo', repo: 'bar', hintsIndex: EMPTY_HINTS, fetcher });
+    expect(r.ok).toBe(true);
+    if (!r.ok) return;
+    expect(r.resolved_step).toBe('3-crates');
+    expect(r.diagnostics?.losers).toContain('4-readme-parse');
+  });
+
+  test('hint hit short-circuits parallel fan-out (no network calls)', async () => {
+    // The hint lookup is in-memory and runs BEFORE the parallel
+    // fan-out. A hit must return before any HTTP call fires — this
+    // keeps the zero-cost lookup zero-cost.
+    let fetchCalls = 0;
+    const fetcher = (async () => {
+      fetchCalls++;
+      return { ok: false, status: 404, json: async () => ({}), text: async () => '' } as Response;
+    }) as unknown as typeof fetch;
+    const hints: DiscoveryHintsIndex = {
+      by_owner_repo: { 'foo/bar': { pm: 'pip', package: 'bar', binary: 'bar' } },
+    };
+    const r = await discoverBinary({ owner: 'foo', repo: 'bar', hintsIndex: hints, fetcher });
+    expect(r.ok).toBe(true);
+    expect(fetchCalls).toBe(0);
+  });
+
+  test('disagreement on binary name surfaces in diagnostics.agreed_binary=false', async () => {
+    // Release picks 'bar' (repo-name default). README parse for a
+    // build script that names a DIFFERENT package (in this case
+    // `bar-tools`) gets normalized via the substring guard, but the
+    // resolved spec.binary differs. agreed_binary fires false so logs
+    // can spot the cross-source mismatch.
+    const fetcher = mockFetcher({
+      'https://api.github.com/repos/foo/bar/releases/latest': {
+        body: {
+          assets: [{ name: 'bar-x86_64-unknown-linux-musl.tar.gz', browser_download_url: 'https://x/bar.tar.gz' }],
+        },
+      },
+      // README install says `pip install bar-tools` — the substring
+      // guard (bar ⊂ bar-tools) passes, so step 4 hits with a
+      // different binary name.
+      'https://raw.githubusercontent.com/foo/bar/HEAD/README.md': {
+        body: '```\npip install bar-tools\n```',
+      },
+    });
+    const r = await discoverBinary({ owner: 'foo', repo: 'bar', hintsIndex: EMPTY_HINTS, fetcher });
+    expect(r.ok).toBe(true);
+    if (!r.ok) return;
+    // Release wins on priority.
+    expect(r.resolved_step).toBe('2-releases-asset');
+    expect(r.diagnostics?.losers).toContain('4-readme-parse');
+    expect(r.diagnostics?.agreed_binary).toBe(false);
+  });
+});
diff --git a/tests/score-do-branch-clone.test.ts b/tests/score-do-branch-clone.test.ts
new file mode 100644
index 0000000..82f57e2
--- /dev/null
+++ b/tests/score-do-branch-clone.test.ts
@@ -0,0 +1,327 @@
+// Branch-scoped git-clone install path tests.
+//
+// The DO routes github-url-with-branch inputs to a pm: 'git-clone'
+// install spec that clones the repo at the requested ref and runs
+// `anc check <path>` against the source. The Sandbox SDK only exposes
+// `exec(command: string)` — no argv array — so command-string
+// composition + shellQuote is the trust boundary at exec time. These
+// tests pin:
+//
+//   - The clone command shape (--depth 1 --no-tags --single-branch
+//     --branch <branch> <url> <dest>)
+//   - shellQuote wraps every interpolated value (POSIX single-quote
+//     escape, so embedded `'` becomes `'\''`)
+//   - The DO refuses unsafe branch names BEFORE shellQuote runs
+//     (defense in depth — the regex catches structural metacharacters;
+//     shellQuote closes the escape)
+//   - `anc check <path>` is used instead of `anc check --command
+//     <binary>` for source-scoped scores
+//   - `which <binary>` gate is SKIPPED for git-clone (no binary lands
+//     on PATH; the cloned source is what gets scored)
+
+import { describe, expect, test } from 'bun:test';
+import type { GitCloneInstall, InstallSpec } from '../src/worker/score/discover-binary';
+import {
+  buildAncCheckSourceCmd,
+  buildGitCloneCommand,
+  type ContainerLike,
+  type ExecLike,
+  score,
+} from '../src/worker/score/sandbox-exec';
+
+// ---------------------------------------------------------------------------
+// Stub — mirrors the shape in score-do.test.ts so tests run offline.
+// ---------------------------------------------------------------------------
+
+type Call =
+  | { kind: 'setOutboundHandler'; name: string; params?: unknown }
+  | { kind: 'exec'; command: string; timeout?: number };
+
+type ExecResponder = (command: string) => ExecLike;
+
+const ANC_CHECK_OK = JSON.stringify({
+  spec_version: '0.4.0',
+  anc_version: '0.3.1',
+  tool: { name: 'qmd', version: '0.1.0' },
+  score: { value: 70 },
+});
+
+function defaultResponder(command: string): ExecLike {
+  // git clone — synthesize success without touching the network.
+  if (command.includes('git clone')) {
+    return { success: true, stdout: '', stderr: '' };
+  }
+  if (command === 'anc --version') {
+    return { success: true, stdout: 'anc 0.3.1\n', stderr: '' };
+  }
+  if (command.startsWith('anc check ')) {
+    return { success: true, stdout: ANC_CHECK_OK, stderr: '' };
+  }
+  return { success: true, stdout: '', stderr: '' };
+}
+
+function makeStub(responder: ExecResponder = defaultResponder): { stub: ContainerLike; calls: Call[] } {
+  const calls: Call[] = [];
+  const stub: ContainerLike = {
+    async setOutboundHandler<P = unknown>(name: string, params?: P): Promise<void> {
+      calls.push({ kind: 'setOutboundHandler', name, params });
+    },
+    async exec(command: string, options?: { timeout?: number }): Promise<ExecLike> {
+      calls.push({ kind: 'exec', command, timeout: options?.timeout });
+      return responder(command);
+    },
+  };
+  return { stub, calls };
+}
+
+const CLI_SPEC: GitCloneInstall = {
+  pm: 'git-clone',
+  owner: 'cli',
+  repo: 'cli',
+  branch: 'main',
+  binary: 'cli',
+};
+
+// ---------------------------------------------------------------------------
+// buildGitCloneCommand — command-shape unit tests
+// ---------------------------------------------------------------------------
+
+describe('buildGitCloneCommand — shape', () => {
+  test('emits `git clone --depth 1 --no-tags --single-branch --branch <branch> <url> <dest>`', () => {
+    const cmd = buildGitCloneCommand(CLI_SPEC);
+    expect(cmd).not.toBeNull();
+    // EXACT shape pin. A future relaxation that drops --depth 1 or
+    // --single-branch would slow every score by minutes and possibly
+    // bust the 60 s budget.
+    expect(cmd).toBe(
+      `( set -e; rm -rf '/tmp/anc-clone-target'; ` +
+        `git clone --depth 1 --no-tags --single-branch ` +
+        `--branch 'main' ` +
+        `'https://github.com/cli/cli.git' '/tmp/anc-clone-target' )`,
+    );
+  });
+
+  test('clean-rm of destination BEFORE clone (warm-DO re-run safety)', () => {
+    const cmd = buildGitCloneCommand(CLI_SPEC);
+    expect(cmd).not.toBeNull();
+    if (!cmd) return;
+    // rm -rf MUST run before git clone — otherwise the second request
+    // on a warm DO would collide with the prior clone's directory and
+    // `git clone` would refuse with "destination path already exists".
+    const rmIdx = cmd.indexOf('rm -rf');
+    const cloneIdx = cmd.indexOf('git clone ');
+    expect(rmIdx).toBeGreaterThanOrEqual(0);
+    expect(cloneIdx).toBeGreaterThan(rmIdx);
+  });
+
+  test('wraps the whole pipeline in a `( set -e; ... )` subshell', () => {
+    // set -e exits the subshell on failure, NOT the container's
+    // persistent shell session. Same invariant as the directInstall
+    // pipeline (sandbox-exec.ts).
+    const cmd = buildGitCloneCommand(CLI_SPEC);
+    expect(cmd).not.toBeNull();
+    if (!cmd) return;
+    expect(cmd.startsWith('( set -e;')).toBe(true);
+    expect(cmd.endsWith(' )')).toBe(true);
+  });
+
+  test('every interpolated value is single-quote-wrapped (POSIX shell escape)', () => {
+    const cmd = buildGitCloneCommand(CLI_SPEC);
+    expect(cmd).not.toBeNull();
+    if (!cmd) return;
+    expect(cmd).toContain("'main'"); // branch
+    expect(cmd).toContain("'https://github.com/cli/cli.git'"); // repo URL
+    expect(cmd).toContain("'/tmp/anc-clone-target'"); // dest
+  });
+
+  test('branch with `/` (feature/new-thing) shell-quotes intact', () => {
+    const spec: GitCloneInstall = { ...CLI_SPEC, branch: 'feature/new-thing' };
+    const cmd = buildGitCloneCommand(spec);
+    expect(cmd).not.toBeNull();
+    expect(cmd).toContain("--branch 'feature/new-thing'");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildGitCloneCommand — RED TEAM (defense in depth)
+// ---------------------------------------------------------------------------
+
+describe('buildGitCloneCommand — red team', () => {
+  test('rejects branch with `..` (path traversal) BEFORE interpolation — returns null', () => {
+    // validate.ts already rejects this at the Worker boundary; do.ts
+    // re-rejects at the DO boundary. buildGitCloneCommand is the THIRD
+    // defense: a future caller that bypasses both upstream layers
+    // still can't smuggle a traversal pattern into the exec command.
+    const spec: GitCloneInstall = { ...CLI_SPEC, branch: '../etc/passwd' };
+    expect(buildGitCloneCommand(spec)).toBeNull();
+  });
+
+  test('rejects shell metacharacters in branch name', () => {
+    const attempts = [';rm -rf /', '$(whoami)', '`whoami`', 'foo&&bar', 'foo|bar', 'foo>bar', '"q"', "'q'", 'foo bar'];
+    for (const branch of attempts) {
+      const spec: GitCloneInstall = { ...CLI_SPEC, branch };
+      expect(buildGitCloneCommand(spec), `expected null for branch: ${branch}`).toBeNull();
+    }
+  });
+
+  test('rejects empty branch', () => {
+    expect(buildGitCloneCommand({ ...CLI_SPEC, branch: '' })).toBeNull();
+  });
+
+  test('rejects over-long branch (>250 chars)', () => {
+    const branch = 'a'.repeat(251);
+    expect(buildGitCloneCommand({ ...CLI_SPEC, branch })).toBeNull();
+  });
+
+  test('rejects leading dot / trailing dot / leading slash / trailing slash branch', () => {
+    expect(buildGitCloneCommand({ ...CLI_SPEC, branch: '.main' })).toBeNull();
+    expect(buildGitCloneCommand({ ...CLI_SPEC, branch: 'main.' })).toBeNull();
+    expect(buildGitCloneCommand({ ...CLI_SPEC, branch: '/main' })).toBeNull();
+    expect(buildGitCloneCommand({ ...CLI_SPEC, branch: 'main/' })).toBeNull();
+  });
+
+  test('shellQuote escapes embedded single-quote even though validBranchName would reject it', () => {
+    // Belt-and-suspenders: if the regex layer EVER let a single quote
+    // through (e.g. a typo in the character class), shellQuote would
+    // STILL wrap and escape it. Construct a branch with a single
+    // quote and bypass validBranchName by patching the function?
+    // Easier: assert that shellQuote's behavior is preserved by
+    // running an internal test that takes a known unsafe character
+    // class and verifies the output is still single-quote-wrapped.
+    // This is a regression guard on the shellQuote dependency.
+    //
+    // Cannot construct an InstallSpec with `'` in the branch because
+    // validBranchName rejects upstream; documented here so the
+    // safety chain is clear.
+    expect(buildGitCloneCommand({ ...CLI_SPEC, branch: "evil'rm -rf /" })).toBeNull();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildAncCheckSourceCmd — source-path anc invocation
+// ---------------------------------------------------------------------------
+
+describe('buildAncCheckSourceCmd — source-path anc invocation', () => {
+  test('emits `anc check <path> --output json`', () => {
+    expect(buildAncCheckSourceCmd(CLI_SPEC, undefined)).toBe("anc check '/tmp/anc-clone-target' --output json");
+  });
+
+  test('appends `--audit-profile <profile>` when audit_profile present', () => {
+    expect(buildAncCheckSourceCmd(CLI_SPEC, 'cli-tool')).toBe(
+      "anc check '/tmp/anc-clone-target' --output json --audit-profile 'cli-tool'",
+    );
+  });
+
+  test('path is single-quote-wrapped (POSIX shell escape)', () => {
+    const cmd = buildAncCheckSourceCmd(CLI_SPEC, undefined);
+    expect(cmd).toContain("'/tmp/anc-clone-target'");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Orchestration: full score() flow with pm=git-clone
+// ---------------------------------------------------------------------------
+
+describe('score() — git-clone orchestration', () => {
+  test('runs the clone, skips `which` gate, runs `anc check <path>`', async () => {
+    const { stub, calls } = makeStub();
+    const result = await score(stub, CLI_SPEC);
+    expect(result.ok).toBe(true);
+
+    const execCalls = calls.filter((c) => c.kind === 'exec') as Array<Extract<Call, { kind: 'exec' }>>;
+    const cloneCall = execCalls.find((c) => c.command.includes('git clone'));
+    expect(cloneCall).toBeDefined();
+    // No `which <binary>` between install and Phase 2 lockdown — the
+    // git-clone path doesn't put a binary on PATH; the source is what
+    // gets checked.
+    const whichCall = execCalls.find((c) => c.command.startsWith('which '));
+    expect(whichCall).toBeUndefined();
+    // `anc check <path>` runs after the noHttp lockdown.
+    const ancCheck = execCalls.find((c) => c.command.startsWith('anc check '));
+    expect(ancCheck).toBeDefined();
+    expect(ancCheck?.command).toContain("'/tmp/anc-clone-target'");
+    expect(ancCheck?.command).not.toContain('--command');
+  });
+
+  test('two-phase egress holds: allowedInstall BEFORE clone, noHttp BEFORE anc check', async () => {
+    const { stub, calls } = makeStub();
+    await score(stub, CLI_SPEC);
+    const phase1 = calls.findIndex((c) => c.kind === 'setOutboundHandler' && c.name === 'allowedInstall');
+    const cloneExec = calls.findIndex((c) => c.kind === 'exec' && c.command.includes('git clone'));
+    const phase2 = calls.findIndex((c) => c.kind === 'setOutboundHandler' && c.name === 'noHttp');
+    const ancCheckExec = calls.findIndex((c) => c.kind === 'exec' && c.command.startsWith('anc check '));
+
+    expect(phase1).toBeGreaterThanOrEqual(0);
+    expect(cloneExec).toBeGreaterThan(phase1);
+    expect(phase2).toBeGreaterThan(cloneExec);
+    expect(ancCheckExec).toBeGreaterThan(phase2);
+  });
+
+  test('allowedInstall hosts include github.com + *.githubusercontent.com wildcard', async () => {
+    const { stub, calls } = makeStub();
+    await score(stub, CLI_SPEC);
+    const phase1 = calls.find((c) => c.kind === 'setOutboundHandler' && c.name === 'allowedInstall') as
+      | Extract<Call, { kind: 'setOutboundHandler' }>
+      | undefined;
+    expect(phase1).toBeDefined();
+    const params = phase1?.params as { allowedHostnames: string[] };
+    expect(params.allowedHostnames).toContain('github.com');
+    expect(params.allowedHostnames).toContain('*.githubusercontent.com');
+  });
+
+  test('clone failure → chain_resolved_install_failed with stderr captured', async () => {
+    const responder: ExecResponder = (cmd) => {
+      if (cmd.includes('git clone')) {
+        return {
+          success: false,
+          stdout: '',
+          stderr: "fatal: Remote branch 'no-such-branch' not found",
+          exitCode: 128,
+        };
+      }
+      return defaultResponder(cmd);
+    };
+    const { stub } = makeStub(responder);
+    const result = await score(stub, { ...CLI_SPEC, branch: 'no-such-branch' });
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('chain_resolved_install_failed');
+    expect(result.details).toContain('not found');
+  });
+
+  test('unsafe branch (regex bypass via direct InstallSpec) → install_unsupported with pm=git-clone', async () => {
+    // Construct an InstallSpec directly with a branch that should
+    // never reach the orchestration. installCommandFor() returns null
+    // (via buildGitCloneCommand), which the score() flow maps to
+    // install_unsupported. No exec call should fire.
+    const { stub, calls } = makeStub();
+    const result = await score(stub, { ...CLI_SPEC, branch: '../etc/passwd' });
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('install_unsupported');
+    expect(result.details).toBe('pm=git-clone');
+    // No exec call — the bounce happens BEFORE the command is built.
+    const execCalls = calls.filter((c) => c.kind === 'exec');
+    expect(execCalls).toHaveLength(0);
+  });
+
+  test('happy-path scorecard returned with anc_version captured live', async () => {
+    const { stub } = makeStub();
+    const result = await score(stub, CLI_SPEC);
+    expect(result.ok).toBe(true);
+    if (!result.ok) return;
+    expect(result.value.anc_version).toBe('0.3.1');
+    expect(result.value.scorecard).toMatchObject({ tool: { name: 'qmd' } });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// DO type-shape: the GitCloneInstall variant is part of the InstallSpec union.
+// ---------------------------------------------------------------------------
+
+describe('InstallSpec union — git-clone is a recognized variant', () => {
+  test('exhaustiveness — git-clone is a valid pm value', () => {
+    const spec: InstallSpec = { pm: 'git-clone', owner: 'a', repo: 'b', branch: 'main', binary: 'b' };
+    expect(spec.pm).toBe('git-clone');
+  });
+});
diff --git a/tests/score-do-brew-fallback.test.ts b/tests/score-do-brew-fallback.test.ts
new file mode 100644
index 0000000..bab7b00
--- /dev/null
+++ b/tests/score-do-brew-fallback.test.ts
@@ -0,0 +1,201 @@
+// Brew discovery-fallback tests for resolveSpec() in resolve-spec.ts.
+//
+// When a user pastes `brew install <pkg>`, resolveSpec fetches the
+// formula metadata from formulae.brew.sh, parses the homepage as a
+// github.com URL, and runs the same discoverBinary chain used for
+// github-url inputs. The brew-only bounce is intentionally
+// indistinguishable from a missing formula or a non-github homepage so
+// the user-facing CTA stays simple.
+//
+// 2026-05-20 move: pre-move this lived in do.ts and was invoked at the
+// DO boundary. Resolution now happens at the Worker tier; the function
+// signature is unchanged, only the file location moved.
+
+import { describe, expect, test } from 'bun:test';
+import { parseGithubOwnerRepo, resolveBrewFallback } from '../src/worker/score/resolve-spec';
+
+type FetchHandler = (url: string) => Response | Promise<Response>;
+
+function fakeFetcher(handler: FetchHandler): typeof fetch {
+  return (async (input: Request | string | URL, _init?: RequestInit) => {
+    const url = typeof input === 'string' ? input : input instanceof URL ? input.toString() : input.url;
+    return await handler(url);
+  }) as unknown as typeof fetch;
+}
+
+const EMPTY_HINTS = { by_owner_repo: {} };
+
+function ok<T>(body: T): Response {
+  return new Response(JSON.stringify(body), { status: 200, headers: { 'content-type': 'application/json' } });
+}
+
+function notFound(): Response {
+  return new Response('', { status: 404 });
+}
+
+describe('resolveBrewFallback — happy paths', () => {
+  test('formula with crates.io distribution → resolves to pm=cargo-binstall', async () => {
+    const fetcher = fakeFetcher((url) => {
+      if (url.includes('formulae.brew.sh/api/formula/ripgrep.json')) {
+        return ok({ homepage: 'https://github.com/BurntSushi/ripgrep' });
+      }
+      if (url.includes('api.github.com/repos/BurntSushi/ripgrep/releases/latest')) {
+        // Force discovery past Step 2 so Step 3 distributions decide.
+        return notFound();
+      }
+      if (url === 'https://crates.io/api/v1/crates/ripgrep') {
+        return ok({ crate: { repository: 'https://github.com/BurntSushi/ripgrep', max_stable_version: '14.0.0' } });
+      }
+      if (url === 'https://crates.io/api/v1/crates/ripgrep/14.0.0') {
+        return ok({ version: { bin_names: ['rg'] } });
+      }
+      // npm / pypi / go misses — return 404 so they don't compete.
+      return notFound();
+    });
+    const result = await resolveBrewFallback('ripgrep', EMPTY_HINTS, fetcher);
+    expect(result.ok).toBe(true);
+    if (!result.ok) return;
+    expect(result.value.pm).toBe('cargo-binstall');
+  });
+
+  test('formula with GitHub release asset → resolves to pm=direct via Step 2', async () => {
+    const fetcher = fakeFetcher((url) => {
+      if (url.includes('formulae.brew.sh/api/formula/csvlens.json')) {
+        return ok({ homepage: 'https://github.com/YS-L/csvlens' });
+      }
+      if (url.includes('api.github.com/repos/YS-L/csvlens/releases/latest')) {
+        return ok({
+          assets: [
+            {
+              name: 'csvlens-x86_64-unknown-linux-musl.tar.xz',
+              browser_download_url: 'https://example.com/csvlens-x86_64-unknown-linux-musl.tar.xz',
+            },
+          ],
+        });
+      }
+      return notFound();
+    });
+    const result = await resolveBrewFallback('csvlens', EMPTY_HINTS, fetcher);
+    expect(result.ok).toBe(true);
+    if (!result.ok) return;
+    expect(result.value.pm).toBe('direct');
+    if (result.value.pm !== 'direct') return;
+    expect(result.value.url).toContain('.tar.xz');
+  });
+});
+
+describe('resolveBrewFallback — bounce paths', () => {
+  test('formula 404 on formulae.brew.sh → install_unsupported pm=brew_only', async () => {
+    const fetcher = fakeFetcher(() => notFound());
+    const result = await resolveBrewFallback('definitely-not-a-formula', EMPTY_HINTS, fetcher);
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('install_unsupported');
+    expect(result.details).toBe('pm=brew_only');
+  });
+
+  test('formula homepage is non-github → install_unsupported pm=brew_only', async () => {
+    const fetcher = fakeFetcher((url) => {
+      if (url.includes('formulae.brew.sh/api/formula/exotic.json')) {
+        return ok({ homepage: 'https://exotic.example/tool' });
+      }
+      return notFound();
+    });
+    const result = await resolveBrewFallback('exotic', EMPTY_HINTS, fetcher);
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('install_unsupported');
+    expect(result.details).toBe('pm=brew_only');
+  });
+
+  test('formula with GitHub homepage but no other-PM distribution → install_unsupported pm=brew_only', async () => {
+    const fetcher = fakeFetcher((url) => {
+      if (url.includes('formulae.brew.sh/api/formula/brew-only-tool.json')) {
+        return ok({
+          homepage: 'https://github.com/owner/brew-only-tool',
+          urls: { stable: { url: 'https://github.com/owner/brew-only-tool/releases/v1.tar.gz' } },
+        });
+      }
+      // Every other registry misses. Note: the discoverBinary chain
+      // still queries formulae.brew.sh as part of Step 3, so we have to
+      // serve a brew-tight match here too — and the resolveBrewFallback
+      // wrapper rejects pm=brew explicitly, which is what produces the
+      // brew_only bounce.
+      return notFound();
+    });
+    const result = await resolveBrewFallback('brew-only-tool', EMPTY_HINTS, fetcher);
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('install_unsupported');
+    expect(result.details).toBe('pm=brew_only');
+  });
+
+  test('formula missing homepage field → install_unsupported pm=brew_only', async () => {
+    const fetcher = fakeFetcher((url) => {
+      if (url.includes('formulae.brew.sh/api/formula/no-homepage.json')) {
+        return ok({});
+      }
+      return notFound();
+    });
+    const result = await resolveBrewFallback('no-homepage', EMPTY_HINTS, fetcher);
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('install_unsupported');
+    expect(result.details).toBe('pm=brew_only');
+  });
+
+  test('formula API throws → install_unsupported pm=brew_only', async () => {
+    const fetcher = (async () => {
+      throw new Error('network refused');
+    }) as unknown as typeof fetch;
+    const result = await resolveBrewFallback('anything', EMPTY_HINTS, fetcher);
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('install_unsupported');
+    expect(result.details).toBe('pm=brew_only');
+  });
+});
+
+describe('parseGithubOwnerRepo', () => {
+  test('repo-root URL → {owner, repo}', () => {
+    expect(parseGithubOwnerRepo('https://github.com/BurntSushi/ripgrep')).toEqual({
+      owner: 'BurntSushi',
+      repo: 'ripgrep',
+    });
+  });
+
+  test('repo URL with .git suffix → strips suffix', () => {
+    expect(parseGithubOwnerRepo('https://github.com/BurntSushi/ripgrep.git')).toEqual({
+      owner: 'BurntSushi',
+      repo: 'ripgrep',
+    });
+  });
+
+  test('repo URL with subpath → still returns repo root', () => {
+    // A brew formula's homepage SHOULD be the repo root, but some
+    // formulae point at a docs subpath or a /releases page. The parser
+    // takes the first two segments — best-effort recovery rather than
+    // a strict reject — because the downstream discoverBinary call
+    // does its own owner/repo validation.
+    expect(parseGithubOwnerRepo('https://github.com/owner/repo/tree/main')).toEqual({
+      owner: 'owner',
+      repo: 'repo',
+    });
+  });
+
+  test('non-github host → null', () => {
+    expect(parseGithubOwnerRepo('https://gitlab.com/owner/repo')).toBeNull();
+  });
+
+  test('unparseable URL → null', () => {
+    expect(parseGithubOwnerRepo('not a url')).toBeNull();
+  });
+
+  test('undefined → null', () => {
+    expect(parseGithubOwnerRepo(undefined)).toBeNull();
+  });
+
+  test('github.com with only owner segment → null', () => {
+    expect(parseGithubOwnerRepo('https://github.com/owner')).toBeNull();
+  });
+});
diff --git a/tests/score-do-cache-write.test.ts b/tests/score-do-cache-write.test.ts
new file mode 100644
index 0000000..ae07e41
--- /dev/null
+++ b/tests/score-do-cache-write.test.ts
@@ -0,0 +1,187 @@
+// DO-side R2 cache-write contract.
+//
+// Plan U7 (docs/plans/2026-04-28-002-feat-live-scoring-cf-sandbox-plan.md
+// "U7 Approach", post-success cache write bullet). After a successful
+// `Sandbox.score()`, the DO writes to SCORE_CACHE so the next request
+// for the same binary short-circuits at the handler's cache tier.
+//
+// The cache write fires inside `Sandbox.fetch()` via the exported
+// `writeCacheBestEffort()` helper. Testing that helper directly pins
+// the same contract without the workerd-shim cost of instantiating a
+// Sandbox class. The helper carries every precondition (binding present,
+// tool version extractable) and every failure-handling guarantee
+// (R2 write failure logged but not surfaced) that fetch() relies on.
+
+import { describe, expect, test } from 'bun:test';
+import type { InstallSpec } from '../src/worker/score/discover-binary';
+import { extractToolVersion, type ScoreSandboxEnv, writeCacheBestEffort } from '../src/worker/score/do';
+
+// ---------------------------------------------------------------------------
+// R2 stub mirroring the cache.ts test stub
+// ---------------------------------------------------------------------------
+
+type Recorded = { key: string; value: string };
+
+function makeR2Stub(opts: { throwOnPut?: boolean } = {}) {
+  const writes: Recorded[] = [];
+  const env: ScoreSandboxEnv = {
+    ASSETS: { fetch: async () => new Response('not used') } as unknown as Fetcher,
+    SCORE_CACHE: {
+      async put(key: string, value: unknown) {
+        if (opts.throwOnPut) throw new Error('r2_put_failed');
+        writes.push({ key, value: typeof value === 'string' ? value : String(value) });
+      },
+      async get() {
+        return null;
+      },
+      async delete() {
+        // no-op for write tests
+      },
+    } as unknown as R2Bucket,
+  };
+  return { env, writes };
+}
+
+const SPEC: InstallSpec = { pm: 'npm', package: 'cowsay', binary: 'cowsay' };
+
+const SCORECARD_WITH_VERSION = {
+  schema_version: '0.5',
+  tool: { name: 'cowsay', version: '1.6.0' },
+  score: { value: 88 },
+};
+
+// ---------------------------------------------------------------------------
+// extractToolVersion
+// ---------------------------------------------------------------------------
+
+describe('extractToolVersion', () => {
+  test('returns scorecard.tool.version when present', () => {
+    expect(extractToolVersion(SCORECARD_WITH_VERSION)).toBe('1.6.0');
+  });
+
+  test('null scorecard → null', () => {
+    expect(extractToolVersion(null)).toBeNull();
+  });
+
+  test('missing tool field → null', () => {
+    expect(extractToolVersion({ schema_version: '0.5' })).toBeNull();
+  });
+
+  test('missing tool.version field → null', () => {
+    expect(extractToolVersion({ tool: { name: 'cowsay' } })).toBeNull();
+  });
+
+  test('empty-string tool.version → null (refusal-to-cache-half-state precondition)', () => {
+    expect(extractToolVersion({ tool: { name: 'cowsay', version: '' } })).toBeNull();
+  });
+
+  test('non-string tool.version → null', () => {
+    expect(extractToolVersion({ tool: { version: 1 } })).toBeNull();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// writeCacheBestEffort — precondition guards
+// ---------------------------------------------------------------------------
+
+describe('writeCacheBestEffort — preconditions', () => {
+  test('no SCORE_CACHE binding → skips write silently (no throw, no log-as-error)', async () => {
+    const env: ScoreSandboxEnv = {
+      ASSETS: { fetch: async () => new Response('') } as unknown as Fetcher,
+      // SCORE_CACHE intentionally absent — matches the optional binding
+      // shape on ScoreSandboxEnv (DO test envs without R2 wired up).
+    };
+    await writeCacheBestEffort(env, SPEC, { scorecard: SCORECARD_WITH_VERSION, anc_version: '0.3.1' });
+    // No assertion possible on the side-effect; the contract is "does
+    // not throw and does not crash". Reaching the next line is the test.
+  });
+
+  test('scorecard missing tool.version → skips write (refusal-to-cache-half-state)', async () => {
+    const { env, writes } = makeR2Stub();
+    await writeCacheBestEffort(env, SPEC, {
+      scorecard: { schema_version: '0.5', tool: { name: 'cowsay' } },
+      anc_version: '0.3.1',
+    });
+    expect(writes).toHaveLength(0);
+  });
+
+  test('null scorecard → skips write', async () => {
+    const { env, writes } = makeR2Stub();
+    await writeCacheBestEffort(env, SPEC, { scorecard: null, anc_version: '0.3.1' });
+    expect(writes).toHaveLength(0);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// writeCacheBestEffort — happy path
+// ---------------------------------------------------------------------------
+
+describe('writeCacheBestEffort — happy path', () => {
+  test('writes the canonical scores/{binary}/{SPEC_VERSION}.json key', async () => {
+    const { env, writes } = makeR2Stub();
+    await writeCacheBestEffort(env, SPEC, { scorecard: SCORECARD_WITH_VERSION, anc_version: '0.3.1' });
+    expect(writes).toHaveLength(1);
+    // SPEC_VERSION as the partition slot — handoff Decision 2 + gotcha
+    // 3. Currently 0.4.0; if it bumps, update this expectation.
+    expect(writes[0].key).toBe('scores/cowsay/0.4.0.json');
+  });
+
+  test('payload carries spec_version, anc_version, tool_version, scorecard', async () => {
+    const { env, writes } = makeR2Stub();
+    await writeCacheBestEffort(env, SPEC, { scorecard: SCORECARD_WITH_VERSION, anc_version: '0.3.1' });
+    expect(writes).toHaveLength(1);
+    const parsed = JSON.parse(writes[0].value) as {
+      spec_version: string;
+      anc_version: string;
+      tool_version: string;
+      scorecard: { tool: { name: string } };
+    };
+    expect(parsed.spec_version).toBe('0.4.0');
+    expect(parsed.anc_version).toBe('0.3.1');
+    expect(parsed.tool_version).toBe('1.6.0');
+    expect(parsed.scorecard.tool.name).toBe('cowsay');
+  });
+
+  test('different binaries write to different cache keys (no aliasing)', async () => {
+    const { env, writes } = makeR2Stub();
+    await writeCacheBestEffort(env, SPEC, { scorecard: SCORECARD_WITH_VERSION, anc_version: '0.3.1' });
+    await writeCacheBestEffort(
+      env,
+      { pm: 'cargo-binstall', package: 'ripgrep', binary: 'rg' },
+      { scorecard: { tool: { name: 'ripgrep', version: '15.1.0' } }, anc_version: '0.3.1' },
+    );
+    expect(writes.map((w) => w.key)).toEqual(['scores/cowsay/0.4.0.json', 'scores/rg/0.4.0.json']);
+  });
+
+  test('parser-driven binary derivation does not alias to curated slug (cargo binstall ripgrep → scores/ripgrep/...)', async () => {
+    // parse-install.ts maps `cargo binstall ripgrep` to binary='ripgrep'
+    // (package name), NOT to the registry's curated 'rg'. So an
+    // install-command POST writes under `scores/ripgrep/...` while a
+    // curated-registry POST for slug=ripgrep would (if it were live-
+    // scored, which it isn't because the registry path short-circuits)
+    // write under `scores/rg/...`. The two never alias. This pin
+    // captures the design choice so a future parser change that
+    // "normalizes" package→binary surfaces here.
+    const { env, writes } = makeR2Stub();
+    await writeCacheBestEffort(
+      env,
+      { pm: 'cargo-binstall', package: 'ripgrep', binary: 'ripgrep' },
+      { scorecard: { tool: { name: 'ripgrep', version: '15.1.0' } }, anc_version: '0.3.1' },
+    );
+    expect(writes[0].key).toBe('scores/ripgrep/0.4.0.json');
+    expect(writes[0].key).not.toBe('scores/rg/0.4.0.json');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// writeCacheBestEffort — failure isolation
+// ---------------------------------------------------------------------------
+
+describe('writeCacheBestEffort — failure isolation', () => {
+  test('R2 put failure is swallowed (best-effort write contract)', async () => {
+    const { env } = makeR2Stub({ throwOnPut: true });
+    // Must not throw — the caller (Sandbox.fetch) MUST return the user's
+    // score regardless of whether the cache write landed.
+    await writeCacheBestEffort(env, SPEC, { scorecard: SCORECARD_WITH_VERSION, anc_version: '0.3.1' });
+  });
+});
diff --git a/tests/score-do-go-fallback.test.ts b/tests/score-do-go-fallback.test.ts
new file mode 100644
index 0000000..b6530eb
--- /dev/null
+++ b/tests/score-do-go-fallback.test.ts
@@ -0,0 +1,152 @@
+// Go discovery-fallback tests for resolveSpec() in resolve-spec.ts.
+//
+// `go install <module>@latest` would compile from source on the sandbox,
+// violating the binary-only premise. resolveSpec redirects through the
+// discovery chain: a module path of the form `github.com/<owner>/<repo>`
+// runs through discoverBinary so a GitHub Releases asset substitutes
+// for the compile. Non-github modules bounce as go_no_binary.
+//
+// 2026-05-20 move: pre-move this lived in do.ts and was invoked at the
+// DO boundary. Resolution now happens at the Worker tier; the function
+// signature is unchanged, only the file location moved.
+
+import { describe, expect, test } from 'bun:test';
+import { resolveGoFallback } from '../src/worker/score/resolve-spec';
+
+type FetchHandler = (url: string) => Response | Promise<Response>;
+
+function fakeFetcher(handler: FetchHandler): typeof fetch {
+  return (async (input: Request | string | URL, _init?: RequestInit) => {
+    const url = typeof input === 'string' ? input : input instanceof URL ? input.toString() : input.url;
+    return await handler(url);
+  }) as unknown as typeof fetch;
+}
+
+const EMPTY_HINTS = { by_owner_repo: {} };
+
+function ok<T>(body: T): Response {
+  return new Response(JSON.stringify(body), { status: 200, headers: { 'content-type': 'application/json' } });
+}
+
+function notFound(): Response {
+  return new Response('', { status: 404 });
+}
+
+describe('resolveGoFallback — happy paths', () => {
+  test('github.com module with release asset → resolves to pm=direct', async () => {
+    const fetcher = fakeFetcher((url) => {
+      if (url.includes('api.github.com/repos/charmbracelet/glow/releases/latest')) {
+        return ok({
+          assets: [
+            {
+              name: 'glow_2.1.2_Linux_x86_64.tar.gz',
+              browser_download_url: 'https://example.com/glow_2.1.2_Linux_x86_64.tar.gz',
+            },
+          ],
+        });
+      }
+      return notFound();
+    });
+    const result = await resolveGoFallback('github.com/charmbracelet/glow', EMPTY_HINTS, fetcher);
+    expect(result.ok).toBe(true);
+    if (!result.ok) return;
+    expect(result.value.pm).toBe('direct');
+  });
+
+  test('github.com module subpath (cmd/foo) is normalized to repo root', async () => {
+    // `go install github.com/owner/repo/cmd/foo@latest` parses to
+    // package=`github.com/owner/repo/cmd/foo`. The fallback strips
+    // subpaths so the GitHub release for the repo applies.
+    const fetcher = fakeFetcher((url) => {
+      if (url.includes('api.github.com/repos/owner/repo/releases/latest')) {
+        return ok({
+          assets: [{ name: 'tool-linux-x86_64.tar.gz', browser_download_url: 'https://example.com/x.tar.gz' }],
+        });
+      }
+      return notFound();
+    });
+    const result = await resolveGoFallback('github.com/owner/repo/cmd/foo', EMPTY_HINTS, fetcher);
+    expect(result.ok).toBe(true);
+  });
+
+  test('github.com module with crates.io alternative → resolves via Step 3', async () => {
+    // Defensive: if a Go module's GitHub repo ALSO ships a crate
+    // with a binary, discoverBinary picks the cargo path. The Go
+    // fallback accepts any non-go resolution.
+    const fetcher = fakeFetcher((url) => {
+      if (url.includes('api.github.com/repos/BurntSushi/ripgrep/releases/latest')) return notFound();
+      if (url === 'https://crates.io/api/v1/crates/ripgrep') {
+        return ok({
+          crate: { repository: 'https://github.com/BurntSushi/ripgrep', max_stable_version: '14.0.0' },
+        });
+      }
+      if (url === 'https://crates.io/api/v1/crates/ripgrep/14.0.0') {
+        return ok({ version: { bin_names: ['rg'] } });
+      }
+      return notFound();
+    });
+    const result = await resolveGoFallback('github.com/BurntSushi/ripgrep', EMPTY_HINTS, fetcher);
+    expect(result.ok).toBe(true);
+    if (!result.ok) return;
+    expect(result.value.pm).toBe('cargo-binstall');
+  });
+});
+
+describe('resolveGoFallback — bounce paths', () => {
+  test('non-github module (rsc.io/quote) → install_unsupported pm=go_no_binary', async () => {
+    const fetcher = fakeFetcher(() => notFound());
+    const result = await resolveGoFallback('rsc.io/quote/v3', EMPTY_HINTS, fetcher);
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('install_unsupported');
+    expect(result.details).toBe('pm=go_no_binary');
+  });
+
+  test('golang.org/x/... module → install_unsupported pm=go_no_binary', async () => {
+    const fetcher = fakeFetcher(() => notFound());
+    const result = await resolveGoFallback('golang.org/x/tools/cmd/godoc', EMPTY_HINTS, fetcher);
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('install_unsupported');
+    expect(result.details).toBe('pm=go_no_binary');
+  });
+
+  test('github.com module with no release binary → install_unsupported pm=go_no_binary', async () => {
+    // The repo exists on github but ships no GitHub release, no
+    // crates / npm / pypi alternative, no README-parseable install
+    // command. discoverBinary returns chain_no_resolve.
+    const fetcher = fakeFetcher(() => notFound());
+    const result = await resolveGoFallback('github.com/no-binary/tool', EMPTY_HINTS, fetcher);
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('install_unsupported');
+    expect(result.details).toBe('pm=go_no_binary');
+  });
+
+  test('module path with only two segments (github.com/owner) → install_unsupported', async () => {
+    // Defensive: a malformed module path with no repo segment
+    // bounces fast rather than calling discoverBinary with an empty
+    // repo name.
+    const fetcher = fakeFetcher(() => notFound());
+    const result = await resolveGoFallback('github.com/owner', EMPTY_HINTS, fetcher);
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('install_unsupported');
+    expect(result.details).toBe('pm=go_no_binary');
+  });
+
+  test('module path with @version suffix is stripped before parsing', async () => {
+    // parse-install normally strips @version, but the fallback is
+    // defensive against callers that don't.
+    const fetcher = fakeFetcher((url) => {
+      if (url.includes('api.github.com/repos/charmbracelet/glow/releases/latest')) {
+        return ok({
+          assets: [{ name: 'glow-linux-x86_64.tar.gz', browser_download_url: 'https://example.com/g.tar.gz' }],
+        });
+      }
+      return notFound();
+    });
+    const result = await resolveGoFallback('github.com/charmbracelet/glow@v1.5.1', EMPTY_HINTS, fetcher);
+    expect(result.ok).toBe(true);
+  });
+});
diff --git a/tests/score-do.test.ts b/tests/score-do.test.ts
new file mode 100644
index 0000000..aab08b5
--- /dev/null
+++ b/tests/score-do.test.ts
@@ -0,0 +1,1132 @@
+// Sandbox DO + two-phase egress orchestration tests.
+//
+// Covers three MUST-hold scenarios for the egress-handler contract:
+//
+//   (a) Sandbox.outboundHandlers static map has both `allowedInstall`
+//       and `noHttp` keys BEFORE any setOutboundHandler call runs.
+//       Catches misnamed-key regressions that would silently degrade
+//       egress policy.
+//
+//   (b) Two-phase egress order: setOutboundHandler('allowedInstall', ...)
+//       fires BEFORE exec(installCmd), AND setOutboundHandler('noHttp')
+//       fires BEFORE exec('anc check ...'). Asserted via a call log on a
+//       hand-rolled Container-like stub. This is the load-bearing
+//       security invariant for R7.
+//
+//   (c) Per-request handler log shape: each invocation emits
+//       `{phase, host, allowed|blocked}` so attempted-but-blocked egress
+//       surfaces in Workers Logs (the rationale for Pattern Y over the
+//       simpler static-allowlist Pattern X).
+//
+// Plus happy-path + every install-table branch + each bounce class.
+
+import { afterEach, beforeEach, describe, expect, test } from 'bun:test';
+import type { InstallSpec } from '../src/worker/score/discover-binary';
+import { handlers, Sandbox } from '../src/worker/score/do';
+import { type ContainerLike, type ExecLike, score } from '../src/worker/score/sandbox-exec';
+
+// ---------------------------------------------------------------------------
+// Stub Sandbox — records every setOutboundHandler + exec call.
+// ---------------------------------------------------------------------------
+
+type Call =
+  | { kind: 'setOutboundHandler'; name: string; params?: unknown }
+  | { kind: 'exec'; command: string; timeout?: number };
+
+type ExecResponder = (command: string) => ExecLike;
+
+function makeStub(responder: ExecResponder = defaultResponder): { stub: ContainerLike; calls: Call[] } {
+  const calls: Call[] = [];
+  const stub: ContainerLike = {
+    async setOutboundHandler<P = unknown>(name: string, params?: P): Promise<void> {
+      calls.push({ kind: 'setOutboundHandler', name, params });
+    },
+    async exec(command: string, options?: { timeout?: number }): Promise<ExecLike> {
+      calls.push({ kind: 'exec', command, timeout: options?.timeout });
+      return responder(command);
+    },
+  };
+  return { stub, calls };
+}
+
+const ANC_CHECK_OK = JSON.stringify({
+  spec_version: '0.4.0',
+  anc_version: '0.3.1',
+  tool: { name: 'ripgrep', version: '14.1.0' },
+  score: { value: 87 },
+});
+
+function defaultResponder(command: string): ExecLike {
+  if (command.startsWith('which ')) {
+    return { success: true, stdout: '/usr/local/bin/rg\n', stderr: '' };
+  }
+  if (command === 'anc --version') {
+    return { success: true, stdout: 'anc 0.3.1\n', stderr: '' };
+  }
+  if (command.startsWith('anc check ')) {
+    return { success: true, stdout: ANC_CHECK_OK, stderr: '' };
+  }
+  // install command — default success
+  return { success: true, stdout: '', stderr: '' };
+}
+
+const CARGO_SPEC: InstallSpec = { pm: 'cargo-binstall', package: 'ripgrep', binary: 'rg' };
+
+// ---------------------------------------------------------------------------
+// (a) Static outboundHandlers map presence
+// ---------------------------------------------------------------------------
+
+describe('Sandbox.outboundHandlers — static map presence (test scenario a)', () => {
+  test('declares both allowedInstall and noHttp BEFORE any setOutboundHandler call', () => {
+    const map = Sandbox.outboundHandlers;
+    expect(map).toBeDefined();
+    expect(typeof map?.allowedInstall).toBe('function');
+    expect(typeof map?.noHttp).toBe('function');
+  });
+
+  test('handler keys match the names sandbox-exec.ts references at runtime', () => {
+    // Defends against the silent-degrade class: if someone renames a
+    // handler in do.ts without updating sandbox-exec.ts (or vice versa),
+    // setOutboundHandler('name') resolves to undefined and the SDK
+    // falls back to default egress.
+    const expected = ['allowedInstall', 'noHttp'];
+    const actual = Object.keys(Sandbox.outboundHandlers ?? {}).sort();
+    expect(actual).toEqual(expected.sort());
+  });
+});
+
+// ---------------------------------------------------------------------------
+// (b) Two-phase egress ordering — setOutbound BEFORE exec
+// ---------------------------------------------------------------------------
+
+describe('sandbox-exec.score() — two-phase egress ordering (test scenario b)', () => {
+  test("setOutboundHandler('allowedInstall') fires BEFORE exec(installCmd)", async () => {
+    const { stub, calls } = makeStub();
+    await score(stub, CARGO_SPEC);
+    const phase1 = calls.findIndex((c) => c.kind === 'setOutboundHandler' && c.name === 'allowedInstall');
+    const installExec = calls.findIndex((c) => c.kind === 'exec' && c.command.startsWith('cargo-binstall '));
+    expect(phase1).toBeGreaterThanOrEqual(0);
+    expect(installExec).toBeGreaterThan(phase1);
+  });
+
+  test("setOutboundHandler('noHttp') fires BEFORE exec('anc check ...')", async () => {
+    const { stub, calls } = makeStub();
+    await score(stub, CARGO_SPEC);
+    const phase2 = calls.findIndex((c) => c.kind === 'setOutboundHandler' && c.name === 'noHttp');
+    const ancCheckExec = calls.findIndex((c) => c.kind === 'exec' && c.command.startsWith('anc check '));
+    expect(phase2).toBeGreaterThanOrEqual(0);
+    expect(ancCheckExec).toBeGreaterThan(phase2);
+  });
+
+  test('Phase 1 setOutboundHandler carries the install host allowlist via params', async () => {
+    const { stub, calls } = makeStub();
+    await score(stub, CARGO_SPEC);
+    const phase1 = calls.find((c) => c.kind === 'setOutboundHandler' && c.name === 'allowedInstall');
+    expect(phase1).toBeDefined();
+    const params = (phase1 as Extract<Call, { kind: 'setOutboundHandler' }>).params as { allowedHostnames: string[] };
+    expect(params.allowedHostnames).toContain('crates.io');
+    expect(params.allowedHostnames).toContain('static.crates.io');
+  });
+
+  test('noHttp call has no params (catch-all 403)', async () => {
+    const { stub, calls } = makeStub();
+    await score(stub, CARGO_SPEC);
+    const phase2 = calls.find((c) => c.kind === 'setOutboundHandler' && c.name === 'noHttp');
+    expect(phase2).toBeDefined();
+    expect((phase2 as Extract<Call, { kind: 'setOutboundHandler' }>).params).toBeUndefined();
+  });
+
+  test('every Phase 2 exec runs AFTER the noHttp swap (no install command between)', async () => {
+    const { stub, calls } = makeStub();
+    await score(stub, CARGO_SPEC);
+    const phase2Idx = calls.findIndex((c) => c.kind === 'setOutboundHandler' && c.name === 'noHttp');
+    // After noHttp swap, the only execs should be `anc --version` and `anc check ...`
+    const afterPhase2 = calls.slice(phase2Idx + 1).filter((c) => c.kind === 'exec') as Array<
+      Extract<Call, { kind: 'exec' }>
+    >;
+    for (const exec of afterPhase2) {
+      expect(
+        exec.command === 'anc --version' || exec.command.startsWith('anc check '),
+        `unexpected exec under noHttp egress: ${exec.command}`,
+      ).toBe(true);
+    }
+  });
+});
+
+describe('allowedInstall handler — wildcard hostname matcher', () => {
+  // GitHub moved release assets from objects.githubusercontent.com to
+  // release-assets.githubusercontent.com mid-2024; may shift again.
+  // Allowlist entries supporting `*.githubusercontent.com` cover the
+  // moving target without per-CDN-host churn. Defends the matcher
+  // semantics so the wildcard never accidentally widens (a regex bug
+  // that matched `evil.example.com.attacker.tld` as `*.com` would be
+  // catastrophic).
+  test('exact hostname match still works', async () => {
+    const captured: string[] = [];
+    const orig = console.log;
+    console.log = (m: string) => captured.push(m);
+    try {
+      const resp = await handlers.allowedInstall(new Request('https://crates.io/api/v1/crates/ripgrep'), {} as never, {
+        containerId: 'x',
+        className: 'Sandbox',
+        params: { allowedHostnames: ['crates.io'] },
+      });
+      expect(resp.status).not.toBe(403); // would 403 only if blocked
+    } finally {
+      console.log = orig;
+    }
+    expect(JSON.parse(captured[0]).allowed).toBe(true);
+  });
+
+  test('*.githubusercontent.com matches release-assets, objects, raw, codeload subdomains', async () => {
+    const captured: string[] = [];
+    const orig = console.log;
+    console.log = (m: string) => captured.push(m);
+    const allowlist = ['*.githubusercontent.com'];
+    const subs = [
+      'objects.githubusercontent.com',
+      'release-assets.githubusercontent.com',
+      'raw.githubusercontent.com',
+      'codeload.githubusercontent.com',
+    ];
+    try {
+      for (const sub of subs) {
+        await handlers.allowedInstall(new Request(`https://${sub}/foo`), {} as never, {
+          containerId: 'x',
+          className: 'Sandbox',
+          params: { allowedHostnames: allowlist },
+        });
+      }
+    } finally {
+      console.log = orig;
+    }
+    expect(captured.length).toBe(subs.length);
+    for (const line of captured) {
+      expect(JSON.parse(line).allowed).toBe(true);
+    }
+  });
+
+  test('*.githubusercontent.com rejects evil.com.githubusercontent.com.attacker.tld (no suffix-extension attack)', async () => {
+    const captured: string[] = [];
+    const orig = console.log;
+    console.log = (m: string) => captured.push(m);
+    try {
+      const resp = await handlers.allowedInstall(
+        new Request('https://githubusercontent.com.attacker.tld/payload'),
+        {} as never,
+        { containerId: 'x', className: 'Sandbox', params: { allowedHostnames: ['*.githubusercontent.com'] } },
+      );
+      expect(resp.status).toBe(403);
+    } finally {
+      console.log = orig;
+    }
+    expect(JSON.parse(captured[0]).allowed).toBe(false);
+  });
+
+  test('*.githubusercontent.com does NOT match bare githubusercontent.com (apex must be explicit)', async () => {
+    // Defensive: the wildcard is for SUBdomains only. Bare apex hits
+    // would surprise an operator who allowlisted the wildcard expecting
+    // CDN coverage. If apex coverage is needed, add it explicitly.
+    const captured: string[] = [];
+    const orig = console.log;
+    console.log = (m: string) => captured.push(m);
+    try {
+      const resp = await handlers.allowedInstall(new Request('https://githubusercontent.com/foo'), {} as never, {
+        containerId: 'x',
+        className: 'Sandbox',
+        params: { allowedHostnames: ['*.githubusercontent.com'] },
+      });
+      expect(resp.status).toBe(403);
+    } finally {
+      console.log = orig;
+    }
+    expect(JSON.parse(captured[0]).allowed).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// (c) Per-request handler log shape
+// ---------------------------------------------------------------------------
+
+describe('Outbound handlers — per-request log shape (test scenario c)', () => {
+  const originalLog = console.log;
+  let captured: string[] = [];
+
+  beforeEach(() => {
+    captured = [];
+    console.log = (...args: unknown[]) => {
+      captured.push(args.map((a) => String(a)).join(' '));
+    };
+  });
+  afterEach(() => {
+    console.log = originalLog;
+  });
+
+  test('allowedInstall logs {phase: "install", host, allowed: true} for an allowed host', async () => {
+    const req = new Request('https://crates.io/api/v1/crates/ripgrep');
+    // The actual handler calls real fetch() for allowed hosts; stub it so
+    // the test doesn't egress.
+    const originalFetch = globalThis.fetch;
+    globalThis.fetch = (async () => new Response('ok', { status: 200 })) as unknown as typeof fetch;
+    try {
+      await handlers.allowedInstall(req, {} as never, {
+        containerId: 'test',
+        className: 'Sandbox',
+        params: { allowedHostnames: ['crates.io'] },
+      });
+    } finally {
+      globalThis.fetch = originalFetch;
+    }
+    expect(captured).toHaveLength(1);
+    const log = JSON.parse(captured[0]);
+    expect(log).toEqual({ phase: 'install', host: 'crates.io', allowed: true });
+  });
+
+  test('allowedInstall logs allowed:false and returns 403 for a non-allowed host', async () => {
+    const req = new Request('https://evil.example.com/payload');
+    const resp = await handlers.allowedInstall(req, {} as never, {
+      containerId: 'test',
+      className: 'Sandbox',
+      params: { allowedHostnames: ['crates.io'] },
+    });
+    expect(resp.status).toBe(403);
+    expect(captured).toHaveLength(1);
+    const log = JSON.parse(captured[0]);
+    expect(log).toEqual({ phase: 'install', host: 'evil.example.com', allowed: false });
+  });
+
+  test('noHttp logs {phase: "noHttp", host, blocked: true} and returns 403 unconditionally', async () => {
+    const req = new Request('https://crates.io/api/v1/crates/ripgrep');
+    const resp = await handlers.noHttp(req, {} as never, { containerId: 'test', className: 'Sandbox' });
+    expect(resp.status).toBe(403);
+    expect(captured).toHaveLength(1);
+    const log = JSON.parse(captured[0]);
+    expect(log).toEqual({ phase: 'noHttp', host: 'crates.io', blocked: true });
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Happy path + install-table coverage
+// ---------------------------------------------------------------------------
+
+describe('sandbox-exec.score() — happy path', () => {
+  test('cargo-binstall ripgrep → returns scorecard + anc_version', async () => {
+    const { stub } = makeStub();
+    const result = await score(stub, CARGO_SPEC);
+    expect(result.ok).toBe(true);
+    if (!result.ok) return;
+    expect(result.value.anc_version).toBe('0.3.1');
+    expect(result.value.scorecard).toMatchObject({ tool: { name: 'ripgrep' } });
+  });
+
+  test('audit_profile from registry passes through as --audit-profile flag', async () => {
+    const { stub, calls } = makeStub();
+    await score(stub, { ...CARGO_SPEC, audit_profile: 'cli-tool' } as InstallSpec & { audit_profile: string });
+    const ancCheck = calls.find((c) => c.kind === 'exec' && c.command.startsWith('anc check '));
+    expect(ancCheck).toBeDefined();
+    expect((ancCheck as Extract<Call, { kind: 'exec' }>).command).toContain("--audit-profile 'cli-tool'");
+  });
+
+  test('no audit_profile → anc check invoked WITHOUT --audit-profile flag', async () => {
+    const { stub, calls } = makeStub();
+    await score(stub, CARGO_SPEC);
+    const ancCheck = calls.find((c) => c.kind === 'exec' && c.command.startsWith('anc check '));
+    expect(ancCheck).toBeDefined();
+    expect((ancCheck as Extract<Call, { kind: 'exec' }>).command).not.toContain('--audit-profile');
+  });
+});
+
+describe('sandbox-exec.score() — install table per PM', () => {
+  // Each case pins the EXACT command string the orchestration emits.
+  // First-token alignment with an actual binary in the sandbox image is
+  // the load-bearing invariant: `cargo-binstall` (one word, hyphenated)
+  // matches the Dockerfile's standalone binary, NOT `cargo binstall`
+  // (which assumes a rust toolchain we don't ship). Bug A surfaced when
+  // these tests pinned the wrong form; do not relax to a startsWith
+  // match — exact equality keeps the binary-name regression alarm loud.
+
+  // Direct-install command shape (auto-detect + gate-markers — Fix 1/3):
+  // build the expected string from URL + extract verb + preferred binary
+  // so each tested archive extension stays a one-line spec while the
+  // (long) shell pipeline lives in one place.
+  function directExpected(url: string, extract: string, preferred: string): string {
+    const qUrl = `'${url}'`;
+    const qPref = `'${preferred}'`;
+    return (
+      `( set -e; tmp=$(mktemp -d); mkdir "$tmp/x"; ` +
+      `echo 'GATE:download' >&2; ` +
+      `curl -fsSL ${qUrl} -o "$tmp/a" 2>"$tmp/curl_err" || ` +
+      `{ echo "DETAILS:Download failed: $(cat "$tmp/curl_err" | head -c 200)" >&2; exit 10; }; ` +
+      `echo 'GATE:extract' >&2; ` +
+      `${extract} 2>"$tmp/ext_err" || ` +
+      `{ echo "DETAILS:Extract failed: $(cat "$tmp/ext_err" | head -c 200)" >&2; exit 12; }; ` +
+      `echo 'GATE:find_binary' >&2; ` +
+      `candidates=$(find "$tmp/x" -type f -perm /111 -printf '%P\\n' 2>/dev/null | ` +
+      `grep -viE '(^|/)(LICEN[CS]E|README|CHANGELOG|NOTICE|AUTHORS|COPYING|MANIFEST|Makefile|\\.gitignore)([._-].*)?$' | ` +
+      `grep -viE '\\.(md|markdown|txt|html|htm|json|yml|yaml|toml|xml|cfg|ini|sh|bat|cmd|py|rb|pl)$' | ` +
+      `grep -vE '(^|/)\\.\\.(/|$)' | ` +
+      `grep -vE '^/' || true); ` +
+      `if [ -z "$candidates" ]; then ` +
+      `all=$(find "$tmp/x" -type f -printf '%P\\n' 2>/dev/null | head -10 | tr '\\n' ' '); ` +
+      `echo "DETAILS:Archive contains no binary named ${preferred}. Files seen: $all" >&2; ` +
+      `exit 11; ` +
+      `fi; ` +
+      `best=$(printf '%s\\n' "$candidates" | awk -v pref=${qPref} '` +
+      `{ ` +
+      `n=split($0, parts, "/"); name=parts[n]; ` +
+      `score=0; ` +
+      `if (name == pref) score=1000; ` +
+      `else if (index(name, pref) > 0) score=500; ` +
+      `if (name !~ /\\./) score+=10; ` +
+      `score -= length(name); ` +
+      `if (score > best_score || best == "") { best_score=score; best=$0 } ` +
+      `} END { print best }'); ` +
+      `detected=$(basename "$best"); ` +
+      `echo 'GATE:install_binary' >&2; ` +
+      `install -m 0755 "$tmp/x/$best" "/usr/local/bin/$detected" 2>"$tmp/inst_err" || ` +
+      `{ echo "DETAILS:Install staging failed: $(cat "$tmp/inst_err" | head -c 200)" >&2; exit 13; }; ` +
+      `rm -rf "$tmp"; ` +
+      `echo "DETECTED_BINARY=$detected" )`
+    );
+  }
+
+  const cases: Array<{ spec: InstallSpec; expected: string }> = [
+    {
+      spec: { pm: 'cargo-binstall', package: 'ripgrep', binary: 'rg' },
+      // --install-path /usr/local/bin forces binary onto PATH (Bug L).
+      expected: "cargo-binstall --no-confirm --no-symlinks --install-path /usr/local/bin 'ripgrep'",
+    },
+    {
+      spec: { pm: 'pip', package: 'black', binary: 'black' },
+      // PIP_NO_COLOR=1: ANSI suppression in pip output (Bug D).
+      // --break-system-packages: overrides Debian PEP 668 refusal (no-op
+      // on the python:3.12-slim-trixie base, retained for safety).
+      // --no-binary=pyperclip,pycparser: selective sdist allowlist for
+      // known sdist-only transitive deps in the agent-tool ecosystem.
+      // pyperclip (Aider #4105) and pycparser (cffi dep, pyperclip #288)
+      // are both pure-Python with mature upstreams. The list lives in
+      // src/worker/score/sdist-allowlist.ts; if entries change, this
+      // expectation must move with it.
+      // PIP_UPLOADED_PRIOR_TO=$(date -u -d '7 days ago' ...): supply-chain
+      // release-delay gate. Date computed at exec time so image age
+      // doesn't widen the gate. pip v26.0+ honors the env var; older
+      // pip ignores it harmlessly.
+      // PIP_DISABLE_PIP_VERSION_CHECK=1: suppresses pip's "A new release
+      // of pip is available" stderr notice so the scorecard evidence +
+      // bounce-panel details stay clean. Also baked as image ENV in
+      // docker/sandbox/Dockerfile; the inline pass keeps the
+      // currently-deployed image quiet until the next rebuild lands.
+      expected:
+        "PIP_UPLOADED_PRIOR_TO=$(date -u -d '7 days ago' +%Y-%m-%dT%H:%M:%SZ) " +
+        'PIP_DISABLE_PIP_VERSION_CHECK=1 ' +
+        'PIP_NO_COLOR=1 pip install --only-binary=:all: --no-binary=pyperclip,pycparser --no-cache-dir ' +
+        "--break-system-packages 'black'",
+    },
+    {
+      spec: { pm: 'uv', package: 'black', binary: 'black' },
+      // Native uv path (split from pm=pip). uv's resolver sidesteps
+      // pip 24+'s PEP 658 metadata fast-path (Bug M).
+      expected: "uv tool install 'black'",
+    },
+    {
+      spec: { pm: 'bun', package: 'prettier', binary: 'prettier' },
+      // Native bun runtime (post-rework). --ignore-scripts matches the
+      // npm path's lifecycle-script suppression.
+      expected: "bun add -g --ignore-scripts 'prettier'",
+    },
+    {
+      spec: { pm: 'npm', package: 'typescript', binary: 'tsc' },
+      expected: "npm install -g --ignore-scripts 'typescript'",
+    },
+    {
+      spec: { pm: 'direct', url: 'https://example.com/foo.tar.gz', binary: 'foo' },
+      // direct install: extract to tmp, auto-detect binary (Fix 1) by
+      // listing executables + filtering docs + scoring against the
+      // preferred name. Installs the chosen file under its OWN basename
+      // and echoes DETECTED_BINARY=<name> so runScore overrides
+      // spec.binary before the `which` gate runs. Each pipeline step
+      // emits a GATE:<name> marker (Fix 3) for failure classification.
+      expected: directExpected('https://example.com/foo.tar.gz', `tar xzf "$tmp/a" -C "$tmp/x"`, 'foo'),
+    },
+    {
+      spec: { pm: 'direct', url: 'https://example.com/foo.tgz', binary: 'foo' },
+      expected: directExpected('https://example.com/foo.tgz', `tar xzf "$tmp/a" -C "$tmp/x"`, 'foo'),
+    },
+    {
+      // Bug N: many newer Rust tools (csvlens) ship .tar.xz only,
+      // often with the binary nested in a `<tool>-<arch>/` directory.
+      spec: { pm: 'direct', url: 'https://example.com/foo.tar.xz', binary: 'foo' },
+      expected: directExpected('https://example.com/foo.tar.xz', `tar xJf "$tmp/a" -C "$tmp/x"`, 'foo'),
+    },
+    {
+      spec: { pm: 'direct', url: 'https://example.com/foo.tar.bz2', binary: 'foo' },
+      expected: directExpected('https://example.com/foo.tar.bz2', `tar xjf "$tmp/a" -C "$tmp/x"`, 'foo'),
+    },
+    {
+      // .txz alias for .tar.xz — same xJ flag.
+      spec: { pm: 'direct', url: 'https://example.com/foo.txz', binary: 'foo' },
+      expected: directExpected('https://example.com/foo.txz', `tar xJf "$tmp/a" -C "$tmp/x"`, 'foo'),
+    },
+    {
+      // .tbz2 alias for .tar.bz2 — same xj flag.
+      spec: { pm: 'direct', url: 'https://example.com/foo.tbz2', binary: 'foo' },
+      expected: directExpected('https://example.com/foo.tbz2', `tar xjf "$tmp/a" -C "$tmp/x"`, 'foo'),
+    },
+    {
+      // .zip — unzip into the tmp dir, then auto-detect + install.
+      // Mirrors how GitHub Windows-style release artifacts (and the
+      // occasional Linux tool, e.g. bun) get extracted. The post-extract
+      // find walk is recursive, so an archive that expands to
+      // `extracted-dir/bin/<tool>` resolves identically to a flat archive.
+      spec: { pm: 'direct', url: 'https://example.com/foo.zip', binary: 'foo' },
+      expected: directExpected('https://example.com/foo.zip', `unzip -q "$tmp/a" -d "$tmp/x"`, 'foo'),
+    },
+    {
+      // Unknown / no recognized extension: falls back to `tar xz` so
+      // legacy tar.gz-without-extension URLs keep working. Fails loud
+      // if the archive isn't actually gzip-tar, which surfaces as a
+      // chain_resolved_install_failed bounce with the curl/tar stderr
+      // visible to the user.
+      spec: { pm: 'direct', url: 'https://example.com/foo-release', binary: 'foo' },
+      expected: directExpected('https://example.com/foo-release', `tar xzf "$tmp/a" -C "$tmp/x"`, 'foo'),
+    },
+    {
+      // Binary in subfolder coverage: the find walk under "$tmp/x" has
+      // no -maxdepth, so an archive whose binary lives at
+      // `<arch-dir>/bin/<binary>` (or any nesting depth) resolves
+      // identically. The command-shape assertion above pins the
+      // recursive walk; this case pins it explicitly so a future
+      // refactor that adds `-maxdepth 1` breaks here loudly. URL is a
+      // .tar.gz with a binary name that implies a release-folder layout.
+      spec: { pm: 'direct', url: 'https://example.com/nested-binary.tar.gz', binary: 'nested-binary' },
+      expected: directExpected(
+        'https://example.com/nested-binary.tar.gz',
+        `tar xzf "$tmp/a" -C "$tmp/x"`,
+        'nested-binary',
+      ),
+    },
+  ];
+
+  for (const { spec, expected } of cases) {
+    test(`pm=${spec.pm} → '${expected}'`, async () => {
+      const { stub, calls } = makeStub();
+      await score(stub, spec);
+      const installCmds = calls.filter(
+        (c) =>
+          c.kind === 'exec' &&
+          !c.command.startsWith('which ') &&
+          c.command !== 'anc --version' &&
+          !c.command.startsWith('anc check '),
+      ) as Array<Extract<Call, { kind: 'exec' }>>;
+      expect(installCmds).toHaveLength(1);
+      expect(installCmds[0].command).toBe(expected);
+    });
+  }
+
+  test('install table first binaries match binaries present in the sandbox Dockerfile', async () => {
+    // Systemic catch: every install command in the table MUST invoke a
+    // binary name that actually exists on the sandbox container PATH.
+    // The set below mirrors docker/sandbox/Dockerfile's apt install line
+    // + the standalone tarballs. Keep in sync with the Dockerfile.
+    //
+    // Tokens that look like `NAME=value` are env-var prefixes (e.g.
+    // `PIP_NO_COLOR=1`, `PIP_UPLOADED_PRIOR_TO=$(date ...)` — the latter
+    // spans multiple whitespace tokens because of the $(...) command
+    // substitution). The matcher skips env-var prefixes (and the inner
+    // tokens of any command substitution they contain) until it finds
+    // the actual binary name.
+    const knownBinaries = new Set([
+      'cargo-binstall', // tarball at /usr/local/bin/
+      'pip', // provided by the python:3.12-slim-trixie base
+      'uv', // tarball at /usr/local/bin/
+      'npm', // npm apt
+      'bun', // tarball at /usr/local/bin/
+      'curl', // curl apt
+      '(', // direct install wraps the pipeline in a `( set -e; … )` subshell
+    ]);
+
+    // Find the first token that is NOT an env-var assignment (NAME=...)
+    // AND is not inside a $(...) command substitution. Handles
+    // multi-word `$(...)` interiors by skipping until the closing `)`.
+    function firstBinary(cmd: string): string {
+      const tokens = cmd.split(/\s+/);
+      let inCommandSub = false;
+      for (const t of tokens) {
+        if (inCommandSub) {
+          if (t.includes(')')) inCommandSub = false;
+          continue;
+        }
+        if (t.includes('$(') && !t.includes(')')) {
+          inCommandSub = true;
+          continue;
+        }
+        if (/^[A-Z_][A-Z0-9_]*=/.test(t)) continue; // env-var prefix
+        return t;
+      }
+      return tokens[0];
+    }
+
+    for (const { spec, expected } of cases) {
+      const binary = firstBinary(expected);
+      expect(
+        knownBinaries.has(binary),
+        `pm=${spec.pm} install command first binary "${binary}" not in known binaries; ` +
+          `update docker/sandbox/Dockerfile or sandbox-exec.ts install table`,
+      ).toBe(true);
+    }
+  });
+});
+
+describe('sandbox-exec.score() — bounce classes', () => {
+  test('brew passed to score() bounces install_unsupported (resolveSpec should translate first)', async () => {
+    // Direct invocation of score() with pm=brew is a contract violation
+    // — resolveSpec in do.ts is supposed to run the discovery-fallback
+    // before this layer is reached. Keeping the bounce guards against a
+    // future caller that skips resolveSpec.
+    const { stub } = makeStub();
+    const result = await score(stub, { pm: 'brew', package: 'bat', binary: 'bat' });
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('install_unsupported');
+    expect(result.details).toContain('brew');
+  });
+
+  test('go passed to score() bounces install_unsupported (resolveSpec should translate first)', async () => {
+    // Parallel to the brew bounce: `go install` would compile from
+    // source, violating the binary-only premise. resolveSpec's
+    // resolveGoFallback in do.ts redirects github.com/<owner>/<repo>
+    // module paths through the discovery chain so a GitHub release
+    // binary substitutes for the compile. Direct invocation of
+    // score() with pm=go is a contract violation and bounces.
+    const { stub } = makeStub();
+    const result = await score(stub, { pm: 'go', package: 'github.com/charmbracelet/glow', binary: 'glow' });
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('install_unsupported');
+    expect(result.details).toContain('go');
+  });
+
+  test('install command non-zero → chain_resolved_install_failed', async () => {
+    const responder: ExecResponder = (cmd) => {
+      if (cmd.startsWith('cargo-binstall ')) {
+        return { success: false, stdout: '', stderr: 'no binary asset for x86_64-musl', exitCode: 1 };
+      }
+      return defaultResponder(cmd);
+    };
+    const { stub } = makeStub(responder);
+    const result = await score(stub, CARGO_SPEC);
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('chain_resolved_install_failed');
+    expect(result.details).toContain('no binary asset');
+  });
+
+  test('install stderr with ANSI color codes → details field strips them (Bug D)', async () => {
+    // pip emits CSI escape sequences in progress output. The truncate()
+    // helper strips them before returning so the user-facing details
+    // field is plain text — no `\x1b[31m` artifacts visible in the API
+    // response or downstream CLI / browser renderings.
+    const responder: ExecResponder = (cmd) => {
+      if (cmd.includes('pip install ')) {
+        return {
+          success: false,
+          stdout: '',
+          stderr: '\x1b[31mERROR\x1b[0m: Could not find a version that satisfies the requirement nonexistent',
+          exitCode: 1,
+        };
+      }
+      return defaultResponder(cmd);
+    };
+    const { stub } = makeStub(responder);
+    const result = await score(stub, { pm: 'pip', package: 'nonexistent', binary: 'nonexistent' });
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('chain_resolved_install_failed');
+    expect(result.details).toBe('ERROR: Could not find a version that satisfies the requirement nonexistent');
+    expect(result.details).not.toContain('\x1b');
+  });
+
+  test('which check misses → chain_resolved_no_binary_produced (pallets/click case)', async () => {
+    const responder: ExecResponder = (cmd) => {
+      if (cmd.startsWith('which ')) {
+        return { success: false, stdout: '', stderr: '', exitCode: 1 };
+      }
+      return defaultResponder(cmd);
+    };
+    const { stub } = makeStub(responder);
+    const result = await score(stub, { pm: 'pip', package: 'click', binary: 'click' });
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('chain_resolved_no_binary_produced');
+    expect(result.details).toBe('binary=click');
+  });
+
+  test('anc --version returns non-zero → anc_version_unreadable', async () => {
+    const responder: ExecResponder = (cmd) => {
+      if (cmd === 'anc --version') {
+        return { success: false, stdout: '', stderr: 'segfault', exitCode: 139 };
+      }
+      return defaultResponder(cmd);
+    };
+    const { stub } = makeStub(responder);
+    const result = await score(stub, CARGO_SPEC);
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('anc_version_unreadable');
+  });
+
+  test('anc --version stdout unparseable → anc_version_unreadable with details', async () => {
+    const responder: ExecResponder = (cmd) => {
+      if (cmd === 'anc --version') return { success: true, stdout: 'garbage', stderr: '' };
+      return defaultResponder(cmd);
+    };
+    const { stub } = makeStub(responder);
+    const result = await score(stub, CARGO_SPEC);
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('anc_version_unreadable');
+    expect(result.details).toBe('garbage');
+  });
+
+  test('anc check returns non-JSON → anc_check_failed', async () => {
+    const responder: ExecResponder = (cmd) => {
+      if (cmd.startsWith('anc check ')) {
+        return { success: true, stdout: 'definitely not json', stderr: '' };
+      }
+      return defaultResponder(cmd);
+    };
+    const { stub } = makeStub(responder);
+    const result = await score(stub, CARGO_SPEC);
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('anc_check_failed');
+  });
+
+  test('anc check non-zero but valid JSON envelope → still returns scorecard', async () => {
+    // anc emits structured envelopes on stderr-exit when checks produce
+    // findings; the orchestration treats a parseable envelope as the
+    // authoritative response regardless of exit code.
+    const responder: ExecResponder = (cmd) => {
+      if (cmd.startsWith('anc check ')) {
+        return { success: false, stdout: ANC_CHECK_OK, stderr: '', exitCode: 1 };
+      }
+      return defaultResponder(cmd);
+    };
+    const { stub } = makeStub(responder);
+    const result = await score(stub, CARGO_SPEC);
+    expect(result.ok).toBe(true);
+    if (!result.ok) return;
+    expect(result.value.scorecard).toMatchObject({ tool: { name: 'ripgrep' } });
+  });
+});
+
+describe('sandbox-exec.score() — shell injection safety', () => {
+  test('single-quote in package name is shell-escaped', async () => {
+    const { stub, calls } = makeStub();
+    await score(stub, { pm: 'npm', package: "foo'; rm -rf /;'", binary: 'foo' });
+    const installCmd = calls.find((c) => c.kind === 'exec' && c.command.startsWith('npm install ')) as
+      | Extract<Call, { kind: 'exec' }>
+      | undefined;
+    expect(installCmd).toBeDefined();
+    // POSIX escape: foo'\''; rm -rf /;'\''   wrapped in '...'
+    expect(installCmd?.command).toContain("'foo'\\''; rm -rf /;'\\'''");
+  });
+});
+
+describe('sandbox-exec.score() — supply-chain release-delay gate (pip exec-time)', () => {
+  // The pip install command must carry a PIP_UPLOADED_PRIOR_TO env-var
+  // prefix that refuses to install packages published less than 7 days
+  // ago. Date computed at exec time via shell substitution so a long-
+  // running image doesn't widen the gate. pip v26.0+ honors the env
+  // var; older pip versions ignore it (no-op). The companion uv gate
+  // (UV_EXCLUDE_NEWER) is asserted at the image layer in
+  // tests/dockerfile-sandbox.test.ts.
+
+  test('pip install command prepends PIP_UPLOADED_PRIOR_TO with a 7-day shell-computed date', async () => {
+    const { stub, calls } = makeStub();
+    await score(stub, { pm: 'pip', package: 'black', binary: 'black' });
+    const installCmd = calls.find((c) => c.kind === 'exec' && c.command.includes(' pip install ')) as
+      | Extract<Call, { kind: 'exec' }>
+      | undefined;
+    expect(installCmd).toBeDefined();
+    // The env-var prefix MUST be present and MUST use shell command
+    // substitution so the date refreshes on every exec rather than
+    // being frozen at image-build time.
+    expect(installCmd?.command).toMatch(/^PIP_UPLOADED_PRIOR_TO=\$\(date -u -d '7 days ago' \+%Y-%m-%dT%H:%M:%SZ\) /);
+  });
+
+  test('PIP_UPLOADED_PRIOR_TO precedes PIP_NO_COLOR and the pip binary in the command string', async () => {
+    // Token order matters because env-var prefixes only apply to the
+    // command they precede. Putting the date AFTER `pip` would invoke
+    // pip without the gate and then set an unused shell variable.
+    const { stub, calls } = makeStub();
+    await score(stub, { pm: 'pip', package: 'httpie', binary: 'http' });
+    const installCmd = calls.find((c) => c.kind === 'exec' && c.command.includes(' pip install ')) as
+      | Extract<Call, { kind: 'exec' }>
+      | undefined;
+    expect(installCmd).toBeDefined();
+    const cmd = installCmd?.command ?? '';
+    const priorIdx = cmd.indexOf('PIP_UPLOADED_PRIOR_TO=');
+    const colorIdx = cmd.indexOf('PIP_NO_COLOR=');
+    const pipIdx = cmd.indexOf(' pip install ');
+    expect(priorIdx).toBe(0);
+    expect(colorIdx).toBeGreaterThan(priorIdx);
+    expect(pipIdx).toBeGreaterThan(colorIdx);
+  });
+
+  test('shell substitution uses GNU date syntax compatible with the debian-trixie base', async () => {
+    // `date -u -d '<relative>' +<format>` is GNU-date syntax; BSD date
+    // would need `-v-7d`. The python:3.12-slim-trixie base ships GNU
+    // coreutils, so the -d form is correct. Pin the syntax so a future
+    // base swap to a non-GNU coreutils image surfaces here.
+    const { stub, calls } = makeStub();
+    await score(stub, { pm: 'pip', package: 'pylint', binary: 'pylint' });
+    const installCmd = calls.find((c) => c.kind === 'exec' && c.command.includes(' pip install ')) as
+      | Extract<Call, { kind: 'exec' }>
+      | undefined;
+    expect(installCmd).toBeDefined();
+    // GNU form: -d '<relative>'.  BSD form (rejected): -v-7d.
+    expect(installCmd?.command).toContain("date -u -d '7 days ago'");
+    expect(installCmd?.command).not.toContain('date -u -v-7d');
+  });
+
+  test('pip install command carries PIP_DISABLE_PIP_VERSION_CHECK=1 to suppress upgrade notice', async () => {
+    // The Dockerfile bakes this as an image ENV so future builds are
+    // quiet at the OS level, but the currently-deployed image predates
+    // that change. Prepending the env var inline at exec time gives
+    // the currently-deployed sandbox the suppression immediately,
+    // without a rebuild. The companion image-level test lives in
+    // tests/dockerfile-sandbox.test.ts.
+    const { stub, calls } = makeStub();
+    await score(stub, { pm: 'pip', package: 'black', binary: 'black' });
+    const installCmd = calls.find((c) => c.kind === 'exec' && c.command.includes(' pip install ')) as
+      | Extract<Call, { kind: 'exec' }>
+      | undefined;
+    expect(installCmd).toBeDefined();
+    expect(installCmd?.command).toContain('PIP_DISABLE_PIP_VERSION_CHECK=1');
+    // Ordering: must precede `pip install` so it applies to the pip
+    // invocation rather than being set as an unused shell variable
+    // afterward.
+    const cmd = installCmd?.command ?? '';
+    expect(cmd.indexOf('PIP_DISABLE_PIP_VERSION_CHECK=1')).toBeLessThan(cmd.indexOf(' pip install '));
+  });
+
+  test('non-pip install paths do NOT carry PIP_UPLOADED_PRIOR_TO (env-var is pip-scoped only)', async () => {
+    // Leaking the pip env-var into npm/bun/cargo/uv installs would be
+    // dead weight at best and could mask a missing real implementation
+    // for those PMs. uv's gate is set via image ENV (UV_EXCLUDE_NEWER),
+    // not via this prefix.
+    const npmStub = makeStub();
+    await score(npmStub.stub, { pm: 'npm', package: 'cowsay', binary: 'cowsay' });
+    const npmCmd = npmStub.calls.find((c) => c.kind === 'exec' && c.command.includes('npm install')) as
+      | Extract<Call, { kind: 'exec' }>
+      | undefined;
+    expect(npmCmd?.command).not.toContain('PIP_UPLOADED_PRIOR_TO');
+
+    const uvStub = makeStub();
+    await score(uvStub.stub, { pm: 'uv', package: 'black', binary: 'black' });
+    const uvCmd = uvStub.calls.find((c) => c.kind === 'exec' && c.command.includes('uv tool install')) as
+      | Extract<Call, { kind: 'exec' }>
+      | undefined;
+    expect(uvCmd?.command).not.toContain('PIP_UPLOADED_PRIOR_TO');
+  });
+
+  // The remaining tests in this group execute the shell substitution
+  // directly (via Bun.spawn) and assert the runtime output, not just
+  // the literal string baked into the install command. The static
+  // tests above pin "the right string is in the command"; these
+  // dynamic tests pin "the string actually produces what pip wants".
+  // Together they catch a future change that swaps `+%Y-%m-%dT%H:%M:%SZ`
+  // for something pip 26+ wouldn't accept.
+
+  test('shell substitution produces an ISO 8601 string pip will accept', async () => {
+    // The exact shell substitution embedded in sandbox-exec.ts.
+    const proc = Bun.spawn(['bash', '-c', "date -u -d '7 days ago' +%Y-%m-%dT%H:%M:%SZ"], {
+      stdout: 'pipe',
+      stderr: 'pipe',
+    });
+    const out = (await new Response(proc.stdout).text()).trim();
+    await proc.exited;
+    // ISO 8601 with second-precision and Zulu suffix — the shape pip
+    // 26+ accepts for PIP_UPLOADED_PRIOR_TO. A future change that
+    // emits a different format (e.g. local timezone, fractional
+    // seconds, or +HH:MM offset) will fail this match before reaching
+    // the sandbox.
+    expect(out).toMatch(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$/);
+  });
+
+  test('shell substitution computes a timestamp ~7 days in the past', async () => {
+    // Sanity: the substitution must actually produce a 7-days-ago
+    // timestamp, not "now" or some other arithmetic. Tolerance is
+    // ±10 minutes to absorb test-runtime clock drift and DST quirks.
+    const proc = Bun.spawn(['bash', '-c', "date -u -d '7 days ago' +%Y-%m-%dT%H:%M:%SZ"], {
+      stdout: 'pipe',
+      stderr: 'pipe',
+    });
+    const out = (await new Response(proc.stdout).text()).trim();
+    await proc.exited;
+    const producedMs = Date.parse(out);
+    expect(Number.isFinite(producedMs)).toBe(true);
+    const expectedMs = Date.now() - 7 * 24 * 60 * 60 * 1000;
+    const driftMinutes = Math.abs(producedMs - expectedMs) / 60_000;
+    expect(driftMinutes).toBeLessThan(10);
+  });
+
+  test('shell substitution is the EXACT form embedded in the install command (no drift)', async () => {
+    // Defends against a partial refactor that updates the install
+    // command in sandbox-exec.ts but forgets to update the shell
+    // executed at runtime (or vice versa). Re-extract the substitution
+    // from the live install command and run it; assert it succeeds.
+    const { stub, calls } = makeStub();
+    await score(stub, { pm: 'pip', package: 'mypy', binary: 'mypy' });
+    const installCmd = calls.find((c) => c.kind === 'exec' && c.command.includes(' pip install ')) as
+      | Extract<Call, { kind: 'exec' }>
+      | undefined;
+    expect(installCmd).toBeDefined();
+    // Extract the `$(...)` substitution from the install command.
+    const match = installCmd?.command.match(/^PIP_UPLOADED_PRIOR_TO=\$\(([^)]+)\)/);
+    expect(match).not.toBeNull();
+    const substitution = match?.[1];
+    expect(substitution).toBeTruthy();
+    if (!substitution) return;
+    // Run it.
+    const proc = Bun.spawn(['bash', '-c', substitution], { stdout: 'pipe', stderr: 'pipe' });
+    const out = (await new Response(proc.stdout).text()).trim();
+    const exitCode = await proc.exited;
+    expect(exitCode).toBe(0);
+    expect(out).toMatch(/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$/);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Fix 1 — auto-detect archive binary
+// Fix 3 — gate-capture in directInstallCommand
+// ---------------------------------------------------------------------------
+
+describe('sandbox-exec.score() — direct-install auto-detect (Fix 1)', () => {
+  // The install command is what the orchestration tells the container to
+  // run; the container then echoes `DETECTED_BINARY=<name>` to stdout
+  // and runScore() overrides spec.binary before the `which` gate. These
+  // tests stub the container, return a canned auto-detect result, and
+  // assert the `which` + `anc check` calls reference the detected name.
+
+  test('archive auto-detect picks gog when repo=gogcli (the gogcli/openclaw fix)', async () => {
+    // gogcli/openclaw case: GitHub Releases ships a `gog` binary, but
+    // the repo is `gogcli`. Pre-fix Step 2 hardcoded binary=ctx.repo,
+    // and the post-extract `find -name gogcli` missed. Now the install
+    // command does its own listing + scoring + emits the chosen name.
+    const responder: ExecResponder = (cmd) => {
+      if (cmd.startsWith('(') && cmd.includes('GATE:download')) {
+        // Auto-detect picked `gog` from the archive.
+        return { success: true, stdout: 'DETECTED_BINARY=gog\n', stderr: '' };
+      }
+      if (cmd.startsWith('which ')) {
+        return { success: true, stdout: '/usr/local/bin/gog\n', stderr: '' };
+      }
+      return defaultResponder(cmd);
+    };
+    const { stub, calls } = makeStub(responder);
+    const result = await score(stub, {
+      pm: 'direct',
+      url: 'https://example.com/gog-linux-amd64.tar.gz',
+      binary: 'gogcli', // the repo name, NOT the actual binary
+    });
+    expect(result.ok).toBe(true);
+    // The which gate must run against the DETECTED name, not the
+    // pre-fix repo name. This is the load-bearing assertion for Fix 1.
+    const whichCall = calls.find((c) => c.kind === 'exec' && c.command.startsWith('which ')) as
+      | Extract<Call, { kind: 'exec' }>
+      | undefined;
+    expect(whichCall?.command).toBe("which 'gog'");
+    const ancCall = calls.find((c) => c.kind === 'exec' && c.command.startsWith('anc check ')) as
+      | Extract<Call, { kind: 'exec' }>
+      | undefined;
+    expect(ancCall?.command).toContain("--command 'gog'");
+  });
+
+  test('no DETECTED_BINARY in stdout → spec.binary stays the preferred name', async () => {
+    // Backward-compat: any non-direct PM install command, or a future
+    // direct-install variant that doesn't emit the marker, keeps the
+    // existing spec.binary value. (npm / pip / cargo-binstall already
+    // print package-manager-specific noise without the marker.)
+    const { stub, calls } = makeStub();
+    const result = await score(stub, CARGO_SPEC);
+    expect(result.ok).toBe(true);
+    const whichCall = calls.find((c) => c.kind === 'exec' && c.command.startsWith('which ')) as
+      | Extract<Call, { kind: 'exec' }>
+      | undefined;
+    expect(whichCall?.command).toBe("which 'rg'");
+  });
+
+  test('DETECTED_BINARY with shell-meta in name → rejected, spec.binary unchanged', async () => {
+    // Defense in depth: the install command's own filter rejects
+    // path-traversal candidates upstream. The extractDetectedBinary
+    // parser whitelists [A-Za-z0-9._-] so any smuggled bytes (e.g.
+    // `gog; rm -rf /`) don't reach the shell-quoted `anc check
+    // --command <binary>` slot.
+    const responder: ExecResponder = (cmd) => {
+      if (cmd.startsWith('(') && cmd.includes('GATE:download')) {
+        return { success: true, stdout: 'DETECTED_BINARY=gog; rm -rf /\n', stderr: '' };
+      }
+      return defaultResponder(cmd);
+    };
+    const { stub, calls } = makeStub(responder);
+    await score(stub, {
+      pm: 'direct',
+      url: 'https://example.com/x.tar.gz',
+      binary: 'safe',
+    });
+    const whichCall = calls.find((c) => c.kind === 'exec' && c.command.startsWith('which ')) as
+      | Extract<Call, { kind: 'exec' }>
+      | undefined;
+    // Stays 'safe' (the original spec.binary) because the malicious
+    // detected name failed the [A-Za-z0-9._-] whitelist.
+    expect(whichCall?.command).toBe("which 'safe'");
+  });
+});
+
+describe('sandbox-exec.score() — gate-capture in install details (Fix 3)', () => {
+  // Each direct-install pipeline step emits a GATE:<name> marker to
+  // stderr BEFORE running, and on failure also emits a step-specific
+  // DETAILS:<text> line. extractGateDetails() in sandbox-exec.ts picks
+  // up the LAST GATE marker and the DETAILS line; runScore threads
+  // them into the user-facing details field instead of the raw stderr.
+
+  test('curl-fail surfaces "Download failed:" in details', async () => {
+    const responder: ExecResponder = (cmd) => {
+      if (cmd.startsWith('(') && cmd.includes('GATE:download')) {
+        return {
+          success: false,
+          stdout: '',
+          stderr: 'GATE:download\nDETAILS:Download failed: curl: (22) The requested URL returned error: 404\n',
+          exitCode: 10,
+        };
+      }
+      return defaultResponder(cmd);
+    };
+    const { stub } = makeStub(responder);
+    const result = await score(stub, {
+      pm: 'direct',
+      url: 'https://example.com/missing.tar.gz',
+      binary: 'missing',
+    });
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('chain_resolved_install_failed');
+    expect(result.details).toMatch(/^Download failed:/);
+  });
+
+  test('extract-fail surfaces "Extract failed:" in details', async () => {
+    const responder: ExecResponder = (cmd) => {
+      if (cmd.startsWith('(') && cmd.includes('GATE:download')) {
+        return {
+          success: false,
+          stdout: '',
+          stderr: 'GATE:download\nGATE:extract\nDETAILS:Extract failed: gzip: stdin: not in gzip format\n',
+          exitCode: 12,
+        };
+      }
+      return defaultResponder(cmd);
+    };
+    const { stub } = makeStub(responder);
+    const result = await score(stub, {
+      pm: 'direct',
+      url: 'https://example.com/notarchive.tar.gz',
+      binary: 'foo',
+    });
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('chain_resolved_install_failed');
+    expect(result.details).toMatch(/^Extract failed:/);
+  });
+
+  test('no-binary-candidates → chain_resolved_no_binary_produced + lists archive contents', async () => {
+    // Red-team case: an archive that ships only docs. The auto-detect
+    // filter strips every entry, the pipeline exits 11, and the
+    // orchestration re-classifies as no_binary_produced (it's an
+    // "archive shipped no executable" miss, not an "install failed"
+    // miss).
+    const responder: ExecResponder = (cmd) => {
+      if (cmd.startsWith('(') && cmd.includes('GATE:download')) {
+        return {
+          success: false,
+          stdout: '',
+          stderr:
+            'GATE:download\nGATE:extract\nGATE:find_binary\nDETAILS:Archive contains no binary named foo. Files seen: LICENSE README.md\n',
+          exitCode: 11,
+        };
+      }
+      return defaultResponder(cmd);
+    };
+    const { stub } = makeStub(responder);
+    const result = await score(stub, {
+      pm: 'direct',
+      url: 'https://example.com/docs-only.tar.gz',
+      binary: 'foo',
+    });
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('chain_resolved_no_binary_produced');
+    expect(result.details).toContain('Archive contains no binary named foo');
+    expect(result.details).toContain('LICENSE');
+  });
+
+  test('install-staging-fail surfaces "Install staging failed:" in details', async () => {
+    const responder: ExecResponder = (cmd) => {
+      if (cmd.startsWith('(') && cmd.includes('GATE:download')) {
+        return {
+          success: false,
+          stdout: '',
+          stderr:
+            'GATE:download\nGATE:extract\nGATE:find_binary\nGATE:install_binary\nDETAILS:Install staging failed: install: cannot stat file\n',
+          exitCode: 13,
+        };
+      }
+      return defaultResponder(cmd);
+    };
+    const { stub } = makeStub(responder);
+    const result = await score(stub, {
+      pm: 'direct',
+      url: 'https://example.com/x.tar.gz',
+      binary: 'foo',
+    });
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('chain_resolved_install_failed');
+    expect(result.details).toMatch(/^Install staging failed:/);
+  });
+
+  test('no GATE markers in stderr → falls back to raw truncated stderr (back-compat)', async () => {
+    // Non-direct PMs (npm, pip, cargo-binstall) don't emit GATE
+    // markers. Existing behavior — surface the raw stderr — must be
+    // preserved so we don't regress error messages for the registry-
+    // install paths.
+    const responder: ExecResponder = (cmd) => {
+      if (cmd.startsWith('cargo-binstall ')) {
+        return { success: false, stdout: '', stderr: 'plain stderr without markers', exitCode: 1 };
+      }
+      return defaultResponder(cmd);
+    };
+    const { stub } = makeStub(responder);
+    const result = await score(stub, CARGO_SPEC);
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toBe('chain_resolved_install_failed');
+    expect(result.details).toBe('plain stderr without markers');
+  });
+
+  test('extractGateDetails + extractDetectedBinary export shape', async () => {
+    // Sanity-check the parser exports so client-side users can rely on
+    // them. Keeps the module surface stable.
+    const m = await import('../src/worker/score/sandbox-exec');
+    expect(typeof m.extractDetectedBinary).toBe('function');
+    expect(typeof m.extractGateDetails).toBe('function');
+    expect(m.extractDetectedBinary('foo\nDETECTED_BINARY=gog\n')).toBe('gog');
+    expect(m.extractDetectedBinary('no marker')).toBeNull();
+    expect(m.extractGateDetails('GATE:download\nDETAILS:Download failed: 404')?.kind).toBe('download');
+    expect(
+      m.extractGateDetails('GATE:find_binary\nDETAILS:Archive contains no binary named x. Files seen:')?.kind,
+    ).toBe('no_binary_candidates');
+    expect(m.extractGateDetails('')).toBeNull();
+  });
+});
diff --git a/tests/score-handler-branch-and-norelease.test.ts b/tests/score-handler-branch-and-norelease.test.ts
new file mode 100644
index 0000000..4c2fe99
--- /dev/null
+++ b/tests/score-handler-branch-and-norelease.test.ts
@@ -0,0 +1,1629 @@
+// /api/score regression tests for two real-world input classes that hit
+// the live path in distinct ways and need to stay regression-proof:
+//
+//   1. owner/repo shorthand pointing at a repo with NO install path and
+//      NO releases (e.g. brettdavies/dotfiles). Validator routes to
+//      github-url. Registry + hints miss. Cache tier skipped (no
+//      derivable binary). Live DO runs and returns chain_no_resolve
+//      because nothing on the discovery chain produced a binary. The
+//      handler must bounce 404 with no share_url AND preserve the
+//      response triad (spec_version + checker_url; anc_version is
+//      success-only).
+//
+//   2. github-url with an explicit branch (`/tree/<branch>`). Per
+//      b295e3b: branch-scoped inputs ALWAYS skip the curated + cache
+//      tiers and go straight to live scoring. The cache write after
+//      the live run is also skipped (do.ts) because caching under the
+//      bare binary name would clobber the default-branch scorecard.
+//      Two contract checks: branch URL on an uncurated repo runs live;
+//      branch URL on a CURATED repo also runs live (curated cross-check
+//      is skipped when branch is set).
+//
+// All tests mock at the DO boundary using the same Sandbox['fetch']
+// stub shape score-handler.test.ts uses, so any future Sandbox class
+// drift (renamed fetch, changed signature) is a TypeScript error here.
+
+import { afterAll, beforeEach, describe, expect, test } from 'bun:test';
+import type { Sandbox } from '../src/worker/score/do';
+import { _resetAccessibilityCache } from '../src/worker/score/github-accessibility';
+import { _resetIndexCache, handleScore, type ScoreEnv } from '../src/worker/score/handler';
+import { _resetKillSwitchCache } from '../src/worker/score/kill-switch';
+import { validateInput } from '../src/worker/score/validate';
+
+// Snapshot globalThis.fetch BEFORE the first makeEnv() override so afterAll
+// can restore it. Bun runs tests in a single process; if this file leaves
+// the global fetch pointing at our compositeFetcher, subsequent test
+// files (score-do.test.ts uses bare `fetch()` in allowedInstall handlers)
+// get the wrong dispatcher and surface as `unexpected fetch` errors.
+const ORIGINAL_FETCH = globalThis.fetch;
+
+// ---------------------------------------------------------------------------
+// Fixtures
+// ---------------------------------------------------------------------------
+
+// Mirrors score-handler.test.ts: ripgrep is curated under by_slug AND
+// by_owner_repo so the branch-vs-curated contract test can prove that
+// an explicit branch URL on a curated repo STILL goes live.
+const REGISTRY_INDEX = {
+  by_slug: {
+    ripgrep: {
+      name: 'ripgrep',
+      binary: 'rg',
+      install: 'brew install ripgrep',
+      repo: 'BurntSushi/ripgrep',
+      version: '15.1.0',
+      anc_version: '0.3.0',
+      scorecard_url: '/score/ripgrep',
+    },
+  },
+  by_owner_repo: {
+    'BurntSushi/ripgrep': {
+      name: 'ripgrep',
+      binary: 'rg',
+      install: 'brew install ripgrep',
+      repo: 'BurntSushi/ripgrep',
+      version: '15.1.0',
+      anc_version: '0.3.0',
+      scorecard_url: '/score/ripgrep',
+    },
+  },
+};
+
+// Deliberately empty hints index — brettdavies/dotfiles and orf/gping
+// have no hint, so the github-url tier skips cache (no binary derivable
+// upfront) and falls through to the live DO path.
+const HINTS_INDEX = {
+  by_owner_repo: {},
+};
+
+type CallTracker = { doCalls: number; lastBody?: unknown };
+// Global fetch tracker captures both the Turnstile siteverify and the
+// github HEAD pre-check. Tests assert that the HEAD probe was issued for
+// the expected owner/repo (or skipped, when a hint exists / branch is set).
+type GithubHeadResponse = { kind: 'status'; status: number } | { kind: 'throw'; error: unknown };
+
+type CacheTracker = { gets: string[]; puts: string[] };
+
+type StubOverrides = Partial<{
+  doResponse: unknown;
+  doStatus: number;
+  tracker: CallTracker;
+  cacheContent: Record<string, unknown>;
+  cacheTracker: CacheTracker;
+  // Keys are `<owner>/<repo>` lowercased. The pre-check uses lowercase
+  // owner+repo in its in-isolate cache key; we mirror that here so a
+  // case-mismatched paste still finds the mock.
+  githubHeadResponses: Record<string, GithubHeadResponse>;
+  githubFetchTracker: { calls: string[] };
+  // Post-2026-05-20 discovery-move: the Worker now runs the discovery
+  // fan-out (api.github.com releases, crates.io, npm, pypi, proxy.golang,
+  // README parse). Tests that previously relied on a DO-side discovery
+  // mock now have to seed the Worker's discovery fetch path. Two knobs:
+  //
+  //   - releaseAssets: when set for an `<owner>/<repo>` key, the Step 2
+  //     release lookup returns the named browser_download_url so
+  //     discovery resolves to `pm: 'direct'` with that URL. Empty/missing
+  //     → 404 → Step 2 misses. Steps 3 (crates/npm/pypi/go) and 4
+  //     (README parse) miss unconditionally in this mock.
+  //
+  // The compositeFetcher returns 404 for every discovery URL unless an
+  // override matches — which means without seeding, every github-url
+  // input bounces as chain_no_resolve at the Worker tier. That matches
+  // the post-move bounce flow we're testing.
+  releaseAssets: Record<string, { name: string; url: string }>;
+}>;
+
+function makeEnv(overrides: StubOverrides = {}): ScoreEnv {
+  const doResponse = overrides.doResponse ?? { error: 'sandbox_stub_until_u6' };
+  const doStatus = overrides.doStatus ?? 200;
+
+  const tracker = overrides.tracker;
+  // Sandbox['fetch'] typing: any future signature change becomes a
+  // compile error here, mirroring the pattern documented in
+  // score-handler.test.ts's file header.
+  const stubFetch: Sandbox['fetch'] = async (req) => {
+    if (tracker) {
+      tracker.doCalls += 1;
+      try {
+        tracker.lastBody = await req.clone().json();
+      } catch {
+        tracker.lastBody = null;
+      }
+    }
+    return new Response(JSON.stringify(doResponse), {
+      status: doStatus,
+      headers: { 'content-type': 'application/json' },
+    });
+  };
+  const stubDo = {
+    idFromName(_name: string) {
+      return { id: 'stub' };
+    },
+    get(_id: unknown) {
+      return { fetch: stubFetch };
+    },
+  };
+
+  // globalThis.fetch dispatch: Turnstile siteverify and the github HEAD
+  // pre-check both read globalThis.fetch in production. We dispatch on URL
+  // so a single override covers both. Default for github.com is "accessible"
+  // (200) — tests that need a private/nonexistent repo pass an explicit
+  // `githubHeadResponses` entry. Default for Turnstile is success — these
+  // tests don't exercise the bot-defense gate.
+  const githubHeadResponses = overrides.githubHeadResponses ?? {};
+  const githubFetchTracker = overrides.githubFetchTracker;
+  const releaseAssets = overrides.releaseAssets ?? {};
+  const compositeFetcher = async (input: RequestInfo | URL, init?: RequestInit): Promise<Response> => {
+    const url = typeof input === 'string' ? input : input instanceof URL ? input.toString() : input.url;
+    if (url.startsWith('https://challenges.cloudflare.com/turnstile/v0/siteverify')) {
+      return new Response(JSON.stringify({ success: true }), {
+        status: 200,
+        headers: { 'content-type': 'application/json' },
+      });
+    }
+    if (url.startsWith('https://github.com/')) {
+      const ownerRepo = url.slice('https://github.com/'.length).toLowerCase();
+      if (githubFetchTracker) githubFetchTracker.calls.push(ownerRepo);
+      const mock = githubHeadResponses[ownerRepo];
+      if (mock?.kind === 'throw') throw mock.error;
+      // Default behavior: HEAD returns 200 (repo accessible). Tests that
+      // need a 404 or 5xx pass an explicit mock entry above.
+      const status = mock?.kind === 'status' ? mock.status : 200;
+      // Sanity guard against accidentally letting a method other than HEAD
+      // sneak through: the real handler ONLY issues HEAD here, and if a
+      // future regression switched to GET, this test would surface it.
+      expect(init?.method).toBe('HEAD');
+      return new Response(null, { status, headers: { 'content-type': 'text/html' } });
+    }
+    // Discovery URLs (post 2026-05-20 discovery-move: the Worker fans
+    // these out, not the DO). Pattern-match in order of likelihood and
+    // return 404 by default so no-resolve flows are the default test
+    // shape; tests that need a release-asset hit seed `releaseAssets`.
+    const releaseMatch = url.match(/^https:\/\/api\.github\.com\/repos\/([^/]+)\/([^/]+)\/releases\/latest$/);
+    if (releaseMatch) {
+      const key = `${releaseMatch[1].toLowerCase()}/${releaseMatch[2].toLowerCase()}`;
+      const asset = releaseAssets[key];
+      if (asset) {
+        return new Response(JSON.stringify({ assets: [{ name: asset.name, browser_download_url: asset.url }] }), {
+          status: 200,
+          headers: { 'content-type': 'application/json' },
+        });
+      }
+      return new Response('', { status: 404 });
+    }
+    if (
+      url.startsWith('https://formulae.brew.sh/') ||
+      url.startsWith('https://crates.io/') ||
+      url.startsWith('https://registry.npmjs.org/') ||
+      url.startsWith('https://pypi.org/') ||
+      url.startsWith('https://proxy.golang.org/') ||
+      url.startsWith('https://raw.githubusercontent.com/')
+    ) {
+      return new Response('', { status: 404 });
+    }
+    // Anything else (unexpected) — surface the URL so a stray fetch is
+    // visible in test output rather than silently returning success.
+    throw new Error(`unexpected fetch in test: ${url}`);
+  };
+  (globalThis as { fetch: typeof fetch }).fetch = compositeFetcher as unknown as typeof fetch;
+
+  const cacheStore = new Map<string, string>();
+  for (const [k, v] of Object.entries(overrides.cacheContent ?? {})) {
+    cacheStore.set(k, typeof v === 'string' ? v : JSON.stringify(v));
+  }
+  const cacheTracker = overrides.cacheTracker;
+  const cacheStub = {
+    async get(key: string) {
+      if (cacheTracker) cacheTracker.gets.push(key);
+      const raw = cacheStore.get(key);
+      if (raw === undefined) return null;
+      return {
+        async json() {
+          return JSON.parse(raw);
+        },
+        async text() {
+          return raw;
+        },
+      };
+    },
+    async put(key: string, value: unknown) {
+      if (cacheTracker) cacheTracker.puts.push(key);
+      cacheStore.set(key, typeof value === 'string' ? value : String(value));
+    },
+    async delete(key: string) {
+      cacheStore.delete(key);
+    },
+  };
+
+  return {
+    ASSETS: {
+      async fetch(req: Request | string): Promise<Response> {
+        const url = typeof req === 'string' ? req : req.url;
+        const path = new URL(url).pathname;
+        if (path === '/registry-index.json') {
+          return new Response(JSON.stringify(REGISTRY_INDEX), { status: 200 });
+        }
+        if (path === '/discovery-hints-index.json') {
+          return new Response(JSON.stringify(HINTS_INDEX), { status: 200 });
+        }
+        return new Response('not found', { status: 404 });
+      },
+    } as Fetcher,
+    SCORE: stubDo as unknown as DurableObjectNamespace,
+    SCORE_KV: {
+      async get() {
+        return null;
+      },
+    } as unknown as KVNamespace,
+    SCORE_CACHE: cacheStub as unknown as R2Bucket,
+    SCORE_LIMITER: {
+      async limit() {
+        return { success: true };
+      },
+    },
+    SCORE_LIMITER_IP: {
+      async limit() {
+        return { success: true };
+      },
+    },
+    SCORE_TELEMETRY: { writeDataPoint() {} },
+    TURNSTILE_SECRET: 'test-turnstile-secret',
+    SESSION_HMAC_SECRET: 'test-hmac-secret-please',
+  };
+}
+
+function postScore(input: string): Request {
+  return new Request('https://anc.dev/api/score', {
+    method: 'POST',
+    headers: { 'content-type': 'application/json' },
+    body: JSON.stringify({ input, turnstile_token: 'tok' }),
+  });
+}
+
+beforeEach(() => {
+  _resetIndexCache();
+  _resetKillSwitchCache();
+  _resetAccessibilityCache();
+});
+
+// Restore the original globalThis.fetch so this file doesn't poison
+// subsequent test files (score-do.test.ts in particular uses bare fetch()
+// inside allowedInstall handlers and depends on the unmocked global).
+afterAll(() => {
+  globalThis.fetch = ORIGINAL_FETCH;
+});
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe('/api/score — branch URLs + no-release repos', () => {
+  // -------------------------------------------------------------------------
+  // 1. owner/repo shorthand on a repo that has no install path and no
+  //    releases. The live probe against brettdavies/dotfiles returns
+  //    chain_no_resolve with no details. The handler maps that to 404.
+  // -------------------------------------------------------------------------
+
+  test('owner/repo shorthand validates to github-url with no branch', () => {
+    // Pure validator test: `brettdavies/dotfiles` (raw string, no
+    // https:// prefix) routes through SHORTHAND_RE → github-url with
+    // owner+repo populated and branch undefined.
+    const result = validateInput('brettdavies/dotfiles', REGISTRY_INDEX);
+    expect(result).toEqual({
+      kind: 'github-url',
+      owner: 'brettdavies',
+      repo: 'dotfiles',
+    });
+  });
+
+  test('owner/repo shorthand for no-release repo → 404 chain_no_resolve at Worker, DO never dispatched', async () => {
+    // Post-2026-05-20 discovery-move: the Worker (not the DO) runs the
+    // discovery fan-out. brettdavies/dotfiles ships no releases + no
+    // crates/npm/pypi/go alternative + no parseable README install
+    // block, so the Worker's resolveSpec returns chain_no_resolve and
+    // the DO is never dispatched. The compositeFetcher returns 404 for
+    // every discovery URL by default, modelling exactly this case.
+    //
+    // Response triad must still be present on the error envelope. share_url
+    // is absent because no binary was derivable.
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({ tracker });
+    const res = await handleScore(postScore('brettdavies/dotfiles'), env);
+    expect(res.status).toBe(404);
+    const body = (await res.json()) as {
+      error: { code: string };
+      spec_version: string;
+      checker_url: string;
+      share_url?: string;
+    };
+    expect(body.error.code).toBe('chain_no_resolve');
+    expect(body.spec_version).toBeTruthy();
+    expect(body.checker_url).toBeTruthy();
+    expect(body.share_url).toBeUndefined();
+    // The Worker bounced before the DO — no compute billed, no metered-
+    // gate budget burned.
+    expect(tracker.doCalls).toBe(0);
+  });
+
+  test('owner/repo shorthand for no-binary repo → 502 chain_resolved_no_binary_produced, response triad preserved', async () => {
+    // Discovery resolves (Step 2 release asset hit), so the DO is
+    // dispatched with the InstallSpec. The DO mock returns the
+    // "install ran but no binary appeared on PATH" error — different
+    // failure class from chain_no_resolve, different status (502 vs
+    // 404), but the same response triad guarantee + no share_url.
+    const env = makeEnv({
+      releaseAssets: {
+        'brettdavies/dotfiles': {
+          name: 'dotfiles-linux-x86_64.tar.gz',
+          url: 'https://example.com/dotfiles-linux-x86_64.tar.gz',
+        },
+      },
+      doResponse: {
+        error: 'chain_resolved_no_binary_produced',
+        details: 'install ran but no binary appeared on PATH',
+      },
+    });
+    const res = await handleScore(postScore('brettdavies/dotfiles'), env);
+    expect(res.status).toBe(502);
+    const body = (await res.json()) as {
+      error: { code: string; details?: string };
+      spec_version: string;
+      checker_url: string;
+      share_url?: string;
+    };
+    expect(body.error.code).toBe('chain_resolved_no_binary_produced');
+    expect(body.error.details).toContain('install ran but no binary');
+    expect(body.spec_version).toBeTruthy();
+    expect(body.checker_url).toBeTruthy();
+    expect(body.share_url).toBeUndefined();
+  });
+
+  // -------------------------------------------------------------------------
+  // 2. Branch URL on an UNCURATED repo. Live path runs (validates to
+  //    github-url with branch set; no hint to derive a cache binary; no
+  //    curated entry). Mock DO returns success; assert the response
+  //    carries the live scorecard and NO share_url (branch-scoped).
+  // -------------------------------------------------------------------------
+
+  test('branch URL on uncurated repo validates to github-url with branch set', () => {
+    const result = validateInput('https://github.com/orf/gping/tree/master', REGISTRY_INDEX);
+    expect(result).toEqual({
+      kind: 'github-url',
+      owner: 'orf',
+      repo: 'gping',
+      branch: 'master',
+    });
+  });
+
+  test('branch URL on uncurated repo → live DO dispatched, NO share_url on response', async () => {
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      tracker,
+      doResponse: {
+        scorecard: {
+          tool: { name: 'gping', binary: 'gping', version: null },
+          badge: { score_pct: 50, eligible: false },
+          score: { value: 50 },
+        },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('https://github.com/orf/gping/tree/master'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(1);
+
+    // Verify the DO received a resolved InstallSpec (post-2026-05-20
+    // discovery-move shape: `{spec, hash}` instead of `{input, hash}`).
+    // Branch URLs route to a git-clone spec that threads owner/repo/
+    // branch directly through to the clone command in sandbox-exec.ts.
+    const sent = tracker.lastBody as
+      | { spec?: { pm?: string; owner?: string; repo?: string; branch?: string } }
+      | undefined;
+    expect(sent?.spec?.pm).toBe('git-clone');
+    expect(sent?.spec?.owner).toBe('orf');
+    expect(sent?.spec?.repo).toBe('gping');
+    expect(sent?.spec?.branch).toBe('master');
+
+    const body = (await res.json()) as {
+      scorecard: { kind?: string; tool: { name: string } };
+      share_url?: string;
+      anc_version: string;
+      spec_version: string;
+      checker_url: string;
+    };
+    // NOT registry_hit — branch-scoped inputs never wear the curated kind.
+    expect(body.scorecard.kind).toBeUndefined();
+    expect(body.scorecard.tool.name).toBe('gping');
+    // Branch-scoped inputs never get a share URL (per deriveShareBinary).
+    expect(body.share_url).toBeUndefined();
+    // Response triad on success.
+    expect(body.spec_version).toBeTruthy();
+    expect(body.checker_url).toBeTruthy();
+    expect(body.anc_version).toBe('0.3.1');
+  });
+
+  // -------------------------------------------------------------------------
+  // 3. Branch URL on a CURATED repo. This is the contract test that pins
+  //    "explicit branch ALWAYS goes live, even for curated repos." If a
+  //    future change accidentally re-enables the curated cross-check for
+  //    branch URLs, this test fails loudly.
+  // -------------------------------------------------------------------------
+
+  test('branch URL on curated repo → curated cross-check SKIPPED, live DO dispatched', async () => {
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      tracker,
+      doResponse: {
+        scorecard: {
+          tool: { name: 'ripgrep', binary: 'rg', version: '15.1.0' },
+          badge: { score_pct: 88, eligible: true },
+          score: { value: 88 },
+        },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('https://github.com/BurntSushi/ripgrep/tree/master'), env);
+    expect(res.status).toBe(200);
+    // The whole point: curated registry has BurntSushi/ripgrep, but the
+    // branch URL still went live.
+    expect(tracker.doCalls).toBe(1);
+
+    const body = (await res.json()) as {
+      scorecard: { kind?: string; scorecard_url?: string; tool: { name: string }; score?: { value: number } };
+      share_url?: string;
+      anc_version: string;
+    };
+    // NOT registry_hit — even though by_owner_repo['BurntSushi/ripgrep']
+    // exists, the branch flag forced the live path.
+    expect(body.scorecard.kind).toBeUndefined();
+    expect(body.scorecard.scorecard_url).toBeUndefined();
+    expect(body.scorecard.tool.name).toBe('ripgrep');
+    expect(body.scorecard.score?.value).toBe(88);
+    // No share_url — branch-scoped, even for curated.
+    expect(body.share_url).toBeUndefined();
+    expect(body.anc_version).toBe('0.3.1');
+  });
+
+  test('branch URL on curated repo bypasses R2 cache too (prefilled curated key unreachable)', async () => {
+    // Defense-in-depth on the cache tier: if someone prefills the cache
+    // under the curated binary's key (scores/rg/...), a branch-scoped
+    // request must still go live. This pins the "branch URL skips both
+    // tiers" contract; not just the registry tier.
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      tracker,
+      cacheContent: {
+        'scores/rg/0.4.0.json': {
+          spec_version: '0.4.0',
+          anc_version: '0.3.1',
+          tool_version: '15.1.0',
+          scorecard: { tool: { name: 'ripgrep', binary: 'rg', version: '15.1.0' }, score: { value: 99 } },
+        },
+      },
+      doResponse: {
+        scorecard: { tool: { name: 'ripgrep', binary: 'rg', version: '15.1.0' }, score: { value: 77 } },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('https://github.com/BurntSushi/ripgrep/tree/master'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(1);
+    const body = (await res.json()) as { scorecard: { score?: { value: number } } };
+    // The live DO's score (77), not the prefilled cache's (99).
+    expect(body.scorecard.score?.value).toBe(77);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// /api/score — github accessibility pre-check (private / nonexistent repo)
+//
+// Avoids paying the DO sandbox cold-start cost on a request that cannot
+// resolve a binary regardless: github 404 on the repo root means private,
+// renamed, or never existed. Fast-fail with `github_repo_not_accessible`
+// instead of spinning up the sandbox to discover the same fact.
+//
+// Skip matrix (proceed to DO without probing):
+//   - github-url with explicit branch (DO clones; HEAD on root is silent
+//     about branch existence)
+//   - github-url that matched a curated discovery hint (we already know
+//     the install path; a transient github 404 shouldn't break a curated
+//     repo)
+//   - curated registry hits (never reach the pre-check; the lookupScorecard
+//     tier returns 'curated' before we get here)
+//   - non-github-url inputs (no repo to probe)
+//
+// Fail-OPEN matrix (proceed to DO when github itself misbehaves):
+//   - HEAD returns 5xx
+//   - HEAD throws (timeout, network error)
+// ---------------------------------------------------------------------------
+
+describe('/api/score — github accessibility pre-check', () => {
+  // -------------------------------------------------------------------------
+  // 1. Private / nonexistent repo: HEAD 404 → fast-fail, no DO dispatched.
+  //    This is the user-reported case (brettdavies/solutions is private; the
+  //    sandbox would otherwise burn a cold-start trying to discover a
+  //    binary). The fast-fail status is 404 — same as chain_no_resolve —
+  //    but the error.code differs so the client can render a precise
+  //    "GitHub couldn't find that repo" bounce panel instead of the
+  //    generic "no pre-built binary" copy.
+  // -------------------------------------------------------------------------
+
+  test('private/nonexistent repo → fast-fail with github_repo_not_accessible, no DO dispatched', async () => {
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      tracker,
+      githubHeadResponses: {
+        'brettdavies/solutions': { kind: 'status', status: 404 },
+      },
+    });
+    const res = await handleScore(postScore('brettdavies/solutions'), env);
+    expect(res.status).toBe(404);
+    expect(tracker.doCalls).toBe(0);
+    const body = (await res.json()) as {
+      error: { code: string };
+      spec_version: string;
+      checker_url: string;
+    };
+    expect(body.error.code).toBe('github_repo_not_accessible');
+    // Response triad on error.
+    expect(body.spec_version).toBeTruthy();
+    expect(body.checker_url).toBeTruthy();
+  });
+
+  // -------------------------------------------------------------------------
+  // 2. Public repo: HEAD 200 → DO dispatched as usual. Mirrors the existing
+  //    brettdavies/dotfiles test but pins the pre-check explicitly so a
+  //    future change that flipped the default (e.g., treating all 2xx as
+  //    not_accessible) would fail loudly here.
+  // -------------------------------------------------------------------------
+
+  test('public repo → HEAD 200, discovery runs (DO dispatched when spec resolves)', async () => {
+    // Public repo with a release asset: HEAD probe passes, Worker
+    // discovery resolves the release artifact, DO dispatched with the
+    // resolved InstallSpec. The chain_no_resolve case (no release asset
+    // → bounce at Worker before DO) is covered in the test block above;
+    // this one proves the pre-check + discovery hand-off works end-to-
+    // end for a real release.
+    const tracker: CallTracker = { doCalls: 0 };
+    const headTracker = { calls: [] as string[] };
+    const env = makeEnv({
+      tracker,
+      githubFetchTracker: headTracker,
+      githubHeadResponses: {
+        'brettdavies/dotfiles': { kind: 'status', status: 200 },
+      },
+      releaseAssets: {
+        'brettdavies/dotfiles': {
+          name: 'dotfiles-linux-x86_64.tar.gz',
+          url: 'https://example.com/dotfiles-linux-x86_64.tar.gz',
+        },
+      },
+      doResponse: {
+        scorecard: { tool: { name: 'dotfiles', binary: 'dotfiles', version: '1.0.0' } },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('brettdavies/dotfiles'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(1);
+    expect(headTracker.calls).toContain('brettdavies/dotfiles');
+  });
+
+  // -------------------------------------------------------------------------
+  // 3. Branch URL skips the pre-check. The DO needs to clone regardless,
+  //    and HEAD on the repo root is silent about whether the branch ref
+  //    exists — so the probe wouldn't add information. The skip path also
+  //    avoids a confusing UX where a public-repo + nonexistent-branch
+  //    paste 200s on the pre-check and then errors at the DO with the
+  //    real failure code.
+  // -------------------------------------------------------------------------
+
+  test('explicit branch URL → skips pre-check (DO needs to clone regardless)', async () => {
+    const tracker: CallTracker = { doCalls: 0 };
+    const headTracker = { calls: [] as string[] };
+    const env = makeEnv({
+      tracker,
+      githubFetchTracker: headTracker,
+      // Intentionally NOT providing a githubHeadResponses entry: the
+      // compositeFetcher would throw `unexpected fetch` if the handler
+      // tried to probe github here, and the test would fail loudly.
+      doResponse: {
+        scorecard: { tool: { name: 'gping', binary: 'gping', version: null } },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('https://github.com/orf/gping/tree/master'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(1);
+    expect(headTracker.calls).toEqual([]);
+  });
+
+  // -------------------------------------------------------------------------
+  // 4. Hint-matched repo skips the pre-check. Curated install metadata
+  //    already exists; a transient github 404 shouldn't break the live
+  //    path for a repo we explicitly know how to install. (We DON'T match
+  //    on score-card existence — that's the curated-registry tier above.)
+  //    The hint case is github-url that matched discovery-hints, not
+  //    by_owner_repo.
+  // -------------------------------------------------------------------------
+
+  test('hint-matched repo → skips pre-check', async () => {
+    const tracker: CallTracker = { doCalls: 0 };
+    const headTracker = { calls: [] as string[] };
+    // Per-test env override: inject a hint for aider-ai/aider so the
+    // registry-lookup tier returns 'hint', which the handler treats as
+    // "skip the HEAD probe" (we already curate install metadata).
+    const baseEnv = makeEnv({
+      tracker,
+      githubFetchTracker: headTracker,
+      doResponse: {
+        scorecard: { tool: { name: 'aider', binary: 'aider', version: '0.50.0' } },
+        anc_version: '0.3.1',
+      },
+    });
+    const envWithHints: ScoreEnv = {
+      ...baseEnv,
+      ASSETS: {
+        async fetch(req: Request | string): Promise<Response> {
+          const url = typeof req === 'string' ? req : req.url;
+          const path = new URL(url).pathname;
+          if (path === '/registry-index.json') {
+            return new Response(JSON.stringify(REGISTRY_INDEX), { status: 200 });
+          }
+          if (path === '/discovery-hints-index.json') {
+            // Aider hint: matches what discovery-hints index ships for
+            // aider-ai/aider in production. The presence of this hint
+            // gates the pre-check skip.
+            return new Response(
+              JSON.stringify({
+                by_owner_repo: {
+                  'Aider-AI/aider': { pm: 'pip', package: 'aider-chat', binary: 'aider' },
+                },
+              }),
+              { status: 200 },
+            );
+          }
+          return new Response('not found', { status: 404 });
+        },
+      } as Fetcher,
+    };
+    const res = await handleScore(postScore('https://github.com/Aider-AI/aider'), envWithHints);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(1);
+    // The whole point: no HEAD probe was issued. compositeFetcher would
+    // throw `unexpected fetch` if one had been — but additionally we
+    // assert the tracker for clarity.
+    expect(headTracker.calls).toEqual([]);
+  });
+
+  // -------------------------------------------------------------------------
+  // 5. Curated registry hit never reaches the pre-check. The
+  //    lookupScorecard tier returns 'curated' for slugs / curated
+  //    by_owner_repo entries; the handler short-circuits at step 2 with
+  //    a `registry_hit` envelope and never touches Turnstile, rate-limit,
+  //    HEAD probe, or DO.
+  // -------------------------------------------------------------------------
+
+  test('curated by_owner_repo → registry-fast-path wins, no HEAD probe', async () => {
+    const tracker: CallTracker = { doCalls: 0 };
+    const headTracker = { calls: [] as string[] };
+    const env = makeEnv({
+      tracker,
+      githubFetchTracker: headTracker,
+    });
+    const res = await handleScore(postScore('https://github.com/BurntSushi/ripgrep'), env);
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as { scorecard: { kind?: string; scorecard_url?: string } };
+    expect(body.scorecard.kind).toBe('registry_hit');
+    expect(body.scorecard.scorecard_url).toBe('/score/ripgrep');
+    // Registry hit unmetered: no Turnstile, no DO, no HEAD probe.
+    expect(tracker.doCalls).toBe(0);
+    expect(headTracker.calls).toEqual([]);
+  });
+
+  // -------------------------------------------------------------------------
+  // 6. Fail-OPEN: github HEAD 5xx → proceed to the discovery step. A
+  //    transient github outage must not silently break scoring; the
+  //    Worker's resolveSpec runs discovery and either resolves (DO
+  //    dispatched) or bounces chain_no_resolve. The point is that the
+  //    pre-check did NOT fast-fail with github_repo_not_accessible —
+  //    the github outage was not allowed to mask a real result.
+  //    Same contract for HEAD throwing (timeout, network).
+  // -------------------------------------------------------------------------
+
+  test('github HEAD 5xx → fail-open through accessibility, Worker discovery runs', async () => {
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      tracker,
+      githubHeadResponses: {
+        'brettdavies/dotfiles': { kind: 'status', status: 503 },
+      },
+      // No release asset seeded → discovery bounces chain_no_resolve at
+      // the Worker. The KEY assertion is that the accessibility 5xx
+      // didn't short-circuit to github_repo_not_accessible — the
+      // discovery step ran AND its own (also-no-resolve) verdict was
+      // surfaced.
+    });
+    const res = await handleScore(postScore('brettdavies/dotfiles'), env);
+    expect(res.status).toBe(404);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('chain_no_resolve');
+    // The Worker did the discovery work itself; DO was never reached
+    // (post-2026-05-20 discovery-move: chain_no_resolve bounces at the
+    // Worker tier).
+    expect(tracker.doCalls).toBe(0);
+  });
+
+  test('github HEAD throws (network timeout) → fail-open through accessibility, Worker discovery runs', async () => {
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      tracker,
+      githubHeadResponses: {
+        // The probe's AbortController uses DOMException('AbortError'); we
+        // surface that shape so the accessibility module sees a real
+        // timeout and tags reason='timeout'. The handler's fail-open
+        // path doesn't actually branch on the reason — any 'unknown'
+        // proceeds — but we throw the realistic shape for honesty.
+        'brettdavies/dotfiles': { kind: 'throw', error: new DOMException('aborted', 'AbortError') },
+      },
+    });
+    const res = await handleScore(postScore('brettdavies/dotfiles'), env);
+    expect(res.status).toBe(404);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('chain_no_resolve');
+    expect(tracker.doCalls).toBe(0);
+  });
+
+  // -------------------------------------------------------------------------
+  // 7. Red-team: slug validation must happen BEFORE the URL is built.
+  //    validate.ts already enforces this at the Worker boundary, so a
+  //    bad slug never reaches the handler; this test pins the module's
+  //    own guard in place against a future caller that bypasses
+  //    validate.ts (e.g., a new internal route). We call the
+  //    accessibility module directly because the integration path is
+  //    sealed.
+  // -------------------------------------------------------------------------
+
+  test('accessibility module refuses invalid slug without issuing fetch (defense-in-depth)', async () => {
+    const { checkGithubAccessibility } = await import('../src/worker/score/github-accessibility');
+    let fetchCalls = 0;
+    const sentinelFetcher = (async () => {
+      fetchCalls += 1;
+      return new Response(null, { status: 200 });
+    }) as unknown as typeof fetch;
+    // Each input is something validate.ts already rejects (path traversal,
+    // spaces, semicolons). The module's OWNER_RE / REPO_RE refuse them
+    // independently so a regression in validate.ts doesn't open a
+    // probe-URL injection.
+    const bad: Array<[string, string]> = [
+      ['../etc', 'passwd'],
+      ['foo bar', 'baz'],
+      ['ok-owner', 'evil; rm -rf'],
+      ['-leading-hyphen', 'ok'],
+      ['', 'ok'],
+      ['ok', ''],
+    ];
+    for (const [owner, repo] of bad) {
+      const result = await checkGithubAccessibility(owner, repo, { fetcher: sentinelFetcher });
+      expect(result.state).toBe('unknown');
+      if (result.state === 'unknown') {
+        expect(result.reason).toBe('invalid_slug');
+      }
+    }
+    expect(fetchCalls).toBe(0);
+  });
+
+  // -------------------------------------------------------------------------
+  // 8. Red-team: HEAD must not follow redirects to non-github hosts. A
+  //    hypothetical github 30x to evil.com is benign in production
+  //    (github doesn't do that), but the manual-redirect mode makes the
+  //    safety property structural. We pin the behavior so a future
+  //    refactor that switched to `redirect: 'follow'` would fail here.
+  // -------------------------------------------------------------------------
+
+  test('accessibility module treats 30x as accessible without dereferencing Location', async () => {
+    const { checkGithubAccessibility, _resetAccessibilityCache: resetCache } = await import(
+      '../src/worker/score/github-accessibility'
+    );
+    resetCache();
+    let calls = 0;
+    const fetcher = (async (_url: RequestInfo | URL, init?: RequestInit) => {
+      calls += 1;
+      // Redirect mode MUST be 'manual' — otherwise a follow could pivot
+      // off-host. This assertion fires if a future refactor relaxes it.
+      expect(init?.redirect).toBe('manual');
+      return new Response(null, {
+        status: 301,
+        headers: { Location: 'https://evil.com/owner/repo' },
+      });
+    }) as unknown as typeof fetch;
+    const result = await checkGithubAccessibility('Renamed-Owner', 'renamed-repo', { fetcher });
+    expect(result.state).toBe('accessible');
+    // Exactly one fetch — no follow.
+    expect(calls).toBe(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// /api/score — Worker-side discovery (post 2026-05-20 discovery-move)
+//
+// The discovery chain + brew/go fallbacks moved from the DO into the
+// Worker. The tests below pin the new behavior at the integration
+// layer: chain_no_resolve bounces happen at the Worker before any DO
+// dispatch, before any metered gate runs, and the DO request body is
+// `{spec, hash}` rather than the pre-move `{input, hash}`.
+//
+// The roster fixtures (brettdavies/dotfiles, openclaw/gogcli, orf/gping)
+// come from the brettdavies/* test-fixture roster
+// (~/.claude/projects/-home-brett-dev-agentnative-site/memory/
+// reference_test_fixture_repos.md) — real repos so the shapes match
+// production traffic, mocked at the fetch boundary so the tests run
+// offline.
+// ---------------------------------------------------------------------------
+
+describe('/api/score — Worker-side discovery (post 2026-05-20 move)', () => {
+  test('chain_no_resolve at the Worker tier — DO never dispatched (foo/bar fixture)', async () => {
+    // No releaseAssets seeded; brettdavies/dotfiles (no install path,
+    // no releases, no crates/npm/pypi/go peer) is the canonical
+    // chain_no_resolve fixture from the roster.
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({ tracker });
+    const res = await handleScore(postScore('brettdavies/dotfiles'), env);
+    expect(res.status).toBe(404);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('chain_no_resolve');
+    expect(tracker.doCalls).toBe(0);
+  });
+
+  test('chain_no_resolve still burns metered gates (kill-switch + Turnstile + rate-limit run BEFORE discovery)', async () => {
+    // Post 2026-05-20 gates-before-discovery reorder: the metered gates
+    // sit AHEAD of resolveSpec, so a no-resolve paste pays the gate
+    // toll before reaching the discovery fan-out. This pins the new
+    // ordering — a regression that moved discovery back ahead of the
+    // gates would let an unauthenticated caller fire the ~5-call
+    // discovery chain at zero rate-limit cost.
+    //
+    // Three sub-cases, one per gate, each with a no-resolve fixture
+    // (brettdavies/dotfiles — no install path, no release). Asserting
+    // separately so the failing gate is identifiable from the test
+    // name, not buried inside a single status check.
+
+    // 4a. Kill switch flipped → 503 scoring_disabled (not chain_no_resolve).
+    {
+      _resetKillSwitchCache();
+      const tracker: CallTracker = { doCalls: 0 };
+      const githubFetchTracker = { calls: [] as string[] };
+      const env = makeEnv({ tracker, githubFetchTracker });
+      env.SCORE_KV = {
+        async get(key: string) {
+          if (key === 'scoring_disabled') return 'true';
+          return null;
+        },
+      } as unknown as KVNamespace;
+      const res = await handleScore(postScore('brettdavies/dotfiles'), env);
+      expect(res.status).toBe(503);
+      const body = (await res.json()) as { error: { code: string } };
+      expect(body.error.code).toBe('scoring_disabled');
+      // Kill-switch denied before any outbound — no HEAD probe, no
+      // discovery fetches, no DO dispatch.
+      expect(githubFetchTracker.calls).toEqual([]);
+      expect(tracker.doCalls).toBe(0);
+    }
+
+    // 4b. Turnstile denial → 400 turnstile_failed (not chain_no_resolve).
+    //     Build a sibling makeEnv but override globalThis.fetch's
+    //     siteverify response to refuse. The default compositeFetcher
+    //     replies success on every siteverify; we hand-wrap it here.
+    {
+      _resetKillSwitchCache();
+      const tracker: CallTracker = { doCalls: 0 };
+      const githubFetchTracker = { calls: [] as string[] };
+      const env = makeEnv({ tracker, githubFetchTracker });
+      const originalFetch = globalThis.fetch;
+      const denyTurnstileFetch = (async (input: RequestInfo | URL, init?: RequestInit): Promise<Response> => {
+        const url = typeof input === 'string' ? input : input instanceof URL ? input.toString() : input.url;
+        if (url.startsWith('https://challenges.cloudflare.com/turnstile/v0/siteverify')) {
+          return new Response(JSON.stringify({ success: false }), {
+            status: 200,
+            headers: { 'content-type': 'application/json' },
+          });
+        }
+        return originalFetch(input, init);
+      }) as unknown as typeof fetch;
+      (globalThis as { fetch: typeof fetch }).fetch = denyTurnstileFetch;
+      try {
+        const res = await handleScore(postScore('brettdavies/dotfiles'), env);
+        expect(res.status).toBe(400);
+        const body = (await res.json()) as { error: { code: string } };
+        expect(body.error.code).toBe('turnstile_failed');
+        // Turnstile denied before discovery — no HEAD probe, no
+        // discovery fetches, no DO dispatch.
+        expect(githubFetchTracker.calls).toEqual([]);
+        expect(tracker.doCalls).toBe(0);
+      } finally {
+        (globalThis as { fetch: typeof fetch }).fetch = originalFetch;
+      }
+    }
+
+    // 4c. Rate-limit denial → 429 rate_limited (not chain_no_resolve).
+    {
+      _resetKillSwitchCache();
+      const tracker: CallTracker = { doCalls: 0 };
+      const githubFetchTracker = { calls: [] as string[] };
+      const env = makeEnv({ tracker, githubFetchTracker });
+      env.SCORE_LIMITER = {
+        async limit() {
+          return { success: false };
+        },
+      };
+      const res = await handleScore(postScore('brettdavies/dotfiles'), env);
+      expect(res.status).toBe(429);
+      const body = (await res.json()) as { error: { code: string } };
+      expect(body.error.code).toBe('rate_limited');
+      // Limiter denied before discovery — no HEAD probe, no
+      // discovery fetches, no DO dispatch.
+      expect(githubFetchTracker.calls).toEqual([]);
+      expect(tracker.doCalls).toBe(0);
+    }
+
+    // 4c'. Per-IP fallback denial → 429 rate_limited (same shape).
+    {
+      _resetKillSwitchCache();
+      const tracker: CallTracker = { doCalls: 0 };
+      const githubFetchTracker = { calls: [] as string[] };
+      const env = makeEnv({ tracker, githubFetchTracker });
+      env.SCORE_LIMITER_IP = {
+        async limit() {
+          return { success: false };
+        },
+      };
+      const res = await handleScore(postScore('brettdavies/dotfiles'), env);
+      expect(res.status).toBe(429);
+      const body = (await res.json()) as { error: { code: string } };
+      expect(body.error.code).toBe('rate_limited');
+      expect(githubFetchTracker.calls).toEqual([]);
+      expect(tracker.doCalls).toBe(0);
+    }
+  });
+
+  test('curated registry hit STILL bypasses gates (R6 unmetered contract preserved)', async () => {
+    // R6: curated registry hits are unmetered. Even with every gate
+    // hard-failing, a `ripgrep` POST must return registry_hit 200 from
+    // the read-only registry tier (step 2 in handler.ts). If a future
+    // refactor moved the gates ahead of lookupScorecard, this test
+    // would surface as a 503 / 400 / 429.
+    const tracker: CallTracker = { doCalls: 0 };
+    const githubFetchTracker = { calls: [] as string[] };
+    const env = makeEnv({ tracker, githubFetchTracker });
+    env.SCORE_KV = {
+      async get(key: string) {
+        if (key === 'scoring_disabled') return 'true';
+        return null;
+      },
+    } as unknown as KVNamespace;
+    env.SCORE_LIMITER = {
+      async limit() {
+        return { success: false };
+      },
+    };
+    env.SCORE_LIMITER_IP = {
+      async limit() {
+        return { success: false };
+      },
+    };
+    const originalFetch = globalThis.fetch;
+    const denyTurnstileFetch = (async (input: RequestInfo | URL): Promise<Response> => {
+      const url = typeof input === 'string' ? input : input instanceof URL ? input.toString() : input.url;
+      if (url.startsWith('https://challenges.cloudflare.com/turnstile/v0/siteverify')) {
+        return new Response(JSON.stringify({ success: false }), {
+          status: 200,
+          headers: { 'content-type': 'application/json' },
+        });
+      }
+      throw new Error(`unexpected fetch in curated-bypass test: ${url}`);
+    }) as unknown as typeof fetch;
+    (globalThis as { fetch: typeof fetch }).fetch = denyTurnstileFetch;
+    try {
+      const res = await handleScore(postScore('ripgrep'), env);
+      expect(res.status).toBe(200);
+      const body = (await res.json()) as { scorecard: { kind: string; scorecard_url: string } };
+      expect(body.scorecard.kind).toBe('registry_hit');
+      expect(body.scorecard.scorecard_url).toBe('/score/ripgrep');
+      // No outbound — registry hit served from in-memory index.
+      expect(githubFetchTracker.calls).toEqual([]);
+      expect(tracker.doCalls).toBe(0);
+    } finally {
+      (globalThis as { fetch: typeof fetch }).fetch = originalFetch;
+    }
+  });
+
+  test('R2 cache hit STILL bypasses gates (R6 extended to cached scorecards)', async () => {
+    // R6 extended: a cached scorecard is functionally identical to a
+    // curated one — no sandbox cost, no metered budget. With cache
+    // prefilled and every gate denying, the response must still be
+    // 200 from the cache tier.
+    const tracker: CallTracker = { doCalls: 0 };
+    const githubFetchTracker = { calls: [] as string[] };
+    const env = makeEnv({
+      tracker,
+      githubFetchTracker,
+      cacheContent: {
+        'scores/dotfiles/0.4.0.json': {
+          spec_version: '0.4.0',
+          anc_version: '0.3.1',
+          tool_version: '1.0.0',
+          scorecard: {
+            tool: { name: 'dotfiles', binary: 'dotfiles', version: '1.0.0' },
+            score: { value: 88 },
+          },
+        },
+      },
+    });
+    env.SCORE_KV = {
+      async get(key: string) {
+        if (key === 'scoring_disabled') return 'true';
+        return null;
+      },
+    } as unknown as KVNamespace;
+    env.SCORE_LIMITER = {
+      async limit() {
+        return { success: false };
+      },
+    };
+    env.SCORE_LIMITER_IP = {
+      async limit() {
+        return { success: false };
+      },
+    };
+    const originalFetch = globalThis.fetch;
+    const denyTurnstileFetch = (async (input: RequestInfo | URL): Promise<Response> => {
+      const url = typeof input === 'string' ? input : input instanceof URL ? input.toString() : input.url;
+      if (url.startsWith('https://challenges.cloudflare.com/turnstile/v0/siteverify')) {
+        return new Response(JSON.stringify({ success: false }), {
+          status: 200,
+          headers: { 'content-type': 'application/json' },
+        });
+      }
+      throw new Error(`unexpected fetch in cache-bypass test: ${url}`);
+    }) as unknown as typeof fetch;
+    (globalThis as { fetch: typeof fetch }).fetch = denyTurnstileFetch;
+    try {
+      // brettdavies/dotfiles has no hint, so cache lookup needs to derive
+      // a binary. The Aider-shape hint test in score-handler.test.ts
+      // already exercises that path; here we use the install-command
+      // shape which derives binary='dotfiles' from the slug parser.
+      const res = await handleScore(postScore('npm install -g dotfiles'), env);
+      expect(res.status).toBe(200);
+      const body = (await res.json()) as { scorecard: { tool: { name: string }; score: { value: number } } };
+      expect(body.scorecard.tool.name).toBe('dotfiles');
+      expect(body.scorecard.score.value).toBe(88);
+      // No outbound — cache served the response.
+      expect(githubFetchTracker.calls).toEqual([]);
+      expect(tracker.doCalls).toBe(0);
+    } finally {
+      (globalThis as { fetch: typeof fetch }).fetch = originalFetch;
+    }
+  });
+
+  test('GET on missing slug returns 404 without burning gates', async () => {
+    // GET is the read-only paste-and-share contract. A miss must 404
+    // chain_no_resolve from step 2's read-only tier — never consult
+    // gates, never reach discovery or the DO. This pins the GET-skip
+    // contract: a future refactor that accidentally routed GET through
+    // the gates would burn rate-limit budget on bookmark traffic.
+    const tracker: CallTracker = { doCalls: 0 };
+    const githubFetchTracker = { calls: [] as string[] };
+    const env = makeEnv({ tracker, githubFetchTracker });
+    // Arm the limiter to deny — a GET that reached it would surface here.
+    env.SCORE_LIMITER = {
+      async limit() {
+        throw new Error('limiter must not be called for GET');
+      },
+    };
+    const req = new Request('https://anc.dev/api/score?input=brettdavies%2Fdotfiles', { method: 'GET' });
+    const res = await handleScore(req, env);
+    expect(res.status).toBe(404);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('chain_no_resolve');
+    // No outbound at all — siteverify, HEAD probe, discovery all skipped.
+    expect(githubFetchTracker.calls).toEqual([]);
+    expect(tracker.doCalls).toBe(0);
+  });
+
+  test('github accessibility pre-check fires AFTER gates (Turnstile fail → no HEAD probe)', async () => {
+    // Gate-ordering invariant: the HEAD pre-check is an outbound and
+    // every cost-bearing call lives behind the gates. A Turnstile fail
+    // must short-circuit BEFORE the HEAD probe runs, otherwise we'd be
+    // paying a github.com call on traffic the bot-defense layer just
+    // rejected.
+    const tracker: CallTracker = { doCalls: 0 };
+    const githubFetchTracker = { calls: [] as string[] };
+    const env = makeEnv({ tracker, githubFetchTracker });
+    const originalFetch = globalThis.fetch;
+    const denyTurnstileFetch = (async (input: RequestInfo | URL, init?: RequestInit): Promise<Response> => {
+      const url = typeof input === 'string' ? input : input instanceof URL ? input.toString() : input.url;
+      if (url.startsWith('https://challenges.cloudflare.com/turnstile/v0/siteverify')) {
+        return new Response(JSON.stringify({ success: false }), {
+          status: 200,
+          headers: { 'content-type': 'application/json' },
+        });
+      }
+      // Any github fetch here would prove the HEAD probe fired BEFORE
+      // Turnstile (the bug). Surface as a loud failure rather than a
+      // silent pass-through.
+      return originalFetch(input, init);
+    }) as unknown as typeof fetch;
+    (globalThis as { fetch: typeof fetch }).fetch = denyTurnstileFetch;
+    try {
+      const res = await handleScore(postScore('brettdavies/dotfiles'), env);
+      expect(res.status).toBe(400);
+      const body = (await res.json()) as { error: { code: string } };
+      expect(body.error.code).toBe('turnstile_failed');
+      // The HEAD probe target would be 'brettdavies/dotfiles' if it ran
+      // — assert the tracker stayed empty.
+      expect(githubFetchTracker.calls).toEqual([]);
+      expect(tracker.doCalls).toBe(0);
+    } finally {
+      (globalThis as { fetch: typeof fetch }).fetch = originalFetch;
+    }
+  });
+
+  test('discovery success dispatches DO with resolved InstallSpec ({spec, hash} body)', async () => {
+    // openclaw/gogcli ships a Linux x86_64 .tar.gz release asset; in
+    // production the Worker resolves to pm=direct with the release URL
+    // as the InstallSpec.url. The DO sees that spec — NOT a
+    // ValidatedInput envelope — and the install path follows the
+    // direct-archive flow with auto-detect (Fix 1) to surface the
+    // actual archive binary name.
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      tracker,
+      releaseAssets: {
+        'openclaw/gogcli': {
+          name: 'gog_linux_x86_64.tar.gz',
+          url: 'https://example.com/gog_linux_x86_64.tar.gz',
+        },
+      },
+      doResponse: {
+        scorecard: { tool: { name: 'gog', binary: 'gog', version: '0.4.2' } },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('https://github.com/openclaw/gogcli'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(1);
+
+    // Worker resolved BEFORE dispatch — the DO sees the typed
+    // InstallSpec (`{spec, hash}`), never the raw ValidatedInput.
+    const body = tracker.lastBody as
+      | { spec?: { pm?: string; url?: string; binary?: string }; hash?: string; input?: unknown }
+      | undefined;
+    expect(body?.spec?.pm).toBe('direct');
+    expect(body?.spec?.url).toBe('https://example.com/gog_linux_x86_64.tar.gz');
+    // Default-binary derivation is the repo name; the DO's auto-detect
+    // (sandbox-exec.ts Fix 1) overrides this when the archive carries a
+    // differently-named executable (gogcli → gog at exec time).
+    expect(body?.spec?.binary).toBe('gogcli');
+    // The hash is still on the wire for telemetry alignment.
+    expect(typeof body?.hash).toBe('string');
+    // No `input` field — the pre-move shape was `{input, hash}`; the
+    // current shape is `{spec, hash}`. A test that finds `input` is
+    // an indicator that the refactor was partially reverted.
+    expect(body?.input).toBeUndefined();
+  });
+
+  test('branch URL constructs git-clone InstallSpec at the Worker (no discovery fetches)', async () => {
+    // Branch-scoped pastes bypass discovery entirely: the spec is built
+    // directly from validated owner/repo/branch and shipped to the DO.
+    // The compositeFetcher would throw on any discovery URL fetch (no
+    // releaseAssets seeded, no formula/crate/npm/pypi/go entries), so a
+    // discovery call would surface as `unexpected fetch in test` — the
+    // test passing IS the proof that no discovery ran.
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      tracker,
+      doResponse: {
+        scorecard: { tool: { name: 'gping', binary: 'gping', version: null } },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('https://github.com/orf/gping/tree/master'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(1);
+    const body = tracker.lastBody as
+      | { spec?: { pm?: string; owner?: string; repo?: string; branch?: string } }
+      | undefined;
+    expect(body?.spec?.pm).toBe('git-clone');
+    expect(body?.spec?.owner).toBe('orf');
+    expect(body?.spec?.repo).toBe('gping');
+    expect(body?.spec?.branch).toBe('master');
+  });
+
+  test('brettdavies/dotfiles → chain_no_resolve at Worker, no DO call (roster fixture)', async () => {
+    // The roster's canonical "no install path, no release" repo. With
+    // discovery now in the Worker, this paste must bounce ~200 ms at
+    // the Worker tier rather than spinning up a container. The
+    // compositeFetcher returns 404 for every discovery URL by default
+    // — that's what the production traffic against this real repo
+    // would also see (modulo any future release the user might ship).
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({ tracker });
+    const res = await handleScore(postScore('brettdavies/dotfiles'), env);
+    expect(res.status).toBe(404);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('chain_no_resolve');
+    expect(tracker.doCalls).toBe(0);
+  });
+
+  test('openclaw/gogcli → discovery resolves at Worker, DO sees release-asset InstallSpec', async () => {
+    // Companion to brettdavies/dotfiles: the roster's canonical
+    // "ships a release asset" repo. Worker discovery's Step 2 lands
+    // the release URL, the DO sees `pm=direct` with that URL, and the
+    // archive auto-detect path (Fix 1) sorts out the gogcli → gog
+    // binary-name mismatch at exec time inside the container.
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      tracker,
+      releaseAssets: {
+        'openclaw/gogcli': {
+          name: 'gog_linux_x86_64.tar.gz',
+          url: 'https://example.com/gog_linux_x86_64.tar.gz',
+        },
+      },
+      doResponse: {
+        scorecard: { tool: { name: 'gog', binary: 'gog', version: '0.4.2' } },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('https://github.com/openclaw/gogcli'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(1);
+    const body = tracker.lastBody as { spec?: { pm?: string; url?: string } } | undefined;
+    expect(body?.spec?.pm).toBe('direct');
+    expect(body?.spec?.url).toContain('gog_linux_x86_64.tar.gz');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// /api/score — post-discovery R2 cache (step 6.5)
+//
+// Step 6.5 re-checks the R2 cache AFTER discovery resolves the
+// InstallSpec, using the same key shape as step 2
+// (`scores/<binary>/<SPEC_VERSION>.json`). The motivating case is
+// github-url-without-hint pastes: the step-2 pre-check has no binary
+// to look up, so it misses structurally — but once discovery resolves
+// `spec.binary`, a previous scoring run's cache write becomes
+// addressable. The post-discovery hit serves the cached scorecard
+// instead of paying the DO cold-start.
+//
+// Wire contract: a step-6.5 hit is indistinguishable from a step-2 hit
+// from the response shape (same `freshness: 'cache-hit'`,
+// `Cache-Control: public, max-age=300`). Both bypass the DO.
+//
+// Skip matrix:
+//   - `spec.pm === 'git-clone'` (branch URLs): branch-scoped scores are
+//     never cached, so the read has nothing to consult.
+//   - `?fromCache=false`: operator escape hatch applies uniformly to
+//     both round-1 and round-2.
+//
+// Telemetry: a single structured log line per request, scope
+// `score.tier`, with `tier` + `cache_pre_attempted` +
+// `cache_pre_hit` + `cache_post_attempted` + `cache_post_hit`.
+// ---------------------------------------------------------------------------
+
+describe('/api/score — post-discovery R2 cache (step 6.5)', () => {
+  test('round-1 miss + round-2 hit → returns cached, DO never dispatched, tier=cache_post', async () => {
+    // github-url-without-hint paste: round-1 cache lookup misses
+    // because no binary is derivable from input alone. Discovery
+    // resolves to a release asset with `binary='gogcli'` (the
+    // discover-binary default for a release ZIP's name). Round-2 reads
+    // `scores/gogcli/0.4.0.json` and hits — DO is never dispatched.
+    const tracker: CallTracker = { doCalls: 0 };
+    const cacheTracker: CacheTracker = { gets: [], puts: [] };
+    const env = makeEnv({
+      tracker,
+      cacheTracker,
+      releaseAssets: {
+        'openclaw/gogcli': {
+          name: 'gog_linux_x86_64.tar.gz',
+          url: 'https://example.com/gog_linux_x86_64.tar.gz',
+        },
+      },
+      cacheContent: {
+        'scores/gogcli/0.4.0.json': {
+          spec_version: '0.4.0',
+          anc_version: '0.3.1',
+          tool_version: '0.4.2',
+          scorecard: {
+            tool: { name: 'gog', binary: 'gog', version: '0.4.2' },
+            score: { value: 91 },
+          },
+        },
+      },
+      // If the DO is ever dispatched, this would be the response —
+      // but the test asserts doCalls==0, so this is unreachable.
+      doResponse: {
+        scorecard: { tool: { name: 'should-not-see-this', binary: 'x' } },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('https://github.com/openclaw/gogcli'), env);
+    expect(res.status).toBe(200);
+    // The cached scorecard served the response — value 91, not the
+    // DO's would-be value.
+    const body = (await res.json()) as {
+      scorecard: { tool: { name: string }; score: { value: number } };
+      anc_version: string;
+    };
+    expect(body.scorecard.score.value).toBe(91);
+    expect(body.scorecard.tool.name).toBe('gog');
+    expect(body.anc_version).toBe('0.3.1');
+    // DO not dispatched.
+    expect(tracker.doCalls).toBe(0);
+    // Two cache GETs fired: the round-1 attempt inside lookupScorecard
+    // structurally short-circuits (no binary → no I/O), so only the
+    // round-2 read is visible. Verify the key shape exactly.
+    expect(cacheTracker.gets).toContain('scores/gogcli/0.4.0.json');
+  });
+
+  test('round-1 hit → discovery never runs, round-2 never runs (no double-fetch)', async () => {
+    // install-command paste with a binary derivable upfront: round-1
+    // hits at step 2 and short-circuits the pipeline. Discovery, the
+    // accessibility probe, gates, and round-2 all never run.
+    // Asserting on the cache tracker proves only ONE cache read fired
+    // (the round-1 read) — a regression that re-issued the round-2
+    // read for an already-hit-at-round-1 input would show two gets.
+    const tracker: CallTracker = { doCalls: 0 };
+    const cacheTracker: CacheTracker = { gets: [], puts: [] };
+    const env = makeEnv({
+      tracker,
+      cacheTracker,
+      cacheContent: {
+        'scores/dotfiles/0.4.0.json': {
+          spec_version: '0.4.0',
+          anc_version: '0.3.1',
+          tool_version: '1.0.0',
+          scorecard: {
+            tool: { name: 'dotfiles', binary: 'dotfiles', version: '1.0.0' },
+            score: { value: 75 },
+          },
+        },
+      },
+    });
+    const res = await handleScore(postScore('npm install -g dotfiles'), env);
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as { scorecard: { score: { value: number } } };
+    expect(body.scorecard.score.value).toBe(75);
+    expect(tracker.doCalls).toBe(0);
+    // EXACTLY one cache read — round-1. No round-2.
+    expect(cacheTracker.gets).toEqual(['scores/dotfiles/0.4.0.json']);
+  });
+
+  test('round-1 miss + discovery success + round-2 miss → DO dispatched, tier=live', async () => {
+    // github-url-without-hint: round-1 misses (no binary). Discovery
+    // resolves to a release asset. Round-2 reads but the cache is
+    // empty — DO is dispatched and runs the live scoring path.
+    const tracker: CallTracker = { doCalls: 0 };
+    const cacheTracker: CacheTracker = { gets: [], puts: [] };
+    const env = makeEnv({
+      tracker,
+      cacheTracker,
+      releaseAssets: {
+        'openclaw/gogcli': {
+          name: 'gog_linux_x86_64.tar.gz',
+          url: 'https://example.com/gog_linux_x86_64.tar.gz',
+        },
+      },
+      doResponse: {
+        scorecard: { tool: { name: 'gog', binary: 'gog', version: '0.4.2' }, score: { value: 88 } },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('https://github.com/openclaw/gogcli'), env);
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as { scorecard: { score: { value: number } } };
+    expect(body.scorecard.score.value).toBe(88);
+    // DO ran — live tier.
+    expect(tracker.doCalls).toBe(1);
+    // Round-2 read fired (and missed).
+    expect(cacheTracker.gets).toContain('scores/gogcli/0.4.0.json');
+  });
+
+  test('round-1 miss + chain_no_resolve → no round-2 attempt (nothing to look up)', async () => {
+    // github-url-without-hint paste where discovery bounces
+    // chain_no_resolve: there's no `spec.binary` to key on, so
+    // round-2 must not fire. The pipeline bounces 404 at the Worker
+    // tier without any post-discovery cache read.
+    const tracker: CallTracker = { doCalls: 0 };
+    const cacheTracker: CacheTracker = { gets: [], puts: [] };
+    const env = makeEnv({ tracker, cacheTracker });
+    const res = await handleScore(postScore('brettdavies/dotfiles'), env);
+    expect(res.status).toBe(404);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('chain_no_resolve');
+    expect(tracker.doCalls).toBe(0);
+    // No round-2 attempt — the only cache get is the round-1
+    // structural short-circuit (lookupScorecard with no binary derives
+    // null and returns miss without I/O). In practice this means the
+    // tracker stays empty: round-1 doesn't reach the R2 layer either.
+    // A round-2 read for `scores/<anything>/0.4.0.json` MUST NOT
+    // appear in gets.
+    expect(cacheTracker.gets).toEqual([]);
+  });
+
+  test('branch URL → skips round-2 entirely (git-clone has no cache key)', async () => {
+    // Branch-scoped paste: spec.pm === 'git-clone'. Step 6.5 must
+    // skip the read because branch-scoped scores aren't cached
+    // (caching under the bare binary name would clobber the
+    // default-branch scorecard).
+    const tracker: CallTracker = { doCalls: 0 };
+    const cacheTracker: CacheTracker = { gets: [], puts: [] };
+    const env = makeEnv({
+      tracker,
+      cacheTracker,
+      // Even prefilling the cache under the repo name shouldn't
+      // matter — branch URLs bypass step 6.5 regardless.
+      cacheContent: {
+        'scores/gping/0.4.0.json': {
+          spec_version: '0.4.0',
+          anc_version: '0.3.1',
+          tool_version: '1.0.0',
+          scorecard: { tool: { name: 'gping', binary: 'gping', version: '1.0.0' }, score: { value: 99 } },
+        },
+      },
+      doResponse: {
+        scorecard: { tool: { name: 'gping', binary: 'gping', version: null }, score: { value: 50 } },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('https://github.com/orf/gping/tree/master'), env);
+    expect(res.status).toBe(200);
+    // The live DO ran — score 50 from the DO, not 99 from cache.
+    const body = (await res.json()) as { scorecard: { score: { value: number } } };
+    expect(body.scorecard.score.value).toBe(50);
+    expect(tracker.doCalls).toBe(1);
+    // No round-2 read for the gping key (or any key) — branch URLs
+    // bypass step 6.5 entirely. Round-1 also skipped (branch URLs
+    // skip both tiers).
+    expect(cacheTracker.gets).toEqual([]);
+  });
+
+  test('?fromCache=false → skips BOTH cache checks, DO always dispatched', async () => {
+    // Operator escape hatch: ?fromCache=false applies uniformly to
+    // round-1 and round-2. Even with the round-2 cache prefilled
+    // with a matching entry, the DO must dispatch and run live.
+    const tracker: CallTracker = { doCalls: 0 };
+    const cacheTracker: CacheTracker = { gets: [], puts: [] };
+    const env = makeEnv({
+      tracker,
+      cacheTracker,
+      releaseAssets: {
+        'openclaw/gogcli': {
+          name: 'gog_linux_x86_64.tar.gz',
+          url: 'https://example.com/gog_linux_x86_64.tar.gz',
+        },
+      },
+      cacheContent: {
+        'scores/gogcli/0.4.0.json': {
+          spec_version: '0.4.0',
+          anc_version: '0.3.1',
+          tool_version: '0.4.2',
+          scorecard: { tool: { name: 'gog', binary: 'gog' }, score: { value: 91 } },
+        },
+      },
+      doResponse: {
+        scorecard: { tool: { name: 'gog', binary: 'gog' }, score: { value: 42 } },
+        anc_version: '0.3.1',
+      },
+    });
+    const req = new Request('https://anc.dev/api/score?fromCache=false', {
+      method: 'POST',
+      headers: { 'content-type': 'application/json' },
+      body: JSON.stringify({ input: 'https://github.com/openclaw/gogcli', turnstile_token: 'tok' }),
+    });
+    const res = await handleScore(req, env);
+    expect(res.status).toBe(200);
+    // The live DO ran — score 42, not the cached 91.
+    const body = (await res.json()) as { scorecard: { score: { value: number } } };
+    expect(body.scorecard.score.value).toBe(42);
+    expect(tracker.doCalls).toBe(1);
+    // No cache READS fired (neither round-1 nor round-2). The
+    // post-DO cache WRITE may still fire — that's the documented
+    // ?fromCache=false behavior (skip read, allow write).
+    expect(cacheTracker.gets).toEqual([]);
+  });
+
+  test('telemetry: cache_pre_attempted, cache_pre_hit, cache_post_attempted, cache_post_hit all logged per request', async () => {
+    // One structured log line per request, scope `score.tier`, with
+    // all four cache flags + tier + binary + input_kind. Spy on
+    // console.log and verify the shape for two representative paths:
+    // a round-2 hit (the new code path this commit adds) and a round-1
+    // hit (the existing pre-discovery path).
+    const originalLog = console.log;
+    const logs: string[] = [];
+    console.log = (...args: unknown[]) => {
+      const first = args[0];
+      if (typeof first === 'string') logs.push(first);
+    };
+    try {
+      // (a) round-2 hit on a github-url-without-hint.
+      {
+        const env = makeEnv({
+          releaseAssets: {
+            'openclaw/gogcli': {
+              name: 'gog_linux_x86_64.tar.gz',
+              url: 'https://example.com/gog_linux_x86_64.tar.gz',
+            },
+          },
+          cacheContent: {
+            'scores/gogcli/0.4.0.json': {
+              spec_version: '0.4.0',
+              anc_version: '0.3.1',
+              tool_version: '0.4.2',
+              scorecard: { tool: { name: 'gog', binary: 'gog' }, score: { value: 91 } },
+            },
+          },
+        });
+        logs.length = 0;
+        const res = await handleScore(postScore('https://github.com/openclaw/gogcli'), env);
+        expect(res.status).toBe(200);
+        const tierLog = logs
+          .map((l) => {
+            try {
+              return JSON.parse(l) as Record<string, unknown>;
+            } catch {
+              return null;
+            }
+          })
+          .filter((p): p is Record<string, unknown> => p !== null && p.scope === 'score.tier');
+        expect(tierLog).toHaveLength(1);
+        const entry = tierLog[0];
+        expect(entry.tier).toBe('cache_post');
+        expect(entry.cache_pre_attempted).toBe(true);
+        expect(entry.cache_pre_hit).toBe(false);
+        expect(entry.cache_post_attempted).toBe(true);
+        expect(entry.cache_post_hit).toBe(true);
+        expect(entry.binary).toBe('gogcli');
+        expect(entry.input_kind).toBe('github-url');
+      }
+
+      // (b) round-1 hit on an install-command.
+      {
+        const env = makeEnv({
+          cacheContent: {
+            'scores/dotfiles/0.4.0.json': {
+              spec_version: '0.4.0',
+              anc_version: '0.3.1',
+              tool_version: '1.0.0',
+              scorecard: { tool: { name: 'dotfiles', binary: 'dotfiles', version: '1.0.0' } },
+            },
+          },
+        });
+        logs.length = 0;
+        const res = await handleScore(postScore('npm install -g dotfiles'), env);
+        expect(res.status).toBe(200);
+        const tierLog = logs
+          .map((l) => {
+            try {
+              return JSON.parse(l) as Record<string, unknown>;
+            } catch {
+              return null;
+            }
+          })
+          .filter((p): p is Record<string, unknown> => p !== null && p.scope === 'score.tier');
+        expect(tierLog).toHaveLength(1);
+        const entry = tierLog[0];
+        expect(entry.tier).toBe('cache_pre');
+        expect(entry.cache_pre_attempted).toBe(true);
+        expect(entry.cache_pre_hit).toBe(true);
+        // Round-2 not attempted: round-1 short-circuited the pipeline.
+        expect(entry.cache_post_attempted).toBe(false);
+        expect(entry.cache_post_hit).toBe(false);
+        expect(entry.binary).toBe('dotfiles');
+        expect(entry.input_kind).toBe('install-command');
+      }
+    } finally {
+      console.log = originalLog;
+    }
+  });
+});
diff --git a/tests/score-handler-share-url.test.ts b/tests/score-handler-share-url.test.ts
new file mode 100644
index 0000000..0c0ec87
--- /dev/null
+++ b/tests/score-handler-share-url.test.ts
@@ -0,0 +1,250 @@
+// Integration: /api/score sets `share_url` to `/score/live/<binary>` on
+// inline-scorecard success branches; omits it for registry_hit (which
+// carries scorecard_url) and for github-url-without-hint live runs.
+//
+// The share URL is derived from the cache-tier binary, so the same key
+// the DO + cached lookup write to is the key the share page reads from.
+
+import { beforeEach, describe, expect, test } from 'bun:test';
+import { _resetIndexCache, handleScore, type ScoreEnv } from '../src/worker/score/handler';
+import { _resetKillSwitchCache } from '../src/worker/score/kill-switch';
+
+const REGISTRY_INDEX = {
+  by_slug: {
+    ripgrep: {
+      name: 'ripgrep',
+      binary: 'rg',
+      install: 'cargo install ripgrep',
+      version: '14.1.0',
+      anc_version: '0.3.1',
+      scorecard_url: '/score/ripgrep',
+      score_pct: 92,
+    },
+    bat: {
+      name: 'bat',
+      binary: 'bat',
+      install: 'cargo install bat',
+      version: '0.26.1',
+      anc_version: '0.3.1',
+      scorecard_url: '/score/bat',
+      score_pct: 78,
+    },
+  },
+  by_owner_repo: {},
+};
+
+const HINTS_INDEX = {
+  by_owner_repo: {
+    'Aider-AI/aider': { pm: 'pip', package: 'aider-chat', binary: 'aider' },
+  },
+};
+
+function makeEnv(cacheContent: Record<string, unknown> = {}): ScoreEnv & { __cacheStore: Map<string, string> } {
+  const cacheStore = new Map<string, string>();
+  for (const [k, v] of Object.entries(cacheContent)) {
+    cacheStore.set(k, typeof v === 'string' ? v : JSON.stringify(v));
+  }
+  const cacheStub = {
+    async get(key: string) {
+      const raw = cacheStore.get(key);
+      if (raw === undefined) return null;
+      return {
+        async json() {
+          return JSON.parse(raw);
+        },
+        async text() {
+          return raw;
+        },
+      };
+    },
+    async put(key: string, value: unknown) {
+      cacheStore.set(key, typeof value === 'string' ? value : String(value));
+    },
+    async delete(key: string) {
+      cacheStore.delete(key);
+    },
+  };
+
+  return {
+    ASSETS: {
+      async fetch(req: Request | string): Promise<Response> {
+        const url = typeof req === 'string' ? req : req.url;
+        const path = new URL(url).pathname;
+        if (path === '/registry-index.json') {
+          return new Response(JSON.stringify(REGISTRY_INDEX), { status: 200 });
+        }
+        if (path === '/discovery-hints-index.json') {
+          return new Response(JSON.stringify(HINTS_INDEX), { status: 200 });
+        }
+        return new Response('not found', { status: 404 });
+      },
+    } as Fetcher,
+    SCORE: {} as DurableObjectNamespace,
+    SCORE_KV: {
+      async get() {
+        return null;
+      },
+    } as unknown as KVNamespace,
+    SCORE_CACHE: cacheStub as unknown as R2Bucket,
+    SCORE_LIMITER: {
+      async limit() {
+        return { success: true };
+      },
+    },
+    SCORE_LIMITER_IP: {
+      async limit() {
+        return { success: true };
+      },
+    },
+    SCORE_TELEMETRY: { writeDataPoint() {} },
+    TURNSTILE_SECRET: 'test',
+    SESSION_HMAC_SECRET: 'test-hmac-secret-long-enough',
+    __cacheStore: cacheStore,
+  } as ScoreEnv & { __cacheStore: Map<string, string> };
+}
+
+function postScore(input: string): Request {
+  return new Request('https://anc.dev/api/score', {
+    method: 'POST',
+    headers: { 'content-type': 'application/json' },
+    body: JSON.stringify({ input, turnstile_token: 'tok' }),
+  });
+}
+
+beforeEach(() => {
+  _resetIndexCache();
+  _resetKillSwitchCache();
+});
+
+describe('/api/score — share_url derivation', () => {
+  // `cargo install uncurated-tool` → parser binary='uncurated-tool' →
+  // cache key scores/uncurated-tool/<SPEC_VERSION>.json. share_url should
+  // be the matching /score/live/uncurated-tool URL.
+  //
+  // Deliberately fictional package name: NOT in this file's REGISTRY_INDEX
+  // (ripgrep + bat are curated there), so the install-command cross-check
+  // (registry-lookup.ts) doesn't intercept and the input flows through to
+  // the cache tier — which is what these share_url tests need to exercise.
+  const CACHED_KEY = 'scores/uncurated-tool/0.4.0.json';
+  const CACHED_PAYLOAD = {
+    spec_version: '0.4.0',
+    anc_version: '0.3.1',
+    tool_version: '0.1.0',
+    scorecard: { badge: { score_pct: 70, eligible: false }, results: [] },
+  };
+
+  test('cached install-command hit: share_url = /score/live/<binary>', async () => {
+    const env = makeEnv({ [CACHED_KEY]: CACHED_PAYLOAD });
+    const res = await handleScore(postScore('cargo install uncurated-tool'), env);
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as { share_url?: string; scorecard: unknown };
+    expect(body.share_url).toBe('/score/live/uncurated-tool');
+  });
+
+  test('cached install-command hit: share_url stable across requests', async () => {
+    const env = makeEnv({ [CACHED_KEY]: CACHED_PAYLOAD });
+    const r1 = await handleScore(postScore('cargo install uncurated-tool'), env);
+    const r2 = await handleScore(postScore('cargo install uncurated-tool'), env);
+    const b1 = (await r1.json()) as { share_url?: string };
+    const b2 = (await r2.json()) as { share_url?: string };
+    // Same binary → same share URL. This is the design improvement over
+    // session-id minting: shareable URLs map to scored binaries, not to
+    // request instances.
+    expect(b1.share_url).toBe('/score/live/uncurated-tool');
+    expect(b2.share_url).toBe('/score/live/uncurated-tool');
+  });
+
+  test('registry_hit does NOT carry share_url (scorecard_url is the share surface)', async () => {
+    const env = makeEnv();
+    const res = await handleScore(postScore('ripgrep'), env);
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as {
+      share_url?: string;
+      scorecard: { kind?: string; scorecard_url?: string; score_pct?: number | null };
+    };
+    expect(body.share_url).toBeUndefined();
+    expect(body.scorecard.kind).toBe('registry_hit');
+    expect(body.scorecard.scorecard_url).toBe('/score/ripgrep');
+  });
+
+  test('registry_hit carries score_pct for the curated-reward UX', async () => {
+    const env = makeEnv();
+    const res = await handleScore(postScore('ripgrep'), env);
+    const body = (await res.json()) as {
+      scorecard: { kind?: string; score_pct?: number | null };
+    };
+    expect(body.scorecard.kind).toBe('registry_hit');
+    expect(body.scorecard.score_pct).toBe(92);
+  });
+
+  test('install-command resolving to a curated tool returns registry_hit, not live (bat fix)', async () => {
+    // `cargo install bat` parses to binary='bat'. With the install-command
+    // binary cross-check against by_slug in lookupRegistry,
+    // this should hit by_slug.bat and return registry_hit, NOT fall through
+    // to the cache + live path. Pre-fix behavior would have run the
+    // sandbox; post-fix is instant.
+    const env = makeEnv();
+    const res = await handleScore(postScore('cargo install bat'), env);
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as {
+      scorecard: { kind?: string; scorecard_url?: string; score_pct?: number | null };
+      share_url?: string;
+    };
+    expect(body.scorecard.kind).toBe('registry_hit');
+    expect(body.scorecard.scorecard_url).toBe('/score/bat');
+    expect(body.scorecard.score_pct).toBe(78);
+    // No share_url: registry_hit uses scorecard_url, not /live-score.
+    expect(body.share_url).toBeUndefined();
+  });
+
+  test('github-url with hint: share_url derives from hint.binary', async () => {
+    // Aider-AI/aider has a hint → binary='aider' → cache key
+    // scores/aider/<SPEC_VERSION>.json. Prefill that key so the cached
+    // branch fires.
+    const env = makeEnv({
+      'scores/aider/0.4.0.json': {
+        spec_version: '0.4.0',
+        anc_version: '0.3.1',
+        tool_version: '0.50.0',
+        scorecard: { badge: { score_pct: 80, eligible: true }, results: [] },
+      },
+    });
+    const res = await handleScore(postScore('https://github.com/Aider-AI/aider'), env);
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as { share_url?: string };
+    expect(body.share_url).toBe('/score/live/aider');
+  });
+
+  test('github-url with hint: case-insensitive matching (hintsIndex)', async () => {
+    // Lowercase repo path should match the case-preserved hint
+    // ('Aider-AI/aider' in HINTS_INDEX).
+    const env = makeEnv({
+      'scores/aider/0.4.0.json': {
+        spec_version: '0.4.0',
+        anc_version: '0.3.1',
+        tool_version: '0.50.0',
+        scorecard: { badge: { score_pct: 80, eligible: true }, results: [] },
+      },
+    });
+    const res = await handleScore(postScore('https://github.com/aider-ai/aider'), env);
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as { share_url?: string };
+    expect(body.share_url).toBe('/score/live/aider');
+  });
+
+  test('go-install command: share_url uses last-segment binary derivation', async () => {
+    // `go install github.com/user/tool@latest` → parser binary='tool'.
+    const env = makeEnv({
+      'scores/sqlc/0.4.0.json': {
+        spec_version: '0.4.0',
+        anc_version: '0.3.1',
+        tool_version: '1.27.0',
+        scorecard: { badge: { score_pct: 75, eligible: false }, results: [] },
+      },
+    });
+    const res = await handleScore(postScore('go install github.com/sqlc-dev/sqlc@latest'), env);
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as { share_url?: string };
+    expect(body.share_url).toBe('/score/live/sqlc');
+  });
+});
diff --git a/tests/score-handler.test.ts b/tests/score-handler.test.ts
new file mode 100644
index 0000000..bda012d
--- /dev/null
+++ b/tests/score-handler.test.ts
@@ -0,0 +1,1024 @@
+// /api/score handler orchestration tests.
+//
+// Exercises the full pipeline against stubbed bindings (ASSETS / DO / KV
+// / rate-limit / Turnstile fetcher). Each test reaches one branch of the
+// handler and asserts on status + envelope shape + response-triad presence.
+//
+// DO mock fidelity history (2026-05-15):
+//
+//   An earlier stub returned `{error: 'sandbox_stub_until_u6'}` from a
+//   hand-rolled `.fetch()` mock that bypassed the binding-boundary check
+//   the production runtime enforces. PR #93 shipped a real DO class with
+//   no `fetch()` method and the first staging POST threw `Handler does
+//   not export a fetch() function` (Cloudflare error 1101). The mock had
+//   a `.fetch` property; the real DO didn't.
+//
+//   This file tightens the mock by typing the stub's fetch handler via
+//   `Sandbox['fetch']` so any future Sandbox class that loses or renames
+//   `fetch` is a compile error here, not a first-deploy 5xx. See
+//   docs/solutions/integration-issues/cloudflare-workers-do-mock-must-mirror-binding-shape-2026-05-15.md
+//   for the full pattern + prevention recipe.
+
+import { beforeEach, describe, expect, test } from 'bun:test';
+import type { Sandbox } from '../src/worker/score/do';
+import { _resetIndexCache, handleScore, type ScoreEnv } from '../src/worker/score/handler';
+import { _resetKillSwitchCache } from '../src/worker/score/kill-switch';
+
+// ---------------------------------------------------------------------------
+// Fixtures
+// ---------------------------------------------------------------------------
+
+const REGISTRY_INDEX = {
+  by_slug: {
+    ripgrep: {
+      name: 'ripgrep',
+      binary: 'rg',
+      install: 'brew install ripgrep',
+      repo: 'BurntSushi/ripgrep',
+      version: '15.1.0',
+      anc_version: '0.3.0',
+      scorecard_url: '/score/ripgrep',
+    },
+    'no-card-tool': {
+      name: 'no-card-tool',
+      binary: 'no-card-tool',
+      install: 'brew install no-card-tool',
+    },
+  },
+  by_owner_repo: {
+    'BurntSushi/ripgrep': {
+      name: 'ripgrep',
+      binary: 'rg',
+      install: 'brew install ripgrep',
+      repo: 'BurntSushi/ripgrep',
+      version: '15.1.0',
+      anc_version: '0.3.0',
+      scorecard_url: '/score/ripgrep',
+    },
+  },
+};
+
+const HINTS_INDEX = {
+  by_owner_repo: {
+    // Mirrors the shape build/registry-index.mjs emits at
+    // dist/discovery-hints-index.json. A hint tells the live-scoring path
+    // which install spec (pm+pkg+binary) to use for a non-registry
+    // github-url, so the discovery chain is skipped on hit. For cache-
+    // tier tests, the `binary` field is the cache-key derivation source.
+    'Aider-AI/aider': { pm: 'pip', package: 'aider-chat', binary: 'aider' },
+  },
+};
+
+type CallTracker = { doCalls: number };
+
+export type TelemetryEvent = { blobs?: (string | null)[]; doubles?: (number | null)[]; indexes?: string[] };
+
+type StubOverrides = Partial<{
+  kvDisabled: boolean;
+  turnstileSecret: string;
+  hmacSecret: string;
+  turnstileResponse: { success: boolean };
+  doResponse: unknown;
+  doStatus: number;
+  rateLimit: boolean;
+  ipRateLimit: boolean;
+  // Prefill SCORE_CACHE with these payloads (key → JSON-encoded body).
+  cacheContent: Record<string, unknown>;
+  // If true, the SCORE_CACHE.get stub throws — exercises the
+  // best-effort read-failure path in cache.get.
+  cacheThrows: boolean;
+  // Optional tracker so cache-tier tests can assert the DO was NOT
+  // dispatched. Mutated in place by the stub fetch.
+  tracker: CallTracker;
+  // Shared cache store passed by the caller. When provided, the
+  // SCORE_CACHE stub uses it directly so a single test can interleave
+  // prefill / inspect / observe-writes operations across multiple
+  // handler invocations. The store survives across `handleScore()` calls
+  // sharing the same env.
+  cacheStore: Map<string, string>;
+  // SCORE_TELEMETRY (Workers Analytics Engine) sink. When provided,
+  // every writeDataPoint call's payload is appended to this array so
+  // assertion-heavy telemetry tests can observe what the handler
+  // recorded. Absent → calls are silently dropped (matches AE's
+  // production write-only behavior).
+  telemetryEvents: TelemetryEvent[];
+  // When true, SCORE_TELEMETRY.writeDataPoint throws. Exercises the
+  // graceful-degradation path in recordScoreEvent.
+  telemetryThrows: boolean;
+  // When true, env.SCORE is omitted from the returned ScoreEnv.
+  // Mirrors the mid-rollback Worker state (between v2-drop-sandbox and
+  // v3-restore-sandbox) where the DO binding is gone. Exercises the
+  // binding-presence guard in handler.ts that returns a typed
+  // sandbox_unavailable 503 instead of letting getRandom() throw and
+  // surface as Cloudflare error 1101.
+  noScoreBinding: boolean;
+}>;
+
+export type ScoreTestEnvOverrides = StubOverrides;
+
+export function makeEnv(overrides: StubOverrides = {}): ScoreEnv {
+  const kvDisabled = overrides.kvDisabled ?? false;
+  const turnstileSecret = overrides.turnstileSecret ?? 'test-turnstile-secret';
+  const hmacSecret = overrides.hmacSecret ?? 'test-hmac-secret-please';
+  const turnstileResponse = overrides.turnstileResponse ?? { success: true };
+  const doResponse = overrides.doResponse ?? { error: 'sandbox_stub_until_u6' };
+  const doStatus = overrides.doStatus ?? 200;
+  const rateLimit = overrides.rateLimit ?? true;
+  const ipRateLimit = overrides.ipRateLimit ?? true;
+
+  const stubKv = {
+    async get(key: string) {
+      if (key === 'scoring_disabled') return kvDisabled ? 'true' : null;
+      return null;
+    },
+  };
+
+  const tracker = overrides.tracker;
+  // Type the stub's fetch via `Sandbox['fetch']` so any future Sandbox
+  // class that loses or renames `fetch` (or changes its signature) is a
+  // TypeScript compile error AND a runtime invocation error in this
+  // file. Closes the drift class that PR #93 hit. See file header.
+  const stubFetch: Sandbox['fetch'] = async (_req) => {
+    if (tracker) tracker.doCalls += 1;
+    return new Response(JSON.stringify(doResponse), {
+      status: doStatus,
+      headers: { 'content-type': 'application/json' },
+    });
+  };
+  const stubDo = {
+    idFromName(_name: string) {
+      return { id: 'stub' };
+    },
+    get(_id: unknown) {
+      return { fetch: stubFetch };
+    },
+  };
+
+  const turnstileFetcher = async () =>
+    new Response(JSON.stringify(turnstileResponse), {
+      status: 200,
+      headers: { 'content-type': 'application/json' },
+    });
+
+  // The handler reads globalThis.fetch for Turnstile; we monkey-patch it
+  // by swapping it on the env's symbol-keyed slot via the stub at runtime.
+  // verifyTurnstile accepts a `fetcher` override in production code but
+  // not via env, so we override globalThis.fetch for the test.
+  const originalFetch = globalThis.fetch;
+  (globalThis as { fetch: typeof fetch }).fetch = turnstileFetcher as unknown as typeof fetch;
+  // Reset after the test — Bun's afterEach scope is per-describe, so each
+  // test resets at the start of the next makeEnv() call. We bind the
+  // restore on `env` for explicit teardown if a test wants it.
+  void originalFetch;
+
+  const cacheStore = overrides.cacheStore ?? new Map<string, string>();
+  for (const [k, v] of Object.entries(overrides.cacheContent ?? {})) {
+    cacheStore.set(k, typeof v === 'string' ? v : JSON.stringify(v));
+  }
+  const cacheStub = {
+    async get(key: string) {
+      if (overrides.cacheThrows) throw new Error('r2_get_failed');
+      const raw = cacheStore.get(key);
+      if (raw === undefined) return null;
+      // Mirror R2's R2ObjectBody surface — `.json()` is the only method
+      // src/worker/score/cache.ts actually calls.
+      return {
+        async json() {
+          return JSON.parse(raw);
+        },
+        async text() {
+          return raw;
+        },
+      };
+    },
+    async put(key: string, value: unknown) {
+      cacheStore.set(key, typeof value === 'string' ? value : String(value));
+    },
+    async delete(key: string) {
+      cacheStore.delete(key);
+    },
+  };
+
+  const env: ScoreEnv = {
+    ASSETS: {
+      async fetch(req: Request | string): Promise<Response> {
+        const url = typeof req === 'string' ? req : req.url;
+        const path = new URL(url).pathname;
+        if (path === '/registry-index.json') {
+          return new Response(JSON.stringify(REGISTRY_INDEX), { status: 200 });
+        }
+        if (path === '/discovery-hints-index.json') {
+          return new Response(JSON.stringify(HINTS_INDEX), { status: 200 });
+        }
+        return new Response('not found', { status: 404 });
+      },
+    } as Fetcher,
+    SCORE_KV: stubKv as unknown as KVNamespace,
+    SCORE_CACHE: cacheStub as unknown as R2Bucket,
+    SCORE_LIMITER: {
+      async limit() {
+        return { success: rateLimit };
+      },
+    },
+    SCORE_LIMITER_IP: {
+      async limit() {
+        return { success: ipRateLimit };
+      },
+    },
+    SCORE_TELEMETRY: {
+      writeDataPoint(event: TelemetryEvent) {
+        if (overrides.telemetryThrows) throw new Error('ae_write_failed');
+        if (overrides.telemetryEvents) overrides.telemetryEvents.push(event);
+      },
+    },
+    TURNSTILE_SECRET: turnstileSecret,
+    SESSION_HMAC_SECRET: hmacSecret,
+  } as ScoreEnv;
+  if (!overrides.noScoreBinding) {
+    env.SCORE = stubDo as unknown as DurableObjectNamespace;
+  }
+  return env;
+}
+
+export function postScore(input: string, opts: { token?: string; cookie?: string; pathSuffix?: string } = {}): Request {
+  const headers: Record<string, string> = { 'content-type': 'application/json' };
+  if (opts.cookie) headers.cookie = opts.cookie;
+  return new Request(`https://anc.dev/api/score${opts.pathSuffix ?? ''}`, {
+    method: 'POST',
+    headers,
+    body: JSON.stringify({ input, turnstile_token: opts.token ?? 'tok' }),
+  });
+}
+
+export function getScore(input: string | null, pathSuffix = ''): Request {
+  const url = new URL(`https://anc.dev/api/score${pathSuffix}`);
+  if (input !== null) url.searchParams.set('input', input);
+  return new Request(url.toString(), { method: 'GET' });
+}
+
+beforeEach(() => {
+  _resetIndexCache();
+  _resetKillSwitchCache();
+});
+
+// ---------------------------------------------------------------------------
+// Method gate + input validation
+// ---------------------------------------------------------------------------
+
+describe('/api/score — method gate', () => {
+  test('DELETE → 405', async () => {
+    const res = await handleScore(new Request('https://anc.dev/api/score', { method: 'DELETE' }), makeEnv());
+    expect(res.status).toBe(405);
+  });
+});
+
+describe('/api/score — input validation', () => {
+  test('POST without input → 400 unrecognized_input', async () => {
+    const res = await handleScore(
+      new Request('https://anc.dev/api/score', {
+        method: 'POST',
+        headers: { 'content-type': 'application/json' },
+        body: JSON.stringify({ turnstile_token: 'tok' }),
+      }),
+      makeEnv(),
+    );
+    expect(res.status).toBe(400);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('unrecognized_input');
+  });
+
+  test('GET without input → 400', async () => {
+    const res = await handleScore(getScore(null), makeEnv());
+    expect(res.status).toBe(400);
+  });
+
+  test('POST with malformed body → 400', async () => {
+    const res = await handleScore(
+      new Request('https://anc.dev/api/score', {
+        method: 'POST',
+        headers: { 'content-type': 'application/json' },
+        body: 'not json',
+      }),
+      makeEnv(),
+    );
+    expect(res.status).toBe(400);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Registry-fast-path (unmetered)
+// ---------------------------------------------------------------------------
+
+describe('/api/score — registry fast-path', () => {
+  test('POST {input: "ripgrep"} → 200 registry_hit with response triad', async () => {
+    const res = await handleScore(postScore('ripgrep'), makeEnv());
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as {
+      scorecard: { kind: string; scorecard_url: string };
+      spec_version: string;
+      anc_version: string;
+      checker_url: string;
+    };
+    expect(body.scorecard.kind).toBe('registry_hit');
+    expect(body.scorecard.scorecard_url).toBe('/score/ripgrep');
+    expect(body.anc_version).toBe('0.3.0');
+    expect(body.spec_version).toBeTruthy();
+    expect(body.checker_url).toBeTruthy();
+  });
+
+  test('GET ?input=ripgrep → 200 (read-only path) with cache-friendly headers', async () => {
+    const res = await handleScore(getScore('ripgrep'), makeEnv());
+    expect(res.status).toBe(200);
+    expect(res.headers.get('Cache-Control')).toBe('public, max-age=300');
+  });
+
+  test('GET ?input=https://github.com/BurntSushi/ripgrep → 200 (URL → registry)', async () => {
+    const res = await handleScore(getScore('https://github.com/BurntSushi/ripgrep'), makeEnv());
+    expect(res.status).toBe(200);
+  });
+
+  test('GET ?input=unknown → 404 chain_no_resolve (GET is registry-only)', async () => {
+    // 'unknown-tool' fails validate (not a slug, not a URL, no prefix) →
+    // unrecognized_input (400). Use a parseable URL instead.
+    const res = await handleScore(getScore('cargo install foo-cli'), makeEnv());
+    expect(res.status).toBe(404);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('chain_no_resolve');
+  });
+
+  test('registry entry without scorecard_url is NOT a fast-path hit', async () => {
+    // no-card-tool has no version/anc_version/scorecard_url, so the
+    // handler falls through to the live path. POST will exercise the
+    // full pipeline; GET will fail with chain_no_resolve at the GET gate.
+    const res = await handleScore(getScore('no-card-tool'), makeEnv());
+    expect(res.status).toBe(404);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Kill switch + Turnstile + rate-limit + DO stub (POST-only chain)
+// ---------------------------------------------------------------------------
+
+describe('/api/score — POST pipeline error paths', () => {
+  test('kill switch on → 503 scoring_disabled with Retry-After: 3600', async () => {
+    const res = await handleScore(postScore('cargo install foo-cli'), makeEnv({ kvDisabled: true }));
+    expect(res.status).toBe(503);
+    expect(res.headers.get('Retry-After')).toBe('3600');
+  });
+
+  test('turnstile rejection → 400 turnstile_failed', async () => {
+    const res = await handleScore(
+      postScore('cargo install foo-cli'),
+      makeEnv({ turnstileResponse: { success: false } }),
+    );
+    expect(res.status).toBe(400);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('turnstile_failed');
+  });
+
+  test('rate-limited → 429 with Retry-After: 60', async () => {
+    const res = await handleScore(postScore('cargo install foo-cli'), makeEnv({ rateLimit: false }));
+    expect(res.status).toBe(429);
+    expect(res.headers.get('Retry-After')).toBe('60');
+  });
+
+  test('per-IP fallback limiter triggers 429 even when session limiter passes', async () => {
+    const res = await handleScore(postScore('cargo install foo-cli'), makeEnv({ rateLimit: true, ipRateLimit: false }));
+    expect(res.status).toBe(429);
+  });
+
+  test('DO stub envelope passthrough → 503 sandbox_stub_until_u6 (defense-in-depth)', async () => {
+    // Defense-in-depth: if the production DO binding ever points back at
+    // the legacy sandbox-stub class (botched rollback, misconfigured
+    // wrangler.jsonc), the handler still bounces with the sandbox_stub
+    // envelope instead of leaking the raw stub error to the user. The
+    // isStubError() check in handler.ts is what makes this safe.
+    const res = await handleScore(
+      postScore('cargo install foo-cli'),
+      makeEnv({ doResponse: { error: 'sandbox_stub_until_u6' } }),
+    );
+    expect(res.status).toBe(503);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('sandbox_stub_until_u6');
+  });
+
+  test('missing env.SCORE binding → 503 sandbox_unavailable (mid-rollback guard)', async () => {
+    // Without the binding-presence guard, getRandom() throws on the
+    // undefined env.SCORE namespace and the Worker exception surfaces
+    // as Cloudflare error 1101 (a generic page, no JSON envelope). The
+    // guard converts that into a typed 503.
+    const res = await handleScore(postScore('cargo install foo-cli'), makeEnv({ noScoreBinding: true }));
+    expect(res.status).toBe(503);
+    const body = (await res.json()) as { error: { code: string }; spec_version: string; checker_url: string };
+    expect(body.error.code).toBe('sandbox_unavailable');
+    expect(body.spec_version).toBeDefined();
+    expect(body.checker_url).toBeDefined();
+  });
+
+  test('DO returns valid scorecard envelope → 200 with response triad', async () => {
+    // Live success path: DO returns {scorecard, anc_version} from
+    // sandbox-exec.score(). The handler wraps it into the response shape
+    // with spec_version + checker_url. This is the test that pins the
+    // DO → handler envelope contract.
+    const res = await handleScore(
+      postScore('cargo install foo-cli'),
+      makeEnv({
+        doResponse: {
+          scorecard: { tool: { name: 'bar', binary: 'bar' }, score: { value: 73 } },
+          anc_version: '0.3.1',
+        },
+      }),
+    );
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as {
+      spec_version: string;
+      anc_version: string;
+      checker_url: string;
+      scorecard: { tool: { name: string } };
+    };
+    expect(body.scorecard.tool.name).toBe('bar');
+    expect(body.spec_version).toBeTruthy();
+    expect(body.checker_url).toBeTruthy();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Session cookie issue + verify
+// ---------------------------------------------------------------------------
+
+describe('/api/score — session cookie', () => {
+  test('first POST issues Set-Cookie with __Host-anc-session', async () => {
+    const res = await handleScore(postScore('cargo install foo-cli'), makeEnv());
+    const cookie = res.headers.get('Set-Cookie');
+    expect(cookie).toContain('__Host-anc-session=');
+    expect(cookie).toContain('HttpOnly');
+    expect(cookie).toContain('Secure');
+    expect(cookie).toContain('SameSite=Lax');
+    expect(cookie).toContain('Path=/');
+  });
+
+  test('returning request with valid cookie does NOT re-issue', async () => {
+    const env = makeEnv();
+    const first = await handleScore(postScore('cargo install foo-cli'), env);
+    const cookie = first.headers.get('Set-Cookie');
+    expect(cookie).toBeTruthy();
+    if (!cookie) return;
+    // Extract the cookie name=value pair (Set-Cookie includes attributes after `;`)
+    const cookiePair = cookie.split(';')[0];
+
+    const second = await handleScore(postScore('cargo install foo-cli', { cookie: cookiePair }), env);
+    expect(second.headers.get('Set-Cookie')).toBeNull();
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Service misconfiguration (fail-fast on missing secrets)
+// ---------------------------------------------------------------------------
+
+describe('/api/score — service misconfiguration', () => {
+  test('missing TURNSTILE_SECRET on POST → 500 service_misconfigured', async () => {
+    const env = makeEnv({ turnstileSecret: '' });
+    const res = await handleScore(postScore('cargo install foo-cli'), env);
+    expect(res.status).toBe(500);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('service_misconfigured');
+  });
+
+  test('missing SESSION_HMAC_SECRET on POST → 500 service_misconfigured', async () => {
+    const env = makeEnv({ hmacSecret: '' });
+    const res = await handleScore(postScore('cargo install foo-cli'), env);
+    expect(res.status).toBe(500);
+  });
+
+  test('registry-fast-path bypass works even without secrets configured', async () => {
+    // The unmetered registry hit must not touch Turnstile or sessions,
+    // so a misconfigured Worker can still serve registry-known tools.
+    const env = makeEnv({ turnstileSecret: '', hmacSecret: '' });
+    const res = await handleScore(postScore('ripgrep'), env);
+    expect(res.status).toBe(200);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Content negotiation
+// ---------------------------------------------------------------------------
+
+describe('/api/score — content negotiation', () => {
+  test('GET /api/score.json?input=ripgrep → JSON', async () => {
+    const res = await handleScore(getScore('ripgrep', '.json'), makeEnv());
+    expect(res.headers.get('Content-Type')).toContain('application/json');
+  });
+
+  test('GET /api/score.md?input=ripgrep → markdown', async () => {
+    const res = await handleScore(getScore('ripgrep', '.md'), makeEnv());
+    expect(res.headers.get('Content-Type')).toContain('text/markdown');
+    const body = await res.text();
+    expect(body).toContain('# anc.dev');
+  });
+
+  test('Accept: text/markdown;q=0.1, application/json;q=0.9 → JSON (q-value, not substring)', async () => {
+    const url = new URL('https://anc.dev/api/score');
+    url.searchParams.set('input', 'ripgrep');
+    const req = new Request(url.toString(), {
+      method: 'GET',
+      headers: { accept: 'text/markdown;q=0.1, application/json;q=0.9' },
+    });
+    const res = await handleScore(req, makeEnv());
+    expect(res.headers.get('Content-Type')).toContain('application/json');
+  });
+
+  test('Accept: text/markdown,application/json;q=0.5 → markdown', async () => {
+    const url = new URL('https://anc.dev/api/score');
+    url.searchParams.set('input', 'ripgrep');
+    const req = new Request(url.toString(), {
+      method: 'GET',
+      headers: { accept: 'text/markdown,application/json;q=0.5' },
+    });
+    const res = await handleScore(req, makeEnv());
+    expect(res.headers.get('Content-Type')).toContain('text/markdown');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// R2 cache tier
+// ---------------------------------------------------------------------------
+
+// The cache key uses SPEC_VERSION (build-time constant) as the
+// anc-version proxy. The constant currently reads 0.4.0 from
+// src/worker/spec-version.gen.ts; if it bumps, update the keys here.
+//
+// `uncurated-tool` is a deliberately-fictional package name used as the
+// cache-tier exemplar — clearly NOT in the test fixture's
+// REGISTRY_INDEX.by_slug, so the install-command-binary cross-check
+// (registry-lookup.ts) doesn't intercept and the input flows through to
+// the cache tier as intended. Avoid swapping to a real CLI tool name
+// here: tests stub the DO response so the package never actually
+// installs, but a real name in test code can mislead a future reader
+// into pasting it as a live-demo example where it would either fail
+// (no real package) or run a slow install. Fictional name = self-
+// documenting "this is fixture data, not a real package".
+const CACHE_KEY_UNCURATED = 'scores/uncurated-tool/0.4.0.json';
+
+const CACHED_UNCURATED_PAYLOAD = {
+  spec_version: '0.4.0',
+  anc_version: '0.3.1',
+  tool_version: '3.04',
+  scorecard: { tool: { name: 'uncurated-tool', binary: 'uncurated-tool', version: '3.04' }, score: { value: 92 } },
+};
+
+describe('/api/score — R2 cache tier', () => {
+  test('install-command + R2 hit → 200 cached, DO never dispatched, gates bypassed', async () => {
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      cacheContent: { [CACHE_KEY_UNCURATED]: CACHED_UNCURATED_PAYLOAD },
+      tracker,
+      // Hard-fail every metered gate. The cached hit must bypass all of
+      // them — proving the unmetered contract (R6 extended to cache).
+      turnstileResponse: { success: false },
+      rateLimit: false,
+      ipRateLimit: false,
+    });
+    const res = await handleScore(postScore('cargo binstall uncurated-tool'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(0);
+    const body = (await res.json()) as {
+      scorecard: { tool: { name: string }; score: { value: number } };
+      anc_version: string;
+      spec_version: string;
+      checker_url: string;
+    };
+    expect(body.scorecard.tool.name).toBe('uncurated-tool');
+    expect(body.scorecard.score.value).toBe(92);
+    expect(body.anc_version).toBe('0.3.1');
+    expect(res.headers.get('Cache-Control')).toBe('public, max-age=300');
+  });
+
+  test('install-command + R2 miss → live path runs (DO dispatched)', async () => {
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      cacheContent: {},
+      tracker,
+      doResponse: {
+        scorecard: { tool: { name: 'uncurated-tool', version: '3.04' } },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('cargo binstall uncurated-tool'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(1);
+    const body = (await res.json()) as { scorecard: { tool: { name: string } } };
+    expect(body.scorecard.tool.name).toBe('uncurated-tool');
+  });
+
+  test('?fromCache=false bypasses R2 read, live path runs even with cache prefilled', async () => {
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      cacheContent: { [CACHE_KEY_UNCURATED]: CACHED_UNCURATED_PAYLOAD },
+      tracker,
+      doResponse: {
+        scorecard: { tool: { name: 'uncurated-tool', version: '3.04' }, score: { value: 50 } },
+        anc_version: '0.3.1',
+      },
+    });
+    const req = new Request('https://anc.dev/api/score?fromCache=false', {
+      method: 'POST',
+      headers: { 'content-type': 'application/json' },
+      body: JSON.stringify({ input: 'cargo binstall uncurated-tool', turnstile_token: 'tok' }),
+    });
+    const res = await handleScore(req, env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(1);
+    const body = (await res.json()) as { scorecard: { score: { value: number } } };
+    // The DO's scorecard (value: 50), not the cached one (value: 92).
+    expect(body.scorecard.score.value).toBe(50);
+  });
+
+  test('curated registry hit still wins over R2 cache (commit ordering)', async () => {
+    // If a curated entry AND a cached entry both exist for the same
+    // binary, the curated one must win because it points at a stable
+    // /score/<slug> page. Cached entries are launch-time live scores
+    // and should never override committed scorecards.
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      // Pre-seed the cache under the slug's binary key. The registry
+      // already has ripgrep with scorecard_url + anc_version.
+      cacheContent: { 'scores/rg/0.4.0.json': CACHED_UNCURATED_PAYLOAD },
+      tracker,
+    });
+    const res = await handleScore(postScore('ripgrep'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(0);
+    const body = (await res.json()) as { scorecard: { kind?: string; scorecard_url?: string } };
+    expect(body.scorecard.kind).toBe('registry_hit');
+    expect(body.scorecard.scorecard_url).toBe('/score/ripgrep');
+  });
+
+  test('R2 read failure → treated as miss, live path runs', async () => {
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      cacheThrows: true,
+      tracker,
+      doResponse: {
+        scorecard: { tool: { name: 'uncurated-tool', version: '3.04' } },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('cargo binstall uncurated-tool'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(1);
+  });
+
+  test('GET install-command with cached hit returns 200 (unmetered read-only tier)', async () => {
+    // GET ?input=<install-command> normally validates fine; the existing
+    // contract was "GET only hits the registry, otherwise 404". The
+    // cache tier is also a read-only/unmetered tier, so a GET that
+    // matches a cached binary returns 200.
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      cacheContent: { [CACHE_KEY_UNCURATED]: CACHED_UNCURATED_PAYLOAD },
+      tracker,
+    });
+    const res = await handleScore(getScore('cargo binstall uncurated-tool'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(0);
+  });
+
+  test('GET install-command with cache miss returns 404 (read-only contract)', async () => {
+    const env = makeEnv({ cacheContent: {} });
+    const res = await handleScore(getScore('cargo binstall uncurated-tool'), env);
+    expect(res.status).toBe(404);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('chain_no_resolve');
+  });
+
+  test('cached scorecard preserves response triad', async () => {
+    const env = makeEnv({ cacheContent: { [CACHE_KEY_UNCURATED]: CACHED_UNCURATED_PAYLOAD } });
+    const res = await handleScore(postScore('cargo binstall uncurated-tool'), env);
+    const body = (await res.json()) as {
+      spec_version: string;
+      anc_version: string;
+      checker_url: string;
+    };
+    expect(body.spec_version).toBeTruthy();
+    expect(body.anc_version).toBe('0.3.1');
+    expect(body.checker_url).toBeTruthy();
+  });
+
+  // -------------------------------------------------------------------------
+  // github-url tier (with + without hint) — covers gaps the install-command
+  // tests don't reach. Aider-AI/aider has a hint with binary='aider' so
+  // the cache key derives to scores/aider/<SPEC_VERSION>.json; an
+  // owner/repo without a hint can't derive a binary upfront and skips
+  // the cache tier entirely.
+  // -------------------------------------------------------------------------
+
+  const CACHE_KEY_AIDER = 'scores/aider/0.4.0.json';
+  const CACHED_AIDER_PAYLOAD = {
+    spec_version: '0.4.0',
+    anc_version: '0.3.1',
+    tool_version: '0.93.0',
+    scorecard: { tool: { name: 'aider', binary: 'aider', version: '0.93.0' }, score: { value: 81 } },
+  };
+
+  test('github-url with hint + R2 hit → 200 cached, DO not dispatched, gates bypassed', async () => {
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      cacheContent: { [CACHE_KEY_AIDER]: CACHED_AIDER_PAYLOAD },
+      tracker,
+      turnstileResponse: { success: false },
+      rateLimit: false,
+      ipRateLimit: false,
+    });
+    const res = await handleScore(postScore('https://github.com/Aider-AI/aider'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(0);
+    const body = (await res.json()) as { scorecard: { tool: { name: string } }; anc_version: string };
+    expect(body.scorecard.tool.name).toBe('aider');
+    expect(body.anc_version).toBe('0.3.1');
+  });
+
+  test('github-url with hint + R2 miss → live path runs (DO dispatched, hint informs cache key)', async () => {
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      cacheContent: {},
+      tracker,
+      doResponse: {
+        scorecard: { tool: { name: 'aider', version: '0.93.0' } },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('https://github.com/Aider-AI/aider'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(1);
+  });
+
+  test('github-url with hint is case-insensitive on owner/repo for cache lookup', async () => {
+    // Mirrors the registry-lookup case-insensitivity guarantee — a paste
+    // of `github.com/aider-ai/aider` (lowercase) must hit the same hint
+    // as `Aider-AI/aider` and therefore the same cache key.
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      cacheContent: { [CACHE_KEY_AIDER]: CACHED_AIDER_PAYLOAD },
+      tracker,
+    });
+    const res = await handleScore(postScore('https://github.com/aider-ai/aider'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(0);
+  });
+
+  test('install-command with unrelated R2 entry → cache tier scoped to derived binary, live path runs', async () => {
+    // `cargo install foo-cli` parses to binary='foo-cli', cache key
+    // `scores/foo-cli/0.4.0.json`. A prefilled entry under a DIFFERENT
+    // binary's key (scores/bar/...) is unreachable from this input and
+    // the live path runs.
+    //
+    // Pre-2026-05-20 this test used a github-url without a hint to prove
+    // the same property (cache tier requires a derivable binary). After
+    // the discovery-move the equivalent github-url POST bounces at the
+    // Worker on chain_no_resolve before the DO; install-command is the
+    // shape that still reaches the DO via the cheap install-command
+    // pass-through path in resolveSpec.
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      cacheContent: { 'scores/bar/0.4.0.json': CACHED_AIDER_PAYLOAD },
+      tracker,
+      doResponse: {
+        scorecard: { tool: { name: 'bar', version: '0.1.0' } },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('cargo install foo-cli'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(1);
+    const body = (await res.json()) as { scorecard: { tool: { name: string } } };
+    // The DO's scorecard (tool=bar), not the prefilled cache (tool=aider).
+    expect(body.scorecard.tool.name).toBe('bar');
+  });
+
+  test('slug WITHOUT curated scorecard → cache tier skipped (no binary derivable), Worker bounces chain_no_resolve', async () => {
+    // The `no-card-tool` registry entry exists but has no `scorecard_url`
+    // / `anc_version`, so the registry tier returns a non-curated hit
+    // and the cache tier sees `kind: registry` (which deriveCacheBinary
+    // bails on — only hint kind feeds the cache for github-urls; slugs
+    // bail because there's no install spec). A prefilled-but-unreachable
+    // R2 entry must NOT be served.
+    //
+    // 2026-05-20 discovery-move: bare-slug live scoring is deferred; the
+    // Worker's resolveSpec bounces slug inputs as chain_no_resolve before
+    // the DO is reached. Pre-move the DO emitted that same bounce; now it
+    // emerges one tier earlier so the no-resolve UX is sub-second.
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      // Pre-seed under the slug name — should NOT be served because
+      // deriveCacheBinary returns null for non-curated slugs.
+      cacheContent: {
+        'scores/no-card-tool/0.4.0.json': CACHED_AIDER_PAYLOAD,
+      },
+      tracker,
+    });
+    const res = await handleScore(postScore('no-card-tool'), env);
+    expect(res.status).toBe(404);
+    // DO NOT called — Worker resolveSpec bounced the slug before dispatch.
+    expect(tracker.doCalls).toBe(0);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('chain_no_resolve');
+  });
+
+  test('cache key partition: install-command binary derivation does not alias curated registry binary', async () => {
+    // `cargo binstall ripgrep` → binary='ripgrep' (parser default).
+    // The curated registry entry for slug=ripgrep has binary='rg'.
+    // The two cache keys are scores/ripgrep/* and scores/rg/* — they
+    // must NOT alias, otherwise an install-command query could pick up
+    // a stale entry written under the curated path.
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      // Pre-seed ONLY under the curated 'rg' key.
+      cacheContent: { 'scores/rg/0.4.0.json': CACHED_UNCURATED_PAYLOAD },
+      tracker,
+      doResponse: {
+        scorecard: { tool: { name: 'ripgrep', version: '15.1.0' } },
+        anc_version: '0.3.1',
+      },
+    });
+    // POST install-command — key derives to scores/ripgrep/0.4.0.json,
+    // which is empty. The pre-seeded scores/rg/0.4.0.json must NOT be
+    // served (it's the curated path's key, not the install-command path's).
+    const res = await handleScore(postScore('cargo binstall uncurated-tool'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(1);
+  });
+
+  test('anc-version partition: reads under SPEC_VERSION slot, stale entries under a different slot are unreachable', async () => {
+    // SPEC_VERSION is currently '0.4.0'. A stale entry under
+    // scores/uncurated-tool/0.3.0.json (older spec) must be unreachable when
+    // the running Worker computes the key from SPEC_VERSION='0.4.0'.
+    // This pins the partition-by-version property so a future change
+    // that strips the version from the key surfaces here.
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      cacheContent: {
+        'scores/uncurated-tool/0.3.0.json': {
+          spec_version: '0.3.0',
+          anc_version: '0.2.5',
+          tool_version: '1.5.0',
+          scorecard: { tool: { name: 'uncurated-tool', version: '1.5.0' } },
+        },
+        // NO entry under 0.4.0 → cache miss.
+      },
+      tracker,
+      doResponse: {
+        scorecard: { tool: { name: 'uncurated-tool', version: '1.6.0' } },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('npm install -g uncurated-tool'), env);
+    expect(res.status).toBe(200);
+    expect(tracker.doCalls).toBe(1);
+    const body = (await res.json()) as { scorecard: { tool: { version: string } } };
+    // Live DO scorecard (1.6.0), not stale cache (1.5.0).
+    expect(body.scorecard.tool.version).toBe('1.6.0');
+  });
+
+  test('cached hit returns Cache-Control: public, max-age=300 for CDN-edge cooperation', async () => {
+    // The per-write Cache-Control header keeps CDN edges from
+    // over-caching while R2 lifecycle handles the long TTL.
+    const env = makeEnv({ cacheContent: { [CACHE_KEY_UNCURATED]: CACHED_UNCURATED_PAYLOAD } });
+    const res = await handleScore(postScore('cargo binstall uncurated-tool'), env);
+    expect(res.headers.get('Cache-Control')).toBe('public, max-age=300');
+  });
+
+  test('live (DO-served) responses get Cache-Control: no-store', async () => {
+    // Mirror-of-above: the live path uses JSON_HEADERS_LIVE so CDN edges
+    // don't accidentally cache an uncached miss. Pins the freshness=live
+    // vs cache-hit response-header split.
+    const env = makeEnv({
+      cacheContent: {},
+      doResponse: {
+        scorecard: { tool: { name: 'foo', version: '0.1.0' } },
+        anc_version: '0.3.1',
+      },
+    });
+    const res = await handleScore(postScore('cargo install foo-cli'), env);
+    expect(res.headers.get('Cache-Control')).toBe('no-store');
+  });
+
+  // -------------------------------------------------------------------------
+  // Cross-PM cache-key aliasing — design choice, not a bug
+  // -------------------------------------------------------------------------
+  //
+  // The cache key (`scores/{binary}/{SPEC_VERSION}.json`) intentionally
+  // OMITS the package-manager dimension. The reasoning: a binary with
+  // the same name + same anc_version produces the same scorecard
+  // regardless of which PM installed it, because `anc check` evaluates
+  // the binary on PATH and doesn't care how it got there. So `pip
+  // install foo`, `cargo binstall foo`, and `bun add -g foo` all
+  // SHOULD share a cache entry for binary='foo'.
+  //
+  // This test pins that design choice so a future change that scopes
+  // the cache key per-PM (which would be the wrong direction, because
+  // it'd waste cache budget) surfaces here.
+
+  test('cache-key aliasing: same binary across different PMs shares the same cache entry', async () => {
+    // Pre-seed under scores/foo/0.4.0.json. Both `pip install foo`
+    // (binary='foo') and `cargo binstall foo` (binary='foo') derive
+    // the same key, so both reads hit the same prefilled entry.
+    const cachedFooPayload = {
+      spec_version: '0.4.0',
+      anc_version: '0.3.1',
+      tool_version: '1.0.0',
+      scorecard: { tool: { name: 'foo', binary: 'foo', version: '1.0.0' }, score: { value: 75 } },
+    };
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      cacheContent: { 'scores/foo/0.4.0.json': cachedFooPayload },
+      tracker,
+    });
+
+    const pipRes = await handleScore(postScore('pip install foo'), env);
+    expect(pipRes.status).toBe(200);
+    const pipBody = (await pipRes.json()) as { scorecard: { score: { value: number } } };
+    expect(pipBody.scorecard.score.value).toBe(75);
+
+    const cargoRes = await handleScore(postScore('cargo binstall foo'), env);
+    expect(cargoRes.status).toBe(200);
+    const cargoBody = (await cargoRes.json()) as { scorecard: { score: { value: number } } };
+    expect(cargoBody.scorecard.score.value).toBe(75);
+
+    const bunRes = await handleScore(postScore('bun add -g foo'), env);
+    expect(bunRes.status).toBe(200);
+    const bunBody = (await bunRes.json()) as { scorecard: { score: { value: number } } };
+    expect(bunBody.scorecard.score.value).toBe(75);
+
+    // All three were cache hits — no DO dispatch happened.
+    expect(tracker.doCalls).toBe(0);
+  });
+
+  // -------------------------------------------------------------------------
+  // ?fromCache=false cache-WRITE fires
+  // -------------------------------------------------------------------------
+  //
+  // fromCache=false skips the READ tier but the design says the live
+  // run must still WRITE to cache so the next request benefits.
+  // Pinning the write half explicitly: with a fresh cache, a
+  // ?fromCache=false POST + cache-miss POST in sequence should mean
+  // the second call sees the entry the live run wrote.
+  //
+  // The DO writes to env.SCORE_CACHE.put() via writeCacheBestEffort
+  // in src/worker/score/do.ts. The handler test's mock DO doesn't
+  // actually run that code path, so we exercise the WRITE side by
+  // observing the cacheStore via the makeEnv override AND issuing the
+  // sequence end-to-end.
+
+  test('?fromCache=false fires the cache write so the next request hits the fresh entry', async () => {
+    // Shared cacheStore lets us inspect (and ALSO simulate the DO write
+    // by inserting directly — the DO would do this after success).
+    const cacheStore = new Map<string, string>();
+    const tracker: CallTracker = { doCalls: 0 };
+    const env = makeEnv({
+      cacheStore,
+      tracker,
+      doResponse: {
+        scorecard: { tool: { name: 'uncurated-tool', binary: 'uncurated-tool', version: '1.6.0' } },
+        anc_version: '0.3.1',
+      },
+    });
+
+    // First request with ?fromCache=false. The cache is empty, so read
+    // would have missed anyway, but the route through skipCache must
+    // still hit the live DO path AND the write happens.
+    const req1 = new Request('https://anc.dev/api/score?fromCache=false', {
+      method: 'POST',
+      headers: { 'content-type': 'application/json' },
+      body: JSON.stringify({ input: 'npm install -g uncurated-tool', turnstile_token: 'tok' }),
+    });
+    const res1 = await handleScore(req1, env);
+    expect(res1.status).toBe(200);
+    expect(tracker.doCalls).toBe(1);
+
+    // The handler mock DO doesn't actually run writeCacheBestEffort
+    // (the real DO does). Simulate that the DO has written the result
+    // by injecting it into the shared cacheStore — this is what
+    // writeCacheBestEffort would do after a successful score.
+    cacheStore.set(
+      'scores/uncurated-tool/0.4.0.json',
+      JSON.stringify({
+        spec_version: '0.4.0',
+        anc_version: '0.3.1',
+        tool_version: '1.6.0',
+        scorecard: {
+          tool: { name: 'uncurated-tool', binary: 'uncurated-tool', version: '1.6.0' },
+          score: { value: 92 },
+        },
+      }),
+    );
+
+    // Second request WITHOUT ?fromCache=false. The cache write should
+    // now be readable; DO should NOT dispatch again.
+    const req2 = postScore('npm install -g uncurated-tool');
+    const res2 = await handleScore(req2, env);
+    expect(res2.status).toBe(200);
+    // Tracker is still 1 from the first call; the second call hits cache.
+    expect(tracker.doCalls).toBe(1);
+    expect(res2.headers.get('Cache-Control')).toBe('public, max-age=300');
+  });
+});
diff --git a/tests/score-live-page.test.ts b/tests/score-live-page.test.ts
new file mode 100644
index 0000000..04728cc
--- /dev/null
+++ b/tests/score-live-page.test.ts
@@ -0,0 +1,405 @@
+// Worker route: GET /score/live/<binary>
+//
+// The shareable result URL renders the cached scorecard as HTML. Cache-
+// key share-URL design: the same key the DO writes to
+// (scores/<binary>/<spec>.json) is the key this route reads from. No
+// session minting; the URL is meaningful.
+
+import { describe, expect, test } from 'bun:test';
+import {
+  _resetShellTemplateCache,
+  handleLiveScorePage,
+  parseLiveScorePath,
+  parseLiveScorePathMatch,
+} from '../src/worker/score/summary-render';
+
+const SHELL_TEMPLATE = `<!doctype html>
+<html lang="en">
+<head>
+<title>{{TITLE}}</title>
+<meta name="description" content="{{DESCRIPTION}}" />
+<link rel="canonical" href="https://anc.dev{{CANONICAL_PATH}}" />
+</head>
+<body>{{BODY}}</body>
+</html>
+`;
+
+const SAMPLE_SCORECARD = {
+  schema_version: '0.5',
+  tool: { name: 'ripgrep', binary: 'rg', version: '14.1.0' },
+  target: { kind: 'command', command: 'rg' },
+  badge: { score_pct: 92, eligible: true },
+  audience: 'agent-optimized',
+  audit_profile: null,
+  results: [
+    { status: 'pass', label: 'has --help', group: 'P3', evidence: 'OK' },
+    {
+      status: 'fail',
+      label: 'exits 0 on missing flag',
+      group: 'P4',
+      evidence: 'expected non-zero exit, got 0',
+    },
+    { status: 'warn', label: 'subcommands listed', group: 'P6', evidence: 'missing groups' },
+    { status: 'pass', label: 'streams stdout', group: 'P1', evidence: 'OK' },
+  ],
+};
+
+const CACHED_RIPGREP_KEY = 'scores/ripgrep/0.4.0.json';
+const CACHED_RIPGREP_PAYLOAD = {
+  spec_version: '0.4.0',
+  anc_version: '0.3.1',
+  tool_version: '14.1.0',
+  scorecard: SAMPLE_SCORECARD,
+};
+
+function makeEnv(content: Record<string, unknown> = {}) {
+  const store = new Map<string, string>();
+  for (const [k, v] of Object.entries(content)) {
+    store.set(k, typeof v === 'string' ? v : JSON.stringify(v));
+  }
+  const env = {
+    ASSETS: {
+      async fetch(req: Request | string) {
+        const url = typeof req === 'string' ? req : req.url;
+        const path = new URL(url).pathname;
+        if (path === '/_internal/score-live-shell.html') {
+          return new Response(SHELL_TEMPLATE, { status: 200 });
+        }
+        return new Response('not found', { status: 404 });
+      },
+    },
+    SCORE_CACHE: {
+      async get(key: string) {
+        const raw = store.get(key);
+        if (raw === undefined) return null;
+        return {
+          async json() {
+            return JSON.parse(raw);
+          },
+          async text() {
+            return raw;
+          },
+        };
+      },
+      async put(key: string, value: unknown) {
+        store.set(key, typeof value === 'string' ? value : String(value));
+      },
+      async delete(key: string) {
+        store.delete(key);
+      },
+    },
+  };
+  // Reset the module-level template cache so each test re-fetches.
+  _resetShellTemplateCache();
+  return env as unknown as { ASSETS: Fetcher; SCORE_CACHE: R2Bucket };
+}
+
+function get(path: string): Request {
+  return new Request(`https://anc.dev${path}`, { method: 'GET' });
+}
+
+describe('parseLiveScorePath', () => {
+  test('accepts /score/live/<binary> with lowercase alphanumeric + hyphen', () => {
+    expect(parseLiveScorePath('/score/live/ripgrep')).toBe('ripgrep');
+    expect(parseLiveScorePath('/score/live/ast-grep')).toBe('ast-grep');
+    expect(parseLiveScorePath('/score/live/btm')).toBe('btm');
+    expect(parseLiveScorePath('/score/live/aider2')).toBe('aider2');
+  });
+
+  test('rejects uppercase, dots (non-.md), slashes (path traversal guard)', () => {
+    expect(parseLiveScorePath('/score/live/RipGrep')).toBeNull();
+    expect(parseLiveScorePath('/score/live/ripgrep.json')).toBeNull();
+    expect(parseLiveScorePath('/score/live/ripgrep.html')).toBeNull();
+    expect(parseLiveScorePath('/score/live/../etc/passwd')).toBeNull();
+    expect(parseLiveScorePath('/score/live/foo/bar')).toBeNull();
+    expect(parseLiveScorePath('/score/live/foo bar')).toBeNull();
+  });
+
+  test('accepts .md suffix and reports isMarkdown', () => {
+    expect(parseLiveScorePathMatch('/score/live/ripgrep')).toEqual({ binary: 'ripgrep', isMarkdown: false });
+    expect(parseLiveScorePathMatch('/score/live/ripgrep.md')).toEqual({ binary: 'ripgrep', isMarkdown: true });
+    expect(parseLiveScorePathMatch('/score/live/ast-grep.md')).toEqual({ binary: 'ast-grep', isMarkdown: true });
+  });
+
+  test('rejects malformed .md paths', () => {
+    expect(parseLiveScorePathMatch('/score/live/.md')).toBeNull();
+    expect(parseLiveScorePathMatch('/score/live/ripgrep.md.md')).toBeNull();
+    expect(parseLiveScorePathMatch('/score/live/ripgrep.MD')).toBeNull();
+    expect(parseLiveScorePathMatch('/score/live/../etc.md')).toBeNull();
+  });
+
+  test('rejects leading hyphen + over-long slugs', () => {
+    expect(parseLiveScorePath('/score/live/-ripgrep')).toBeNull();
+    expect(parseLiveScorePath(`/score/live/${'a'.repeat(65)}`)).toBeNull();
+  });
+
+  test('rejects empty + bare prefix paths', () => {
+    expect(parseLiveScorePath('/score/live/')).toBeNull();
+    expect(parseLiveScorePath('/live-score')).toBeNull();
+    expect(parseLiveScorePath('/livescore/ripgrep')).toBeNull();
+  });
+
+  test('rejects /api/score and /score (curated) namespaces', () => {
+    expect(parseLiveScorePath('/api/score/ripgrep')).toBeNull();
+    expect(parseLiveScorePath('/score/ripgrep')).toBeNull();
+  });
+});
+
+describe('handleLiveScorePage — happy path', () => {
+  test('returns 200 HTML with rendered scorecard summary', async () => {
+    const env = makeEnv({ [CACHED_RIPGREP_KEY]: CACHED_RIPGREP_PAYLOAD });
+    const res = await handleLiveScorePage(get('/score/live/ripgrep'), env);
+    expect(res.status).toBe(200);
+    expect(res.headers.get('content-type')).toContain('text/html');
+    expect(res.headers.get('x-robots-tag')).toBe('noindex');
+    const html = await res.text();
+    expect(html).toContain('<title>ripgrep');
+    expect(html).toContain('92%');
+    expect(html).toContain('14.1.0');
+    expect(html).toContain('0.3.1'); // anc version
+    expect(html).toContain('exits 0 on missing flag'); // top issue
+    expect(html).toContain('subcommands listed'); // top issue
+    expect(html).toContain('href="/install"'); // canonical install link (dedup with content/install.md)
+    expect(html).toContain('https://anc.dev/score/live/ripgrep'); // canonical
+  });
+
+  test('top-issues block surfaces FAIL before WARN', async () => {
+    const env = makeEnv({ [CACHED_RIPGREP_KEY]: CACHED_RIPGREP_PAYLOAD });
+    const res = await handleLiveScorePage(get('/score/live/ripgrep'), env);
+    const html = await res.text();
+    const failIdx = html.indexOf('exits 0 on missing flag');
+    const warnIdx = html.indexOf('subcommands listed');
+    expect(failIdx).toBeGreaterThan(-1);
+    expect(warnIdx).toBeGreaterThan(-1);
+    expect(failIdx).toBeLessThan(warnIdx);
+  });
+
+  test('omits per-tool check table and meta sections (summary-only)', async () => {
+    const env = makeEnv({ [CACHED_RIPGREP_KEY]: CACHED_RIPGREP_PAYLOAD });
+    const res = await handleLiveScorePage(get('/score/live/ripgrep'), env);
+    const html = await res.text();
+    expect(html).not.toContain('scorecard-checks');
+    expect(html).not.toContain('scorecard-meta');
+    expect(html).not.toContain('All Checks');
+  });
+
+  test('renders cached freshness marker', async () => {
+    const env = makeEnv({ [CACHED_RIPGREP_KEY]: CACHED_RIPGREP_PAYLOAD });
+    const res = await handleLiveScorePage(get('/score/live/ripgrep'), env);
+    const html = await res.text();
+    expect(html).toContain('cached');
+  });
+
+  test('clean scorecard shows "no failing or warning checks"', async () => {
+    const cleanPayload = {
+      ...CACHED_RIPGREP_PAYLOAD,
+      scorecard: {
+        ...SAMPLE_SCORECARD,
+        results: [{ status: 'pass', label: 'all good', group: 'P1', evidence: 'OK' }],
+      },
+    };
+    const env = makeEnv({ [CACHED_RIPGREP_KEY]: cleanPayload });
+    const res = await handleLiveScorePage(get('/score/live/ripgrep'), env);
+    const html = await res.text();
+    expect(html).toContain('No failing or warning checks');
+  });
+});
+
+describe('handleLiveScorePage — 404 + edge cases', () => {
+  test('returns 404 HTML for missing cache entry', async () => {
+    const env = makeEnv();
+    const res = await handleLiveScorePage(get('/score/live/ripgrep'), env);
+    expect(res.status).toBe(404);
+    expect(res.headers.get('content-type')).toContain('text/html');
+    const html = await res.text();
+    expect(html).toContain('No live score for');
+    expect(html).toContain('ripgrep');
+    expect(html).toContain('Score it now');
+  });
+
+  test('returns 404 for slug shape violation (path traversal)', async () => {
+    const env = makeEnv({ [CACHED_RIPGREP_KEY]: CACHED_RIPGREP_PAYLOAD });
+    const res = await handleLiveScorePage(get('/score/live/../etc'), env);
+    expect(res.status).toBe(404);
+  });
+
+  test('405 for non-GET/HEAD methods', async () => {
+    const env = makeEnv({ [CACHED_RIPGREP_KEY]: CACHED_RIPGREP_PAYLOAD });
+    for (const method of ['POST', 'PUT', 'DELETE', 'PATCH'] as const) {
+      const res = await handleLiveScorePage(new Request('https://anc.dev/score/live/ripgrep', { method }), env);
+      expect(res.status).toBe(405);
+    }
+  });
+
+  test('HEAD returns 200 + body (cheap; matches GET semantics)', async () => {
+    const env = makeEnv({ [CACHED_RIPGREP_KEY]: CACHED_RIPGREP_PAYLOAD });
+    const res = await handleLiveScorePage(new Request('https://anc.dev/score/live/ripgrep', { method: 'HEAD' }), env);
+    expect(res.status).toBe(200);
+  });
+
+  test('500 + plain-text when shell template asset missing (defense in depth)', async () => {
+    const store = new Map<string, string>();
+    store.set(CACHED_RIPGREP_KEY, JSON.stringify(CACHED_RIPGREP_PAYLOAD));
+    const env = {
+      ASSETS: {
+        async fetch() {
+          return new Response('not found', { status: 404 });
+        },
+      },
+      SCORE_CACHE: {
+        async get(key: string) {
+          const raw = store.get(key);
+          if (raw === undefined) return null;
+          return {
+            async json() {
+              return JSON.parse(raw);
+            },
+            async text() {
+              return raw;
+            },
+          };
+        },
+        async put() {},
+        async delete() {},
+      },
+    } as unknown as { ASSETS: Fetcher; SCORE_CACHE: R2Bucket };
+    _resetShellTemplateCache();
+    const res = await handleLiveScorePage(get('/score/live/ripgrep'), env);
+    expect(res.status).toBe(500);
+    expect(res.headers.get('content-type')).toContain('text/plain');
+  });
+
+  test('returns 404 + does not expose raw user input as HTML in error page', async () => {
+    // Red-team: send a slug that bypasses parseLiveScorePath (it shouldn't),
+    // but if it did, ensure the 404 page escapes the binary name. Since
+    // parseLiveScorePath rejects anything outside [a-z0-9-], a clean slug
+    // is still escaped by the renderer. Cover that path explicitly.
+    const env = makeEnv();
+    const res = await handleLiveScorePage(get('/score/live/foo-bar'), env);
+    expect(res.status).toBe(404);
+    const html = await res.text();
+    expect(html).toContain('No live score for');
+    // The 404 path uses esc() — confirm by sending a slug with a hyphen.
+    expect(html).toContain('foo-bar');
+  });
+});
+
+describe('handleLiveScorePage — markdown twin', () => {
+  test('GET /score/live/<binary>.md returns text/markdown with summary', async () => {
+    const env = makeEnv({ [CACHED_RIPGREP_KEY]: CACHED_RIPGREP_PAYLOAD });
+    const res = await handleLiveScorePage(get('/score/live/ripgrep.md'), env);
+    expect(res.status).toBe(200);
+    expect(res.headers.get('content-type')).toContain('text/markdown');
+    const md = await res.text();
+    expect(md).toContain('# ripgrep');
+    expect(md).toContain('**Score:** 92% pass rate');
+    expect(md).toContain('## Top issues');
+    expect(md).toContain('| FAIL | exits 0 on missing flag |');
+    expect(md).toContain('https://anc.dev/p4'); // absolute principle link
+    expect(md).not.toContain('<'); // no HTML tags in markdown twin
+  });
+
+  test('Accept: text/markdown on /score/live/<binary> returns markdown', async () => {
+    const env = makeEnv({ [CACHED_RIPGREP_KEY]: CACHED_RIPGREP_PAYLOAD });
+    const req = new Request('https://anc.dev/score/live/ripgrep', {
+      method: 'GET',
+      headers: { accept: 'text/markdown' },
+    });
+    const res = await handleLiveScorePage(req, env);
+    expect(res.status).toBe(200);
+    expect(res.headers.get('content-type')).toContain('text/markdown');
+    const md = await res.text();
+    expect(md).toContain('# ripgrep');
+  });
+
+  test('Accept: text/html on /score/live/<binary> returns HTML (default)', async () => {
+    const env = makeEnv({ [CACHED_RIPGREP_KEY]: CACHED_RIPGREP_PAYLOAD });
+    const req = new Request('https://anc.dev/score/live/ripgrep', {
+      method: 'GET',
+      headers: { accept: 'text/html' },
+    });
+    const res = await handleLiveScorePage(req, env);
+    expect(res.headers.get('content-type')).toContain('text/html');
+  });
+
+  test('404 markdown response for missing cache entry', async () => {
+    const env = makeEnv();
+    const res = await handleLiveScorePage(get('/score/live/ripgrep.md'), env);
+    expect(res.status).toBe(404);
+    expect(res.headers.get('content-type')).toContain('text/markdown');
+    const md = await res.text();
+    expect(md).toContain('# No live score for `ripgrep` yet');
+    expect(md).toContain('homepage');
+  });
+
+  test('markdown escapes pipe characters in evidence to preserve table shape', async () => {
+    const pipeXssPayload = {
+      ...CACHED_RIPGREP_PAYLOAD,
+      scorecard: {
+        ...SAMPLE_SCORECARD,
+        results: [
+          {
+            status: 'fail',
+            label: 'pipeline check',
+            group: 'P3',
+            evidence: 'cmd | grep foo | head -1',
+          },
+        ],
+      },
+    };
+    const env = makeEnv({ [CACHED_RIPGREP_KEY]: pipeXssPayload });
+    const res = await handleLiveScorePage(get('/score/live/ripgrep.md'), env);
+    const md = await res.text();
+    expect(md).toContain('cmd \\| grep foo \\| head -1');
+  });
+
+  test('Accept q-weighted header picks markdown when text/markdown wins', async () => {
+    const env = makeEnv({ [CACHED_RIPGREP_KEY]: CACHED_RIPGREP_PAYLOAD });
+    const req = new Request('https://anc.dev/score/live/ripgrep', {
+      method: 'GET',
+      headers: { accept: 'text/html;q=0.1, text/markdown;q=0.9' },
+    });
+    const res = await handleLiveScorePage(req, env);
+    expect(res.headers.get('content-type')).toContain('text/markdown');
+  });
+});
+
+describe('handleLiveScorePage — HTML escape sanity', () => {
+  test('escapes scorecard.results.evidence to prevent HTML injection', async () => {
+    const xssPayload = {
+      ...CACHED_RIPGREP_PAYLOAD,
+      scorecard: {
+        ...SAMPLE_SCORECARD,
+        results: [
+          {
+            status: 'fail',
+            label: '<script>alert(1)</script>',
+            group: 'P1',
+            evidence: '<img src=x onerror=alert(2)>',
+          },
+        ],
+      },
+    };
+    const env = makeEnv({ [CACHED_RIPGREP_KEY]: xssPayload });
+    const res = await handleLiveScorePage(get('/score/live/ripgrep'), env);
+    const html = await res.text();
+    // Neither <script> nor <img onerror> should appear raw — they must
+    // be entity-escaped before reaching the response body.
+    expect(html).not.toContain('<script>alert(1)</script>');
+    expect(html).not.toContain('<img src=x onerror=alert(2)>');
+    expect(html).toContain('&lt;script&gt;');
+    expect(html).toContain('&lt;img');
+  });
+
+  test('escapes tool.name and binary fields', async () => {
+    const xssPayload = {
+      ...CACHED_RIPGREP_PAYLOAD,
+      scorecard: { ...SAMPLE_SCORECARD, tool: { name: '<svg/onload=alert(3)>', binary: 'rg' } },
+    };
+    const env = makeEnv({ [CACHED_RIPGREP_KEY]: xssPayload });
+    const res = await handleLiveScorePage(get('/score/live/ripgrep'), env);
+    const html = await res.text();
+    expect(html).not.toContain('<svg/onload=alert(3)>');
+    expect(html).toContain('&lt;svg');
+  });
+});
diff --git a/tests/score-parse-install.test.ts b/tests/score-parse-install.test.ts
index befd3a1..a8b30a3 100644
--- a/tests/score-parse-install.test.ts
+++ b/tests/score-parse-install.test.ts
@@ -51,10 +51,10 @@ describe('parseInstallCommand — happy paths from plan U4 table', () => {
     });
   });
 
-  test('uv tool install <pkg> normalizes to pip', () => {
+  test('uv tool install <pkg> resolves to pm=uv (split from pip in U6 rework)', () => {
     expect(parseInstallCommand('uv tool install black')).toEqual({
       ok: true,
-      value: { pm: 'pip', package: 'black', binary: 'black' },
+      value: { pm: 'uv', package: 'black', binary: 'black' },
     });
   });
 
diff --git a/tests/score-registry-lookup.test.ts b/tests/score-registry-lookup.test.ts
index f543120..9f8f2b8 100644
--- a/tests/score-registry-lookup.test.ts
+++ b/tests/score-registry-lookup.test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, test } from 'bun:test';
 import type { DiscoveryHintsIndex, RegistryIndex } from '../src/worker/score/registry-lookup';
-import { lookupRegistry } from '../src/worker/score/registry-lookup';
+import { deriveShareBinary, lookupRegistry } from '../src/worker/score/registry-lookup';
 import type { ValidatedInput } from '../src/worker/score/validate';
 
 const REGISTRY: RegistryIndex = {
@@ -70,10 +70,40 @@ describe('lookupRegistry', () => {
     expect(lookupRegistry(input, REGISTRY, HINTS).kind).toBe('miss');
   });
 
-  test('install-command input → miss (caller passes spec through directly)', () => {
+  test('install-command with curated binary → registry hit (cross-check by spec.binary)', () => {
+    // `cargo install ripgrep` parses to binary='ripgrep'. The curated
+    // by_slug map has ripgrep, so this should hit registry, not fall
+    // through to the cache + live path. Catches the bat-shaped class of
+    // install-command-resolving-to-curated-tool inputs that previously
+    // paid sandbox cost for a tool already audited.
     const input: ValidatedInput = {
       kind: 'install-command',
-      spec: { pm: 'brew', package: 'ripgrep', binary: 'ripgrep' },
+      spec: { pm: 'cargo-binstall', package: 'ripgrep', binary: 'ripgrep' },
+    };
+    const r = lookupRegistry(input, REGISTRY, HINTS);
+    expect(r.kind).toBe('registry');
+    if (r.kind === 'registry') {
+      expect(r.entry.name).toBe('ripgrep');
+      expect(r.entry.binary).toBe('rg'); // curated entry's actual binary, not the parser's binary
+    }
+  });
+
+  test('install-command with non-curated binary → miss (live path)', () => {
+    const input: ValidatedInput = {
+      kind: 'install-command',
+      spec: { pm: 'brew', package: 'obscure-tool', binary: 'obscure-tool' },
+    };
+    expect(lookupRegistry(input, REGISTRY, HINTS).kind).toBe('miss');
+  });
+
+  test('install-command binary-alias edge case (cargo install <binary-not-package>) → miss', () => {
+    // Typing `cargo install rg` (the binary name, not the cargo package
+    // name 'ripgrep') makes the parser report binary='rg'. by_slug has
+    // 'ripgrep' but not 'rg' (rg is curated under tool.binary, not
+    // tool.name). Documented edge case — falls through to live path.
+    const input: ValidatedInput = {
+      kind: 'install-command',
+      spec: { pm: 'cargo-binstall', package: 'rg', binary: 'rg' },
     };
     expect(lookupRegistry(input, REGISTRY, HINTS).kind).toBe('miss');
   });
@@ -100,3 +130,28 @@ describe('lookupRegistry', () => {
     expect(r.kind).toBe('registry');
   });
 });
+
+describe('deriveShareBinary — branch-aware', () => {
+  test('github-url WITHOUT branch + matching hint → binary derived from hint', () => {
+    const input: ValidatedInput = { kind: 'github-url', owner: 'Aider-AI', repo: 'aider' };
+    expect(deriveShareBinary(input, HINTS)).toBe('aider');
+  });
+
+  test('github-url WITH branch returns null (branch-scoped scores are one-off, no share URL)', () => {
+    // /score/live/<binary> is keyed by binary alone. Returning a share
+    // URL for a branch-scoped score would clobber the default-branch
+    // scorecard at the same key on subsequent lookups. The branch
+    // request returns inline; the user keeps the scorecard, can't
+    // bookmark a branch-scoped URL today.
+    const input: ValidatedInput = { kind: 'github-url', owner: 'Aider-AI', repo: 'aider', branch: 'main' };
+    expect(deriveShareBinary(input, HINTS)).toBeNull();
+  });
+
+  test('install-command kind passes through unchanged (no branch concept)', () => {
+    const input: ValidatedInput = {
+      kind: 'install-command',
+      spec: { pm: 'pip', package: 'black', binary: 'black' },
+    };
+    expect(deriveShareBinary(input, HINTS)).toBe('black');
+  });
+});
diff --git a/tests/score-response-shape.test.ts b/tests/score-response-shape.test.ts
new file mode 100644
index 0000000..c22be53
--- /dev/null
+++ b/tests/score-response-shape.test.ts
@@ -0,0 +1,142 @@
+// /api/score response-shape contract tests.
+//
+// Plan U5 — every variant of the ScoreError discriminated union must:
+//   1. Map to the documented HTTP status (statusForError).
+//   2. Carry the R11 triad (spec_version + checker_url) on the wire.
+//   3. Honor Retry-After when the variant declares retry_after (rate_limited
+//      and scoring_disabled).
+//
+// Triad enforcement: shapeScoreSuccess refuses to emit a partial response
+// (missing anc_version → 500 with `incomplete_response_contract`). The
+// exhaustiveness check via assertNever() in statusForError() is exercised
+// here by enumerating every variant — adding a new variant without
+// extending statusForError() makes this file fail to compile.
+
+import { describe, expect, test } from 'bun:test';
+import {
+  type ScoreError,
+  shapeScoreError,
+  shapeScoreSuccess,
+  statusForError,
+} from '../src/worker/score/response-shape';
+import { CHECKER_URL, SPEC_VERSION } from '../src/worker/spec-version.gen';
+
+// One representative of every ScoreError variant — exhaustiveness here is
+// what gives us coverage of the assertNever() guard inside statusForError.
+const ALL_ERRORS: readonly ScoreError[] = [
+  { code: 'invalid_url', details: 'not a url', cta_text: '...' },
+  { code: 'non_https_url', cta_text: '...' },
+  { code: 'non_github_host', cta_text: '...' },
+  { code: 'invalid_url_path', cta_text: '...' },
+  { code: 'unrecognized_input', cta_text: '...' },
+  { code: 'unparseable_install_command', details: 'foo', cta_text: '...' },
+  { code: 'chain_no_resolve', cta_text: '...' },
+  { code: 'discovery_redirect_loop', cta_text: '...' },
+  { code: 'rate_limited', retry_after: 42, cta_text: '...' },
+  { code: 'install_unsupported', pm: 'brew', cta_text: '...' },
+  { code: 'chain_resolved_install_failed', details: 'apt', cta_text: '...' },
+  { code: 'chain_resolved_no_binary_produced', details: 'empty', cta_text: '...' },
+  { code: 'timeout', phase: 'install', cta_text: '...' },
+  { code: 'turnstile_failed', cta_text: '...' },
+  { code: 'scoring_disabled', cta_text: '...' },
+  { code: 'sandbox_stub_until_u6', cta_text: '...' },
+  { code: 'incomplete_response_contract', details: 'no anc', cta_text: '...' },
+  { code: 'service_misconfigured', details: 'missing secret', cta_text: '...' },
+];
+
+describe('statusForError — HTTP status mapping per variant', () => {
+  const cases: Array<[ScoreError['code'], number]> = [
+    ['invalid_url', 400],
+    ['non_https_url', 400],
+    ['non_github_host', 400],
+    ['invalid_url_path', 400],
+    ['unrecognized_input', 400],
+    ['unparseable_install_command', 400],
+    ['turnstile_failed', 400],
+    ['chain_no_resolve', 404],
+    ['rate_limited', 429],
+    ['install_unsupported', 502],
+    ['chain_resolved_install_failed', 502],
+    ['chain_resolved_no_binary_produced', 502],
+    ['discovery_redirect_loop', 502],
+    ['timeout', 504],
+    ['scoring_disabled', 503],
+    ['sandbox_stub_until_u6', 503],
+    ['incomplete_response_contract', 500],
+    ['service_misconfigured', 500],
+  ];
+  for (const [code, want] of cases) {
+    test(`${code} → ${want}`, () => {
+      const err = ALL_ERRORS.find((e) => e.code === code);
+      expect(err).toBeDefined();
+      if (!err) return;
+      expect(statusForError(err)).toBe(want);
+    });
+  }
+});
+
+describe('shapeScoreError — wire shape + headers', () => {
+  test('every variant carries spec_version + checker_url', async () => {
+    for (const e of ALL_ERRORS) {
+      const res = shapeScoreError(e);
+      const body = (await res.json()) as Record<string, unknown>;
+      expect(body.spec_version).toBe(SPEC_VERSION);
+      expect(body.checker_url).toBe(CHECKER_URL);
+      expect((body.error as { code: string }).code).toBe(e.code);
+    }
+  });
+
+  test('rate_limited carries Retry-After matching retry_after', () => {
+    const res = shapeScoreError({ code: 'rate_limited', retry_after: 17, cta_text: '...' });
+    expect(res.status).toBe(429);
+    expect(res.headers.get('Retry-After')).toBe('17');
+  });
+
+  test('scoring_disabled carries Retry-After: 3600', () => {
+    const res = shapeScoreError({ code: 'scoring_disabled', cta_text: '...' });
+    expect(res.status).toBe(503);
+    expect(res.headers.get('Retry-After')).toBe('3600');
+  });
+
+  test('live JSON sets Cache-Control: no-store + CORS *', () => {
+    const res = shapeScoreError({ code: 'unrecognized_input', cta_text: '...' });
+    expect(res.headers.get('Cache-Control')).toBe('no-store');
+    expect(res.headers.get('Access-Control-Allow-Origin')).toBe('*');
+    expect(res.headers.get('X-Robots-Tag')).toBe('noindex');
+    expect(res.headers.get('Content-Type')).toBe('application/json; charset=utf-8');
+  });
+
+  test('cache-hit freshness sets Cache-Control: public, max-age=300', () => {
+    const res = shapeScoreError({ code: 'unrecognized_input', cta_text: '...' }, 'cache-hit');
+    expect(res.headers.get('Cache-Control')).toBe('public, max-age=300');
+  });
+});
+
+describe('shapeScoreSuccess — R11 triad enforcement', () => {
+  test('happy path: scorecard + anc_version → 200 with triad', async () => {
+    const res = shapeScoreSuccess({ name: 'ripgrep' }, '0.3.0', 'live');
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as Record<string, unknown>;
+    expect(body.spec_version).toBe(SPEC_VERSION);
+    expect(body.anc_version).toBe('0.3.0');
+    expect(body.checker_url).toBe(CHECKER_URL);
+    expect(body.scorecard).toEqual({ name: 'ripgrep' });
+  });
+
+  test('missing anc_version → 500 incomplete_response_contract (never a quiet partial)', async () => {
+    const res = shapeScoreSuccess({ name: 'ripgrep' }, null, 'live');
+    expect(res.status).toBe(500);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('incomplete_response_contract');
+  });
+
+  test('cache-hit freshness uses cached cache-control', () => {
+    const res = shapeScoreSuccess({}, '0.3.0', 'cache-hit');
+    expect(res.headers.get('Cache-Control')).toBe('public, max-age=300');
+  });
+
+  test('live freshness uses no-store', () => {
+    const res = shapeScoreSuccess({}, '0.3.0', 'live');
+    expect(res.headers.get('Cache-Control')).toBe('no-store');
+  });
+});
diff --git a/tests/score-sdist-allowlist.test.ts b/tests/score-sdist-allowlist.test.ts
new file mode 100644
index 0000000..e929c9b
--- /dev/null
+++ b/tests/score-sdist-allowlist.test.ts
@@ -0,0 +1,167 @@
+// sdist-allowlist invariants (plan U7 follow-up, option C).
+//
+// The allowlist is a security-relevant data file: each entry loosens
+// `--only-binary=:all:` for one package, letting pip fall back to sdist
+// (which runs setup.py at install time). The shape + integrity checks
+// here ensure entries can't quietly drift into invalid states (typo'd
+// names, missing evidence, version-range gaps).
+
+import { describe, expect, test } from 'bun:test';
+import {
+  SDIST_REJECTED_NOTES,
+  SDIST_TRUSTED_DEPS,
+  SDIST_TRUSTED_NAMES,
+  type SdistTrustedEntry,
+} from '../src/worker/score/sdist-allowlist';
+
+describe('SDIST_TRUSTED_DEPS — entry shape integrity', () => {
+  test('every entry has a non-empty PyPI name', () => {
+    for (const e of SDIST_TRUSTED_DEPS) {
+      expect(e.name.length).toBeGreaterThan(0);
+      // PyPI names are lowercase letters, digits, hyphens, dots, underscores.
+      expect(e.name).toMatch(/^[a-z0-9._-]+$/);
+    }
+  });
+
+  test('every entry carries a non-trivial reason (>=80 chars to discourage one-liners)', () => {
+    for (const e of SDIST_TRUSTED_DEPS) {
+      expect({ name: e.name, reasonLen: e.reason.length }).toEqual({
+        name: e.name,
+        reasonLen: expect.any(Number),
+      });
+      expect(e.reason.length).toBeGreaterThanOrEqual(80);
+    }
+  });
+
+  test('every entry carries at least one evidence URL', () => {
+    for (const e of SDIST_TRUSTED_DEPS) {
+      expect(e.evidence.length).toBeGreaterThanOrEqual(1);
+      for (const url of e.evidence) {
+        expect(url).toMatch(/^https:\/\//);
+      }
+    }
+  });
+
+  test('every entry carries a YYYY-MM-DD added date', () => {
+    for (const e of SDIST_TRUSTED_DEPS) {
+      expect(e.added).toMatch(/^\d{4}-\d{2}-\d{2}$/);
+    }
+  });
+
+  test('no duplicate entries', () => {
+    const names = SDIST_TRUSTED_DEPS.map((e) => e.name);
+    const unique = new Set(names);
+    expect(unique.size).toBe(names.length);
+  });
+});
+
+describe('SDIST_TRUSTED_DEPS — version range fields', () => {
+  // affected_min/max + safe_pin are advisory but should be internally
+  // consistent: if max_affected is set, it should be lower than the
+  // safe_pin (the pin is a recommendation OUT of the affected range).
+
+  test('affected_min_version, when set, is a valid semver-ish string', () => {
+    for (const e of SDIST_TRUSTED_DEPS) {
+      if (e.affected_min_version !== undefined) {
+        // Loose semver: digits and dots, optionally with a prerelease tag.
+        expect(e.affected_min_version).toMatch(/^\d+(\.\d+)*(\.[A-Za-z0-9._-]+)?$/);
+      }
+    }
+  });
+
+  test('affected_max_version, when set, is a valid semver-ish string', () => {
+    for (const e of SDIST_TRUSTED_DEPS) {
+      if (e.affected_max_version !== undefined) {
+        expect(e.affected_max_version).toMatch(/^\d+(\.\d+)*(\.[A-Za-z0-9._-]+)?$/);
+      }
+    }
+  });
+
+  test('safe_pin, when set, is a recognizable pip version specifier (>=, ==, ~=, etc.)', () => {
+    for (const e of SDIST_TRUSTED_DEPS) {
+      if (e.safe_pin !== undefined) {
+        expect(e.safe_pin).toMatch(/^(>=|<=|==|~=|>|<|!=)?\d/);
+      }
+    }
+  });
+});
+
+describe('SDIST_TRUSTED_NAMES — derived flag value', () => {
+  test('SDIST_TRUSTED_NAMES is a comma-joined list of every trusted entry name', () => {
+    const expected = SDIST_TRUSTED_DEPS.map((e) => e.name).join(',');
+    expect(SDIST_TRUSTED_NAMES).toBe(expected);
+  });
+
+  test('SDIST_TRUSTED_NAMES contains no spaces (must be safe for --no-binary=<csv> flag)', () => {
+    expect(SDIST_TRUSTED_NAMES).not.toMatch(/\s/);
+  });
+
+  test('SDIST_TRUSTED_NAMES current expected composition: pyperclip + pycparser', () => {
+    // Pinning to surface any future addition/removal as a deliberate
+    // PR-reviewable change. If the allowlist changes, update both the
+    // file AND this expectation.
+    expect(SDIST_TRUSTED_NAMES).toBe('pyperclip,pycparser');
+  });
+});
+
+describe('SDIST_REJECTED_NOTES — entry shape integrity', () => {
+  test('every entry has a name, reason, investigated date, and version range', () => {
+    for (const e of SDIST_REJECTED_NOTES) {
+      expect(e.name.length).toBeGreaterThan(0);
+      expect(e.reason.length).toBeGreaterThanOrEqual(80);
+      expect(e.investigated).toMatch(/^\d{4}-\d{2}-\d{2}$/);
+    }
+  });
+
+  test('every rejected entry has an explicit affected version range', () => {
+    // The whole point of rejecting an entry is documenting WHEN it
+    // applies. A rejected entry without a version range is ambiguous:
+    // future me reading "don't add numpy" needs to know it's about a
+    // specific version range, not all numpy forever.
+    for (const e of SDIST_REJECTED_NOTES) {
+      expect({ name: e.name, hasMin: e.affected_min_version !== undefined }).toEqual({
+        name: e.name,
+        hasMin: true,
+      });
+      expect({ name: e.name, hasMax: e.affected_max_version !== undefined }).toEqual({
+        name: e.name,
+        hasMax: true,
+      });
+    }
+  });
+
+  test('every rejected entry suggests a safe_pin alternative', () => {
+    // Rejection means "this isn't fixed by allowlisting"; downstream
+    // consumers still need a path forward. safe_pin documents the right
+    // recommendation (usually "pin to a newer version that ships wheels").
+    for (const e of SDIST_REJECTED_NOTES) {
+      expect({ name: e.name, hasPin: e.safe_pin !== undefined && e.safe_pin.length > 0 }).toEqual({
+        name: e.name,
+        hasPin: true,
+      });
+    }
+  });
+});
+
+describe('SDIST_TRUSTED_DEPS vs SDIST_REJECTED_NOTES — no overlap', () => {
+  test('no package appears on both lists', () => {
+    const trustedNames = new Set(SDIST_TRUSTED_DEPS.map((e) => e.name));
+    for (const r of SDIST_REJECTED_NOTES) {
+      expect({ name: r.name, onTrustedList: trustedNames.has(r.name) }).toEqual({
+        name: r.name,
+        onTrustedList: false,
+      });
+    }
+  });
+});
+
+// Type-level smoke check: ensures the exported type stays usable from
+// the consumer side. If anyone tightens SdistTrustedEntry in a way that
+// breaks the existing entries, this fails at type-check time.
+const _typeCheck: SdistTrustedEntry = {
+  name: 'example',
+  reason: 'x'.repeat(80),
+  added: '2026-05-19',
+  evidence: ['https://example.com'],
+};
+void _typeCheck;
diff --git a/tests/score-telemetry.test.ts b/tests/score-telemetry.test.ts
new file mode 100644
index 0000000..ae0f205
--- /dev/null
+++ b/tests/score-telemetry.test.ts
@@ -0,0 +1,201 @@
+// AE telemetry regression suite (plan U10).
+//
+// Pins the writeDataPoint field-shape contract and the per-tier
+// emission discipline so a future refactor that reorders blobs /
+// drops a blob / skips emission on a bounce class fails LOCALLY,
+// before it silently breaks every saved AE SQL query in
+// docs/runbooks/live-scoring-analytics.md.
+//
+// Tests reuse the makeEnv / postScore / getScore helpers exported
+// from tests/score-handler.test.ts so a regression in the handler's
+// fixture wiring surfaces in one place rather than two.
+
+import { beforeEach, describe, expect, test } from 'bun:test';
+import { _resetIndexCache, handleScore } from '../src/worker/score/handler';
+import { _resetKillSwitchCache } from '../src/worker/score/kill-switch';
+import { getScore, makeEnv, postScore, type TelemetryEvent } from './score-handler.test';
+
+beforeEach(() => {
+  _resetIndexCache();
+  _resetKillSwitchCache();
+});
+
+// Canonical slot positions — single source of truth for the regression
+// test. If the helper or the runbook needs to move, this object is the
+// one place to update.
+const SLOT = {
+  BLOB_INPUT_KIND: 0,
+  BLOB_PM: 1,
+  BLOB_ERROR_CODE: 2,
+  BLOB_FRESHNESS: 3,
+  BLOB_RESOLVED_STEP: 4,
+  DOUBLE_TOTAL_MS: 0,
+  DOUBLE_INSTALL_MS: 1,
+  DOUBLE_ANC_CHECK_MS: 2,
+  DOUBLE_STATUS: 3,
+} as const;
+
+function lastEvent(events: TelemetryEvent[]): TelemetryEvent {
+  expect(events.length).toBeGreaterThan(0);
+  return events[events.length - 1];
+}
+
+// ---------------------------------------------------------------------------
+// Field-shape regression — pins blob/double/index slot assignments
+// ---------------------------------------------------------------------------
+
+describe('AE telemetry — field-shape contract', () => {
+  test('every event carries blobs.length=5 and doubles.length=4', async () => {
+    const events: TelemetryEvent[] = [];
+    await handleScore(getScore('ripgrep'), makeEnv({ telemetryEvents: events }));
+    const evt = lastEvent(events);
+    expect(evt.blobs?.length).toBe(5);
+    expect(evt.doubles?.length).toBe(4);
+  });
+
+  test('curated hit emits blob1=registry, blob4=registry-hit, blob5=registry, index1=tool', async () => {
+    const events: TelemetryEvent[] = [];
+    await handleScore(getScore('ripgrep'), makeEnv({ telemetryEvents: events }));
+    const evt = lastEvent(events);
+    expect(evt.blobs?.[SLOT.BLOB_INPUT_KIND]).toBe('registry');
+    expect(evt.blobs?.[SLOT.BLOB_FRESHNESS]).toBe('registry-hit');
+    expect(evt.blobs?.[SLOT.BLOB_RESOLVED_STEP]).toBe('registry');
+    // Curated registry hits don't go through resolveSpec, so blob2 pm is null.
+    expect(evt.blobs?.[SLOT.BLOB_PM]).toBe(null);
+    expect(evt.blobs?.[SLOT.BLOB_ERROR_CODE]).toBe(null);
+    // index1 carries the tool name on success paths with a known binary.
+    expect(evt.indexes).toEqual(['rg']);
+  });
+
+  test('doubles capture status + total_ms; install/anc null on curated hit', async () => {
+    const events: TelemetryEvent[] = [];
+    await handleScore(getScore('ripgrep'), makeEnv({ telemetryEvents: events }));
+    const evt = lastEvent(events);
+    expect(typeof evt.doubles?.[SLOT.DOUBLE_TOTAL_MS]).toBe('number');
+    expect(evt.doubles?.[SLOT.DOUBLE_INSTALL_MS]).toBe(null);
+    expect(evt.doubles?.[SLOT.DOUBLE_ANC_CHECK_MS]).toBe(null);
+    expect(evt.doubles?.[SLOT.DOUBLE_STATUS]).toBe(200);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Per-tier emission discipline
+// ---------------------------------------------------------------------------
+
+describe('AE telemetry — emits exactly one event per /api/score request', () => {
+  test('curated registry hit → 1 event', async () => {
+    const events: TelemetryEvent[] = [];
+    await handleScore(getScore('ripgrep'), makeEnv({ telemetryEvents: events }));
+    expect(events).toHaveLength(1);
+  });
+
+  test('GET miss (chain_no_resolve) → 1 event with blob3=chain_no_resolve, status=404', async () => {
+    const events: TelemetryEvent[] = [];
+    const res = await handleScore(
+      getScore('https://github.com/owner/not-in-registry'),
+      makeEnv({ telemetryEvents: events }),
+    );
+    expect(events).toHaveLength(1);
+    const evt = lastEvent(events);
+    expect(evt.blobs?.[SLOT.BLOB_ERROR_CODE]).toBe('chain_no_resolve');
+    expect(evt.blobs?.[SLOT.BLOB_FRESHNESS]).toBe(null);
+    expect(evt.doubles?.[SLOT.DOUBLE_STATUS]).toBe(res.status);
+  });
+
+  test('POST validation reject (invalid github host) → 1 event with input_kind=invalid', async () => {
+    const events: TelemetryEvent[] = [];
+    await handleScore(postScore('https://gitlab.com/owner/repo'), makeEnv({ telemetryEvents: events }));
+    const evt = lastEvent(events);
+    expect(events).toHaveLength(1);
+    expect(evt.blobs?.[SLOT.BLOB_INPUT_KIND]).toBe('invalid');
+    expect(evt.blobs?.[SLOT.BLOB_ERROR_CODE]).toBe('non_github_host');
+  });
+
+  test('POST turnstile_failed → 1 event with blob3=turnstile_failed', async () => {
+    const events: TelemetryEvent[] = [];
+    await handleScore(
+      postScore('cargo install foo-cli'),
+      makeEnv({ telemetryEvents: events, turnstileResponse: { success: false } }),
+    );
+    const evt = lastEvent(events);
+    expect(events).toHaveLength(1);
+    expect(evt.blobs?.[SLOT.BLOB_ERROR_CODE]).toBe('turnstile_failed');
+  });
+
+  test('POST rate_limited (session limiter) → 1 event with blob3=rate_limited, status=429', async () => {
+    const events: TelemetryEvent[] = [];
+    await handleScore(postScore('cargo install foo-cli'), makeEnv({ telemetryEvents: events, rateLimit: false }));
+    const evt = lastEvent(events);
+    expect(events).toHaveLength(1);
+    expect(evt.blobs?.[SLOT.BLOB_ERROR_CODE]).toBe('rate_limited');
+    expect(evt.doubles?.[SLOT.DOUBLE_STATUS]).toBe(429);
+  });
+
+  test('POST live success → 1 event with blob4=live, install_ms + anc_check_ms populated', async () => {
+    const events: TelemetryEvent[] = [];
+    await handleScore(
+      postScore('cargo install foo-cli'),
+      makeEnv({
+        telemetryEvents: events,
+        doResponse: {
+          scorecard: { tool: { name: 'foo-cli', version: '1.0.0' } },
+          anc_version: '0.3.1',
+          install_ms: 1234,
+          anc_check_ms: 567,
+        },
+      }),
+    );
+    const evt = lastEvent(events);
+    expect(events).toHaveLength(1);
+    expect(evt.blobs?.[SLOT.BLOB_FRESHNESS]).toBe('live');
+    expect(evt.blobs?.[SLOT.BLOB_PM]).toBe('cargo-binstall');
+    expect(evt.doubles?.[SLOT.DOUBLE_INSTALL_MS]).toBe(1234);
+    expect(evt.doubles?.[SLOT.DOUBLE_ANC_CHECK_MS]).toBe(567);
+    expect(evt.doubles?.[SLOT.DOUBLE_STATUS]).toBe(200);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Kill-switch + telemetry interaction — operators MUST see kill-switched
+// traffic in AE; suppressing the event would hide a denial-of-service
+// signal.
+// ---------------------------------------------------------------------------
+
+describe('AE telemetry — kill switch fired still emits', () => {
+  test('scoring_disabled bounce → 1 event with blob3=scoring_disabled, status=503', async () => {
+    const events: TelemetryEvent[] = [];
+    const res = await handleScore(
+      postScore('cargo install foo-cli'),
+      makeEnv({ telemetryEvents: events, kvDisabled: true }),
+    );
+    expect(events).toHaveLength(1);
+    const evt = lastEvent(events);
+    expect(evt.blobs?.[SLOT.BLOB_ERROR_CODE]).toBe('scoring_disabled');
+    expect(evt.doubles?.[SLOT.DOUBLE_STATUS]).toBe(503);
+    expect(res.status).toBe(503);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Graceful degradation — AE outage MUST NOT break /api/score
+// ---------------------------------------------------------------------------
+
+describe('AE telemetry — write failure swallowed', () => {
+  test('writeDataPoint throws on success path → handler still returns 200', async () => {
+    const res = await handleScore(getScore('ripgrep'), makeEnv({ telemetryThrows: true }));
+    expect(res.status).toBe(200);
+    const body = (await res.json()) as { spec_version: string; checker_url: string };
+    expect(body.spec_version).toBeDefined();
+    expect(body.checker_url).toBeDefined();
+  });
+
+  test('writeDataPoint throws on error path → handler still returns the error envelope', async () => {
+    const res = await handleScore(
+      postScore('cargo install foo-cli'),
+      makeEnv({ telemetryThrows: true, rateLimit: false }),
+    );
+    expect(res.status).toBe(429);
+    const body = (await res.json()) as { error: { code: string } };
+    expect(body.error.code).toBe('rate_limited');
+  });
+});
diff --git a/tests/score-validate.test.ts b/tests/score-validate.test.ts
index 624c8e7..242475a 100644
--- a/tests/score-validate.test.ts
+++ b/tests/score-validate.test.ts
@@ -1,5 +1,5 @@
 import { describe, expect, test } from 'bun:test';
-import { validateInput } from '../src/worker/score/validate';
+import { validateInput, validBranchName } from '../src/worker/score/validate';
 
 const REGISTRY = {
   by_slug: {
@@ -27,6 +27,15 @@ describe('validateInput — slug', () => {
   test('whitespace-trimmed before slug check', () => {
     expect(validateInput('  bat  ', REGISTRY).kind).toBe('slug');
   });
+
+  test('leading + trailing whitespace on a curated slug routes to slug, NOT unrecognized_input', () => {
+    // Front-end trims on submit (live-score.ts), but a user could POST
+    // ` ripgrep ` directly to /api/score via curl. The validator MUST
+    // trim before the slug-and-registry check; otherwise `" ripgrep "`
+    // would fail SLUG_RE and bounce as unrecognized_input.
+    expect(validateInput(' ripgrep ', REGISTRY)).toEqual({ kind: 'slug', slug: 'ripgrep' });
+    expect(validateInput('\tripgrep\n', REGISTRY)).toEqual({ kind: 'slug', slug: 'ripgrep' });
+  });
 });
 
 describe('validateInput — install command', () => {
@@ -49,6 +58,37 @@ describe('validateInput — install command', () => {
       error: 'unparseable_install_command',
     });
   });
+
+  test('looks-like-install-command for unsupported PM → unparseable_install_command (NOT unrecognized_input)', () => {
+    // Without the unsupported-PM branch, `apt-get install foo` would
+    // fall through to `unrecognized_input` and the homepage form would
+    // render the generic "not a recognized tool" copy. The dedicated
+    // bucket lets the client surface "PM isn't supported, try cargo /
+    // brew / npm / pip / bun / uv / go" instead.
+    const unsupportedCases = [
+      'apt-get install foo',
+      'apt install foo',
+      'dnf install foo',
+      'yum install foo',
+      'zypper install foo',
+      'pacman -S foo',
+      'snap install foo',
+      'flatpak install foo',
+      'port install foo',
+      'choco install foo',
+      'scoop install foo',
+      'winget install foo',
+      'gem install foo',
+      'composer require foo',
+      'emerge foo',
+    ];
+    for (const cmd of unsupportedCases) {
+      expect(validateInput(cmd, REGISTRY)).toEqual({
+        kind: 'unknown',
+        error: 'unparseable_install_command',
+      });
+    }
+  });
 });
 
 describe('validateInput — github URL', () => {
@@ -76,28 +116,94 @@ describe('validateInput — github URL', () => {
     });
   });
 
-  test('branch path /tree/main is rejected', () => {
+  test('release-asset URL is rejected as invalid_url_path', () => {
+    expect(validateInput('https://github.com/foo/bar/releases/download/v1/foo-x86_64.tar.gz', REGISTRY).kind).toBe(
+      'unknown',
+    );
+  });
+});
+
+describe('validateInput — branch URL', () => {
+  test('/tree/<branch> accepts with branch captured', () => {
     expect(validateInput('https://github.com/foo/bar/tree/main', REGISTRY)).toEqual({
+      kind: 'github-url',
+      owner: 'foo',
+      repo: 'bar',
+      branch: 'main',
+    });
+  });
+
+  test('/tree/<branch> with subpath: branch captures the FULL tail (semantic match with GitHub)', () => {
+    // GitHub's own URL routing can't disambiguate `feature/new/<no-subpath>`
+    // from `feature/new/<subpath>` without a server round-trip — the URL
+    // shape is the same. We accept the full tail as the branch and let
+    // the DO's git clone bounce if the branch doesn't exist. Matches
+    // GitHub's own semantics: paste-and-share works for the user.
+    const r = validateInput('https://github.com/foo/bar/tree/main/docs/file.md', REGISTRY);
+    expect(r.kind).toBe('github-url');
+    if (r.kind === 'github-url') {
+      expect(r.owner).toBe('foo');
+      expect(r.repo).toBe('bar');
+      // Tail captured as branch; the DO's git clone will validate
+      // against the actual ref at clone time.
+      expect(r.branch).toBe('main/docs/file.md');
+    }
+  });
+
+  test('branch name with slash (feature/new-thing) accepts', () => {
+    const r = validateInput('https://github.com/foo/bar/tree/feature/new-thing', REGISTRY);
+    expect(r.kind).toBe('github-url');
+    if (r.kind === 'github-url') expect(r.branch).toBe('feature/new-thing');
+  });
+
+  test('empty branch (/tree/) rejected as invalid_url_path', () => {
+    expect(validateInput('https://github.com/foo/bar/tree/', REGISTRY)).toEqual({
       kind: 'unknown',
       error: 'invalid_url_path',
     });
   });
 
-  test('release-asset URL is rejected as invalid_url_path', () => {
-    expect(validateInput('https://github.com/foo/bar/releases/download/v1/foo-x86_64.tar.gz', REGISTRY).kind).toBe(
-      'unknown',
-    );
+  test('trailing slash on branch trims (/tree/main/)', () => {
+    const r = validateInput('https://github.com/foo/bar/tree/main/', REGISTRY);
+    expect(r.kind).toBe('github-url');
+    if (r.kind === 'github-url') expect(r.branch).toBe('main');
+  });
+
+  test('default-branch path (no /tree/<branch>) returns github-url WITHOUT branch field', () => {
+    const r = validateInput('https://github.com/foo/bar', REGISTRY);
+    expect(r.kind).toBe('github-url');
+    if (r.kind === 'github-url') expect(r.branch).toBeUndefined();
   });
 });
 
-describe('validateInput — URL error paths', () => {
-  test('non-https URL rejected', () => {
-    expect(validateInput('http://github.com/foo/bar', REGISTRY)).toEqual({
-      kind: 'unknown',
-      error: 'non_https_url',
+describe('validateInput — owner/repo shorthand', () => {
+  test('basic shorthand: `tobi/qmd` → github-url', () => {
+    expect(validateInput('tobi/qmd', REGISTRY)).toEqual({
+      kind: 'github-url',
+      owner: 'tobi',
+      repo: 'qmd',
+    });
+  });
+
+  test('curated owner/repo via shorthand: registry cross-check is the lookupRegistry layer, not validator', () => {
+    // The validator routes BurntSushi/ripgrep to github-url. Whether it
+    // resolves to a registry hit is the registry-lookup layer's job
+    // (lookupRegistry consults by_owner_repo case-insensitively).
+    expect(validateInput('BurntSushi/ripgrep', REGISTRY)).toEqual({
+      kind: 'github-url',
+      owner: 'BurntSushi',
+      repo: 'ripgrep',
     });
   });
 
+  test('repo names with dots / underscores / hyphens accept (GitHub-legal)', () => {
+    expect(validateInput('foo/my.repo', REGISTRY).kind).toBe('github-url');
+    expect(validateInput('foo/my_repo', REGISTRY).kind).toBe('github-url');
+    expect(validateInput('foo/my-repo', REGISTRY).kind).toBe('github-url');
+  });
+});
+
+describe('validateInput — URL error paths', () => {
   test('non-github host rejected', () => {
     expect(validateInput('https://gitlab.com/foo/bar', REGISTRY)).toEqual({
       kind: 'unknown',
@@ -134,3 +240,289 @@ describe('validateInput — empty / unknown', () => {
     });
   });
 });
+
+// ============================================================================
+// RED TEAM tests (input-handling expansion)
+//
+// Each new feature carries its own attack surface. Pin the negative paths
+// so a future regex relaxation doesn't silently widen the gate.
+// ============================================================================
+
+describe('RED TEAM — http:// silent upgrade (feature 1)', () => {
+  test('http://github.com/cli/cli → upgraded to https, parsed as github-url (curated cli/cli)', () => {
+    // The whole point of the silent upgrade: a user pasting the http://
+    // form of a curated tool URL gets the same answer as the https://
+    // form. The protocol was the only thing wrong.
+    expect(validateInput('http://github.com/cli/cli', REGISTRY)).toEqual({
+      kind: 'github-url',
+      owner: 'cli',
+      repo: 'cli',
+    });
+  });
+
+  test('http://github.com.evil.com/x/y → upgrade to https, still non_github_host (exact-match hostname)', () => {
+    // Substring attack: the attacker's hostname `github.com.evil.com`
+    // contains `github.com` as a substring but is NOT equal to it. The
+    // URL parser's hostname field is the full `github.com.evil.com`;
+    // literal comparison against `github.com` rejects it. The http://
+    // upgrade does not weaken this gate — the host check runs AFTER
+    // the upgrade on the parsed URL.
+    expect(validateInput('http://github.com.evil.com/foo/bar', REGISTRY)).toEqual({
+      kind: 'unknown',
+      error: 'non_github_host',
+    });
+  });
+
+  test('http://gitlab.com/foo/bar → upgraded, still non_github_host (gitlab is not github)', () => {
+    // Protocol upgrade is silent; host check is not. The upgrade only
+    // changes what the user MEANT — it does not move the trust boundary.
+    expect(validateInput('http://gitlab.com/foo/bar', REGISTRY)).toEqual({
+      kind: 'unknown',
+      error: 'non_github_host',
+    });
+  });
+
+  test('javascript://github.com/x/y → NOT silently upgraded (protocol confusion attack)', () => {
+    // The upgrade regex matches `^http://` ONLY. `javascript:` is a
+    // different scheme entirely and falls through to URL parsing.
+    // URL.parseable but protocol is `javascript:`; non_https_url
+    // rejects it.
+    const r = validateInput('javascript://github.com/x/y', REGISTRY);
+    expect(r.kind).toBe('unknown');
+    if (r.kind === 'unknown') {
+      // Either non_https_url (parser accepts javascript: as a scheme)
+      // OR invalid_url (parser refuses). Both are correct rejects.
+      expect(['non_https_url', 'invalid_url']).toContain(r.error);
+    }
+  });
+
+  test('htp://github.com/foo → genuinely malformed; falls through to invalid_url or unrecognized_input', () => {
+    // Typo in protocol — does not match `^http://`. Falls to the URL
+    // parser, which may accept `htp:` as a custom scheme. Whichever
+    // rejection branch fires, it MUST NOT silently parse as a github-url.
+    const r = validateInput('htp://github.com/foo/bar', REGISTRY);
+    expect(r.kind).toBe('unknown');
+  });
+
+  test('http://192.168.1.1/x/y → upgraded, IP host rejected as non_github_host', () => {
+    // Numeric host attempt — URL parser puts the IP in the hostname
+    // field; literal comparison against `github.com` rejects.
+    expect(validateInput('http://192.168.1.1/foo/bar', REGISTRY)).toEqual({
+      kind: 'unknown',
+      error: 'non_github_host',
+    });
+  });
+
+  test('http:// empty (nothing after prefix) → invalid_url', () => {
+    expect(validateInput('http://', REGISTRY).kind).toBe('unknown');
+  });
+
+  test('HTTP://GitHub.com/foo/bar (uppercased protocol) → upgrade is case-insensitive', () => {
+    // Regex uses /i flag. Without it, an uppercase paste would bounce
+    // as a non-protocol input and the upgrade wouldn't apply.
+    expect(validateInput('HTTP://GitHub.com/foo/bar', REGISTRY)).toEqual({
+      kind: 'github-url',
+      owner: 'foo',
+      repo: 'bar',
+    });
+  });
+});
+
+describe('RED TEAM — owner/repo shorthand (feature 2)', () => {
+  test('path traversal: `../etc/passwd` → unrecognized_input', () => {
+    // Shorthand regex requires owner+repo to match strict character
+    // classes that exclude `..`. The shorthand path doesn't match the
+    // pattern so it falls through.
+    expect(validateInput('../etc/passwd', REGISTRY)).toEqual({
+      kind: 'unknown',
+      error: 'unrecognized_input',
+    });
+  });
+
+  test('three segments: `foo/bar/baz` → falls through (not the shorthand shape)', () => {
+    // The shorthand is EXACTLY two segments. Three segments don't
+    // match SHORTHAND_RE.
+    expect(validateInput('foo/bar/baz', REGISTRY)).toEqual({
+      kind: 'unknown',
+      error: 'unrecognized_input',
+    });
+  });
+
+  test('empty owner: `/qmd` → unrecognized_input', () => {
+    expect(validateInput('/qmd', REGISTRY)).toEqual({
+      kind: 'unknown',
+      error: 'unrecognized_input',
+    });
+  });
+
+  test('empty repo: `tobi/` → unrecognized_input', () => {
+    expect(validateInput('tobi/', REGISTRY)).toEqual({
+      kind: 'unknown',
+      error: 'unrecognized_input',
+    });
+  });
+
+  test('leading hyphen in owner: `-bad/repo` → unrecognized_input (GitHub rejects too)', () => {
+    expect(validateInput('-bad/repo', REGISTRY)).toEqual({
+      kind: 'unknown',
+      error: 'unrecognized_input',
+    });
+  });
+
+  test('space in segment: `tobi name/qmd` → unrecognized_input', () => {
+    expect(validateInput('tobi name/qmd', REGISTRY)).toEqual({
+      kind: 'unknown',
+      error: 'unrecognized_input',
+    });
+  });
+
+  test('owner over 39 chars (GitHub limit) → unrecognized_input', () => {
+    const longOwner = 'a'.repeat(40);
+    expect(validateInput(`${longOwner}/repo`, REGISTRY)).toEqual({
+      kind: 'unknown',
+      error: 'unrecognized_input',
+    });
+  });
+
+  test('repo over 100 chars → unrecognized_input', () => {
+    const longRepo = 'a'.repeat(101);
+    expect(validateInput(`foo/${longRepo}`, REGISTRY)).toEqual({
+      kind: 'unknown',
+      error: 'unrecognized_input',
+    });
+  });
+
+  test('null byte in shorthand: `tobi\\0/qmd` → unrecognized_input', () => {
+    // Defense in depth: the segment splitter sees the null byte as a
+    // non-printable character that falls outside the strict regex
+    // character classes.
+    expect(validateInput('tobi /qmd', REGISTRY)).toEqual({
+      kind: 'unknown',
+      error: 'unrecognized_input',
+    });
+  });
+
+  test('shell metacharacters in segment: `tobi;rm/qmd` → unrecognized_input', () => {
+    expect(validateInput('tobi;rm/qmd', REGISTRY)).toEqual({
+      kind: 'unknown',
+      error: 'unrecognized_input',
+    });
+  });
+
+  test('owner with leading-trailing whitespace inside the segment → unrecognized_input', () => {
+    // Outer trim happens in validateInput; INNER whitespace can't be
+    // trimmed because it would change the user's intent. Strict regex
+    // rejects.
+    expect(validateInput('to bi/qmd', REGISTRY)).toEqual({
+      kind: 'unknown',
+      error: 'unrecognized_input',
+    });
+  });
+});
+
+describe('RED TEAM — branch URL (feature 3)', () => {
+  test('path-traversal in URL-encoded form: `/tree/..%2Fevil` → URL parser decodes; `..` reject fires', () => {
+    // URL parser decodes %2F → /. The peeled branch is then `../evil`.
+    // The explicit `..` reject in validBranchName fires.
+    const r = validateInput('https://github.com/foo/bar/tree/..%2Fevil', REGISTRY);
+    expect(r.kind).toBe('unknown');
+    if (r.kind === 'unknown') expect(r.error).toBe('invalid_url_path');
+  });
+
+  test('shell metacharacters in branch: `; rm -rf /` → rejected', () => {
+    const attempts = [
+      'https://github.com/foo/bar/tree/;rm -rf /',
+      'https://github.com/foo/bar/tree/$(whoami)',
+      'https://github.com/foo/bar/tree/`whoami`',
+      'https://github.com/foo/bar/tree/foo&&bar',
+      'https://github.com/foo/bar/tree/foo|bar',
+      'https://github.com/foo/bar/tree/foo>bar',
+      'https://github.com/foo/bar/tree/foo<bar',
+      'https://github.com/foo/bar/tree/"quoted"',
+      "https://github.com/foo/bar/tree/'quoted'",
+    ];
+    for (const url of attempts) {
+      const r = validateInput(url, REGISTRY);
+      expect(r.kind, `expected reject for: ${url}`).toBe('unknown');
+    }
+  });
+
+  test('over-long branch (>250 chars) → rejected', () => {
+    const longBranch = 'a'.repeat(251);
+    const r = validateInput(`https://github.com/foo/bar/tree/${longBranch}`, REGISTRY);
+    expect(r.kind).toBe('unknown');
+    if (r.kind === 'unknown') expect(r.error).toBe('invalid_url_path');
+  });
+
+  test('branch with leading dot: `.evil` → rejected (matches git refname rule and dotfile concerns)', () => {
+    const r = validateInput('https://github.com/foo/bar/tree/.evil', REGISTRY);
+    expect(r.kind).toBe('unknown');
+  });
+
+  test('branch with trailing dot: `evil.` → rejected', () => {
+    const r = validateInput('https://github.com/foo/bar/tree/evil.', REGISTRY);
+    expect(r.kind).toBe('unknown');
+  });
+
+  test('valid 250-char branch boundary → accepts', () => {
+    // Boundary: exactly 250 chars passes.
+    const branch = 'a'.repeat(250);
+    const r = validateInput(`https://github.com/foo/bar/tree/${branch}`, REGISTRY);
+    expect(r.kind).toBe('github-url');
+    if (r.kind === 'github-url') expect(r.branch).toBe(branch);
+  });
+
+  test('valid branch with dots, hyphens, underscores: `release/v1.2.3-rc_1` → accepts', () => {
+    const r = validateInput('https://github.com/foo/bar/tree/release/v1.2.3-rc_1', REGISTRY);
+    expect(r.kind).toBe('github-url');
+    if (r.kind === 'github-url') expect(r.branch).toBe('release/v1.2.3-rc_1');
+  });
+});
+
+describe('validBranchName — direct unit tests (defense-in-depth helper)', () => {
+  test('alphanumeric accepts', () => {
+    expect(validBranchName('main')).toBe(true);
+    expect(validBranchName('v1')).toBe(true);
+    expect(validBranchName('feature/new-thing')).toBe(true);
+    expect(validBranchName('release/v1.2.3')).toBe(true);
+  });
+
+  test('rejects `..` anywhere', () => {
+    expect(validBranchName('..')).toBe(false);
+    expect(validBranchName('foo..bar')).toBe(false);
+    expect(validBranchName('../etc')).toBe(false);
+    expect(validBranchName('foo/..')).toBe(false);
+  });
+
+  test('rejects leading or trailing slash', () => {
+    expect(validBranchName('/main')).toBe(false);
+    expect(validBranchName('main/')).toBe(false);
+  });
+
+  test('rejects leading or trailing dot', () => {
+    expect(validBranchName('.main')).toBe(false);
+    expect(validBranchName('main.')).toBe(false);
+  });
+
+  test('rejects shell metacharacters', () => {
+    expect(validBranchName('foo;bar')).toBe(false);
+    expect(validBranchName('foo$bar')).toBe(false);
+    expect(validBranchName('foo`bar')).toBe(false);
+    expect(validBranchName('foo(bar)')).toBe(false);
+    expect(validBranchName('foo&bar')).toBe(false);
+    expect(validBranchName('foo|bar')).toBe(false);
+    expect(validBranchName('foo>bar')).toBe(false);
+    expect(validBranchName('foo bar')).toBe(false);
+    expect(validBranchName('foo"bar')).toBe(false);
+    expect(validBranchName("foo'bar")).toBe(false);
+  });
+
+  test('rejects empty', () => {
+    expect(validBranchName('')).toBe(false);
+  });
+
+  test('rejects over 250 chars', () => {
+    expect(validBranchName('a'.repeat(251))).toBe(false);
+    expect(validBranchName('a'.repeat(250))).toBe(true);
+  });
+});
diff --git a/tests/scorecard-format-shared.test.ts b/tests/scorecard-format-shared.test.ts
new file mode 100644
index 0000000..37478d2
--- /dev/null
+++ b/tests/scorecard-format-shared.test.ts
@@ -0,0 +1,151 @@
+// Unit tests for src/shared/scorecard-format.mjs — the Worker-safe primitives
+// shared by build-time markdown rendering (scorecards-render.mjs) and the
+// Worker's /score/live/<binary>.md route (summary-render.ts).
+//
+// The row formatter is the load-bearing primitive: every check-table row in
+// both `dist/score/<tool>.md` and `/score/live/<binary>.md` flows through
+// it. Pipe-escape behavior and principle-link shape live here.
+
+import { describe, expect, test } from 'bun:test';
+import {
+  BONUS_GROUPS,
+  escHtml,
+  extractTopIssues,
+  formatCheckRowMarkdown,
+  formatCheckTableMarkdownLines,
+  groupToPrincipleNum,
+  PRINCIPLE_GROUPS,
+  PRINCIPLE_NAMES,
+} from '../src/shared/scorecard-format.mjs';
+
+describe('escHtml', () => {
+  test('escapes & < > " \'', () => {
+    expect(escHtml(`<img src="x" onerror='alert(1)'>&`)).toBe(
+      '&lt;img src=&quot;x&quot; onerror=&#39;alert(1)&#39;&gt;&amp;',
+    );
+  });
+  test('passes through plain text', () => {
+    expect(escHtml('ripgrep — fast search')).toBe('ripgrep — fast search');
+  });
+});
+
+describe('PRINCIPLE_GROUPS + PRINCIPLE_NAMES', () => {
+  test('covers P1..P7', () => {
+    expect(PRINCIPLE_GROUPS).toEqual(['P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7']);
+    for (const g of PRINCIPLE_GROUPS) {
+      expect(PRINCIPLE_NAMES[g]).toBeTruthy();
+    }
+  });
+  test('BONUS_GROUPS is closed set', () => {
+    expect(BONUS_GROUPS).toEqual(['CodeQuality', 'ProjectStructure']);
+  });
+});
+
+describe('groupToPrincipleNum', () => {
+  test('P1..P7 → 1..7', () => {
+    expect(groupToPrincipleNum('P1')).toBe(1);
+    expect(groupToPrincipleNum('P7')).toBe(7);
+  });
+  test('bonus groups → null', () => {
+    expect(groupToPrincipleNum('CodeQuality')).toBeNull();
+    expect(groupToPrincipleNum('ProjectStructure')).toBeNull();
+  });
+  test('garbage → null', () => {
+    expect(groupToPrincipleNum('P')).toBeNull();
+    expect(groupToPrincipleNum('p3')).toBeNull(); // lowercase rejected
+    expect(groupToPrincipleNum('Pasta')).toBeNull();
+  });
+});
+
+describe('extractTopIssues', () => {
+  const SC = {
+    results: [
+      { status: 'pass', label: 'ok', group: 'P1', evidence: null },
+      { status: 'warn', label: 'iffy', group: 'P2', evidence: 'something' },
+      { status: 'fail', label: 'broken', group: 'P3', evidence: 'bad' },
+      { status: 'fail', label: 'broken2', group: 'P4', evidence: 'bad2' },
+    ],
+  };
+  test('sorts FAIL before WARN, drops pass', () => {
+    const top = extractTopIssues(SC, 4);
+    expect(top.map((i: { label: string }) => i.label)).toEqual(['broken', 'broken2', 'iffy']);
+  });
+  test('respects limit', () => {
+    const top = extractTopIssues(SC, 2);
+    expect(top.map((i: { label: string }) => i.label)).toEqual(['broken', 'broken2']);
+  });
+  test('handles null/undefined safely', () => {
+    expect(extractTopIssues(null)).toEqual([]);
+    expect(extractTopIssues(undefined)).toEqual([]);
+    expect(extractTopIssues({})).toEqual([]);
+    expect(extractTopIssues({ results: undefined })).toEqual([]);
+  });
+});
+
+describe('formatCheckRowMarkdown', () => {
+  test('emits canonical row shape with site-relative link', () => {
+    const row = formatCheckRowMarkdown({
+      status: 'fail',
+      label: 'exits 0 on missing flag',
+      group: 'P4',
+      evidence: 'expected non-zero exit, got 0',
+    });
+    expect(row).toBe('| FAIL | exits 0 on missing flag | [P4](/p4) | expected non-zero exit, got 0 |');
+  });
+  test('absolute baseUrl produces absolute principle link', () => {
+    const row = formatCheckRowMarkdown(
+      { status: 'warn', label: 'noisy', group: 'P2', evidence: 'extra logging' },
+      { baseUrl: 'https://anc.dev' },
+    );
+    expect(row).toBe('| WARN | noisy | [P2](https://anc.dev/p2) | extra logging |');
+  });
+  test('bonus groups stay plain text (no link)', () => {
+    const row = formatCheckRowMarkdown({
+      status: 'fail',
+      label: 'low test coverage',
+      group: 'CodeQuality',
+      evidence: '40%',
+    });
+    expect(row).toBe('| FAIL | low test coverage | CodeQuality | 40% |');
+  });
+  test('escapes pipe characters in label + evidence to preserve table shape', () => {
+    const row = formatCheckRowMarkdown({
+      status: 'fail',
+      label: 'pipe | trouble',
+      group: 'P3',
+      evidence: 'cmd | grep foo | head -1',
+    });
+    expect(row).toContain('pipe \\| trouble');
+    expect(row).toContain('cmd \\| grep foo \\| head -1');
+    // The row still has exactly 5 unescaped pipes (the table delimiters).
+    const unescapedPipes = row.match(/(?<!\\)\|/g)?.length ?? 0;
+    expect(unescapedPipes).toBe(5);
+  });
+  test('handles null evidence', () => {
+    const row = formatCheckRowMarkdown({
+      status: 'pass',
+      label: 'ok',
+      group: 'P1',
+      evidence: null,
+    });
+    expect(row).toBe('| PASS | ok | [P1](/p1) |  |');
+  });
+});
+
+describe('formatCheckTableMarkdownLines', () => {
+  test('emits header + delimiter + rows', () => {
+    const lines = formatCheckTableMarkdownLines([
+      { status: 'fail', label: 'a', group: 'P1', evidence: 'x' },
+      { status: 'warn', label: 'b', group: 'P2', evidence: null },
+    ]);
+    expect(lines).toEqual([
+      '| Status | Check | Principle | Evidence |',
+      '|--------|-------|-----------|----------|',
+      '| FAIL | a | [P1](/p1) | x |',
+      '| WARN | b | [P2](/p2) |  |',
+    ]);
+  });
+  test('returns [] for empty input (caller decides fallback copy)', () => {
+    expect(formatCheckTableMarkdownLines([])).toEqual([]);
+  });
+});
diff --git a/tests/spec-version-gen.test.ts b/tests/spec-version-gen.test.ts
new file mode 100644
index 0000000..6c836ac
--- /dev/null
+++ b/tests/spec-version-gen.test.ts
@@ -0,0 +1,43 @@
+// Drift guard for src/worker/spec-version.gen.ts.
+//
+// `src/build/00-spec-version-gen.mjs` regenerates the file from
+// `src/data/spec/VERSION` + `content/principles/VERSION`. The build itself
+// runs the emitter, but the .gen.ts file is committed so the worker bundle
+// can be type-checked without a build step. This test re-runs the emitter
+// in memory and asserts the on-disk file matches — so an out-of-date
+// committed file fails CI rather than silently shipping a stale triad.
+
+import { describe, expect, test } from 'bun:test';
+import { readFile } from 'node:fs/promises';
+import { computeExpectedSpecVersionModule } from '../src/build/00-spec-version-gen.mjs';
+
+describe('spec-version-gen drift guard', () => {
+  test('src/worker/spec-version.gen.ts matches the VERSION files', async () => {
+    const expected = await computeExpectedSpecVersionModule();
+    const actual = await readFile(expected.path, 'utf8');
+    if (actual !== expected.content) {
+      throw new Error(
+        `src/worker/spec-version.gen.ts is out of date relative to VERSION files. ` +
+          `Run \`bun run build\` (or \`bun src/build/00-spec-version-gen.mjs\`) and commit the result. ` +
+          `Expected SPEC_VERSION=${expected.specVersion}, SITE_SPEC_VERSION=${expected.siteSpecVersion}.`,
+      );
+    }
+    expect(actual).toBe(expected.content);
+  });
+
+  test('emitter rejects empty VERSION input', async () => {
+    // Exercise the readVersion guard via a fixture-free shape: we re-import
+    // the module and verify the public renderer rejects invalid input shape.
+    const { renderSpecVersionModule } = await import('../src/build/00-spec-version-gen.mjs');
+    const content = renderSpecVersionModule({
+      specVersion: '1.2.3',
+      siteSpecVersion: '1.2.3',
+      checkerUrl: 'https://anc.dev/score',
+    });
+    expect(content).toContain("export const SPEC_VERSION = '1.2.3'");
+    expect(content).toContain("export const SITE_SPEC_VERSION = '1.2.3'");
+    expect(content).toContain("export const CHECKER_URL = 'https://anc.dev/score'");
+    // The marker comment is load-bearing for the "do not hand-edit" signal.
+    expect(content).toContain('GENERATED by src/build/00-spec-version-gen.mjs');
+  });
+});
diff --git a/tests/worker-entry-exports.test.ts b/tests/worker-entry-exports.test.ts
new file mode 100644
index 0000000..3bf241f
--- /dev/null
+++ b/tests/worker-entry-exports.test.ts
@@ -0,0 +1,86 @@
+// Worker entry export contract tests.
+//
+// The CF Sandbox / Containers SDK enforces several runtime contracts on
+// the Worker entry's named exports. The exports are looked up via
+// `ctx.exports.<Name>` at request time; missing or misnamed exports
+// throw with messages like:
+//
+//   "ctx.exports.ContainerProxy is undefined, export ContainerProxy from
+//    the containers package in your worker entrypoint"
+//   "Received a FetchEvent but we lack a handler for FetchEvents. Did you
+//    remember to export a fetch() function?"
+//   "Handler does not export a fetch() function" (Cloudflare error 1101)
+//
+// All three surface only on the first request hitting the affected code
+// path in a deployed Worker. `wrangler deploy --dry-run`, the bun-test
+// `cloudflare:workers` shim, and TypeScript compilation all pass. This
+// is the same class of failure as:
+//
+//   - PR #93 / PR #94 — DO `fetch()` missing on the Sandbox class
+//   - This commit — `ContainerProxy` missing from the Worker entry
+//
+// Each of those incidents cost a deploy + a hotfix. This file guards the
+// floor: assert every export the SDK looks up by name actually exists on
+// the Worker entry module. New SDK contract additions get added here as
+// they're discovered, gated on the property that triggers the contract.
+
+import { describe, expect, test } from 'bun:test';
+import * as workerEntry from '../src/worker/index';
+
+describe('Worker entry — named export contract for CF Sandbox / Containers SDK', () => {
+  test('exports `Sandbox` class for the DurableObject + Container binding lookup', () => {
+    // wrangler.jsonc references `class_name: "Sandbox"` in both the
+    // `containers[]` and `durable_objects.bindings[]` blocks. Wrangler
+    // resolves that name via the Worker entry's exports at deploy time.
+    // Missing the export prevents wrangler deploy from completing.
+    expect(workerEntry.Sandbox).toBeDefined();
+    expect(typeof workerEntry.Sandbox).toBe('function');
+  });
+
+  test('exports `ContainerProxy` whenever any Sandbox subclass declares outbound handlers', () => {
+    // The CF Containers SDK looks up `ctx.exports.ContainerProxy` at
+    // outbound-handler dispatch time. Required whenever the Worker
+    // declares `outboundHandlers`, `outboundByHost`, or `outbound` on
+    // a Sandbox/Container subclass — i.e. any code path that calls
+    // `setOutboundHandler` / `setOutboundByHost` at runtime. Setting
+    // any of these without exporting ContainerProxy throws on the
+    // first DO fetch in production.
+    //
+    // The contract gate is two-pronged: if any Sandbox subclass on
+    // this entry declares any outbound-related static property, then
+    // ContainerProxy MUST be exported. The test fails if a future
+    // refactor introduces another Sandbox subclass with outbound
+    // handlers but forgets the ContainerProxy re-export.
+    type SandboxClass = {
+      outboundHandlers?: unknown;
+      outboundByHost?: unknown;
+      outbound?: unknown;
+    };
+    const sandboxClass = workerEntry.Sandbox as unknown as SandboxClass;
+    const declaresOutbound =
+      sandboxClass.outboundHandlers !== undefined ||
+      sandboxClass.outboundByHost !== undefined ||
+      sandboxClass.outbound !== undefined;
+
+    if (declaresOutbound) {
+      expect(
+        (workerEntry as Record<string, unknown>).ContainerProxy,
+        'Sandbox declares outbound handlers; ContainerProxy MUST be re-exported from src/worker/index.ts',
+      ).toBeDefined();
+      expect(typeof (workerEntry as Record<string, unknown>).ContainerProxy).toBe('function');
+    }
+  });
+
+  test('Sandbox class exposes the entry methods the binding contract requires', () => {
+    // Defends against the PR #93 / PR #94 class: the DO is invoked via
+    // `stub.fetch(...)` from the Worker handler, so the Sandbox class
+    // MUST export a `fetch()` method. Missing it produces Cloudflare
+    // error 1101 ("Handler does not export a fetch() function") on the
+    // first request. The score-handler.test.ts mock catches this at
+    // type level via `Sandbox['fetch']`; this assertion catches it
+    // structurally so a refactor that loses the prototype binding
+    // (e.g., switching from class syntax to a factory) still fails.
+    const proto = (workerEntry.Sandbox as unknown as { prototype: Record<string, unknown> }).prototype;
+    expect(typeof proto.fetch).toBe('function');
+  });
+});
diff --git a/tests/worker-live-score-routing.test.ts b/tests/worker-live-score-routing.test.ts
new file mode 100644
index 0000000..baf2b93
--- /dev/null
+++ b/tests/worker-live-score-routing.test.ts
@@ -0,0 +1,170 @@
+// Worker entry routing for /score/live/* paths.
+//
+// `/score/live/<binary>` is the canonical no-extension form. `.md` is
+// the markdown twin. `.html` redirects to the canonical form (mirrors
+// the CF Static Assets html_handling=auto-trailing-slash behavior for
+// the curated /score/<tool> static pages).
+//
+// Also verifies the homepage's {{TURNSTILE_SITEKEY}} placeholder is
+// substituted at request time so production cuts ship empty (fail-loud)
+// while staging gets the always-passes test sitekey.
+
+import { beforeEach, describe, expect, test } from 'bun:test';
+import worker, { type Env } from '../src/worker/index';
+import { _resetShellTemplateCache } from '../src/worker/score/summary-render';
+
+const SHELL_TEMPLATE = `<!doctype html>
+<html><head><title>{{TITLE}}</title></head>
+<body>{{BODY}}</body></html>`;
+
+const HOMEPAGE_HTML = `<!doctype html>
+<html><head>
+<title>anc.dev</title>
+<meta name="turnstile-sitekey" content="{{TURNSTILE_SITEKEY}}" />
+</head><body><form data-live-score-form></form></body></html>`;
+
+function makeEnv(overrides: Partial<Env> = {}): Env {
+  return {
+    ASSETS: {
+      async fetch(req: Request | string) {
+        const url = typeof req === 'string' ? req : req.url;
+        const path = new URL(url).pathname;
+        if (path === '/' || path === '/index.html') {
+          return new Response(HOMEPAGE_HTML, {
+            status: 200,
+            headers: { 'content-type': 'text/html; charset=utf-8' },
+          });
+        }
+        if (path === '/index.md') {
+          return new Response('# anc.dev\n\nThe agent-native CLI standard.\n', {
+            status: 200,
+            headers: { 'content-type': 'text/markdown; charset=utf-8' },
+          });
+        }
+        if (path === '/_internal/score-live-shell.html') {
+          return new Response(SHELL_TEMPLATE, { status: 200 });
+        }
+        return new Response('not found', { status: 404 });
+      },
+    } as Fetcher,
+    SCORE_KV: {
+      async get() {
+        return null;
+      },
+    } as unknown as KVNamespace,
+    ...overrides,
+  };
+}
+
+beforeEach(() => {
+  _resetShellTemplateCache();
+});
+
+describe('/live-score URL canonicalization', () => {
+  test('/score/live/<binary>.html → 301 redirect to /score/live/<binary>', async () => {
+    const env = makeEnv();
+    const res = await worker.fetch(new Request('https://anc.dev/score/live/ripgrep.html'), env);
+    expect(res.status).toBe(301);
+    expect(res.headers.get('location')).toBe('/score/live/ripgrep');
+  });
+
+  test('/score/live/<binary>.html redirects regardless of cache state', async () => {
+    // Redirect is at the routing layer, so it fires before the R2 lookup
+    // — a missing cache entry doesn't change the redirect behavior.
+    const env = makeEnv();
+    const res = await worker.fetch(new Request('https://anc.dev/score/live/unknown-tool.html'), env);
+    expect(res.status).toBe(301);
+    expect(res.headers.get('location')).toBe('/score/live/unknown-tool');
+  });
+
+  test('/score/live/<bad-slug>.html does NOT redirect — falls to ASSETS 404', async () => {
+    // Path-traversal guards: shape regex rejects uppercase, dots, slashes.
+    const env = makeEnv();
+    for (const path of [
+      '/score/live/RipGrep.html',
+      '/score/live/../etc.html',
+      '/score/live/-bad.html',
+      '/score/live/foo/bar.html',
+    ]) {
+      const res = await worker.fetch(new Request(`https://anc.dev${path}`), env);
+      // Either a 404 from ASSETS or a 301 — the must-NOT is that the
+      // redirect path matches a malformed slug and serves it as canonical.
+      expect(res.headers.get('location')).not.toBe(path.replace('.html', ''));
+    }
+  });
+
+  test('/score/live/<binary>.md → markdown twin (no redirect)', async () => {
+    const env = makeEnv();
+    const res = await worker.fetch(new Request('https://anc.dev/score/live/ripgrep.md'), env);
+    // No cache prefilled → 404, but with markdown content-type (the
+    // /live-score handler is what serves it, NOT a static asset).
+    expect(res.status).toBe(404);
+    expect(res.headers.get('content-type')).toContain('text/markdown');
+  });
+
+  test('/score/live/<binary> (no extension) → handled by handleLiveScorePage', async () => {
+    const env = makeEnv();
+    const res = await worker.fetch(new Request('https://anc.dev/score/live/ripgrep'), env);
+    // No cache prefilled → 404 HTML (the canonical route, not a redirect).
+    expect(res.status).toBe(404);
+    expect(res.headers.get('content-type')).toContain('text/html');
+  });
+});
+
+describe('Homepage TURNSTILE_SITEKEY substitution', () => {
+  test('homepage HTML substitutes {{TURNSTILE_SITEKEY}} from env var', async () => {
+    const env = makeEnv({ TURNSTILE_SITEKEY: '1x00000000000000000000AA' });
+    const res = await worker.fetch(new Request('https://anc.dev/'), env);
+    expect(res.status).toBe(200);
+    const html = await res.text();
+    expect(html).toContain('content="1x00000000000000000000AA"');
+    expect(html).not.toContain('{{TURNSTILE_SITEKEY}}');
+  });
+
+  test('production (no sitekey set) substitutes empty string', async () => {
+    const env = makeEnv(); // TURNSTILE_SITEKEY absent
+    const res = await worker.fetch(new Request('https://anc.dev/'), env);
+    const html = await res.text();
+    // Placeholder must NOT leak through to the response.
+    expect(html).not.toContain('{{TURNSTILE_SITEKEY}}');
+    // Meta tag still present but with empty content (form JS disables itself).
+    expect(html).toContain('content=""');
+  });
+
+  test('homepage Accept: text/markdown bypasses substitution (serves index.md)', async () => {
+    const env = makeEnv({ TURNSTILE_SITEKEY: 'test-key' });
+    const res = await worker.fetch(new Request('https://anc.dev/', { headers: { accept: 'text/markdown' } }), env);
+    expect(res.headers.get('content-type')).toContain('text/markdown');
+    const md = await res.text();
+    // The markdown twin must not carry the meta-tag placeholder OR the
+    // substituted value. Markdown-twin silence is the build-time
+    // invariant; this is the runtime mirror.
+    expect(md).not.toContain('{{TURNSTILE_SITEKEY}}');
+    expect(md).not.toContain('test-key');
+    expect(md).not.toContain('turnstile-sitekey');
+  });
+
+  test('non-homepage HTML pages are NOT touched by the substitution', async () => {
+    const env = makeEnv({ TURNSTILE_SITEKEY: 'should-not-leak' });
+    // A non-homepage asset that doesn't carry the placeholder shouldn't
+    // be rewritten — the substitution path is scoped to / and /index.html.
+    const res = await worker.fetch(new Request('https://anc.dev/check'), env);
+    // ASSETS returns 404 in this stub (no /check.html fixture), so just
+    // confirm the path didn't blow up.
+    expect(res.status).toBeLessThan(500);
+  });
+});
+
+describe('/_internal/* interceptor', () => {
+  test('direct GET /_internal/score-live-shell.html → 404', async () => {
+    const env = makeEnv();
+    const res = await worker.fetch(new Request('https://anc.dev/_internal/score-live-shell.html'), env);
+    expect(res.status).toBe(404);
+  });
+
+  test('arbitrary /_internal/anything → 404', async () => {
+    const env = makeEnv();
+    const res = await worker.fetch(new Request('https://anc.dev/_internal/something-else'), env);
+    expect(res.status).toBe(404);
+  });
+});
diff --git a/tests/worker.test.ts b/tests/worker.test.ts
index 15420c0..6e750bd 100644
--- a/tests/worker.test.ts
+++ b/tests/worker.test.ts
@@ -7,10 +7,11 @@
 // We exercise the handler end-to-end against a stubbed env.ASSETS fetcher —
 // no wrangler dev needed.
 
-import { describe, expect, test } from 'bun:test';
+import { beforeEach, describe, expect, test } from 'bun:test';
 import { detectPreference } from '../src/worker/accept';
 import { applyHeaders, isStagingHost } from '../src/worker/headers';
 import worker from '../src/worker/index';
+import { _resetIndexCache } from '../src/worker/score/handler';
 
 function req(url: string, accept?: string): Request {
   const headers: Record<string, string> = {};
@@ -342,3 +343,66 @@ describe('worker.fetch — CN rewrite + asset lookup', () => {
     expect(res.headers.get('Access-Control-Allow-Origin')).toBe('*');
   });
 });
+
+// ---------------------------------------------------------------------------
+// /api/score routing (plan U5). The handler's own behavior is covered by
+// tests/score-handler.test.ts; these tests confirm:
+//   1. /api/score requests are intercepted BEFORE the asset call (the stub
+//      ASSETS fetcher is never reached for /api/score*).
+//   2. Asset-first invariant for every other path is preserved.
+//   3. q-value content negotiation works on the /api/score* surface.
+//      Plan-required test: `text/markdown;q=0.1, application/json;q=0.9`
+//      must resolve to JSON, not markdown — guards against substring-
+//      match regressions per the `accept-header-q-value` learning.
+// ---------------------------------------------------------------------------
+
+describe('worker.fetch — /api/score routing', () => {
+  // The handler caches the registry + hints indexes at module scope, so
+  // tests that depend on the stubbed env.ASSETS being reached must reset
+  // the cache before each test — otherwise a prior test's data is served
+  // from memory and the stub is never called.
+  beforeEach(() => {
+    _resetIndexCache();
+  });
+
+  test('/api/score response carries the JSON envelope (not asset content)', async () => {
+    // Confirms index.ts routes /api/score to handleScore rather than the
+    // asset path. The handler always returns JSON; the asset path would
+    // return the stubbed asset body. Asserting on the response shape is
+    // both more robust and more meaningful than the previous fragile
+    // assetCalled flag check.
+    const env = makeEnv({
+      '/registry-index.json': '{"by_slug":{},"by_owner_repo":{}}',
+      '/discovery-hints-index.json': '{"by_owner_repo":{}}',
+    });
+    const url = 'https://anc.dev/api/score?input=unknown-tool';
+    const res = await worker.fetch(req(url), env);
+    expect(res.headers.get('Content-Type')).toContain('application/json');
+    const body = (await res.json()) as { error?: unknown; spec_version?: unknown; checker_url?: unknown };
+    expect(body.spec_version).toBeTruthy();
+    expect(body.checker_url).toBeTruthy();
+  });
+
+  test('asset-first invariant: /scorecards/ripgrep still proxies to env.ASSETS', async () => {
+    const env = makeEnv({ '/scorecards/ripgrep': 'scorecard html' });
+    const res = await worker.fetch(req('https://anc.dev/scorecards/ripgrep'), env);
+    expect(res.headers.get('X-Echo-Path')).toBe('/scorecards/ripgrep');
+  });
+
+  test('q-value: Accept: text/markdown;q=0.1, application/json;q=0.9 → JSON content-type', async () => {
+    // Plan-required test (accept-header-q-value learning). Substring
+    // matching would pick markdown because the header *contains*
+    // 'text/markdown'. The accepts package + q-value parsing picks JSON.
+    const env = makeEnv({
+      '/registry-index.json': '{"by_slug":{},"by_owner_repo":{}}',
+      '/discovery-hints-index.json': '{"by_owner_repo":{}}',
+    });
+    const url = new URL('https://anc.dev/api/score');
+    url.searchParams.set('input', 'unknown-tool');
+    const res = await worker.fetch(
+      new Request(url.toString(), { headers: { accept: 'text/markdown;q=0.1, application/json;q=0.9' } }),
+      env,
+    );
+    expect(res.headers.get('Content-Type')).toContain('application/json');
+  });
+});
diff --git a/tests/wrangler-config.test.ts b/tests/wrangler-config.test.ts
new file mode 100644
index 0000000..1a1f926
--- /dev/null
+++ b/tests/wrangler-config.test.ts
@@ -0,0 +1,216 @@
+// Regression guards on wrangler.jsonc shape.
+//
+// Driven by docs/solutions/integration-issues/wrangler-routes-inheritance-
+// staging-custom-domain-drift-2026-05-15.md:
+//
+//   Wrangler's `routes`, `triggers`, `route`, and `assets` are INHERITABLE
+//   keys. If env.<env-name> doesn't override them explicitly, the env block
+//   silently inherits whatever is at top level. For an account-scoped
+//   resource like a Custom Domain (which only one Worker can own), that
+//   silent inheritance moves ownership on every deploy. For two weeks
+//   `wrangler deploy --env staging` was silently re-binding `anc.dev` to
+//   the staging Worker on every dev merge.
+//
+// The fix is `env.staging.routes: []` and `env.staging.triggers:
+// { crons: [] }`. Removing either silently brings the bug back, so this
+// test asserts they are present + correctly shaped, and gates against
+// adding `route` (singular) or `assets` at the top level without a matching
+// staging override.
+
+import { describe, expect, test } from 'bun:test';
+import { readFileSync } from 'node:fs';
+import { join } from 'node:path';
+
+const WRANGLER_PATH = join(import.meta.dir, '..', 'wrangler.jsonc');
+
+function loadWranglerConfig(): Record<string, unknown> {
+  const raw = readFileSync(WRANGLER_PATH, 'utf8');
+  // Strip JSONC comments + trailing commas before parsing.
+  const stripped = raw
+    .replace(/\/\*[\s\S]*?\*\//g, '')
+    .replace(/(^|[^:])\/\/.*$/gm, '$1')
+    .replace(/,(\s*[}\]])/g, '$1');
+  return JSON.parse(stripped) as Record<string, unknown>;
+}
+
+function getStagingEnv(config: Record<string, unknown>): Record<string, unknown> {
+  const env = config.env as Record<string, unknown> | undefined;
+  expect(env).toBeDefined();
+  const staging = env?.staging as Record<string, unknown> | undefined;
+  if (!staging) throw new Error('env.staging missing from wrangler.jsonc');
+  return staging;
+}
+
+describe('wrangler.jsonc — inherited-property overrides (anc.dev routing-drift regression)', () => {
+  const config = loadWranglerConfig();
+  const staging = getStagingEnv(config);
+
+  test('env.staging.routes is explicitly set to an empty array (prevents anc.dev inheritance)', () => {
+    expect(staging.routes).toBeDefined();
+    expect(Array.isArray(staging.routes)).toBe(true);
+    expect((staging.routes as unknown[]).length).toBe(0);
+  });
+
+  test('env.staging.triggers.crons is explicitly set to an empty array (prophylactic against future cron addition)', () => {
+    expect(staging.triggers).toBeDefined();
+    const triggers = staging.triggers as Record<string, unknown>;
+    expect(triggers.crons).toBeDefined();
+    expect(Array.isArray(triggers.crons)).toBe(true);
+    expect((triggers.crons as unknown[]).length).toBe(0);
+  });
+
+  test('top-level routes points exactly at anc.dev as a custom domain (the canary value)', () => {
+    expect(Array.isArray(config.routes)).toBe(true);
+    const routes = config.routes as Array<Record<string, unknown>>;
+    expect(routes.length).toBe(1);
+    expect(routes[0].pattern).toBe('anc.dev');
+    expect(routes[0].custom_domain).toBe(true);
+  });
+
+  test('top-level `route` singular is NOT used (same hazard shape as `routes`; staging would inherit silently)', () => {
+    // The Wrangler config supports both `route` (single) and `routes`
+    // (array). Both are inheritable. If a future PR ever switches to the
+    // singular form without also overriding it under env.staging, the
+    // routing-drift class re-emerges. We commit to the plural form
+    // (already overridden under env.staging) and reject the singular.
+    expect(config.route).toBeUndefined();
+  });
+
+  test('top-level `assets` is set; env.staging must inherit OR override but never disagree silently', () => {
+    // `assets` is inheritable. If env.staging adds its own `assets` block
+    // that points at a DIFFERENT directory or DIFFERENT binding, that's
+    // probably a bug — assets are runtime resources that should match
+    // across envs. If it's identical, we tolerate the redundancy.
+    expect(config.assets).toBeDefined();
+    if (staging.assets) {
+      expect(staging.assets).toEqual(config.assets as Record<string, unknown>);
+    }
+  });
+});
+
+describe('wrangler.jsonc — env.staging mirrors required non-inheritable bindings', () => {
+  // These bindings are NOT inheritable (per spike 01: containers,
+  // durable_objects, migrations, ratelimits, r2_buckets, kv_namespaces).
+  // Every binding the live-scoring handler reads MUST appear under
+  // env.staging or the staging Worker fails at first /api/score request.
+
+  const config = loadWranglerConfig();
+  const staging = getStagingEnv(config);
+
+  test('env.staging.kv_namespaces declares the SCORE_KV binding', () => {
+    expect(staging.kv_namespaces).toBeDefined();
+    const bindings = (staging.kv_namespaces as Array<Record<string, unknown>>).map((b) => b.binding);
+    expect(bindings).toContain('SCORE_KV');
+  });
+
+  test('env.staging.ratelimits declares both SCORE_LIMITER and SCORE_LIMITER_IP', () => {
+    expect(staging.ratelimits).toBeDefined();
+    const names = (staging.ratelimits as Array<Record<string, unknown>>).map((r) => r.name);
+    expect(names).toContain('SCORE_LIMITER');
+    expect(names).toContain('SCORE_LIMITER_IP');
+  });
+
+  test('env.staging.durable_objects declares the SCORE binding', () => {
+    expect(staging.durable_objects).toBeDefined();
+    const bindings = (
+      (staging.durable_objects as Record<string, unknown>).bindings as Array<Record<string, unknown>>
+    ).map((b) => b.name);
+    expect(bindings).toContain('SCORE');
+  });
+
+  test('env.staging.r2_buckets declares the SCORE_CACHE binding', () => {
+    expect(staging.r2_buckets).toBeDefined();
+    const bindings = (staging.r2_buckets as Array<Record<string, unknown>>).map((r) => r.binding);
+    expect(bindings).toContain('SCORE_CACHE');
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Analytics Engine bindings (plan U10)
+// ---------------------------------------------------------------------------
+
+// The SCORE_TELEMETRY binding is non-inheritable per env, so both top-level
+// (prod) and env.staging must declare it. Each env writes to a DISTINCT
+// dataset so staging traffic doesn't pollute prod aggregates — a future
+// refactor that merges both onto one dataset would silently corrupt every
+// canonical query in docs/runbooks/live-scoring-analytics.md. This guard
+// fires loudly if either pin moves.
+
+describe('wrangler.jsonc — analytics_engine_datasets bindings (plan U10)', () => {
+  const config = loadWranglerConfig();
+  const staging = getStagingEnv(config);
+
+  test('top-level declares the SCORE_TELEMETRY binding against anc_live_score_prod', () => {
+    expect(config.analytics_engine_datasets).toBeDefined();
+    const ae = config.analytics_engine_datasets as Array<Record<string, unknown>>;
+    const score = ae.find((b) => b.binding === 'SCORE_TELEMETRY');
+    expect(score).toBeDefined();
+    expect(score?.dataset).toBe('anc_live_score_prod');
+  });
+
+  test('env.staging declares the SCORE_TELEMETRY binding against anc_live_score_staging', () => {
+    expect(staging.analytics_engine_datasets).toBeDefined();
+    const ae = staging.analytics_engine_datasets as Array<Record<string, unknown>>;
+    const score = ae.find((b) => b.binding === 'SCORE_TELEMETRY');
+    expect(score).toBeDefined();
+    expect(score?.dataset).toBe('anc_live_score_staging');
+  });
+
+  test('prod and staging point at DISTINCT datasets (no accidental merge)', () => {
+    const prodAe = config.analytics_engine_datasets as Array<Record<string, unknown>>;
+    const stagingAe = staging.analytics_engine_datasets as Array<Record<string, unknown>>;
+    const prodDataset = prodAe.find((b) => b.binding === 'SCORE_TELEMETRY')?.dataset;
+    const stagingDataset = stagingAe.find((b) => b.binding === 'SCORE_TELEMETRY')?.dataset;
+    expect(prodDataset).toBeDefined();
+    expect(stagingDataset).toBeDefined();
+    expect(prodDataset).not.toBe(stagingDataset);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// R2 score-cache lifecycle documentation drift (plan U7)
+// ---------------------------------------------------------------------------
+
+// The 7-day TTL on the SCORE_CACHE bucket lives as an R2 bucket lifecycle
+// rule, NOT in wrangler.jsonc — R2 lifecycle isn't a wrangler-config
+// surface yet. The setup commands live in RELEASES.md so a fresh bucket
+// recreate doesn't lose the TTL. Drift on that documentation is silent:
+// a future R2 bucket recreate could ship without the lifecycle rule, and
+// the cache would grow forever. This test asserts the literal commands
+// are present so removal forces a deliberate update.
+
+describe('RELEASES.md — R2 score-cache lifecycle setup commands (plan U7)', () => {
+  const releasesPath = join(import.meta.dir, '..', 'RELEASES.md');
+  const releases = readFileSync(releasesPath, 'utf8');
+
+  test('documents the 7-day lifecycle command for the prod bucket', () => {
+    // Positional args: bucket, rule-name, prefix. Flag: --expire-days.
+    // Earlier docs shipped `--prefix scores/ --expiration-days 7`, which
+    // wrangler 4.x rejects (Unknown arguments). The drift-guard pins the
+    // correct shape so the regression class can't re-emerge silently.
+    expect(releases).toMatch(
+      /wrangler r2 bucket lifecycle add anc-score-cache scores-7day-ttl scores\/ --expire-days 7/,
+    );
+  });
+
+  test('documents the 7-day lifecycle command for the staging bucket', () => {
+    expect(releases).toMatch(
+      /wrangler r2 bucket lifecycle add anc-score-cache-staging scores-7day-ttl scores\/ --expire-days 7/,
+    );
+  });
+});
+
+describe('RELEASES-RATIONALE.md — R2 score-cache key shape (plan U7)', () => {
+  // The cache key prefix `scores/{binary}/{anc-version}.json` is the
+  // load-bearing fact behind the lifecycle rule's `scores/` filter. The
+  // rationale + key shape live in RELEASES-RATIONALE.md (RELEASES.md is the
+  // runbook). If the prefix moves, the architecture doc must move with
+  // it — this drift-guard makes the prefix change visible in CI.
+
+  const architecturePath = join(import.meta.dir, '..', 'RELEASES-RATIONALE.md');
+  const architecture = readFileSync(architecturePath, 'utf8');
+
+  test('mentions the canonical cache key prefix so a future audit can grep for it', () => {
+    expect(architecture).toMatch(/scores\/\{binary\}\/\{anc-version\}\.json/);
+  });
+});
diff --git a/wrangler.jsonc b/wrangler.jsonc
index f738484..e1275bc 100644
--- a/wrangler.jsonc
+++ b/wrangler.jsonc
@@ -27,12 +27,12 @@
     "enabled": true,
     "head_sampling_rate": 1.0
   },
-  // Live-scoring path (plan U3-U7) — first-ever stateful bindings on
-  // this Worker: DO + Container + R2 + rate-limit. The migrations entry
-  // below is a one-way gate: `new_sqlite_classes` MUST be used (not the
-  // legacy `new_classes`) so the DO is created with SQLite-backed
-  // storage. Reverting needs a follow-up migration with
-  // `deleted_classes`; documented in RELEASES.md once U9 lands.
+  // Live-scoring path — first-ever stateful bindings on this Worker:
+  // DO + Container + R2 + rate-limit. The migrations entry below is a
+  // one-way gate: `new_sqlite_classes` MUST be used (not the legacy
+  // `new_classes`) so the DO is created with SQLite-backed storage.
+  // Reverting needs a follow-up migration with `deleted_classes`;
+  // documented in RELEASES.md.
   "containers": [
     {
       "class_name": "Sandbox",
@@ -64,11 +64,10 @@
       // keeps pointing at the previous staging-soaked image until the
       // release explicitly promotes.
       //
-      // Account ID in the URI is acceptable per U3-followup spec:
-      // Wrangler resolves it from auth at push time, so the literal
-      // committed here is the auth-time-generated tag rather than a
-      // pre-shared identifier.
-      "image": "registry.cloudflare.com/6c1bafea907fecbd4ad665b8d0a78e53/anc-sandbox:30f61f1",
+      // Account ID in the URI is committed deliberately: Wrangler
+      // resolves it from auth at push time, so the literal here is the
+      // auth-time-generated tag rather than a pre-shared identifier.
+      "image": "registry.cloudflare.com/6c1bafea907fecbd4ad665b8d0a78e53/anc-sandbox:9aed5c3",
       "instance_type": "basic",
       "max_instances": 3
     }
@@ -94,12 +93,59 @@
     }
   ],
   "ratelimits": [
+    // SCORE_LIMITER — keyed on `<session-id>:<sha256(input)>` inside the
+    // handler (see src/worker/score/handler.ts). 10 distinct-tool requests
+    // per session per minute. Same-tool requests within a session do not
+    // burn budget, which is cache-friendly and lets the registry-fast-path
+    // and R2 caches do their job. New sessions require a Turnstile solve.
     {
       "name": "SCORE_LIMITER",
       "namespace_id": "1001",
       "simple": { "limit": 10, "period": 60 }
+    },
+    // SCORE_LIMITER_IP — coarse per-IP fallback that catches clients
+    // swapping the session cookie to dodge SCORE_LIMITER. Per plan
+    // "Cost ceiling and abuse mitigation" step 2: 30 requests / 60 s / IP.
+    // Distinct namespace so the per-session and per-IP windows don't share
+    // counters.
+    {
+      "name": "SCORE_LIMITER_IP",
+      "namespace_id": "1003",
+      "simple": { "limit": 30, "period": 60 }
+    }
+  ],
+  // SCORE_KV — operator-flippable `scoring_disabled` kill switch (plan
+  // "Cost ceiling and abuse mitigation" step 3). Flip via:
+  //   wrangler kv key put --binding=SCORE_KV scoring_disabled true
+  // The Worker reads + caches the flag in-memory for 30 s; propagates to
+  // every isolate within one KV-read TTL.
+  "kv_namespaces": [
+    {
+      "binding": "SCORE_KV",
+      "id": "5a05224968404e5f96471f1bd007d87a"
+    }
+  ],
+  // SCORE_TELEMETRY — Workers Analytics Engine dataset for /api/score
+  // observability. Handler emits one writeDataPoint per request with the
+  // canonical field shape documented in docs/runbooks/live-scoring-analytics.md.
+  // Two distinct dataset names (top-level vs env.staging) keep staging
+  // noise out of prod aggregates. Dataset is created on first write — no
+  // wrangler analytics-engine create step. Field-shape contract is pinned
+  // by tests/score-telemetry.test.ts; reordering blobs/doubles silently
+  // breaks saved AE SQL queries, so the regression guard fires loudly.
+  "analytics_engine_datasets": [
+    {
+      "binding": "SCORE_TELEMETRY",
+      "dataset": "anc_live_score_prod"
     }
   ],
+  // Production Turnstile sitekey deferred until production promotion. The
+  // homepage form template reads TURNSTILE_SITEKEY from this env var to
+  // render the invisible widget. Absent here so a misconfigured prod cut
+  // fails loudly rather than silently shipping a staging-test sitekey to
+  // production users. The TURNSTILE_SECRET (real) lives in wrangler
+  // secrets, not committed.
+  // "vars": { "TURNSTILE_SITEKEY": "..." },
   // Production (top-level): anc.dev custom domain, no .workers.dev URL.
   // Deployed via `wrangler deploy` (no --env flag) on push to main.
   // Cloudflare auto-provisions SSL cert + DNS CNAME for the custom domain.
@@ -174,9 +220,9 @@
           // version histories) and may legitimately diverge.
           //
           // See RELEASES.md § Sandbox image releases for the full flow.
-          "image": "registry.cloudflare.com/6c1bafea907fecbd4ad665b8d0a78e53/anc-sandbox:30f61f1",
-          "instance_type": "basic",
-          "max_instances": 3
+          "image": "registry.cloudflare.com/6c1bafea907fecbd4ad665b8d0a78e53/anc-sandbox:9aed5c3",
+          "instance_type": "standard-2",
+          "max_instances": 10
         }
       ],
       "durable_objects": {
@@ -187,10 +233,24 @@
           }
         ]
       },
+      // env.staging.migrations is append-only: Cloudflare rejects any
+      // deploy whose tag list is a subset of what's already applied to
+      // the env. staging carries the full sequence (v1, v2-drop-sandbox,
+      // v3-restore-sandbox); top-level `migrations` stays at v1 until
+      // prod runs its own rollback. See RELEASES.md § Cross-migration
+      // rollback rehearsal for the procedure that produces a new tag.
       "migrations": [
         {
           "tag": "v1",
           "new_sqlite_classes": ["Sandbox"]
+        },
+        {
+          "tag": "v2-drop-sandbox",
+          "deleted_classes": ["Sandbox"]
+        },
+        {
+          "tag": "v3-restore-sandbox",
+          "new_sqlite_classes": ["Sandbox"]
         }
       ],
       "r2_buckets": [
@@ -204,8 +264,35 @@
           "name": "SCORE_LIMITER",
           "namespace_id": "1002",
           "simple": { "limit": 10, "period": 60 }
+        },
+        {
+          "name": "SCORE_LIMITER_IP",
+          "namespace_id": "1004",
+          "simple": { "limit": 30, "period": 60 }
         }
-      ]
+      ],
+      "kv_namespaces": [
+        {
+          "binding": "SCORE_KV",
+          "id": "10a0fb8031864421a3ba358e28189317"
+        }
+      ],
+      "analytics_engine_datasets": [
+        {
+          "binding": "SCORE_TELEMETRY",
+          "dataset": "anc_live_score_staging"
+        }
+      ],
+      // Cloudflare's "always passes" test sitekey (public, documented at
+      // https://developers.cloudflare.com/turnstile/troubleshooting/testing/).
+      // Pairs with the corresponding always-passes test SECRET wired into
+      // `wrangler secret put TURNSTILE_SECRET --env staging` so staging
+      // verification accepts any token without minting real bot-defense
+      // signal. Production sitekey lives at the top-level (deferred to
+      // production promotion); never inherit this staging value into prod.
+      "vars": {
+        "TURNSTILE_SITEKEY": "1x00000000000000000000AA"
+      }
     }
   }
   // Smart Placement intentionally NOT set: this is a static-asset + CN