diff --git a/.dockerignore b/.dockerignore
index a71ba86..ce609c5 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -8,6 +8,7 @@
# Files COPYed by docker/score/Dockerfile (anything Dockerfile needs to see).
!registry.yaml
!docker/score/Dockerfile
-!docker/score/anc
+!docker/score/inject
+!docker/score/inject/anc
!docker/score/install-tools.sh
!docker/score/score-anc100.sh
diff --git a/.github/ISSUE_TEMPLATE/00-blank.yml b/.github/ISSUE_TEMPLATE/00-blank.yml
new file mode 100644
index 0000000..8afdd98
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/00-blank.yml
@@ -0,0 +1,35 @@
+name: "Blank issue"
+description: "Open an issue that doesn't fit any of the structured templates below."
+body:
+ - type: textarea
+ id: body
+ attributes:
+ label: Issue
+ description: "Describe what's going on. Include the affected URL (e.g., anc.dev/scorecards), what you observed vs expected, and any browser / device context where relevant."
+ validations:
+ required: true
+ - type: textarea
+ id: ai-disclosure
+ attributes:
+ label: AI disclosure
+ description: "One sentence: what part of this issue was AI-written, what was human-written?"
+ placeholder: "Entirely human-written."
+ validations:
+ required: true
+ - type: textarea
+ id: agent-instructions
+ attributes:
+ label: For agents
+ description: "Agents: expand this section before filing."
+ value: |
+
+ Agent filing instructions
+
+ 1. **Pick a structured template first.** Site bugs have a dedicated form — use it when it fits.
+ 2. **Search first.** Run `gh search issues --repo brettdavies/agentnative-site ""` to check for duplicates.
+ 3. **AI disclosure is required.** Fill the field above honestly.
+ 4. **Wrong repo?** Spec questions, principle edits, and grading findings live on [brettdavies/agentnative](https://github.com/brettdavies/agentnative). `anc` checker bugs and tool-registry submissions live on [brettdavies/agentnative-cli](https://github.com/brettdavies/agentnative-cli). Skill bundle issues live on [brettdavies/agentnative-skill](https://github.com/brettdavies/agentnative-skill).
+ 5. See [CONTRIBUTING.md](https://github.com/brettdavies/agentnative-site/blob/main/CONTRIBUTING.md) for full guidelines.
+
+ validations:
+ required: false
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
index 7a6c1b2..823600e 100644
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,8 +1,11 @@
blank_issues_enabled: false
contact_links:
- - name: "Spec questions, principle edits, or CLI grading"
+ - name: "Spec questions, principle edits, or grading findings"
url: "https://github.com/brettdavies/agentnative/issues/new/choose"
- about: "For anything about the standard itself — propose changes, grade a CLI, ask questions — file on the spec repo."
- - name: "Checker bugs (false positives/negatives)"
+ about: "For anything about the standard itself — propose changes, submit a grading finding, ask questions — file on the spec repo."
+ - name: "Checker bugs, features, or tool-registry submissions"
url: "https://github.com/brettdavies/agentnative-cli/issues/new/choose"
- about: "For bugs in the `anc` checker itself, file on the tool repo."
+ about: "For bugs in the `anc` checker, feature requests, or proposing a tool for the leaderboard, file on the linter repo."
+ - name: "Skill bundle issues (bundle content, install paths, host runtimes)"
+ url: "https://github.com/brettdavies/agentnative-skill/issues/new/choose"
+ about: "For bugs or proposals about the agent-facing skill bundle, file on the skill repo."
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
index fdead33..6d72a69 100644
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -6,7 +6,7 @@
agentHosts
```
-## Upstream — data flowing INTO this repo
+## Upstream: data flowing INTO this repo
-| Source | Mechanism | What's synced | Trigger / cadence | Drift check |
-| ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `brettdavies/agentnative-cli` `coverage/matrix.json` | `scripts/sync-coverage-matrix.sh` (manual `cp` from `$ANC_ROOT/coverage/matrix.json`) | → `src/data/coverage-matrix.json` | After CLI bumps the matrix (new checks, registry changes) | CLI's CI enforces `anc generate coverage-matrix --check` against the committed CLI artifact. Site trusts the synced copy; no site-side `--check` mode. Resync is manual; `git diff` after sync is the review surface. |
-| `brettdavies/agentnative` (spec) `principles/p*-*.md` + `VERSION` + `CHANGELOG.md` | `scripts/sync-spec.sh` (manual; remote-first via `SPEC_REMOTE_URL`, falls back to local `SPEC_ROOT`; auto-picks latest v* tag; extracts via `git show "$tag:" >dest`) | → `src/data/spec/{VERSION,CHANGELOG.md,principles/p*-*.md}` (`principles/AGENTS.md` filtered out — spec-side internal) | After a spec release. Spec's `repository_dispatch:spec-release` already fires here on tag publish. | None automated on this side (consumer-side handler that auto-PRs the resync is tracked as follow-up). Spec repo's `scripts/hooks/pre-push` enforces source-side correctness. `git diff src/data/spec/` after sync is the review surface. `src/data/spec/README.md` documents the workflow. |
-| `brettdavies/agentnative` (spec) prose-check tooling: `BRAND.md`, `styles/brand/*.yml` + `README.md`, `styles/config/vocabularies/brand/{accept,reject}.txt`, `scripts/generate-pack-readme.mjs` | `scripts/sync-prose-tooling.sh` (manual; remote-first / local-fallback like `sync-spec.sh`, but tracks `main` HEAD instead of the latest v* tag because prose tooling is not contract; extracts via `git show "main:" >dest`) | → repo-rooted: `BRAND.md`, `styles/brand/`, `styles/config/vocabularies/brand/`, `scripts/generate-pack-readme.mjs` | After spec's `main` advances with changes touching the prose-check stack. Separate sync clock from `sync-spec.sh` because prose tooling and the principles/contract release on different cadences and the tooling has no release ceremony. | None automated on this side. Sync-script atomicity is the integrity guarantee: brand `*.yml` AND its `README.md` come from the same `main` HEAD SHA, so no downstream regeneration / drift surface. `git diff` after sync is the review surface. Idempotent at a fixed `main` HEAD SHA: re-running produces no diff until upstream `main` moves. **Consumer-owned (un-vendored 2026-05-13):** `scripts/prose-check.sh` is no longer vendored by this script — the upstream copy kept clobbering the SITE-LOCAL DIVERGENCE block (consumer-specific path exclusions and LT denylist additions). Universal pipeline changes (new check stage, LT URL change, severity routing) now require coordinated PRs across all four channel repos (spec / site / cli / skill). Long-term fix is the sidecar-config migration tracked at `agentnative-spec/.context/compound-engineering/todos/`; once shipped, vendoring can resume with universal logic vendored and consumer config in a sidecar file. See `scripts/prose-check.sh`'s CONSUMER-OWNED header for context. |
-| `docker/score/` image — pre-installs the full ANC 100 toolset (`anc` + 96 scored binaries) inside a reproducible Ubuntu container; iterates `registry.yaml` and runs `anc check --command [--audit-profile ] --output json` for each | `bash docker/score/build.sh --run` (builds `anc` from local cli checkout, builds image, runs `score-anc100.sh` inside container with bind-mounted `scorecards/` + `out/` dirs) | → `scorecards/-v.json` (96 files) + `docker/score/out/score-failures.txt` for any install/score failures | After a new `anc` release, after registry changes, or to refresh the full leaderboard | Build-time schema 0.5 invariant validation in `src/build/scorecards.mjs`; auto-discovery picks the highest-versioned scorecard per slug, silently superseding stale ones. Filename's `-v` suffix is the version anchor (registry no longer carries `version:` per entry post-U4). The container is the source of truth — host-side ad-hoc scoring is deprecated. |
+| Source | Mechanism | What's synced | Trigger / cadence | Drift check |
+| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `brettdavies/agentnative-cli` `coverage/matrix.json` | `scripts/sync-coverage-matrix.sh` (manual `cp` from `$ANC_ROOT/coverage/matrix.json`) | → `src/data/coverage-matrix.json` | After CLI bumps the matrix (new checks, registry changes) | CLI's CI enforces `anc generate coverage-matrix --check` against the committed CLI artifact. Site trusts the synced copy; no site-side `--check` mode. Resync is manual; `git diff` after sync is the review surface. |
+| `brettdavies/agentnative` (spec) `principles/p*-*.md` + `VERSION` + `CHANGELOG.md` | `scripts/sync-spec.sh` (manual; remote-first via `SPEC_REMOTE_URL`, falls back to local `SPEC_ROOT`; auto-picks latest v* tag; extracts via `git show "$tag:" >dest`) | → `src/data/spec/{VERSION,CHANGELOG.md,principles/p*-*.md}` (`principles/AGENTS.md` filtered out, spec-side internal) | After a spec release. Spec's `repository_dispatch:spec-release` already fires here on tag publish. | None automated on this side (consumer-side handler that auto-PRs the resync is tracked as follow-up). Spec repo's `scripts/hooks/pre-push` enforces source-side correctness. `git diff src/data/spec/` after sync is the review surface. `src/data/spec/README.md` documents the workflow. |
+| `brettdavies/agentnative` (spec) prose-check tooling: `BRAND.md`, `styles/brand/*.yml` + `README.md`, `styles/config/vocabularies/brand/{accept,reject}.txt`, `scripts/generate-pack-readme.mjs` | `scripts/sync-prose-tooling.sh` (manual; remote-first / local-fallback like `sync-spec.sh`, but tracks `main` HEAD instead of the latest v* tag because prose tooling is not contract; extracts via `git show "main:" >dest`) | → repo-rooted: `BRAND.md`, `styles/brand/`, `styles/config/vocabularies/brand/`, `scripts/generate-pack-readme.mjs` | After spec's `main` advances with changes touching the prose-check stack. Separate sync clock from `sync-spec.sh` because prose tooling and the principles/contract release on different cadences and the tooling has no release ceremony. | None automated on this side. Sync-script atomicity is the integrity guarantee: brand `*.yml` AND its `README.md` come from the same `main` HEAD SHA, so no downstream regeneration / drift surface. `git diff` after sync is the review surface. Idempotent at a fixed `main` HEAD SHA: re-running produces no diff until upstream `main` moves. **Consumer-owned (un-vendored 2026-05-13):** `scripts/prose-check.sh` is no longer vendored by this script because the upstream copy kept clobbering the SITE-LOCAL DIVERGENCE block (consumer-specific path exclusions and LT denylist additions). Universal pipeline changes (new check stage, LT URL change, severity routing) now require coordinated PRs across all four channel repos (spec / site / cli / skill). Long-term fix is the sidecar-config migration tracked at `agentnative-spec/.context/compound-engineering/todos/`; once shipped, vendoring can resume with universal logic vendored and consumer config in a sidecar file. See `scripts/prose-check.sh`'s CONSUMER-OWNED header for context. |
+| `docker/score/` image: pre-installs the full ANC 100 toolset (`anc` + 96 scored binaries) inside a reproducible Ubuntu container; iterates `registry.yaml` and runs `anc check --command [--audit-profile ] --output json` for each | `bash docker/score/build.sh --run` (default: brew-installs the latest `anc` from `brettdavies/tap/agentnative`; with `--from-source ` cargo-builds anc on the host and injects the binary into the image instead, bypassing brew) | → `scorecards/-v.json` (96 files) + `docker/score/out/score-failures.txt` for any install/score failures | After a new `anc` release, after registry changes, or to refresh the full leaderboard. Inject mode is also the way to score against an unreleased anc (feature branch in agentnative-cli before tag + bottle). | Build-time schema 0.5 invariant validation in `src/build/scorecards.mjs`; auto-discovery picks the highest-versioned scorecard per slug, silently superseding stale ones. Filename's `-v` suffix is the version anchor (registry no longer carries `version:` per entry post-U4). The container is the source of truth; host-side ad-hoc scoring is deprecated. |
### How spec version flows into rendering
### How spec versions flow into rendering surfaces
-The site shows version labels in three places. **Each pulls from a different source by design** — the three sources move
-at different cadences (vendoring, scoring, manual reconciliation), and conflating them into one would lie about at least
-one of those movements.
+The site shows version labels in three places. **Each pulls from a different source by design** because the three
+sources move at different cadences (vendoring, scoring, manual reconciliation), and conflating them into one would lie
+about at least one of those movements.
```mermaid
flowchart LR
@@ -80,20 +80,20 @@ flowchart LR
util -. "SPEC_VERSION (reference only)" .-> diff
```
-| Surface | Source | Bumped by |
-| --------------- | -------------------------------------------------- | --------------------------------------------------------------------------------------------------------- |
-| Footer | `SITE_SPEC_VERSION` ← `content/principles/VERSION` | Manual, by the contributor who reconciles `content/principles/p*-*.md` after a `sync-spec.sh` run. |
-| Per-tool badges | Each scorecard's `spec_version` field | Automatic — bumps when the scorecard is regenerated against a newer `anc` build (via `docker/score/`). |
-| OG card | `anc`'s self-scorecard's `spec_version` | Automatic on `bun run og` after `anc`'s scorecard is refreshed. |
-| (no surface) | `SPEC_VERSION` ← `src/data/spec/VERSION` | Automatic — `./scripts/sync-spec.sh` overwrites whenever the spec ships a new tag. Reference / diff only. |
+| Surface | Source | Bumped by |
+| --------------- | -------------------------------------------------- | -------------------------------------------------------------------------------------------------------- |
+| Footer | `SITE_SPEC_VERSION` ← `content/principles/VERSION` | Manual, by the contributor who reconciles `content/principles/p*-*.md` after a `sync-spec.sh` run. |
+| Per-tool badges | Each scorecard's `spec_version` field | Automatic; bumps when the scorecard is regenerated against a newer `anc` build (via `docker/score/`). |
+| OG card | `anc`'s self-scorecard's `spec_version` | Automatic on `bun run og` after `anc`'s scorecard is refreshed. |
+| (no surface) | `SPEC_VERSION` ← `src/data/spec/VERSION` | Automatic; `./scripts/sync-spec.sh` overwrites whenever the spec ships a new tag. Reference / diff only. |
Why three sources, not one: vendoring (we got a snapshot), scoring (anc was compiled against this spec), and site
reconciliation (the prose has been updated to match) are three independent events. Conflating them into one constant
forces at least one surface to lie about its actual currency. Full rationale in `src/data/spec/README.md` and the
cross-repo version-model doc at `docs/solutions/best-practices/agentnative-version-model-2026-05-01.md`. There is no
-site-own version (`package.json` is `"0.0.0"` deliberately — the spec version IS the site's "version" by intent).
+site-own version (`package.json` is `"0.0.0"` deliberately: the spec version IS the site's "version" by intent).
-## Downstream — data flowing OUT of this repo
+## Downstream: data flowing OUT of this repo
### Build-time vendoring by other repos
@@ -103,9 +103,9 @@ site-own version (`package.json` is `"0.0.0"` deliberately — the spec version
### Deploy-time emission to Cloudflare Workers
-| Surface | Mechanism | What's emitted | Trigger / cadence | Drift check |
-| ------------------------------ | ----------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `anc.dev` (Cloudflare Workers) | `wrangler deploy` invoked by `.github/workflows/deploy.yml` | `dist/` — HTML pages, CSS, JS, 107 per-tool scorecard HTML pages + markdown twins, 96 badge SVGs, OG image, fonts, `skill.{json,html,md}`, `install.{html,md}` (no `install.json` — see DESIGN §3.10), llms.txt, sitemap.xml | Push to `dev` (staging Worker `agentnative-site-staging`) or `main` (production `anc.dev`); `paths-ignore: docs/**, *.md` skips deploy on planning-only commits | None automated — production canary is by hand. The pre-deploy CI pipeline (`ci.yml`) gates on `bun install → lint → build → test → wrangler --dry-run`. |
+| Surface | Mechanism | What's emitted | Trigger / cadence | Drift check |
+| ------------------------------ | ----------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `anc.dev` (Cloudflare Workers) | `wrangler deploy` invoked by `.github/workflows/deploy.yml` | `dist/`: HTML pages, CSS, JS, 107 per-tool scorecard HTML pages + markdown twins, 96 badge SVGs, OG image, fonts, `skill.{json,html,md}`, `install.{html,md}` (no `install.json`; see DESIGN §3.10), llms.txt, sitemap.xml | Push to `dev` (staging Worker `agentnative-site-staging`) or `main` (production `anc.dev`); `paths-ignore: docs/**, *.md` skips deploy on planning-only commits | None automated; production canary is by hand. The pre-deploy CI pipeline (`ci.yml`) gates on `bun install → lint → build → test → wrangler --dry-run`. |
## Release / sync orchestration
@@ -116,8 +116,8 @@ The flows interact, but each is independently triggered:
this repo trusts the bytes.
2. **A scored tool ships a new version** (or `anc` itself does) → maintainer runs `bash docker/score/build.sh --run`
- from the repo root → `docker/score/build.sh` rebuilds the `anc` binary from the local `agentnative-cli` checkout,
- bakes it into the image, and runs `score-anc100.sh` against the full registry inside the container; bind-mounts write
+ from the repo root → `docker/score/build.sh` brew-installs the latest `anc` from `brettdavies/tap/agentnative` inside
+ the image, bakes it in, and runs `score-anc100.sh` against the full registry inside the container; bind-mounts write
the new `scorecards/-v.json` files back to the host. Old per-tool files are silently superseded by
auto-discovery → next build refreshes the badge SVG and `/score/` page. The container is the source of truth
for scoring; host-side ad-hoc scoring (the prior `regen-scorecards.sh` flow) is deprecated.
@@ -126,13 +126,13 @@ The flows interact, but each is independently triggered:
tag from the spec remote) → vendored `src/data/spec/{VERSION,CHANGELOG.md,principles/p*-*.md}` updates → next site
build picks up the new `SPEC_VERSION` automatically (footer, OG card, badge URLs all flow from the vendored `VERSION`
file). Site contributor reviews `git diff src/data/spec/principles/` and decides whether to manually reconcile any
- prose changes into `content/principles/p*-*.md` (the two file shapes are intentionally different — see
+ prose changes into `content/principles/p*-*.md` (the two file shapes are intentionally different; see
`src/data/spec/README.md` for the workflow). Spec's `repository_dispatch:spec-release` event already fires here on
tag publish; a consumer-side handler that auto-PRs the resync is tracked as follow-up work.
4. **Spec's `main` advances with prose-tooling changes** → maintainer runs `bash scripts/sync-prose-tooling.sh` (same
remote-first / local-fallback resolution as `sync-spec.sh`, but tracks `main` HEAD instead of the latest v* tag
- because prose tooling is not contract — it's tooling, faster cadence, no release ceremony) → vendored `BRAND.md`,
+ because prose tooling is not contract: it's tooling, faster cadence, no release ceremony) → vendored `BRAND.md`,
`styles/brand/`, `styles/config/vocabularies/brand/`, and `scripts/generate-pack-readme.mjs` update in place.
`scripts/prose-check.sh` is consumer-owned (un-vendored 2026-05-13) and is no longer touched by this sync; universal
pipeline changes there require coordinated PRs across all four channel repos. Separate sync clock from item 3 because
@@ -144,7 +144,7 @@ The flows interact, but each is independently triggered:
manifest fields changed (per-host install commands, version, description), edits this repo's `src/data/skill.json` to
bump `version` plus the changed fields → PR to `dev` → release flow to `main` → `wrangler deploy` updates
`/skill.json` on `anc.dev` → Cloudflare cache purge → CLI's next PR exercises `skill-fixture-drift` against the new
- fixture. If the release didn't change any manifest fields, skip the manifest bump entirely — installed users learn
+ fixture. If the release didn't change any manifest fields, skip the manifest bump entirely; installed users learn
about the new release via the skill bundle's `bin/check-update`, not via a manifest change here. Full runbook in
`RELEASES.md` §"Skill-release procedure".
@@ -153,28 +153,28 @@ The flows interact, but each is independently triggered:
## Reference
-- `scripts/sync-coverage-matrix.sh` — header comment for usage and `ANC_ROOT` env var.
-- `scripts/sync-spec.sh` — header comment for usage, `SPEC_REMOTE_URL` / `SPEC_ROOT` env vars, and the
+- `scripts/sync-coverage-matrix.sh`: header comment for usage and `ANC_ROOT` env var.
+- `scripts/sync-spec.sh`: header comment for usage, `SPEC_REMOTE_URL` / `SPEC_ROOT` env vars, and the
remote-first-with-local-fallback resolution flow.
-- `scripts/sync-prose-tooling.sh` — header comment for the prose-check vendor manifest and rationale (separate sync
- clock from `sync-spec.sh`; tracks `main` HEAD instead of v* tags because tooling is not contract; brand README is a
- released artifact, not regenerated downstream). Shares `SPEC_REMOTE_URL` / `SPEC_ROOT` env vars with `sync-spec.sh`.
- Note: `scripts/prose-check.sh` is consumer-owned (un-vendored 2026-05-13) and intentionally NOT in the manifest; see
- that file's CONSUMER-OWNED header for context.
-- `docker/score/README.md` + `docker/score/build.sh` — the canonical scoring pipeline. `build.sh --run` builds the image
+- `scripts/sync-prose-tooling.sh`: header comment for the prose-check vendor manifest and rationale (separate sync clock
+ from `sync-spec.sh`; tracks `main` HEAD instead of v* tags because tooling is not contract; brand README is a released
+ artifact, not regenerated downstream). Shares `SPEC_REMOTE_URL` / `SPEC_ROOT` env vars with `sync-spec.sh`. Note:
+ `scripts/prose-check.sh` is consumer-owned (un-vendored 2026-05-13) and intentionally NOT in the manifest; see that
+ file's CONSUMER-OWNED header for context.
+- `docker/score/README.md` + `docker/score/build.sh`: the canonical scoring pipeline. `build.sh --run` builds the image
and runs `score-anc100.sh` inside the container, writing scorecards back to the host via bind mount. The container is
the single source of truth for scoring; host-side `regen-scorecards.sh` is deprecated.
-- `src/data/spec/README.md` — what's vendored, why, and the manual reconciliation workflow when spec prose drifts.
-- `RELEASES.md` §"Skill releases" — the downstream manifest-bump procedure for `src/data/skill.json` end-to-end
- (manifest edit → cache-purge → live verify).
+- `src/data/spec/README.md`: what's vendored, why, and the manual reconciliation workflow when spec prose drifts.
+- `RELEASES.md` §"Skill releases": the downstream manifest-bump procedure for `src/data/skill.json` end-to-end (manifest
+ edit → cache-purge → live verify).
- `docs/DESIGN.md` §3.9 (`/skill` + `/skill.json` build contract) and §3.10 (`/install` HTML-only contract).
-- `AGENTS.md` — repo conventions and the `content/principles/` vs `src/data/spec/principles/` separation rule.
-- `docs/plans/2026-04-23-001-feat-sync-spec-plan.md` (dev branch only, gated off main) — the plan that introduced
+- `AGENTS.md`: repo conventions and the `content/principles/` vs `src/data/spec/principles/` separation rule.
+- `docs/plans/2026-04-23-001-feat-sync-spec-plan.md` (dev branch only, gated off main): the plan that introduced
`sync-spec.sh` + vendored `src/data/spec/` + the SPEC_VERSION wiring.
-- `docs/solutions/best-practices/agentnative-version-model-2026-05-01.md` — cross-repo version model: what version means
+- `docs/solutions/best-practices/agentnative-version-model-2026-05-01.md`: cross-repo version model. What version means
in each of the four agentnative repos, why the site has no own version, where each version is read or displayed.
-- `docs/solutions/best-practices/cross-repo-artifact-consumption-static-sites-2026-04-21.md` — governing pattern
+- `docs/solutions/best-practices/cross-repo-artifact-consumption-static-sites-2026-04-21.md`: governing pattern
(commit-a-copy over build-time fetch over symlinks).
- CLI's reference implementation of `sync-spec.sh`: `~/dev/agentnative-cli/scripts/sync-spec.sh`.
-- CLI's `scripts/sync-skill-fixture.sh` and `skill-fixture-drift` workflow — the inverse-direction drift gate that
+- CLI's `scripts/sync-skill-fixture.sh` and `skill-fixture-drift` workflow: the inverse-direction drift gate that
protects the `src/data/skill.json` → CLI fixture flow.
diff --git a/scripts/cf-access-bootstrap.sh b/scripts/cf-access-bootstrap.sh
new file mode 100755
index 0000000..b43f4e6
--- /dev/null
+++ b/scripts/cf-access-bootstrap.sh
@@ -0,0 +1,288 @@
+#!/usr/bin/env bash
+# cf-access-bootstrap.sh — idempotent Cloudflare Access setup for the staging Worker.
+#
+# What this script does, each step skipped if already present:
+#
+# 1. Creates the Self-Hosted Access application for the staging Worker URL.
+# 2. Creates a CLI service token, capturing its client_id + client_secret
+# into 1Password (the secret is shown ONCE by Cloudflare).
+# 3. Creates two policies on the app:
+# a. "Allow brett email" — decision allow, includes a specific email.
+# b. "Allow CLI service token" — decision non_identity, includes the
+# service token id from step 2.
+# 4. Verifies the boundary works: unauth request to the protected URL must
+# return a 302 redirect to *.cloudflareaccess.com; authed request with
+# the service token headers must return 200.
+#
+# Resources are matched by NAME (not ID), so the script is safe to re-run.
+# If everything is already in place, every step reports "exists, skipping".
+#
+# Disaster recovery: if the CF account is restored from backup or the
+# Access app is deleted, re-running this script reconstructs the staging
+# auth surface from 1Password-resident credentials. The 1Password item
+# `Cloudflare API Token - Access Setup (agentnative-site)` is the only
+# operator-side prerequisite.
+#
+# Inputs (env vars; defaults below):
+#
+# CF_ACCOUNT_ID Cloudflare account ID. REQUIRED.
+# APP_NAME Access app name (default: "agentnative-site staging")
+# APP_DOMAIN Protected URL (default: agentnative-site-staging.brettdavies.workers.dev)
+# APP_SESSION session_duration (default: 2160h, 90 days)
+# IDENTITY_EMAIL Email allowed by the identity policy (default: davies.brett@gmail.com)
+# SERVICE_TOKEN_NAME Service token name (default: agentnative-site-staging-cli)
+# SERVICE_TOKEN_DURATION CF duration string (default: 8760h, 1 year — the CF max non-forever)
+# OP_ITEM_API_TOKEN 1Password title for the setup API token
+# (default: "Cloudflare API Token - Access Setup (agentnative-site)")
+# OP_ITEM_SERVICE_TOKEN 1Password title for the service token credentials
+# (default: "Cloudflare Access Service Token - agentnative-site-staging")
+#
+# Dependencies: curl, jaq (preferred) or jq, op CLI via the
+# ~/.claude/skills/1password/scripts/ helpers.
+
+set -u
+
+# ---------------------------------------------------------------------------
+# Inputs
+# ---------------------------------------------------------------------------
+
+CF_ACCOUNT_ID="${CF_ACCOUNT_ID:-}"
+APP_NAME="${APP_NAME:-agentnative-site staging}"
+APP_DOMAIN="${APP_DOMAIN:-agentnative-site-staging.brettdavies.workers.dev}"
+APP_SESSION="${APP_SESSION:-2160h}"
+IDENTITY_EMAIL="${IDENTITY_EMAIL:-davies.brett@gmail.com}"
+SERVICE_TOKEN_NAME="${SERVICE_TOKEN_NAME:-agentnative-site-staging-cli}"
+SERVICE_TOKEN_DURATION="${SERVICE_TOKEN_DURATION:-8760h}"
+OP_ITEM_API_TOKEN="${OP_ITEM_API_TOKEN:-Cloudflare API Token - Access Setup (agentnative-site)}"
+OP_ITEM_SERVICE_TOKEN="${OP_ITEM_SERVICE_TOKEN:-Cloudflare Access Service Token - agentnative-site-staging}"
+
+OP_READ="${OP_READ:-$HOME/.claude/skills/1password/scripts/read_field.sh}"
+OP_CREATE="${OP_CREATE:-$HOME/.claude/skills/1password/scripts/create_item.sh}"
+
+JQ_BIN="$(command -v jaq || command -v jq || true)"
+
+# ---------------------------------------------------------------------------
+# Sanity checks
+# ---------------------------------------------------------------------------
+
+die() {
+ printf 'FATAL: %s\n' "$1" >&2
+ exit 2
+}
+
+[ -n "$CF_ACCOUNT_ID" ] || die "CF_ACCOUNT_ID env var is required (32-char hex)."
+[ -n "$JQ_BIN" ] || die "neither jaq nor jq installed; install one (brew install jaq) and retry."
+[ -x "$OP_READ" ] || die "1Password read helper not found at $OP_READ; install the 1password skill or export OP_READ."
+[ -x "$OP_CREATE" ] || die "1Password create helper not found at $OP_CREATE."
+
+API_TOKEN="$("$OP_READ" "$OP_ITEM_API_TOKEN" credential 2>/dev/null || true)"
+[ -n "$API_TOKEN" ] || die "could not read API token from 1Password item '$OP_ITEM_API_TOKEN'. Verify the item exists with a field named 'credential'."
+
+API_BASE="https://api.cloudflare.com/client/v4/accounts/$CF_ACCOUNT_ID"
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+# cf_get PATH
+cf_get() {
+ curl -s -H "Authorization: Bearer $API_TOKEN" "$API_BASE$1"
+}
+
+# cf_post PATH BODY
+cf_post() {
+ curl -s -X POST -H "Authorization: Bearer $API_TOKEN" -H "Content-Type: application/json" \
+ "$API_BASE$1" --data "$2"
+}
+
+# Report a one-liner table row.
+row() {
+ printf ' %-30s %s\n' "$1" "$2"
+}
+
+# ---------------------------------------------------------------------------
+# Token sanity probe
+# ---------------------------------------------------------------------------
+
+printf '\n=== cf-access-bootstrap @ %s ===\n' "$APP_DOMAIN"
+printf ' account_id=%s\n' "$CF_ACCOUNT_ID"
+printf ' app_name=%s\n' "$APP_NAME"
+printf ' session_duration=%s\n\n' "$APP_SESSION"
+
+probe="$(cf_get "/access/apps")"
+probe_success="$("$JQ_BIN" -r '.success' <<<"$probe")"
+if [ "$probe_success" != "true" ]; then
+ die "API token sanity check failed: $(echo "$probe" | "$JQ_BIN" -c '.errors')
+ Verify the token has 'Access: Apps and Policies Write' AND 'Access: Service Tokens Write' permissions."
+fi
+
+# ---------------------------------------------------------------------------
+# Step 1: Access application
+# ---------------------------------------------------------------------------
+
+printf '[1] Access application\n'
+APP_ID="$(echo "$probe" | "$JQ_BIN" -r --arg name "$APP_NAME" '.result[] | select(.name == $name) | .id' | head -1)"
+
+if [ -n "$APP_ID" ] && [ "$APP_ID" != "null" ]; then
+ row "status" "exists, skipping creation"
+ row "app_id" "$APP_ID"
+ AUD="$(echo "$probe" | "$JQ_BIN" -r --arg name "$APP_NAME" '.result[] | select(.name == $name) | .aud' | head -1)"
+ CURRENT_SESSION="$(echo "$probe" | "$JQ_BIN" -r --arg name "$APP_NAME" '.result[] | select(.name == $name) | .session_duration' | head -1)"
+ row "aud" "$AUD"
+ row "session_duration" "$CURRENT_SESSION"
+ if [ "$CURRENT_SESSION" != "$APP_SESSION" ]; then
+ row "session_duration drift" "current=$CURRENT_SESSION desired=$APP_SESSION (re-run with manual PUT if you want this updated)"
+ fi
+else
+ printf ' creating ...\n'
+ create_body=$(cat </dev/null 2>&1; then
+ printf ' WARNING: service token "%s" exists in CF but 1Password item "%s" is missing.\n' "$SERVICE_TOKEN_NAME" "$OP_ITEM_SERVICE_TOKEN" >&2
+ printf ' The CLI client_secret cannot be recovered. Rotate via:\n' >&2
+ printf ' curl -s -X POST -H "Authorization: Bearer \$API_TOKEN" \\\n' >&2
+ printf ' "%s/access/service_tokens/%s/rotate"\n' "$API_BASE" "$SVC_TOKEN_ID" >&2
+ printf ' Then capture the new client_secret into 1Password.\n' >&2
+ else
+ row "1password" "item '$OP_ITEM_SERVICE_TOKEN' present (client_id readable)"
+ fi
+else
+ printf ' creating ...\n'
+ resp_dir="$(mktemp -d -t cf-svc-XXXXXXXX)"
+ chmod 700 "$resp_dir"
+ create_resp="$(cf_post "/access/service_tokens" "{\"name\": \"$SERVICE_TOKEN_NAME\", \"duration\": \"$SERVICE_TOKEN_DURATION\"}")"
+ echo "$create_resp" > "$resp_dir/resp.json"
+ chmod 600 "$resp_dir/resp.json"
+ create_success="$("$JQ_BIN" -r '.success' "$resp_dir/resp.json")"
+ if [ "$create_success" != "true" ]; then
+ err="$("$JQ_BIN" -c '.errors' "$resp_dir/resp.json")"
+ shred -uz "$resp_dir/resp.json" && rmdir "$resp_dir"
+ die "service token create failed: $err"
+ fi
+ SVC_TOKEN_ID="$("$JQ_BIN" -r '.result.id' "$resp_dir/resp.json")"
+ expires_at="$("$JQ_BIN" -r '.result.expires_at' "$resp_dir/resp.json")"
+ expires_ts="$(date -u -d "$expires_at" +%s)"
+
+ printf ' ingesting to 1Password (value never echoed) ...\n'
+ notes="CF Access service token for the $APP_NAME Worker at $APP_DOMAIN. Auth via HTTP headers CF-Access-Client-Id and CF-Access-Client-Secret. Created $(date -u +%Y-%m-%d) by scripts/cf-access-bootstrap.sh; expires $expires_at. Rotate via the CF dashboard or POST to /access/service_tokens/$SVC_TOKEN_ID/rotate."
+ "$OP_CREATE" \
+ --title "$OP_ITEM_SERVICE_TOKEN" \
+ --tags "cloudflare,access,service-token,agentnative-site,staging" \
+ --notes "$notes" \
+ --hostname "$APP_DOMAIN" \
+ --field "username=$SERVICE_TOKEN_NAME" \
+ --field "expires=$expires_ts" \
+ --field "type=Service Token" \
+ --field "client_id=$("$JQ_BIN" -r '.result.client_id' "$resp_dir/resp.json")" \
+ --field "client_secret[concealed]=$("$JQ_BIN" -r '.result.client_secret' "$resp_dir/resp.json")" >/dev/null
+
+ shred -uz "$resp_dir/resp.json" && rmdir "$resp_dir"
+ row "status" "CREATED + ingested"
+ row "token_id" "$SVC_TOKEN_ID"
+ row "1password" "item '$OP_ITEM_SERVICE_TOKEN' created"
+fi
+
+# ---------------------------------------------------------------------------
+# Step 3: Policies
+# ---------------------------------------------------------------------------
+
+printf '\n[3] Policies\n'
+existing_policies="$(cf_get "/access/apps/$APP_ID/policies")"
+
+ensure_policy() {
+ local pname="$1" body="$2"
+ local existing_id
+ existing_id="$(echo "$existing_policies" | "$JQ_BIN" -r --arg name "$pname" '.result[] | select(.name == $name) | .id' | head -1)"
+ if [ -n "$existing_id" ] && [ "$existing_id" != "null" ]; then
+ row "$pname" "exists ($existing_id)"
+ return
+ fi
+ local resp
+ resp="$(cf_post "/access/apps/$APP_ID/policies" "$body")"
+ local ok
+ ok="$(echo "$resp" | "$JQ_BIN" -r '.success')"
+ if [ "$ok" != "true" ]; then
+ printf ' FAILED: %s\n' "$pname" >&2
+ echo "$resp" | "$JQ_BIN" -c '.errors' >&2
+ die "policy create failed (most common cause: API token missing 'Access: Apps and Policies Write' permission group)"
+ fi
+ row "$pname" "CREATED ($(echo "$resp" | "$JQ_BIN" -r '.result.id'))"
+}
+
+email_policy_body=$(cat <... target specific pack(s)
//
@@ -23,9 +23,9 @@ import yaml from "js-yaml";
const REPO_ROOT = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..");
const STYLES_DIR = path.join(REPO_ROOT, "styles");
-const DEFAULT_PACKS = ["brand", "spec"];
+const DEFAULT_PACKS = ["brand", "site"];
const TRAILER = (pack) =>
- ``;
+ ``;
function parseArgs(argv) {
const args = argv.slice(2);
diff --git a/scripts/hooks/pre-push b/scripts/hooks/pre-push
index aebd449..fc3ed56 100755
--- a/scripts/hooks/pre-push
+++ b/scripts/hooks/pre-push
@@ -20,7 +20,7 @@
# reads tokens from styles/site/BannedFonts.yml)
# 7. prose-check — bash scripts/prose-check.sh
# (Vale + LanguageTool over *.md in scope; LT skips
-# cleanly when pool is unreachable)
+# cleanly when LanguageTool is unreachable)
#
# Stages 5-7 each redirect child stdin to Banned-font deployment scan'
bash scripts/check-banned-fonts.sh prose-check (Vale + LanguageTool)'
-bash scripts/prose-check.sh pre-push checks passed'
diff --git a/scripts/prose-check.sh b/scripts/prose-check.sh
index 9888b17..007ac41 100755
--- a/scripts/prose-check.sh
+++ b/scripts/prose-check.sh
@@ -70,57 +70,34 @@
# scripts/prose-check.sh --lt-only skip Vale entirely (LT debugging)
#
# Env:
-# LANGUAGETOOL_URL LT base URL (default: http://pool.tail42ba87.ts.net:8081)
-# FQDN avoids macOS+Tailscale short-name DNS timeouts.
+# LANGUAGETOOL_URL LT base URL (default: http://languagetool:8081).
+# Consumed by lt_check (~/dotfiles/config/shell/languagetool.sh).
+# LT_DENY_RULES Extend the baseline 10-rule denylist with repo-specific
+# rule IDs. This site adds 4 by default (IN_PRINCIPAL,
+# CONTRACT_CONTACT, TO_DO_HYPHEN, PLURAL_MODIFIER); override
+# to replace, or set to "${LT_DENY_RULES_BASELINE}|EXTRA" to
+# extend further.
# PROSE_CHECK_BASE git ref to diff against in --changed-only (default: origin/dev)
set -euo pipefail
cd "$(git rev-parse --show-toplevel)"
-LT_URL_DEFAULT="http://pool.tail42ba87.ts.net:8081"
-LT_URL="${LANGUAGETOOL_URL:-$LT_URL_DEFAULT}"
+# LanguageTool wrapper: see ~/dotfiles/config/shell/languagetool.sh for the
+# baseline 10-rule denylist (LT_DENY_RULES_BASELINE), category whitelist,
+# and exit-code contract. Reachability probe and per-file POST live there.
+LT_LIB="${DOTFILES_SHELL_DIR:-$HOME/dotfiles/config/shell}/languagetool.sh"
+if [[ ! -f "$LT_LIB" ]]; then
+ echo "prose-check: required helper $LT_LIB not found (install brettdavies/dotfiles)" >&2
+ exit 2
+fi
+# shellcheck disable=SC1090
+source "$LT_LIB"
+
PROSE_CHECK_BASE="${PROSE_CHECK_BASE:-origin/dev}"
-# LT blocking whitelist — narrowed from the plan's 7-category default
-# (TYPOS|GRAMMAR|PUNCTUATION|TYPOGRAPHY|CASING|COMPOUNDING|CONFUSED_WORDS)
-# to the three categories that are reliably high-signal on markdown corpora.
-# PUNCTUATION/TYPOGRAPHY/CASING/COMPOUNDING fired ~95% noise on the spec
-# corpus from LT misreading markdown syntax (table whitespace, `->` arrows,
-# code-fence quotes); they remain on the warning tier (visible via
-# --warnings). Re-promote to blocking when LT gains markdown awareness or
-# a per-rule allowlist lands.
-LT_BLOCKING_CATEGORIES='^(TYPOS|GRAMMAR|CONFUSED_WORDS)$'
-# Per-rule denylist within the blocking categories — specific LT rule
-# IDs that misfire on RFC 2119 keyword conventions or on technical-prose
-# patterns the rule pack does not cover. Override via LT_DENY_RULES env.
-#
-# MD_BASEFORM "MUST " / "MAY " — LT does not
-# recognize RFC 2119 keywords; treats them as
-# modal-verb usage and demands base form.
-# MUST_HAVE_TO Same root cause for "must" usage.
-# HAVE_PART_AGREEMENT Misfires on "if: CLI has X" YAML-prose.
-# PREPOSITION_VERB Misfires on workflow names ("deploy / publish").
-# THIS_NNS Misfires on "all of these hold" technical claims.
-# NON_STANDARD_WORD Misfires on identifier strings inside code spans.
-# POSSESSIVE_APOSTROPHE Misfires on code-comment-style prose.
-# A_INSTALL Misfires on "an install path" / "a full reinstall"
-# — CLI-domain noun usage of install/reinstall that
-# LT's noun lexicon does not cover.
-# IS_AND_ARE Misfires on parenthetical-clause subjects, e.g.
-# "runtimes (Claude Code, Cursor, ... and others as
-# the ecosystem evolves)" — LT picks the wrong head
-# noun when a parenthetical sits between subject and
-# verb.
-# SINGULAR_NOUN_ADV_AGREEMENT
-# Same class of misfire on subordinate-clause
-# subjects, e.g. "Agents consuming JSON output still
-# receive interleaved diagnostic text" — LT parses
-# "JSON output" as the head noun and demands a
-# singular verb when the actual subject ("Agents")
-# is plural.
-#
# === SITE-LOCAL DENYLIST EXTENSIONS ====================================
-# Four additional rules that misfire on agentnative-site domain jargon:
+# Four rules atop the lt_check baseline that misfire on agentnative-site
+# domain jargon:
#
# IN_PRINCIPAL LT confuses "principle" (P1-P8 noun, the contract
# term) with "principal" (chief). Site corpus uses
@@ -143,8 +120,8 @@ LT_BLOCKING_CATEGORIES='^(TYPOS|GRAMMAR|CONFUSED_WORDS)$'
# site-corpus-correct fix; the alternative is
# rewording every doc that names a CF CLI command.
# ========================================================================
-LT_DENY_RULES_DEFAULT='^(MD_BASEFORM|MUST_HAVE_TO|HAVE_PART_AGREEMENT|PREPOSITION_VERB|THIS_NNS|NON_STANDARD_WORD|POSSESSIVE_APOSTROPHE|A_INSTALL|IS_AND_ARE|SINGULAR_NOUN_ADV_AGREEMENT|IN_PRINCIPAL|CONTRACT_CONTACT|TO_DO_HYPHEN|PLURAL_MODIFIER)$'
-LT_DENY_RULES="${LT_DENY_RULES:-$LT_DENY_RULES_DEFAULT}"
+LT_DENY_RULES="${LT_DENY_RULES:-${LT_DENY_RULES_BASELINE}|IN_PRINCIPAL|CONTRACT_CONTACT|TO_DO_HYPHEN|PLURAL_MODIFIER}"
+export LT_DENY_RULES
CHANGED_ONLY=0
SHOW_WARNINGS=0
@@ -249,47 +226,25 @@ fi
# --- LanguageTool stage ---
if (( RUN_LT )); then
- if curl --max-time 2 -fsS "$LT_URL/v2/languages" >/dev/null 2>&1; then
- LT_TMP="$(mktemp -d)"
- trap 'rm -rf "$LT_TMP" "$OUT_FILE"' EXIT
-
- printf '%s\0' "${MD_FILES[@]}" | xargs -0 -P4 -I{} bash -c '
- file="$1"; tmp="$2"; url="$3"
- out="$tmp/$(echo "$file" | tr "/" "_").json"
- curl -sS --max-time 30 -X POST "$url/v2/check" \
- --data-urlencode "language=en-US" \
- --data-urlencode "text@$file" > "$out" 2>/dev/null || true
- ' _ {} "$LT_TMP" "$LT_URL"
-
- for f in "${MD_FILES[@]}"; do
- json="$LT_TMP/$(echo "$f" | tr '/' '_').json"
- [[ -s "$json" ]] || continue
- while IFS=$'\t' read -r offset rule_id category message; do
- [[ -z "$offset" ]] && continue
- # Approximate line from byte offset (no exact column conversion at v1).
- line=$(awk -v off="$offset" 'BEGIN{cur=0} {cur+=length($0)+1; if (cur>off) {print NR; exit}}' "$f" 2>/dev/null)
- line="${line:-?}"
- if [[ "$category" =~ $LT_BLOCKING_CATEGORIES ]] && ! [[ "$rule_id" =~ $LT_DENY_RULES ]]; then
- BLOCKING=$((BLOCKING + 1))
- printf '%s:%s:LT.%s (%s): %s\n' "$f" "$line" "$rule_id" "$category" "$message" >> "$OUT_FILE"
- else
- WARNING=$((WARNING + 1))
- if (( SHOW_WARNINGS )); then
- printf '[warn] %s:%s:LT.%s (%s): %s\n' "$f" "$line" "$rule_id" "$category" "$message" >> "$OUT_FILE"
- fi
- fi
- done < <(jaq -r '.matches[]? | [.offset, .rule.id, .rule.category.id, .message] | @tsv' "$json" 2>/dev/null || true)
- done
- else
- rc=$?
- case "$rc" in
- 6) reason="couldn't resolve host (Tailscale likely off, or FQDN drift)" ;;
- 7) reason="couldn't connect (host up, LT service down)" ;;
- 28) reason="timed out (>2s; service slow or network impaired)" ;;
- *) reason="curl exit $rc" ;;
- esac
- echo "prose-check: LanguageTool unreachable at $LT_URL — $reason; skipping grammar check" >&2
- fi
+ LT_OUT="$(mktemp)"
+ trap 'rm -f "$OUT_FILE" "$LT_OUT"' EXIT
+ LT_RC=0
+ lt_check "${MD_FILES[@]}" > "$LT_OUT" || LT_RC=$?
+ case "$LT_RC" in
+ 0|1) ;; # findings (if any) are in LT_OUT
+ 2) echo "prose-check: skipping grammar check (see lt_check notice above)" >&2 ;;
+ *) echo "prose-check: lt_check returned unexpected exit $LT_RC" >&2; exit 2 ;;
+ esac
+ while IFS= read -r ln; do
+ [[ -z "$ln" ]] && continue
+ if [[ "$ln" == "[warn] "* ]]; then
+ WARNING=$((WARNING + 1))
+ (( SHOW_WARNINGS )) && printf '%s\n' "$ln" >> "$OUT_FILE"
+ else
+ BLOCKING=$((BLOCKING + 1))
+ printf '%s\n' "$ln" >> "$OUT_FILE"
+ fi
+ done < "$LT_OUT"
fi
# Print findings sorted by file then line
diff --git a/scripts/score-sandbox.py b/scripts/score-sandbox.py
new file mode 100755
index 0000000..533df2e
--- /dev/null
+++ b/scripts/score-sandbox.py
@@ -0,0 +1,420 @@
+#!/usr/bin/env -S uv run python3
+# /// script
+# requires-python = ">=3.11"
+# dependencies = [
+# "polars>=1.0",
+# ]
+# ///
+"""Score-algorithm sandbox.
+
+Loads every latest-version scorecard plus the coverage-matrix, joins per-check tier
+metadata onto each result row, and computes several candidate scoring algorithms
+side-by-side as a polars DataFrame. Emits into `.context/score-sandbox/` (gitignored
+local-only artifact dir per the repo's `.context/` convention):
+
+ .context/score-sandbox/long.parquet long-form dataframe (one row per check per tool)
+ .context/score-sandbox/tools.csv per-tool aggregate scores (one row per tool)
+ .context/score-sandbox/report.md markdown report (eligibility, distribution, leaderboard)
+
+Pure read-only against the host repo's tracked data. Does not touch the CLI or scorecards/.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+import sys
+from pathlib import Path
+
+import polars as pl
+
+REPO = Path(__file__).resolve().parent.parent
+SCORECARDS = REPO / "scorecards"
+COVERAGE = REPO / "src/data/coverage-matrix.json"
+OUT_DIR = REPO / ".context/score-sandbox"
+
+VERSION_RE = re.compile(r"^(.+)-v([0-9].*)\.json$")
+
+
+def parse_version(v: str) -> tuple[int, ...]:
+ return tuple(int(x) if x.isdigit() else 0 for x in v.split("."))
+
+
+def load_tier_lookup() -> dict[str, str]:
+ matrix = json.loads(COVERAGE.read_text())
+ lookup: dict[str, str] = {}
+ for row in matrix["rows"]:
+ for v in row.get("verifiers", []):
+ lookup[v["check_id"]] = row["level"] # must | should | may
+ lookup.setdefault("p3-version", "must")
+ return lookup
+
+
+def load_latest_scorecards() -> list[dict]:
+ """Pick the highest-versioned scorecard per slug."""
+ seen: dict[str, dict] = {}
+ for f in sorted(SCORECARDS.glob("*.json")):
+ m = VERSION_RE.match(f.name)
+ if not m:
+ continue
+ slug, version = m.group(1), m.group(2)
+ prior = seen.get(slug)
+ if prior is None or parse_version(version) > parse_version(prior["version"]):
+ data = json.loads(f.read_text())
+ seen[slug] = {"slug": slug, "version": version, "file": f.name, "data": data}
+ return sorted(seen.values(), key=lambda x: x["slug"])
+
+
+def build_long_frame(cards: list[dict], tiers: dict[str, str]) -> pl.DataFrame:
+ """One row per check per tool: slug, version, check_id, status, layer, tier."""
+ rows = []
+ for card in cards:
+ for r in card["data"]["results"]:
+ rows.append(
+ {
+ "slug": card["slug"],
+ "version": card["version"],
+ "check_id": r["id"],
+ "status": r["status"],
+ "layer": r.get("layer", ""),
+ "tier": tiers.get(r["id"], "must"),
+ }
+ )
+ return pl.DataFrame(rows)
+
+
+# ───── scoring expressions ─────────────────────────────────────────────────
+
+
+def weighted_score(
+ weights: dict[str, float],
+ *,
+ may_warn_as_skip: bool = False,
+ skip_in_denom: bool = False,
+ exec_pass: float = 1.0,
+ exec_warn: float = 0.5,
+ exec_fail: float = 0.0,
+) -> pl.Expr:
+ """Element-value: sum(base * exec) / sum(base over denom rows).
+
+ `skip_in_denom=False` (default): denominator = base over pass/warn/fail only.
+ Rewards tools whose evaluated set is mostly passes — "ratio under tier weights."
+ `skip_in_denom=True`: denominator = base over pass/warn/fail/skip.
+ True skating model: skip earns no points but its base still appears in the
+ ceiling, so a tool that didn't attempt the check pays for that absence.
+ `may_warn_as_skip`: reclassify MAY-tier `warn` to `skip` before applying the
+ skip-handling rule. Lets "MAY non-adoption shouldn't count against you"
+ compose with either denominator stance.
+ """
+ tier_w = (
+ pl.when(pl.col("tier") == "must")
+ .then(weights["must"])
+ .when(pl.col("tier") == "should")
+ .then(weights["should"])
+ .when(pl.col("tier") == "may")
+ .then(weights["may"])
+ .otherwise(1.0)
+ )
+ eff_status = (
+ pl.when((pl.col("tier") == "may") & (pl.col("status") == "warn") & may_warn_as_skip)
+ .then(pl.lit("skip"))
+ .otherwise(pl.col("status"))
+ )
+ exec_mult = (
+ pl.when(eff_status == "pass")
+ .then(exec_pass)
+ .when(eff_status == "warn")
+ .then(exec_warn)
+ .when(eff_status == "fail")
+ .then(exec_fail)
+ .otherwise(0.0) # skip → contributes 0 to numerator
+ )
+ if skip_in_denom:
+ # Denom rows: every status except `error` (probe broke; anc-side bug).
+ denom_valid = eff_status != "error"
+ else:
+ # Denom rows: only pass/warn/fail.
+ denom_valid = eff_status.is_in(["pass", "warn", "fail"])
+ num = (tier_w * exec_mult).filter(denom_valid).sum()
+ denom = tier_w.filter(denom_valid).sum()
+ return (
+ pl.when(denom > 0)
+ .then((num / denom * 100).round(0))
+ .otherwise(0)
+ .cast(pl.Int64)
+ )
+
+
+def current_score() -> pl.Expr:
+ pass_n = (pl.col("status") == "pass").sum()
+ warn_n = (pl.col("status") == "warn").sum()
+ fail_n = (pl.col("status") == "fail").sum()
+ denom = pass_n + warn_n + fail_n
+ return (
+ pl.when(denom > 0)
+ .then((pass_n / denom * 100).round(0))
+ .otherwise(0)
+ .cast(pl.Int64)
+ )
+
+
+def compliance_score() -> pl.Expr:
+ """MUST + SHOULD only. MAY excluded from headline."""
+ mask = pl.col("tier").is_in(["must", "should"])
+ pass_n = ((pl.col("status") == "pass") & mask).sum()
+ eval_n = (pl.col("status").is_in(["pass", "warn", "fail"]) & mask).sum()
+ return (
+ pl.when(eval_n > 0)
+ .then((pass_n / eval_n * 100).round(0))
+ .otherwise(0)
+ .cast(pl.Int64)
+ )
+
+
+def extras_score() -> pl.Expr:
+ """MAY adoption rate: pass / (pass + warn + fail + skip) over MAY-tier checks.
+
+ Skip counts in the denominator so the metric reflects what fraction of the
+ spec's MAY menu the tool adopts — including 'tool didn't ship this thing.'
+ """
+ mask = pl.col("tier") == "may"
+ pass_n = ((pl.col("status") == "pass") & mask).sum()
+ total_n = mask.sum()
+ return (
+ pl.when(total_n > 0)
+ .then((pass_n / total_n * 100).round(0))
+ .otherwise(0)
+ .cast(pl.Int64)
+ )
+
+
+def weighted_blend(comp_weight: float = 0.85) -> pl.Expr:
+ return (
+ (compliance_score() * comp_weight + extras_score() * (1 - comp_weight))
+ .round(0)
+ .cast(pl.Int64)
+ )
+
+
+# ───── aggregation per tool ───────────────────────────────────────────────
+
+
+def compute_tool_scores(long: pl.DataFrame) -> pl.DataFrame:
+ tier_mix = (
+ long.group_by("slug")
+ .agg(
+ (pl.col("tier") == "must").sum().alias("n_must"),
+ (pl.col("tier") == "should").sum().alias("n_should"),
+ (pl.col("tier") == "may").sum().alias("n_may"),
+ pl.col("version").first(),
+ )
+ )
+
+ scored = long.group_by("slug").agg(
+ current_score().alias("A_current"),
+ weighted_score({"must": 1, "should": 2, "may": 3}).alias("B_skating_1_2_3"),
+ weighted_score({"must": 1, "should": 2, "may": 4}).alias("C_skating_1_2_4"),
+ compliance_score().alias("D_compliance"),
+ extras_score().alias("D_extras"),
+ weighted_score({"must": 1, "should": 2, "may": 3}, may_warn_as_skip=True).alias(
+ "E_skating_may_skip"
+ ),
+ weighted_blend(0.85).alias("F_weighted_85_15"),
+ weighted_score({"must": 1, "should": 2, "may": 3}, skip_in_denom=True).alias(
+ "G_ceiling_1_2_3"
+ ),
+ weighted_score(
+ {"must": 1, "should": 2, "may": 3},
+ skip_in_denom=True,
+ may_warn_as_skip=True,
+ ).alias("H_ceiling_may_skip"),
+ )
+
+ return tier_mix.join(scored, on="slug").sort("B_skating_1_2_3", descending=True)
+
+
+# ───── reporting ──────────────────────────────────────────────────────────
+
+
+def threshold_eligibility(df: pl.DataFrame, threshold: int) -> dict[str, int]:
+ cols = [
+ "A_current", "B_skating_1_2_3", "C_skating_1_2_4", "D_compliance",
+ "E_skating_may_skip", "F_weighted_85_15", "G_ceiling_1_2_3", "H_ceiling_may_skip",
+ ]
+ out = {c: int(df.filter(pl.col(c) >= threshold).height) for c in cols}
+ out["D_both"] = int(
+ df.filter((pl.col("D_compliance") >= threshold) & (pl.col("D_extras") >= 50)).height
+ )
+ return out
+
+
+def bucket_distribution(df: pl.DataFrame, col: str) -> list[int]:
+ buckets = [(90, 100), (80, 89), (70, 79), (60, 69), (50, 59), (0, 49)]
+ return [
+ int(df.filter((pl.col(col) >= lo) & (pl.col(col) <= hi)).height)
+ for (lo, hi) in buckets
+ ]
+
+
+def add_ranks(df: pl.DataFrame) -> pl.DataFrame:
+ return df.with_columns(
+ pl.col("A_current").rank("min", descending=True).cast(pl.Int64).alias("A_rank"),
+ pl.col("B_skating_1_2_3").rank("min", descending=True).cast(pl.Int64).alias("B_rank"),
+ pl.col("F_weighted_85_15").rank("min", descending=True).cast(pl.Int64).alias("F_rank"),
+ ).with_columns(
+ (pl.col("A_rank") - pl.col("B_rank")).alias("rank_delta_A_to_B"),
+ )
+
+
+def render_markdown(df: pl.DataFrame) -> str:
+ lines: list[str] = []
+ push = lines.append
+ push("# Scoring sandbox — v0.4.0 rescore data")
+ push("")
+ push(f"Tools analyzed: {df.height}. Generated by `scripts/score-sandbox.py`.")
+ push("")
+ push("## Configurations")
+ push("")
+ push("- **A current** — `pass / (pass + warn + fail)`, skip/error excluded. Today's algorithm.")
+ push("- **B skating 1/2/3** — element-value, weights MUST=1, SHOULD=2, MAY=3; pass=1.0 warn=0.5 fail=0.0; skip/error excluded.")
+ push("- **C skating 1/2/4** — element-value, weights MUST=1, SHOULD=2, MAY=4; same execution multiplier as B.")
+ push("- **D compliance / extras** — two numbers per tool. Compliance = MUST + SHOULD ratio (skip/error excluded). Extras = MAY pass rate against the full MAY menu (skips in denominator).")
+ push("- **E skating + MAY→skip** — same weights as B, but MAY-warn results are reclassified as skip (excluded from numerator AND denominator).")
+ push("- **F weighted 85/15** — single-number blend of D: `compliance × 0.85 + extras × 0.15`.")
+ push("- **G ceiling 1/2/3** — same weights as B but `skip` is counted in the denominator (spec ceiling). True skating model: a tool that didn't attempt a check pays for the absence.")
+ push("- **H ceiling + MAY→skip** — G with MAY-warn reclassified as skip. Tests whether shifting MAY-warns to skips meaningfully changes outcomes when the denominator already counts skips.")
+ push("")
+
+ push("## Eligibility counts")
+ push("")
+ e75 = threshold_eligibility(df, 75)
+ e80 = threshold_eligibility(df, 80)
+ push("| Threshold | A | B | C | D both | D comp | E | F | G | H |")
+ push("| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: | ---: |")
+ push(f"| ≥ 75 | {e75['A_current']} | {e75['B_skating_1_2_3']} | {e75['C_skating_1_2_4']} | {e75['D_both']} | {e75['D_compliance']} | {e75['E_skating_may_skip']} | {e75['F_weighted_85_15']} | {e75['G_ceiling_1_2_3']} | {e75['H_ceiling_may_skip']} |")
+ push(f"| ≥ 80 | {e80['A_current']} | {e80['B_skating_1_2_3']} | {e80['C_skating_1_2_4']} | {e80['D_both']} | {e80['D_compliance']} | {e80['E_skating_may_skip']} | {e80['F_weighted_85_15']} | {e80['G_ceiling_1_2_3']} | {e80['H_ceiling_may_skip']} |")
+ push("")
+
+ push("## Distribution by score bucket")
+ push("")
+ cols_for_dist = [
+ ("A_current", "A"),
+ ("B_skating_1_2_3", "B"),
+ ("C_skating_1_2_4", "C"),
+ ("D_compliance", "D-comp"),
+ ("D_extras", "D-ext"),
+ ("E_skating_may_skip", "E"),
+ ("F_weighted_85_15", "F"),
+ ("G_ceiling_1_2_3", "G"),
+ ("H_ceiling_may_skip", "H"),
+ ]
+ header = "| Bucket | " + " | ".join(label for _, label in cols_for_dist) + " |"
+ push(header)
+ push("| --- | " + " | ".join("---:" for _ in cols_for_dist) + " |")
+ bucket_labels = ["90–100", "80–89", "70–79", "60–69", "50–59", "0–49"]
+ bucket_data = {col: bucket_distribution(df, col) for col, _ in cols_for_dist}
+ for i, label in enumerate(bucket_labels):
+ row = "| " + label + " | " + " | ".join(str(bucket_data[col][i]) for col, _ in cols_for_dist) + " |"
+ push(row)
+ push("")
+
+ df_ranked = add_ranks(df)
+
+ # Sort leaderboard by G (true skating ceiling model) rather than B.
+ df_ranked = df_ranked.sort("G_ceiling_1_2_3", descending=True)
+
+ push("## Per-tool leaderboard (sorted by config G — true skating ceiling)")
+ push("")
+ push("| # | Slug | Version | M/S/m | A | B | C | D comp/ext | E | F | G | H | Δ rank A→G |")
+ push("| ---: | --- | --- | :---: | ---: | ---: | ---: | :---: | ---: | ---: | ---: | ---: | :---: |")
+ df_ranked = df_ranked.with_columns(
+ pl.col("G_ceiling_1_2_3").rank("min", descending=True).cast(pl.Int64).alias("G_rank"),
+ ).with_columns(
+ (pl.col("A_rank") - pl.col("G_rank")).alias("rank_delta_A_to_G"),
+ )
+ for i, row in enumerate(df_ranked.iter_rows(named=True), start=1):
+ delta = row["rank_delta_A_to_G"]
+ arrow = f"▲{delta}" if delta > 0 else (f"▼{-delta}" if delta < 0 else "–")
+ push(
+ f"| {i} | {row['slug']} | v{row['version']} | "
+ f"{row['n_must']}/{row['n_should']}/{row['n_may']} | "
+ f"{row['A_current']} | {row['B_skating_1_2_3']} | {row['C_skating_1_2_4']} | "
+ f"{row['D_compliance']} / {row['D_extras']} | "
+ f"{row['E_skating_may_skip']} | {row['F_weighted_85_15']} | "
+ f"{row['G_ceiling_1_2_3']} | {row['H_ceiling_may_skip']} | {arrow} |"
+ )
+ push("")
+
+ push("## Biggest A→B rank movers")
+ push("")
+ movers = df_ranked.sort("rank_delta_A_to_B", descending=True)
+ push("### Climbers (rank ↑ going from A to B)")
+ push("")
+ push("| Slug | A rank | B rank | A% | B% | Δ rank |")
+ push("| --- | ---: | ---: | ---: | ---: | :---: |")
+ for row in movers.head(15).iter_rows(named=True):
+ if row["rank_delta_A_to_B"] <= 0:
+ continue
+ push(
+ f"| {row['slug']} | {row['A_rank']} | {row['B_rank']} | "
+ f"{row['A_current']} | {row['B_skating_1_2_3']} | ▲{row['rank_delta_A_to_B']} |"
+ )
+ push("")
+ push("### Fallers (rank ↓ going from A to B)")
+ push("")
+ push("| Slug | A rank | B rank | A% | B% | Δ rank |")
+ push("| --- | ---: | ---: | ---: | ---: | :---: |")
+ for row in movers.tail(15).iter_rows(named=True):
+ if row["rank_delta_A_to_B"] >= 0:
+ continue
+ push(
+ f"| {row['slug']} | {row['A_rank']} | {row['B_rank']} | "
+ f"{row['A_current']} | {row['B_skating_1_2_3']} | ▼{-row['rank_delta_A_to_B']} |"
+ )
+ push("")
+
+ return "\n".join(lines)
+
+
+def main() -> int:
+ if not COVERAGE.exists():
+ print(f"error: missing {COVERAGE}", file=sys.stderr)
+ return 1
+ tiers = load_tier_lookup()
+ cards = load_latest_scorecards()
+ if not cards:
+ print("error: no scorecards found", file=sys.stderr)
+ return 1
+ OUT_DIR.mkdir(parents=True, exist_ok=True)
+ long_path = OUT_DIR / "long.parquet"
+ tools_path = OUT_DIR / "tools.csv"
+ report_path = OUT_DIR / "report.md"
+
+ long = build_long_frame(cards, tiers)
+ long.write_parquet(long_path)
+
+ df = compute_tool_scores(long)
+ df.write_csv(tools_path)
+
+ md = render_markdown(df)
+ report_path.write_text(md)
+
+ # Echo the markdown report to stdout so a `bash` invocation captures it.
+ print(md)
+ print(
+ f"\n---\nlong-form dataframe: {long_path.relative_to(REPO)} ({long.height} rows)",
+ file=sys.stderr,
+ )
+ print(
+ f"per-tool table: {tools_path.relative_to(REPO)} ({df.height} rows)",
+ file=sys.stderr,
+ )
+ print(
+ f"markdown report: {report_path.relative_to(REPO)}",
+ file=sys.stderr,
+ )
+ return 0
+
+
+if __name__ == "__main__":
+ raise SystemExit(main())
diff --git a/scripts/smoke-api-score.sh b/scripts/smoke-api-score.sh
new file mode 100755
index 0000000..0c74306
--- /dev/null
+++ b/scripts/smoke-api-score.sh
@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+# Post-deploy smoke for the live-scoring Worker. Exits 0 when /api/score for
+# a curated slug returns the response triad; exits non-zero otherwise.
+#
+# Invoked from .github/workflows/deploy.yml after a successful wrangler
+# deploy, and runnable locally for parity. Exercises the registry-fast-path
+# only: gate behaviour and live-sandbox dispatch are covered by unit tests
+# and the opt-in homepage-score-live e2e suite. Rationale lives in
+# RELEASES-RATIONALE.md § Post-deploy smoke scope.
+#
+# Usage:
+# scripts/smoke-api-score.sh
+#
+# Environment variables (all optional):
+# CF_ACCESS_CLIENT_ID Sent as CF-Access-Client-Id when non-empty.
+# CF_ACCESS_CLIENT_SECRET Sent as CF-Access-Client-Secret when non-empty.
+# Both come from repo secrets in GH Actions; they
+# are required for staging (Worker is behind
+# Cloudflare Access) and unused for production
+# (anc.dev is public).
+# TURNSTILE_TOKEN Defaults to "x". The literal "x" succeeds only
+# against the CF always-passes test secret used
+# on staging. Production needs a real strategy.
+# SMOKE_SLEEP_SEC Edge-propagation delay before the POST.
+# Default 10. Tune up if regional latency starts
+# producing intermittent 404s.
+# SLUG Curated slug to score. Default "ripgrep".
+# Must be present in registry.yaml.
+#
+# Exit codes:
+# 0 smoke passed
+# 1 smoke failed (assertion mismatch or non-200 from /api/score)
+# 2 prerequisite missing (no base URL, no jq)
+
+set -euo pipefail
+
+BASE_URL="${1:-}"
+if [ -z "$BASE_URL" ]; then
+ echo "FATAL: missing base URL. Usage: $0 " >&2
+ exit 2
+fi
+
+JQ_BIN="$(command -v jaq || command -v jq || true)"
+if [ -z "$JQ_BIN" ]; then
+ echo "FATAL: neither jaq nor jq is installed. Install one (brew install jaq) and retry." >&2
+ exit 2
+fi
+
+SLEEP_SEC="${SMOKE_SLEEP_SEC:-10}"
+SLUG="${SLUG:-ripgrep}"
+TURNSTILE_TOKEN="${TURNSTILE_TOKEN:-x}"
+
+ACCESS_HEADERS=()
+if [ -n "${CF_ACCESS_CLIENT_ID:-}" ] && [ -n "${CF_ACCESS_CLIENT_SECRET:-}" ]; then
+ ACCESS_HEADERS+=(-H "CF-Access-Client-Id: ${CF_ACCESS_CLIENT_ID}")
+ ACCESS_HEADERS+=(-H "CF-Access-Client-Secret: ${CF_ACCESS_CLIENT_SECRET}")
+fi
+
+if [ "$SLEEP_SEC" -gt 0 ]; then
+ echo "Waiting ${SLEEP_SEC}s for edge propagation..."
+ sleep "$SLEEP_SEC"
+fi
+
+echo "POST ${BASE_URL}/api/score (slug=${SLUG})"
+response="$(curl --silent --show-error --fail-with-body \
+ --max-time 30 \
+ "${ACCESS_HEADERS[@]}" \
+ -H "Content-Type: application/json" \
+ -d "{\"input\":\"${SLUG}\",\"turnstile_token\":\"${TURNSTILE_TOKEN}\"}" \
+ "${BASE_URL}/api/score")"
+
+echo "::group::smoke response"
+echo "${response}" | "$JQ_BIN" .
+echo "::endgroup::"
+
+# Contract: scorecard.kind === "registry_hit" plus four-field response triad.
+# Missing any field is a deploy-stop signal.
+if ! echo "${response}" | "$JQ_BIN" --exit-status '
+ .scorecard.kind == "registry_hit"
+ and (.spec_version | type) == "string"
+ and (.site_spec_version | type) == "string"
+ and (.anc_version | type) == "string"
+ and (.checker_url | type) == "string"
+ ' > /dev/null; then
+ echo "FATAL: /api/score response missing required fields for ${SLUG}" >&2
+ exit 1
+fi
+
+echo "[pass] /api/score returned registry_hit with full response triad"
diff --git a/scripts/staging-cache-smoke.sh b/scripts/staging-cache-smoke.sh
new file mode 100755
index 0000000..27ab3be
--- /dev/null
+++ b/scripts/staging-cache-smoke.sh
@@ -0,0 +1,394 @@
+#!/usr/bin/env bash
+# staging-cache-smoke.sh — opt-in live cache smoke test for /api/score on staging.
+#
+# Plan U7 verification. NOT in the default test pipeline (bun test). Run on
+# demand when you need confidence that the live staging cache tier is
+# behaving as designed, or after any change to handler.ts / cache.ts / do.ts
+# that touches the lookupScorecard or post-success cache-write path.
+#
+# Two modes:
+#
+# ./scripts/staging-cache-smoke.sh
+# Warm + edge tests only. No sandbox spawns. Safe to run repeatedly.
+# Asserts validation gates, Turnstile semantics, method gate, curated
+# registry hit unmetered, and cache READS for binaries previously
+# written (cowsay is the canonical fixture, see HOW THE CACHE GETS
+# SEEDED below).
+#
+# ./scripts/staging-cache-smoke.sh --cold
+# Adds three cold sandbox spawns. Runs cold-POST then warm-POST for
+# each of: `pip install black`, `cargo binstall ouch`, and the
+# hint-mapped github-url `https://github.com/Aider-AI/aider`.
+# Asserts cache WRITES (R2 object lands at the canonical key) AND
+# READS (second request hits the cache, sub-2s, same scorecard
+# payload). Each cold spawn burns ~5-20 s of staging container time;
+# use sparingly.
+#
+# HOW THE CACHE GETS SEEDED: U7 writes to SCORE_CACHE on every successful
+# live score, so any prior --cold run (or production-style traffic from
+# the homepage form once U8 ships) seeds the cache. The warm-mode tests
+# assume `cowsay` is already cached — the very first U7 verification on
+# 2026-05-19 wrote it. If it ages out via the 7-day R2 lifecycle, run
+# `./scripts/staging-cache-smoke.sh --cold` to reseed.
+#
+# Turnstile bypass: staging's TURNSTILE_SECRET is bound to the Cloudflare
+# always-passes test secret, so all POSTs in this script pass
+# `turnstile_token: "x"`. See
+# docs/solutions/tooling-decisions/cloudflare-staging-turnstile-test-secret-2026-05-19.md
+# for the full pattern.
+#
+# Cloudflare Access (added 2026-05-19): the staging Worker URL is now
+# gated by a CF Access Self-Hosted Application. CLI clients must send
+# CF-Access-Client-Id + CF-Access-Client-Secret headers from a service
+# token. This script reads them from 1Password by item title:
+# "Cloudflare Access Service Token - agentnative-site-staging"
+# A missing service-token item OR a missing op CLI surfaces as an
+# instant 302 redirect to `*.cloudflareaccess.com` on every request,
+# which the harness reports as a clear FAIL rather than a confusing
+# protocol-level error.
+#
+# Dependencies: curl, jaq (preferred) or jq, wrangler (bun x wrangler), date (GNU or BSD), op (1Password CLI).
+
+set -u
+
+STAGING_URL="${STAGING_URL:-https://agentnative-site-staging.brettdavies.workers.dev}"
+STAGING_BUCKET="${STAGING_BUCKET:-anc-score-cache-staging}"
+COLD=false
+[ "${1:-}" = "--cold" ] && COLD=true
+
+# Currently 0.4.0 — keep in lockstep with src/worker/spec-version.gen.ts.
+SPEC_VERSION="${SPEC_VERSION:-0.4.0}"
+
+# Prefer jaq (faster, drop-in jq replacement). Fall back to jq.
+JQ_BIN="$(command -v jaq || command -v jq || true)"
+if [ -z "$JQ_BIN" ]; then
+ echo "FATAL: neither jaq nor jq is installed. Install one (brew install jaq) and retry." >&2
+ exit 2
+fi
+
+# Fetch CF Access service token credentials from 1Password. The values
+# never enter the script's logged output; they live in shell variables
+# scoped to this process and are passed to curl via -H. The 1Password
+# helper script picks up the operator's default vault.
+OP_ITEM="Cloudflare Access Service Token - agentnative-site-staging"
+OP_READ="${OP_READ:-$HOME/.claude/skills/1password/scripts/read_field.sh}"
+if [ ! -x "$OP_READ" ]; then
+ echo "FATAL: 1Password helper not found at $OP_READ. Export OP_READ to point at it, or install the 1password skill." >&2
+ exit 2
+fi
+CF_ACCESS_CLIENT_ID="$("$OP_READ" "$OP_ITEM" client_id 2>/dev/null || true)"
+CF_ACCESS_CLIENT_SECRET="$("$OP_READ" "$OP_ITEM" client_secret 2>/dev/null || true)"
+if [ -z "$CF_ACCESS_CLIENT_ID" ] || [ -z "$CF_ACCESS_CLIENT_SECRET" ]; then
+ echo "FATAL: could not read CF Access service token from 1Password item '$OP_ITEM'." >&2
+ echo " Verify the item exists in 1Password with fields 'client_id' and 'client_secret'." >&2
+ echo " Then re-run. Without these credentials every staging request returns 302 to *.cloudflareaccess.com." >&2
+ exit 2
+fi
+
+# Curl helper that always carries the CF Access service-token headers.
+# All HTTP calls below go through these so the Access boundary is
+# transparent to the test logic.
+ACCESS_HEADERS=(
+ -H "CF-Access-Client-Id: $CF_ACCESS_CLIENT_ID"
+ -H "CF-Access-Client-Secret: $CF_ACCESS_CLIENT_SECRET"
+)
+
+PASS=0
+FAIL=0
+FAIL_LABELS=()
+
+ok() {
+ printf ' [pass] %s\n' "$1"
+ PASS=$((PASS + 1))
+}
+
+ko() {
+ printf ' [FAIL] %s — %s\n' "$1" "$2"
+ FAIL=$((FAIL + 1))
+ FAIL_LABELS+=("$1")
+}
+
+# Millisecond clock (Linux + macOS).
+now_ms() {
+ if date +%s%N >/dev/null 2>&1 && [ "$(date +%N)" != "N" ]; then
+ echo $(($(date +%s%N) / 1000000))
+ else
+ # macOS without coreutils — fall back to perl.
+ perl -MTime::HiRes=time -E 'say int(time() * 1000)'
+ fi
+}
+
+# expect_status_post LABEL BODY EXPECTED_STATUS [QUERY_STRING]
+expect_status_post() {
+ local label=$1 body=$2 expected=$3 query=${4:-}
+ local tmp
+ tmp=$(mktemp)
+ local code
+ code=$(curl -s -o "$tmp" -w '%{http_code}' "${ACCESS_HEADERS[@]}" \
+ -X POST -H 'content-type: application/json' \
+ "$STAGING_URL/api/score$query" \
+ --data "$body")
+ if [ "$code" = "$expected" ]; then
+ ok "$label (status=$code)"
+ else
+ ko "$label" "expected $expected, got $code: $(head -c 200 "$tmp")"
+ fi
+ rm -f "$tmp"
+}
+
+# expect_error_code LABEL BODY EXPECTED_HTTP_STATUS EXPECTED_ERROR_CODE [QUERY]
+expect_error_code() {
+ local label=$1 body=$2 expected_status=$3 expected_code=$4 query=${5:-}
+ local tmp
+ tmp=$(mktemp)
+ local code
+ code=$(curl -s -o "$tmp" -w '%{http_code}' "${ACCESS_HEADERS[@]}" \
+ -X POST -H 'content-type: application/json' \
+ "$STAGING_URL/api/score$query" \
+ --data "$body")
+ local body_code
+ body_code=$("$JQ_BIN" -r '.error.code // ""' <"$tmp" 2>/dev/null || echo "")
+ if [ "$code" = "$expected_status" ] && [ "$body_code" = "$expected_code" ]; then
+ ok "$label (status=$code, error.code=$body_code)"
+ else
+ ko "$label" "expected ${expected_status}/${expected_code}, got ${code}/${body_code}"
+ fi
+ rm -f "$tmp"
+}
+
+# expect_status_method LABEL METHOD EXPECTED_STATUS
+expect_status_method() {
+ local label=$1 method=$2 expected=$3
+ local code
+ code=$(curl -s -o /dev/null -w '%{http_code}' "${ACCESS_HEADERS[@]}" -X "$method" "$STAGING_URL/api/score")
+ if [ "$code" = "$expected" ]; then
+ ok "$label (method=$method, status=$code)"
+ else
+ ko "$label" "expected $expected, got $code"
+ fi
+}
+
+# expect_warm_hit LABEL BODY MAX_MS — POST and assert sub-MAX_MS round-trip
+# AND scorecard.kind != 'registry_hit' (live or cache-hit, not curated).
+expect_warm_hit() {
+ local label=$1 body=$2 max_ms=$3
+ local tmp
+ tmp=$(mktemp)
+ local start_ms end_ms duration code
+ start_ms=$(now_ms)
+ code=$(curl -s -o "$tmp" -w '%{http_code}' "${ACCESS_HEADERS[@]}" \
+ -X POST -H 'content-type: application/json' \
+ "$STAGING_URL/api/score" --data "$body")
+ end_ms=$(now_ms)
+ duration=$((end_ms - start_ms))
+ if [ "$code" != "200" ]; then
+ ko "$label" "expected 200, got $code: $(head -c 200 "$tmp")"
+ rm -f "$tmp"
+ return
+ fi
+ if [ "$duration" -gt "$max_ms" ]; then
+ ko "$label" "expected <${max_ms} ms (cache hit), got ${duration} ms — cache may be cold"
+ rm -f "$tmp"
+ return
+ fi
+ ok "$label (status=200, duration=${duration} ms < ${max_ms} ms — cache hit)"
+ rm -f "$tmp"
+}
+
+# expect_cold_then_warm LABEL_PREFIX BODY EXPECTED_BINARY
+expect_cold_then_warm() {
+ local label_prefix=$1 body=$2 binary=$3
+ local tmp_cold tmp_warm
+ tmp_cold=$(mktemp)
+ tmp_warm=$(mktemp)
+
+ # COLD
+ local start_ms end_ms duration code
+ start_ms=$(now_ms)
+ code=$(curl -s -o "$tmp_cold" -w '%{http_code}' --max-time 90 "${ACCESS_HEADERS[@]}" \
+ -X POST -H 'content-type: application/json' \
+ "$STAGING_URL/api/score" --data "$body")
+ end_ms=$(now_ms)
+ duration=$((end_ms - start_ms))
+ if [ "$code" != "200" ]; then
+ ko "$label_prefix cold" "expected 200, got $code: $(head -c 200 "$tmp_cold")"
+ rm -f "$tmp_cold" "$tmp_warm"
+ return
+ fi
+ ok "$label_prefix cold (status=200, duration=${duration} ms — sandbox spawn)"
+
+ # Verify R2 object lands at the canonical key.
+ local key="scores/${binary}/${SPEC_VERSION}.json"
+ if bun x wrangler r2 object get "${STAGING_BUCKET}/${key}" --file /tmp/r2-probe.json --remote >/dev/null 2>&1; then
+ local payload_keys
+ payload_keys=$("$JQ_BIN" -r 'keys | join(",")' /dev/null || echo "")
+ if echo "$payload_keys" | grep -q "spec_version" && echo "$payload_keys" | grep -q "anc_version" && echo "$payload_keys" | grep -q "tool_version"; then
+ ok "$label_prefix R2 wrote $key with full payload shape"
+ else
+ ko "$label_prefix R2 write" "payload shape missing required fields (got: $payload_keys)"
+ fi
+ else
+ ko "$label_prefix R2 write" "object not found at $key after cold run"
+ fi
+
+ # WARM
+ start_ms=$(now_ms)
+ code=$(curl -s -o "$tmp_warm" -w '%{http_code}' "${ACCESS_HEADERS[@]}" \
+ -X POST -H 'content-type: application/json' \
+ "$STAGING_URL/api/score" --data "$body")
+ end_ms=$(now_ms)
+ duration=$((end_ms - start_ms))
+ if [ "$code" != "200" ]; then
+ ko "$label_prefix warm" "expected 200, got $code"
+ rm -f "$tmp_cold" "$tmp_warm"
+ return
+ fi
+ if [ "$duration" -gt 2000 ]; then
+ ko "$label_prefix warm" "expected <2000 ms (cache hit), got ${duration} ms"
+ rm -f "$tmp_cold" "$tmp_warm"
+ return
+ fi
+ ok "$label_prefix warm (status=200, duration=${duration} ms — cache hit)"
+
+ # Cold and warm scorecards must be byte-identical (cache returns what we wrote).
+ if diff <("$JQ_BIN" -S '.scorecard' <"$tmp_cold") <("$JQ_BIN" -S '.scorecard' <"$tmp_warm") >/dev/null 2>&1; then
+ ok "$label_prefix scorecard equality (cold == warm)"
+ else
+ ko "$label_prefix scorecard equality" "cold and warm scorecards differ"
+ fi
+ rm -f "$tmp_cold" "$tmp_warm"
+}
+
+printf '\n=== staging-cache-smoke @ %s ===\n' "$STAGING_URL"
+printf ' SPEC_VERSION=%s COLD=%s\n\n' "$SPEC_VERSION" "$COLD"
+
+# -----------------------------------------------------------------------------
+# Group Z — CF Access boundary (must run FIRST so a lifted Access app
+# surfaces here rather than silently letting the rest of the suite
+# "pass" via the service-token bypass)
+# -----------------------------------------------------------------------------
+#
+# Without the ACCESS_HEADERS, an unauth request to the staging Worker
+# must be intercepted by Cloudflare Access and redirected to the
+# account's *.cloudflareaccess.com login flow. If we instead see a 200
+# or a 4xx from the Worker, the Access app has been disabled or its
+# policies wiped, AND the rest of the suite would falsely "pass"
+# (because every other request carries the service-token headers).
+# This probe catches the boundary getting silently lifted.
+printf '[Z] CF Access boundary\n'
+ZUNAUTH_STATUS=$(curl -s -o /dev/null -w '%{http_code}' \
+ "$STAGING_URL/api/score?input=ripgrep")
+ZUNAUTH_LOC=$(curl -s -o /dev/null -w '%{redirect_url}' \
+ "$STAGING_URL/api/score?input=ripgrep")
+if [ "$ZUNAUTH_STATUS" = "302" ] && echo "$ZUNAUTH_LOC" | grep -q 'cloudflareaccess.com'; then
+ ok "Z01 unauth request → 302 to *.cloudflareaccess.com (boundary enforced)"
+else
+ ko "Z01 unauth boundary" "expected 302 to *.cloudflareaccess.com; got status=$ZUNAUTH_STATUS location=${ZUNAUTH_LOC:-}"
+fi
+
+# -----------------------------------------------------------------------------
+# Group A — input validation (warm; no sandbox)
+# -----------------------------------------------------------------------------
+printf '\n[A] input validation\n'
+expect_error_code "A01 empty input" '{"input":"","turnstile_token":"x"}' 400 unrecognized_input
+expect_status_post "A02 malformed JSON body" 'not json' 400
+expect_error_code "A03 non-https URL" '{"input":"http://github.com/foo/bar","turnstile_token":"x"}' 400 non_https_url
+expect_error_code "A04 non-github host" '{"input":"https://example.com/foo/bar","turnstile_token":"x"}' 400 non_github_host
+expect_error_code "A05 branch path URL" '{"input":"https://github.com/foo/bar/tree/main","turnstile_token":"x"}' 400 invalid_url_path
+
+# -----------------------------------------------------------------------------
+# Group B — method gate (warm; no sandbox)
+# -----------------------------------------------------------------------------
+printf '\n[B] method gate\n'
+expect_status_method "B01 DELETE → 405" DELETE 405
+expect_status_method "B02 PUT → 405" PUT 405
+
+# -----------------------------------------------------------------------------
+# Group C — Turnstile semantics (warm; no sandbox)
+# -----------------------------------------------------------------------------
+# Empty/missing tokens are rejected by the Worker BEFORE siteverify is called
+# (the "missing_token" check fires first). The CF test secret only matters
+# AFTER a non-empty token reaches siteverify.
+printf '\n[C] Turnstile semantics\n'
+expect_error_code "C01 empty turnstile_token" '{"input":"https://github.com/foo/bar","turnstile_token":""}' 400 turnstile_failed
+expect_error_code "C02 missing turnstile_token" '{"input":"https://github.com/foo/bar"}' 400 turnstile_failed
+
+# Curated registry hit (slug=ripgrep) is unmetered — bypasses Turnstile entirely.
+# Should return 200 with ANY token, including empty or missing.
+expect_status_post "C03 curated slug with token=x" '{"input":"ripgrep","turnstile_token":"x"}' 200
+expect_status_post "C04 curated slug with empty token (unmetered bypass)" '{"input":"ripgrep","turnstile_token":""}' 200
+expect_status_post "C05 curated slug without token field" '{"input":"ripgrep"}' 200
+
+# -----------------------------------------------------------------------------
+# Group D — registry/cache read tier (warm; no sandbox)
+# -----------------------------------------------------------------------------
+printf '\n[D] read tiers\n'
+expect_warm_hit "D01 POST cowsay (cached from prior run)" '{"input":"npm install -g cowsay","turnstile_token":"x"}' 2000
+
+# GET path: cache tier also honored on GET per U7 (read-only contract extended).
+GET_LATENCY=$({
+ start_ms=$(now_ms)
+ curl -s -o /dev/null "${ACCESS_HEADERS[@]}" "$STAGING_URL/api/score?input=npm%20install%20-g%20cowsay"
+ end_ms=$(now_ms)
+ echo $((end_ms - start_ms))
+})
+GET_STATUS=$(curl -s -o /dev/null -w '%{http_code}' "${ACCESS_HEADERS[@]}" "$STAGING_URL/api/score?input=npm%20install%20-g%20cowsay")
+if [ "$GET_STATUS" = "200" ] && [ "$GET_LATENCY" -lt 2000 ]; then
+ ok "D02 GET cowsay → 200 cache-hit ($GET_LATENCY ms)"
+else
+ ko "D02 GET cowsay" "status=$GET_STATUS, latency=$GET_LATENCY ms"
+fi
+
+# GET on an uncached non-registry github-url → 404 chain_no_resolve.
+# GET is registry + cache tier only (read-only contract). The cache tier
+# can't help here because there's no derivable binary upfront.
+GET_404_STATUS=$(curl -s -o /tmp/d03 -w '%{http_code}' "${ACCESS_HEADERS[@]}" "$STAGING_URL/api/score?input=https%3A%2F%2Fgithub.com%2Ftotally%2Funknown-tool-12345")
+GET_404_CODE=$("$JQ_BIN" -r '.error.code // ""' /dev/null)
+if [ "$GET_404_STATUS" = "404" ] && [ "$GET_404_CODE" = "chain_no_resolve" ]; then
+ ok "D03 GET unknown github → 404 chain_no_resolve"
+else
+ ko "D03 GET unknown github" "status=$GET_404_STATUS, error.code=$GET_404_CODE"
+fi
+rm -f /tmp/d03
+
+# -----------------------------------------------------------------------------
+# Group E — cold sandbox spawns (only with --cold; 3 sandbox runs)
+# -----------------------------------------------------------------------------
+if [ "$COLD" = true ]; then
+ printf '\n[E] cold sandbox spawns (3 cold + 3 warm)\n'
+
+ expect_cold_then_warm "E01 pip install black" '{"input":"pip install black","turnstile_token":"x"}' black
+ expect_cold_then_warm "E02 cargo binstall ouch" '{"input":"cargo binstall ouch","turnstile_token":"x"}' ouch
+ expect_cold_then_warm "E03 github.com/Aider-AI/aider (hint→pip aider-chat)" '{"input":"https://github.com/Aider-AI/aider","turnstile_token":"x"}' aider
+
+ # E04 — ?fromCache=false bypass on a cached entry. Live re-spawn forced
+ # even though cowsay is cached. The cache write still fires (overwriting
+ # the existing entry with a freshly-scored copy).
+ printf ' exercising ?fromCache=false bypass on cowsay (1 sandbox spawn)\n'
+ start_ms=$(now_ms)
+ code=$(curl -s -o /tmp/e04 -w '%{http_code}' --max-time 90 "${ACCESS_HEADERS[@]}" \
+ -X POST -H 'content-type: application/json' \
+ "$STAGING_URL/api/score?fromCache=false" \
+ --data '{"input":"npm install -g cowsay","turnstile_token":"x"}')
+ end_ms=$(now_ms)
+ duration=$((end_ms - start_ms))
+ if [ "$code" = "200" ] && [ "$duration" -gt 1500 ]; then
+ ok "E04 ?fromCache=false on cowsay (status=200, duration=${duration} ms — live re-spawn)"
+ else
+ ko "E04 ?fromCache=false" "status=$code, duration=${duration} ms (expected 200 + >1500 ms)"
+ fi
+ rm -f /tmp/e04
+else
+ printf '\n[E] cold sandbox spawns: SKIPPED (pass --cold to enable)\n'
+fi
+
+# -----------------------------------------------------------------------------
+# Summary
+# -----------------------------------------------------------------------------
+printf '\n=== summary: %d passed, %d failed ===\n' "$PASS" "$FAIL"
+if [ "$FAIL" -gt 0 ]; then
+ printf 'failed tests:\n'
+ for label in "${FAIL_LABELS[@]}"; do printf ' - %s\n' "$label"; done
+ exit 1
+fi
+exit 0
diff --git a/src/build/00-spec-version-gen.mjs b/src/build/00-spec-version-gen.mjs
new file mode 100644
index 0000000..3ead95f
--- /dev/null
+++ b/src/build/00-spec-version-gen.mjs
@@ -0,0 +1,103 @@
+// Build-time emitter for `src/worker/spec-version.gen.ts`.
+//
+// Reads `src/data/spec/VERSION` (the vendored spec channel — the standard
+// the Worker scores against) and `content/principles/VERSION` (this site's
+// principle copy — may lag the spec briefly during a release cycle), and
+// writes a TS module the Worker imports at build time. The emitted file is
+// the single source of truth for `SPEC_VERSION`, `SITE_SPEC_VERSION`, and
+// `CHECKER_URL`; `response-shape.ts` consumes it for every response triad.
+//
+// Two-file split is load-bearing: spec VERSION and site-principles VERSION
+// are released independently. A spec bump can ship before the site copy
+// updates; rendering both makes the lag observable on every response.
+//
+// Run via `build()` in build.mjs before any worker-bundle step. The drift
+// test in tests/spec-version-gen.test.ts re-runs this emitter in memory
+// and asserts the committed `.gen.ts` matches — so an out-of-date generated
+// file fails CI instead of silently shipping a stale triad.
+
+import { readFile, writeFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { fileURLToPath } from 'node:url';
+
+const REPO_ROOT = join(fileURLToPath(import.meta.url), '..', '..', '..');
+const SPEC_VERSION_PATH = join(REPO_ROOT, 'src', 'data', 'spec', 'VERSION');
+const SITE_VERSION_PATH = join(REPO_ROOT, 'content', 'principles', 'VERSION');
+const GEN_PATH = join(REPO_ROOT, 'src', 'worker', 'spec-version.gen.ts');
+
+// CHECKER_URL is intentionally not a file: anc.dev is the only live-scoring
+// surface. If a future fork wants to point at a different host, override
+// here. Not a token-fetched value because we want the literal embedded in
+// the bundle, not a runtime lookup.
+const CHECKER_URL = 'https://anc.dev/score';
+
+/**
+ * Read a VERSION file and strip trailing newlines. Throws if the file
+ * is missing or empty — these constants must NEVER ship as empty strings,
+ * since `response-shape.ts` writes them into every `/api/score` response.
+ */
+async function readVersion(path) {
+ const raw = await readFile(path, 'utf8');
+ const trimmed = raw.trim();
+ if (!trimmed) throw new Error(`spec-version-gen: ${path} is empty`);
+ if (!/^\d+\.\d+\.\d+/.test(trimmed)) {
+ throw new Error(`spec-version-gen: ${path} does not look like semver (got "${trimmed}")`);
+ }
+ return trimmed;
+}
+
+/**
+ * Build the file content. Pure — takes the resolved versions and returns
+ * the bytes that should land at `src/worker/spec-version.gen.ts`. Exposed
+ * so the drift test can compare the on-disk file against a fresh
+ * re-computation without writing anything.
+ */
+export function renderSpecVersionModule({ specVersion, siteSpecVersion, checkerUrl }) {
+ return `// GENERATED by src/build/00-spec-version-gen.mjs — do NOT edit.
+// Re-run \`bun run build\` to regenerate. The drift check in
+// tests/spec-version-gen.test.ts fails CI if this file is out of date.
+//
+// SPEC_VERSION — from src/data/spec/VERSION (the standard the
+// Worker scores against).
+// SITE_SPEC_VERSION — from content/principles/VERSION (the principle
+// copy this site renders).
+// CHECKER_URL — production live-scoring surface; moves with anc.dev.
+
+export const SPEC_VERSION = '${specVersion}';
+export const SITE_SPEC_VERSION = '${siteSpecVersion}';
+export const CHECKER_URL = '${checkerUrl}';
+`;
+}
+
+/**
+ * Run the emitter. Returns the generated content + the resolved versions
+ * so callers (build.mjs, the drift test) can assert on either.
+ */
+export async function generateSpecVersionModule() {
+ const specVersion = await readVersion(SPEC_VERSION_PATH);
+ const siteSpecVersion = await readVersion(SITE_VERSION_PATH);
+ const content = renderSpecVersionModule({ specVersion, siteSpecVersion, checkerUrl: CHECKER_URL });
+ await writeFile(GEN_PATH, content);
+ return { specVersion, siteSpecVersion, checkerUrl: CHECKER_URL, content, path: GEN_PATH };
+}
+
+/**
+ * Pure variant for the drift test — computes what the file SHOULD say
+ * without writing it. The test reads the on-disk file and compares.
+ */
+export async function computeExpectedSpecVersionModule() {
+ const specVersion = await readVersion(SPEC_VERSION_PATH);
+ const siteSpecVersion = await readVersion(SITE_VERSION_PATH);
+ return {
+ specVersion,
+ siteSpecVersion,
+ checkerUrl: CHECKER_URL,
+ content: renderSpecVersionModule({ specVersion, siteSpecVersion, checkerUrl: CHECKER_URL }),
+ path: GEN_PATH,
+ };
+}
+
+if (import.meta.main) {
+ const { specVersion, siteSpecVersion, path } = await generateSpecVersionModule();
+ console.log(`spec-version-gen: wrote ${path} (spec=${specVersion}, site=${siteSpecVersion})`);
+}
diff --git a/src/build/assets.mjs b/src/build/01-assets.mjs
similarity index 91%
rename from src/build/assets.mjs
rename to src/build/01-assets.mjs
index 56b8d0f..e6ba625 100644
--- a/src/build/assets.mjs
+++ b/src/build/01-assets.mjs
@@ -84,8 +84,12 @@ export async function copyAssets({ repoRoot, distDir }) {
join(repoRoot, 'src/client/leaderboard.ts'),
join(distDir, 'js/leaderboard.js'),
);
+ // Homepage live-scoring form (Turnstile lazy-load + 2 s theater +
+ // redirect to /live-score/). Loaded with defer from the
+ // homepage shell only.
+ const liveScoreJs = await bundleClient(join(repoRoot, 'src/client/live-score.ts'), join(distDir, 'js/live-score.js'));
// theme-init is inlined into every HTML head — no file emitted.
const themeInit = await bundleClient(join(repoRoot, 'src/client/theme-init.ts'));
- return { themeInit, themeJs, clipboardJs, leaderboardJs };
+ return { themeInit, themeJs, clipboardJs, leaderboardJs, liveScoreJs };
}
diff --git a/src/build/06-homepage.mjs b/src/build/06-homepage.mjs
new file mode 100644
index 0000000..880a4e6
--- /dev/null
+++ b/src/build/06-homepage.mjs
@@ -0,0 +1,160 @@
+// Homepage emit. Section 6 of the build pipeline.
+//
+// Produces dist/index.html (hero + live-score form + principle listing) and
+// the trimmed-to-match dist/index.md twin. The live-scoring form is
+// server-rendered as an inert shell; /js/live-score.js wires submit +
+// Turnstile + redirect on the client side. The Turnstile sitekey is
+// injected by the Worker via meta[name=turnstile-sitekey] — only set on
+// staging until full promotion (DESIGN.md §3.4).
+
+import { readFile, writeFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { extractDescription, extractFirstParagraph, extractIntroSummary, extractTitle } from './content.mjs';
+import { emitShell } from './shell.mjs';
+import { absolutifyMarkdownLinks, escHtml } from './util.mjs';
+
+/**
+ * Build the homepage body HTML — hero, live-scoring form section,
+ * principle listing, install-anc CTA. The live-score section sits between
+ * hero and principles per the wireframe-first placement; layout polish is
+ * deferred to /design-review after the basic surface renders.
+ *
+ * @param {string} introTitle
+ * @param {string} introLede
+ * @param {Array<{n: number, title: string, shortDesc: string}>} principles
+ * @returns {string}
+ */
+function buildHomepageBody(introTitle, introLede, principles) {
+ const entries = principles
+ .map((p) => {
+ const num = String(p.n).padStart(2, '0');
+ const title = escHtml(p.title.replace(/^P\d+:\s*/, ''));
+ const desc = escHtml(p.shortDesc);
+ return `
+
+${buildLiveScoreSection()}
+
+
+${entries}
+
+`;
+}
+
+/**
+ * Live-scoring paste-input form section. Server-rendered shell: the JS at
+ * /js/live-score.js (lazy-loaded with the rest of the deferred client
+ * bundle) wires submit + Turnstile + theater. The Turnstile sitekey is
+ * injected by the Worker at request time via meta[name=turnstile-sitekey]
+ * — only set on staging until full promotion, so production HTML carries
+ * an empty value and the JS disables the form with a "not yet live"
+ * message.
+ *
+ * R9 CTA framing: install-anc is the PRIMARY surface, not buried. Visible
+ * above the form input so a visitor who never engages the form still sees
+ * the local-install option first.
+ *
+ * @returns {string}
+ */
+function buildLiveScoreSection() {
+ return `
+
+ Try
+
+
Score a binary, live.
+
+ Install anc locally for source + project depth. The demo here is binary and behavioral checks only.
+
+
+
+
+`;
+}
+
+/**
+ * Emit dist/index.html and dist/index.md. The introSource is returned so
+ * downstream (llms-full.txt) can embed the homepage markdown verbatim
+ * without re-reading the file.
+ *
+ * @param {object} args
+ * @param {string} args.distDir
+ * @param {string} args.contentDir
+ * @param {string} args.themeInit
+ * @param {Array<{n: number, title: string, shortDesc: string}>} args.principles
+ * @returns {Promise<{introTitle: string, introSummary: string, introSource: string, introLede: string}>}
+ */
+export async function emitHomepage({ distDir, contentDir, themeInit, principles }) {
+ const introPath = join(contentDir, '_intro.md');
+ const introSource = await readFile(introPath, 'utf8');
+ const introTitle = extractTitle(introSource);
+ const introSummary = extractIntroSummary(introSource);
+ const introDescription = extractDescription(introSource);
+ const introLede = extractFirstParagraph(introSource);
+
+ const indexBody = buildHomepageBody(introTitle, introLede, principles);
+ await writeFile(
+ join(distDir, 'index.html'),
+ emitShell({
+ title: introTitle,
+ description: introDescription,
+ canonicalPath: '/',
+ bodyHtml: indexBody,
+ themeInitJs: themeInit,
+ isIndex: true,
+ // Homepage carries the live-scoring form. /js/live-score.js is
+ // bundled in assets.mjs alongside theme/clipboard/leaderboard and
+ // loads with `defer`. Lazy-loads Turnstile + handles submit/redirect.
+ extraScripts: ['/js/live-score.js'],
+ }),
+ );
+
+ // index.md — trimmed to match the HTML homepage.
+ const indexMdLines = [
+ `# ${introTitle}`,
+ '',
+ introLede,
+ '',
+ '## Principles',
+ '',
+ ...principles.map((p) => `- [${p.title}](/p${p.n}) — ${p.shortDesc}`),
+ '',
+ ];
+ await writeFile(join(distDir, 'index.md'), absolutifyMarkdownLinks(indexMdLines.join('\n')));
+
+ return { introTitle, introSummary, introSource, introLede };
+}
diff --git a/src/build/07-subpages.mjs b/src/build/07-subpages.mjs
new file mode 100644
index 0000000..b05b10e
--- /dev/null
+++ b/src/build/07-subpages.mjs
@@ -0,0 +1,60 @@
+// Content-driven sub-pages emit. Section 7 of the build pipeline.
+//
+// For each entry in `subPages`, reads content/.md, renders the HTML
+// via the shared markdown pipeline, wraps in emitShell, and emits both the
+// HTML and markdown twin. The twin is the authored source with site-
+// relative links absolutified.
+//
+// Adding a new content/*.md page requires three coordinated registrations:
+// this list, src/build/10-sitemap.mjs's hardcoded paths, and src/build/shell.mjs's
+// nav. See docs/solutions/conventions/new-content-page-requires-three-registrations-2026-05-21.md.
+
+import { readFile, writeFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { extractDescription, extractTitle } from './content.mjs';
+import { renderMarkdown } from './render.mjs';
+import { emitShell } from './shell.mjs';
+import { absolutifyMarkdownLinks } from './util.mjs';
+
+/**
+ * Emit content-driven sub-pages (HTML + MD twin via shared pipeline).
+ *
+ * @param {object} args
+ * @param {string} args.distDir
+ * @param {string} args.contentDir
+ * @param {string} args.themeInit
+ * @returns {Promise>}
+ * Per-page metadata consumed by llms-full.txt assembly.
+ */
+export async function emitSubPages({ distDir, contentDir, themeInit }) {
+ const subPages = [
+ { name: 'check', path: join(contentDir, 'check.md') },
+ { name: 'install', path: join(contentDir, 'install.md') },
+ { name: 'about', path: join(contentDir, 'about.md') },
+ { name: 'badge', path: join(contentDir, 'badge.md') },
+ { name: 'changelog', path: join(contentDir, 'changelog.md') },
+ { name: 'contribute', path: join(contentDir, 'contribute.md') },
+ { name: 'methodology', path: join(contentDir, 'methodology.md') },
+ { name: 'scorecard-schema', path: join(contentDir, 'scorecard-schema.md') },
+ ];
+ const subPageData = [];
+ for (const { name, path } of subPages) {
+ const source = await readFile(path, 'utf8');
+ const title = extractTitle(source);
+ const description = extractDescription(source);
+ const html = await renderMarkdown(source);
+ await writeFile(
+ join(distDir, `${name}.html`),
+ emitShell({
+ title,
+ description,
+ canonicalPath: `/${name}`,
+ bodyHtml: html,
+ themeInitJs: themeInit,
+ }),
+ );
+ await writeFile(join(distDir, `${name}.md`), absolutifyMarkdownLinks(source));
+ subPageData.push({ name, source, title });
+ }
+ return subPageData;
+}
diff --git a/src/build/08-scorecards-emit.mjs b/src/build/08-scorecards-emit.mjs
new file mode 100644
index 0000000..02653fd
--- /dev/null
+++ b/src/build/08-scorecards-emit.mjs
@@ -0,0 +1,292 @@
+// Scorecard-surface emit. Section 8 of the build pipeline.
+//
+// Owns the entire scorecard + coverage + skill emit pipeline:
+// - Registry loading + corpus invariants
+// - Build-time indexes for the live-scoring path (registry-index.json,
+// discovery-hints-index.json)
+// - Leaderboard page (dist/scorecards.html + .md)
+// - Per-tool scorecard pages (dist/score/.{html,md})
+// - Badge SVGs (dist/badge/.svg)
+// - Binary-name redirect pages for tools where binary !== name
+// - Stale-file reaping for removed registry entries
+// - Coverage matrix page (dist/coverage.{html,md})
+// - Skill manifest surfaces (dist/skill.json + dist/skill.{html,md})
+//
+// Returns the data downstream needs: leaderboard (for llms-full + sitemap
+// extra paths), scorecardPaths (for sitemap), coverageMarkdown and skill
+// artifacts (for llms-full).
+
+import { mkdir, readdir, unlink, writeFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { renderBadgeSvg } from './badge.mjs';
+import { buildCoverageBody, buildCoverageMarkdown, loadCoverageMatrix } from './coverage.mjs';
+import { emitBuildIndexes } from './registry-index.mjs';
+import {
+ computeLeaderboard,
+ extractTopIssues,
+ loadRegistry,
+ loadScoredTools,
+ runScorecardInvariants,
+} from './scorecards.mjs';
+import {
+ buildLeaderboardBody,
+ buildLeaderboardMarkdown,
+ buildScorecardBody,
+ buildScorecardMarkdown,
+} from './scorecards-render.mjs';
+import { emitShell } from './shell.mjs';
+import { emitSkillJson, emitSkillMarkdown, loadSkillData, renderSkillPage } from './skill.mjs';
+import { absolutifyMarkdownLinks, escHtml } from './util.mjs';
+
+/**
+ * Emit the leaderboard, per-tool scorecards + badges, coverage page, and
+ * skill manifest surfaces. Returns the data downstream (sitemap, llms)
+ * needs.
+ *
+ * @param {object} args
+ * @param {string} args.distDir
+ * @param {string} args.registryPath
+ * @param {string} args.hintsPath
+ * @param {string} args.coverageMatrixPath
+ * @param {string} args.skillDataPath
+ * @param {string} args.scorecardsDir
+ * @param {string} args.themeInit
+ * @returns {Promise<{
+ * leaderboard: Array