From 47cde2ca31571355a8be5bdd4c7b2fd1b6a1df81 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 3 Jul 2026 02:21:35 +0000 Subject: [PATCH 1/2] feat(wave5): per-language testing standard v2.0.0 + guide template + AffineScript MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The estate's only per-language testing depth was a single Julia guide from 2024 (no MUST/SHOULD, Rust+Julia only) plus a byte-identical duplicate snapshot. The user flagged this: replicate the pattern properly instead of one stale generic. - standards/language-testing-standards.md -> v2.0.0: RFC-2119 conformance requirements R1–R9 (unit/format/lint/coverage/property/bench/audit/contract/ proof) mapped to the CRG test taxonomy, an anti-theatre rule (no continue-on-error on a MUST check; coverage reported-not-asserted), and the required per-language guide set. Rust/SPARK + Julia mapped to R1–R9. - standards/templates/language-testing-guide-TEMPLATE.md: the skeleton every per-language guide follows — a requirement-mapping table (tool or visible `none`), tools, CI pipeline, best practices, and a mandatory honest "Known gaps" section. - standards/affinescript-testing-guide.md: the estate's primary language, which had ZERO testing standard. Authored honestly — most SHOULD rows are tracked gaps (no formatter/coverage/fuzz/bench yet) and R3 notes that affinescript-verify.yml is currently advisory. Canonical SSOT migrates to hyperpolymath/affinescript prospectively (Wave-6 charter). - scripts/check-language-guide.sh + Justfile language-guides-check, wired into `just validate` as a hard gate: a guide missing a required section (e.g. "Known gaps"), the SPDX header, or the R1..R9 mapping fails loudly. - Deleted the byte-identical language-testing-standards-v1.0.0-2024-04-14.md. - scripts/tests/wave5-language-guides-test.sh (7/7): lint accepts a conformant guide and rejects incomplete ones; the standard is v2.0.0/RFC-2119. Zig / Elixir+Gleam / Idris2+Agda guides are Wave-6 charters. Licence rows manual-only (flag-only). Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_0114ps6mY5jAH4Sz --- Justfile | 6 + scripts/check-language-guide.sh | 55 ++++ scripts/tests/wave5-language-guides-test.sh | 41 +++ standards/affinescript-testing-guide.md | 109 ++++++++ ...age-testing-standards-v1.0.0-2024-04-14.md | 165 ----------- standards/language-testing-standards.md | 257 ++++++++---------- .../language-testing-guide-TEMPLATE.md | 66 +++++ 7 files changed, 384 insertions(+), 315 deletions(-) create mode 100755 scripts/check-language-guide.sh create mode 100755 scripts/tests/wave5-language-guides-test.sh create mode 100644 standards/affinescript-testing-guide.md delete mode 100644 standards/language-testing-standards-v1.0.0-2024-04-14.md create mode 100644 standards/templates/language-testing-guide-TEMPLATE.md diff --git a/Justfile b/Justfile index 8da52930..e3a8da9d 100644 --- a/Justfile +++ b/Justfile @@ -85,12 +85,18 @@ dyadt-conformance: dyadt-test: @bash scripts/tests/wave4-dyadt-test.sh +# Structural lint for per-language testing guides (required sections + R1..R9) +language-guides-check: + @bash scripts/check-language-guide.sh + # Aggregate compliance gate: registry drift is the HARD gate (registry-check, # a hard dep). The RSR self-audit is INFORMATIONAL — a monorepo is not expected # to score Gold — but a *broken* audit (exit 4 / unexpected) must fail loudly # rather than pass silently under a blanket `|| true` (Wave-0 false-green fix). validate: registry-check @echo "=== validate: registry drift (HARD GATE) — passed as a dependency above ===" + @echo "=== validate: per-language testing guides (structural, HARD GATE) ===" + @bash scripts/check-language-guide.sh @echo "=== validate: RSR self-audit (INFORMATIONAL grade; errors fail loudly) ===" @bash scripts/rsr-selfaudit.sh . @echo "=== validate: done ===" diff --git a/scripts/check-language-guide.sh b/scripts/check-language-guide.sh new file mode 100755 index 00000000..b16de3cb --- /dev/null +++ b/scripts/check-language-guide.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: MPL-2.0 +# +# check-language-guide.sh — structural lint for per-language testing guides. +# +# Every guide built from templates/language-testing-guide-TEMPLATE.md MUST carry +# the required sections and the R1–R9 requirement-mapping table. A guide that +# silently omits a section (e.g. "Known gaps") is a false-completeness hole — +# this fails loudly instead. +# +# Usage: check-language-guide.sh [guide.md ...] +# With no args, checks every standards/*-testing-guide.md. +# Exit: 0 all valid · 1 a guide is missing a required section + +set -uo pipefail + +ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" + +REQUIRED_SECTIONS=( + "## Requirement mapping" + "## Tools" + "## Recommended CI pipeline" + "## Best practices" + "## Known gaps" + "## Resources" +) + +check_one() { # file + local f="$1" rc=0 sec + if [ ! -f "$f" ]; then echo " ❌ $f: not found"; return 1; fi + for sec in "${REQUIRED_SECTIONS[@]}"; do + grep -Fqx "$sec" "$f" || { echo " ❌ $(basename "$f"): missing section '$sec'"; rc=1; } + done + # The requirement mapping MUST reference the R1..R9 rows (at least R1 and R9). + grep -Eq '\bR1\b' "$f" && grep -Eq '\bR9\b' "$f" || { echo " ❌ $(basename "$f"): requirement mapping does not reference R1..R9"; rc=1; } + # A SPDX header is required. + head -3 "$f" | grep -q 'SPDX-License-Identifier' || { echo " ❌ $(basename "$f"): missing SPDX header"; rc=1; } + [ "$rc" -eq 0 ] && echo " ✅ $(basename "$f")" + return $rc +} + +if [ "$#" -gt 0 ]; then + files=("$@") +else + mapfile -t files < <(ls "$ROOT"/standards/*-testing-guide.md 2>/dev/null) +fi + +if [ "${#files[@]}" -eq 0 ]; then + echo "no language testing guides found (standards/*-testing-guide.md)"; exit 0 +fi + +rc=0 +echo "Language testing guides:" +for f in "${files[@]}"; do check_one "$f" || rc=1; done +exit $rc diff --git a/scripts/tests/wave5-language-guides-test.sh b/scripts/tests/wave5-language-guides-test.sh new file mode 100755 index 00000000..8fcf81c6 --- /dev/null +++ b/scripts/tests/wave5-language-guides-test.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: MPL-2.0 +set -uo pipefail +# +# Wave-5 regression: the per-language testing guide lint must accept a +# template-conformant guide and reject one missing a required section. + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +CHK="$ROOT/scripts/check-language-guide.sh" +TMP="$(mktemp -d)"; trap 'rm -rf "$TMP"' EXIT + +pass=0 fail=0 +ok() { echo " ✅ $1"; pass=$((pass + 1)); } +bad() { echo " ❌ $1"; fail=$((fail + 1)); } + +echo "== real guides pass ==" +bash "$CHK" >/dev/null 2>&1 && ok "estate guides pass structural lint" || bad "estate guides failed lint" +bash "$CHK" "$ROOT/standards/affinescript-testing-guide.md" >/dev/null 2>&1 && ok "affinescript guide valid" || bad "affinescript guide invalid" + +echo "== rejects incomplete guides ==" +# missing a required section +g="$TMP/foo-testing-guide.md" +printf '\n# Foo\n## Requirement mapping\nR1 ... R9 ...\n## Tools\n## Recommended CI pipeline\n## Best practices\n## Resources\n' > "$g" +bash "$CHK" "$g" >/dev/null 2>&1 && bad "missing 'Known gaps' not caught" || ok "missing section rejected" +# missing SPDX header +g2="$TMP/bar-testing-guide.md" +printf '# Bar\n## Requirement mapping\nR1 R9\n## Tools\n## Recommended CI pipeline\n## Best practices\n## Known gaps\n## Resources\n' > "$g2" +bash "$CHK" "$g2" >/dev/null 2>&1 && bad "missing SPDX not caught" || ok "missing SPDX rejected" +# missing R1..R9 reference +g3="$TMP/baz-testing-guide.md" +printf '\n# Baz\n## Requirement mapping\nno numbers here\n## Tools\n## Recommended CI pipeline\n## Best practices\n## Known gaps\n## Resources\n' > "$g3" +bash "$CHK" "$g3" >/dev/null 2>&1 && bad "missing R1..R9 not caught" || ok "missing R1..R9 rejected" + +echo "== the stale duplicate snapshot is gone ==" +[ ! -f "$ROOT/standards/language-testing-standards-v1.0.0-2024-04-14.md" ] && ok "duplicate snapshot removed" || bad "duplicate snapshot still present" +echo "== the standard is v2.0.0 with RFC-2119 ==" +grep -q 'Version:\*\* 2.0.0' "$ROOT/standards/language-testing-standards.md" && grep -qi 'RFC-2119' "$ROOT/standards/language-testing-standards.md" && ok "standard refreshed to v2.0.0 RFC-2119" || bad "standard not refreshed" + +echo +echo "Wave-5 language-guides regression: $pass passed, $fail failed" +[ "$fail" -eq 0 ] diff --git a/standards/affinescript-testing-guide.md b/standards/affinescript-testing-guide.md new file mode 100644 index 00000000..cfccaf1d --- /dev/null +++ b/standards/affinescript-testing-guide.md @@ -0,0 +1,109 @@ + +# AffineScript Testing Tools Guide + +**Version:** 1.0.0 +**Date:** 2026-07-03 +**Status:** Active (baseline — honest by construction) +**Parent standard:** `language-testing-standards.md` (R1–R9) + +AffineScript is the estate's primary application language (RS/TS/JS → +AffineScript → typed-wasm; affine/linear types, OCaml-based compiler). This +guide is the estate's current best statement of its testing story. AffineScript's +tooling is young, so several rows below are honest **gaps**, not omissions — a +gap here is a tracked piece of work, and this guide names it rather than +pretending coverage exists. + +**Canonical SSOT (prospective):** the authoritative home for this guide will be +`hyperpolymath/affinescript` (`spec/` or `docs/testing.adoc`). Until that lands, +this repo carries it; the migration is a Wave-6 charter. Do not let the two +diverge — when the affinescript-repo version ships, this becomes a pointer. + +## Requirement mapping + +| # | Requirement | Level | Tool | CI invocation | Status | +|---|---|---|---|---|---| +| R1 | Unit test runner | MUST | `affinescript-deno-test` bootstrap runner | `deno task test` (in the AS repo) | partial — bootstrap shim, self-hosting pending | +| R2 | Formatter (check mode) | MUST | `affinescript fmt` (compiler subcommand) | `affinescript fmt --check` | gap — formatter not yet shipped | +| R3 | Linter / static analysis | MUST | compiler diagnostics + `affinescript-verify.yml` | `affinescript compile --check` | partial — the type system IS the primary static check; a dedicated linter is a gap | +| R4 | Coverage | SHOULD | `none` | — | gap | +| R5 | Property-based / fuzz | SHOULD | `none` | — | gap (parser/lowering are the priority targets) | +| R6 | Benchmark | SHOULD | wasm bench harness | — | gap | +| R7 | Security / dependency audit | MUST | Deno (`deno.json` import audit) | `deno task audit` | partial — Deno-managed deps; no AS-native audit | +| R8 | Contract / pre-post | MAY | affine/linear types (compile-time) | compiler | partial — linearity is a compile-time contract | +| R9 | Proof check | MUST\* | Idris2 ABI proofs (for proven-backed modules) | ECHIDNA proof gate | partial — applies to modules using the `proven` library | + +`MUST*` = R7 applies (Deno ecosystem); R9 applies only to AS modules that call +proven/Idris2-verified code. + +## Tools + +### AffineScript compiler (`affinescript`) — R2, R3, R8 +- **Purpose:** the type checker is the primary correctness gate. Affine/linear + types reject use-after-move and aliasing at compile time — that is R8 + (contract) discharged by construction, and much of R3 (static analysis). +- **Usage:** `affinescript compile .affine` (type-checks + lowers to + typed-wasm); `affinescript compile --check` for check-only. +- **CI:** `.github/workflows/affinescript-verify.yml` clones + builds the + compiler and runs verification. **Note:** that job is currently *advisory* + (`continue-on-error`) while the compiler build stabilises — it does not yet + gate. Promotion to blocking is the unblock condition for R3. + +### affinescript-deno-test — R1 +- **Purpose:** the bootstrap test runner used until AffineScript self-hosts its + test framework. TS/JS shim (documented carve-out). +- **Usage:** `deno task test` in the AS repo. +- **CI:** runs in the AffineScript repo's CI. + +## Recommended CI pipeline + +Until the AS-native toolchain matures, the recommended pipeline is: + +1. **Type-check (R3/R8, MUST):** build the compiler, `affinescript compile + --check` over all `.affine` sources — SHA-pinned, and **blocking once the + compiler build is reliably green** (today advisory; see `affinescript-verify.yml`). +2. **Unit tests (R1, MUST):** `deno task test` via the bootstrap runner. +3. **Dep audit (R7, MUST):** Deno import audit. +4. SHOULD rows (coverage, property, bench) are tracked gaps — see below. + +No `continue-on-error` on a MUST check once its tool is stable; the current +advisory status of `affinescript-verify.yml` is itself a tracked gap, not a +silent pass. + +## Best practices + +1. Design modules to admit affine/linear typing from the start — the type system + is the cheapest test you have. +2. Prefer compile-time linearity contracts (R8) over runtime assertions where the + type system can express the invariant. +3. For correctness-critical paths, route through `proven`/Idris2 (R9) rather than + hand-rolled checks. +4. Keep `.affine` sources free of TS/JS shims except the documented bootstrap + carve-outs. + +## Known gaps + +Honest inventory (every gap is real work, not an omission): + +- **R2 formatter** — no `affinescript fmt` yet. Charter. +- **R3 dedicated linter** — beyond type diagnostics; and `affinescript-verify.yml` + is advisory (`continue-on-error`), so R3 does not yet *gate*. Charter: flip to + blocking once the compiler build is reliably green. +- **R4 coverage** — no wasm coverage tool. Charter. +- **R5 property/fuzz** — none; parser and canonical-lowering are the priority + targets. Charter. +- **R6 benchmark** — no wasm bench harness. Charter. +- **R1 self-hosting** — the test runner is a TS/JS bootstrap shim, not AS-native. + Unblocks when AffineScript self-hosts the runner. + +These gaps are why AffineScript's Toolchain Readiness Grade cannot yet exceed the +lower bands — which is the honest position, and the reason this guide exists. + +## Resources + +- `language-testing-standards.md` — the parent R1–R9 standard. +- `.github/workflows/affinescript-verify.yml` — the current (advisory) CI check. +- `templates/language-testing-guide-TEMPLATE.md` — the skeleton this follows. +- SSOT (prospective): `hyperpolymath/affinescript`. + +**Maintainers:** @hyperpolymath +**Last Updated:** 2026-07-03 diff --git a/standards/language-testing-standards-v1.0.0-2024-04-14.md b/standards/language-testing-standards-v1.0.0-2024-04-14.md deleted file mode 100644 index fbf022c7..00000000 --- a/standards/language-testing-standards-v1.0.0-2024-04-14.md +++ /dev/null @@ -1,165 +0,0 @@ -# Language Testing Standards - -**Version:** 1.0.0 -**Date:** 2024-04-14 -**Status:** Active - -This document establishes canonical testing standards for all programming languages used across our projects. - -## Table of Contents - -1. [Rust](#rust) -2. [Julia](#julia) -3. [Version Control](#version-control) - -## Rust - -### Core Tools - -| Tool | Purpose | Integration | CI/CD Stage | -|------|---------|-------------|-------------| -| `rustfmt` | Code formatting | ✅ Integrated | Check | -| `clippy` | Linting | ✅ Integrated | Check | -| `cargo audit` | Security auditing | ✅ Integrated | Security | -| `cargo test` | Unit testing | ✅ Integrated | Test | -| `cargo bench` | Benchmarking | ✅ Integrated | Test | - -### Current Implementation - -**GitHub Actions:** `rust-ci.yml` -- Format checking: `cargo fmt --all -- --check` -- Clippy linting: `cargo clippy --all-targets --all-features -- -D warnings` -- Security audit: `cargo audit` -- Test coverage: `cargo tarpaulin` - -**GitLab CI:** `.gitlab-ci.yml` -- Format checking: `cargo fmt --all -- --check` -- Clippy linting: `cargo clippy --all -- -D warnings` -- Security audit: `cargo audit` -- Additional security: `cargo-geiger`, `cargo-license` - -### Best Practices - -1. **Format on save**: Configure editors to run `rustfmt` on file save -2. **Warnings as errors**: Use `-D warnings` flag to treat warnings as errors -3. **Regular audits**: Run `cargo audit` weekly minimum -4. **Coverage targets**: Maintain >80% test coverage - -## Julia - -### Equivalent Tools - -| Julia Tool | Rust Equivalent | Purpose | Integration Status | -|-----------|-----------------|---------|-------------------| -| `JuliaFormatter.jl` | `rustfmt` | Code formatting | ❌ Not yet integrated | -| `JET.jl` | `clippy` | Static analysis | ❌ Not yet integrated | -| `Aqua.jl` | `cargo audit` | Package security | ❌ Not yet integrated | -| `Pkg.test()` | `cargo test` | Unit testing | ✅ Integrated | -| `BenchmarkTools.jl` | `cargo bench` | Benchmarking | ❌ Not yet integrated | - -### Recommended Julia CI/CD Integration - -```yaml -# Julia GitHub Actions Example -name: Julia CI -on: [push, pull_request] - -jobs: - format: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v2 - - name: Format check - run: | - julia --project=docs -e ' - using JuliaFormatter - JuliaFormatter.format("."; verbose=true, overwrite=false) - ' - - lint: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v2 - - name: Static analysis - run: | - julia --project=docs -e ' - using JET - JET.test_package(path=".") - ' - - security: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v2 - - name: Package security audit - run: | - julia --project=docs -e ' - using Aqua - Aqua.test_all(deps=true) - ' - - test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v2 - - name: Run tests - run: julia --project=. -e 'using Pkg; Pkg.test()' -``` - -### Julia Best Practices - -1. **Project.toml**: Always include proper dependency specification -2. **Test coverage**: Use `Coverage.jl` for coverage reports -3. **Documentation**: Use `Documenter.jl` for doc generation -4. **CI templates**: Use `julia-actions/setup-julia` GitHub action - -## Version Control - -### Git Standards - -- **Commit messages**: Follow [Conventional Commits](https://www.conventionalcommits.org/) -- **Branch naming**: `feature/`, `fix/`, `docs/`, `refactor/` prefixes -- **Pull requests**: Require approval from 2 maintainers -- **Semantic versioning**: Follow [SemVer 2.0.0](https://semver.org/) - -### Git Hooks - -Recommended hooks for all repositories: - -```bash -# pre-commit: Run formatters and linters -# pre-push: Run tests -# commit-msg: Validate commit message format -``` - -## Implementation Roadmap - -### Phase 1: Documentation (✅ Complete) -- [x] Create canonical language standards document -- [x] Document current Rust implementation -- [x] Document recommended Julia implementation - -### Phase 2: Julia Integration -- [ ] Add JuliaFormatter to JuliaPackage-Reuse-Audit.jl -- [ ] Add JET.jl static analysis -- [ ] Add Aqua.jl security checks -- [ ] Update CI/CD pipelines - -### Phase 3: Monitoring -- [ ] Set up regular audit scheduling -- [ ] Create compliance dashboard -- [ ] Establish metrics tracking - -## Maintenance - -**Review cycle**: Quarterly -**Next review**: 2024-07-14 -**Maintainers**: @hyperpolymath/core-team - -## Changelog - -**1.0.0 (2024-04-14)**: Initial release with Rust and Julia standards \ No newline at end of file diff --git a/standards/language-testing-standards.md b/standards/language-testing-standards.md index fbf022c7..6c0e651f 100644 --- a/standards/language-testing-standards.md +++ b/standards/language-testing-standards.md @@ -1,165 +1,122 @@ + # Language Testing Standards -**Version:** 1.0.0 -**Date:** 2024-04-14 -**Status:** Active +**Version:** 2.0.0 +**Date:** 2026-07-03 +**Status:** Active (supersedes v1.0.0, 2024-04-14) + +This document establishes the estate's canonical, **conformance-graded** testing +standards for every programming language in the CCCP language policy. Keywords +**MUST / SHOULD / MAY** are RFC-2119. + +It sits above the per-language guides: this document says *what every language's +testing story MUST provide*; each per-language guide (built from +`templates/language-testing-guide-TEMPLATE.md`) says *which concrete tools +provide it*. The requirement categories align with the CRG test taxonomy in +`testing-and-benchmarking/TESTING-TAXONOMY.adoc`, so a language's testing +maturity maps onto its Component/Toolchain Readiness Grade. + +## Conformance requirements (every approved language) + +A language's testing story is **conformant** when its per-language guide names a +concrete, CI-runnable tool for each MUST row, and records `none` *visibly* where +it genuinely cannot (never a silent gap). + +| # | Requirement | Level | CRG category | +|---|---|---|---| +| R1 | A **unit test** runner MUST exist and run in CI on every push/PR. | MUST | Unit tests | +| R2 | A **formatter** MUST exist and be checkable in CI (fail on unformatted). | MUST | hygiene | +| R3 | A **linter / static analyser** MUST exist and run in CI. | MUST | Aspect (correctness) | +| R4 | A **coverage** tool SHOULD run in CI and report a number. | SHOULD | Unit tests | +| R5 | A **property-based / fuzz** facility SHOULD exist for parsers and pure logic. | SHOULD | Property-based (P2P) | +| R6 | A **benchmark** facility SHOULD exist; regressions SHOULD gate for perf-critical code. | SHOULD | Benchmarks | +| R7 | A **security / dependency audit** MUST run for languages with a package ecosystem. | MUST | Aspect (security) | +| R8 | **Contract / pre-post** checks MAY be expressed where the language supports them. | MAY | Contract | +| R9 | For formally-verifiable languages, **proofs** MUST be checked in CI (no hollow proof claims). | MUST\* | proof gate | + +`MUST*` = applies only to languages whose role includes formal verification +(Idris2, Agda, Rust/SPARK). The `spark-theatre-gate.yml` workflow already +enforces "no hollow SPARK proof claims"; R9 generalises that stance. + +**Anti-theatre rule (all requirements):** a testing job that cannot fail is not +a test. A MUST check MUST NOT sit behind `continue-on-error` without a +documented, blocking equivalent elsewhere. Coverage numbers MUST be *reported +with an artifact*, never merely asserted. (See the Wave-0/1 false-green +remediation.) + +## Per-language guides (required set) + +Each approved language SHOULD publish a guide from +`templates/language-testing-guide-TEMPLATE.md`. Priority tracks estate centrality: + +| Language | Guide | Status | +|---|---|---| +| Rust/SPARK | this document §Rust + SPARK proof gate | present | +| Julia | `julia-testing-tools-guide.md` | present (v1.0.0 — R1–R9 refresh tracked) | +| **AffineScript** | `affinescript-testing-guide.md` | **present** — canonical SSOT migrates to `hyperpolymath/affinescript` prospectively | +| Zig | — | charter | +| Elixir + Gleam (BEAM) | — | charter | +| Idris2 / Agda (proofs) | — | charter (ties to proof-debt epic #124) | + +New guides MUST pass `scripts/check-language-guide.sh` (wired into `just +validate`), which fails if a guide omits a required section. + +## Rust/SPARK + +| Requirement | Tool | CI invocation | +|---|---|---| +| R1 unit | `cargo test` | `cargo test --all` | +| R2 format | `rustfmt` | `cargo fmt --all -- --check` | +| R3 lint | `clippy` | `cargo clippy --all-targets --all-features -- -D warnings` | +| R4 coverage | `cargo tarpaulin` / `cargo llvm-cov` | reports % in CI | +| R5 property | `proptest` / `quickcheck` | in the test suite | +| R6 bench | `criterion` | `cargo bench` | +| R7 audit | `cargo audit` | weekly minimum | +| R9 proof | Rust/SPARK | `spark-theatre-gate.yml` (no hollow proof claims) | + +Reusable workflow: `rust-ci-reusable.yml`. Warnings are errors (`-D warnings`); +coverage SHOULD be ≥ 80%. -This document establishes canonical testing standards for all programming languages used across our projects. - -## Table of Contents - -1. [Rust](#rust) -2. [Julia](#julia) -3. [Version Control](#version-control) - -## Rust +## Julia -### Core Tools +Concrete tools live in `julia-testing-tools-guide.md`. Requirement mapping: -| Tool | Purpose | Integration | CI/CD Stage | -|------|---------|-------------|-------------| -| `rustfmt` | Code formatting | ✅ Integrated | Check | -| `clippy` | Linting | ✅ Integrated | Check | -| `cargo audit` | Security auditing | ✅ Integrated | Security | -| `cargo test` | Unit testing | ✅ Integrated | Test | -| `cargo bench` | Benchmarking | ✅ Integrated | Test | +| Requirement | Tool | +|---|---| +| R1 unit | `Pkg.test()` | +| R2 format | `JuliaFormatter.format("."; overwrite=false)` | +| R3 lint | `JET.test_package(".")` | +| R4 coverage | `Coverage.jl` | +| R6 bench | `BenchmarkTools.@benchmark` | +| R7 audit | `Aqua.test_all(deps=true)` (ambiguities, deps-compat, project-extras) | -### Current Implementation +> The Julia guide is v1.0.0 (2024) and predates this RFC-2119 framing; refreshing +> it to the R1–R9 mapping (and flipping its "not yet integrated" rows) is tracked +> estate work. -**GitHub Actions:** `rust-ci.yml` -- Format checking: `cargo fmt --all -- --check` -- Clippy linting: `cargo clippy --all-targets --all-features -- -D warnings` -- Security audit: `cargo audit` -- Test coverage: `cargo tarpaulin` +## AffineScript -**GitLab CI:** `.gitlab-ci.yml` -- Format checking: `cargo fmt --all -- --check` -- Clippy linting: `cargo clippy --all -- -D warnings` -- Security audit: `cargo audit` -- Additional security: `cargo-geiger`, `cargo-license` +The estate's primary application language. See `affinescript-testing-guide.md` +for the full guide; requirement mapping summarised there. Canonical SSOT will +move to `hyperpolymath/affinescript` prospectively (charter) — until then this +repo carries the guide. -### Best Practices +## Version control & CI hygiene (all languages) -1. **Format on save**: Configure editors to run `rustfmt` on file save -2. **Warnings as errors**: Use `-D warnings` flag to treat warnings as errors -3. **Regular audits**: Run `cargo audit` weekly minimum -4. **Coverage targets**: Maintain >80% test coverage +- All CI actions MUST be SHA-pinned (governance workflow-lint + + `hooks/validate-sha-pins.sh`). +- Commit messages SHOULD follow Conventional Commits; SemVer 2.0.0 for releases. +- Pre-commit/pre-push hooks are installed via `just hooks-install`. -## Julia +## Resources -### Equivalent Tools - -| Julia Tool | Rust Equivalent | Purpose | Integration Status | -|-----------|-----------------|---------|-------------------| -| `JuliaFormatter.jl` | `rustfmt` | Code formatting | ❌ Not yet integrated | -| `JET.jl` | `clippy` | Static analysis | ❌ Not yet integrated | -| `Aqua.jl` | `cargo audit` | Package security | ❌ Not yet integrated | -| `Pkg.test()` | `cargo test` | Unit testing | ✅ Integrated | -| `BenchmarkTools.jl` | `cargo bench` | Benchmarking | ❌ Not yet integrated | - -### Recommended Julia CI/CD Integration - -```yaml -# Julia GitHub Actions Example -name: Julia CI -on: [push, pull_request] - -jobs: - format: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v2 - - name: Format check - run: | - julia --project=docs -e ' - using JuliaFormatter - JuliaFormatter.format("."; verbose=true, overwrite=false) - ' - - lint: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v2 - - name: Static analysis - run: | - julia --project=docs -e ' - using JET - JET.test_package(path=".") - ' - - security: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v2 - - name: Package security audit - run: | - julia --project=docs -e ' - using Aqua - Aqua.test_all(deps=true) - ' - - test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v2 - - name: Run tests - run: julia --project=. -e 'using Pkg; Pkg.test()' -``` - -### Julia Best Practices - -1. **Project.toml**: Always include proper dependency specification -2. **Test coverage**: Use `Coverage.jl` for coverage reports -3. **Documentation**: Use `Documenter.jl` for doc generation -4. **CI templates**: Use `julia-actions/setup-julia` GitHub action - -## Version Control - -### Git Standards - -- **Commit messages**: Follow [Conventional Commits](https://www.conventionalcommits.org/) -- **Branch naming**: `feature/`, `fix/`, `docs/`, `refactor/` prefixes -- **Pull requests**: Require approval from 2 maintainers -- **Semantic versioning**: Follow [SemVer 2.0.0](https://semver.org/) - -### Git Hooks - -Recommended hooks for all repositories: - -```bash -# pre-commit: Run formatters and linters -# pre-push: Run tests -# commit-msg: Validate commit message format -``` - -## Implementation Roadmap - -### Phase 1: Documentation (✅ Complete) -- [x] Create canonical language standards document -- [x] Document current Rust implementation -- [x] Document recommended Julia implementation - -### Phase 2: Julia Integration -- [ ] Add JuliaFormatter to JuliaPackage-Reuse-Audit.jl -- [ ] Add JET.jl static analysis -- [ ] Add Aqua.jl security checks -- [ ] Update CI/CD pipelines - -### Phase 3: Monitoring -- [ ] Set up regular audit scheduling -- [ ] Create compliance dashboard -- [ ] Establish metrics tracking - -## Maintenance - -**Review cycle**: Quarterly -**Next review**: 2024-07-14 -**Maintainers**: @hyperpolymath/core-team +- `testing-and-benchmarking/TESTING-TAXONOMY.adoc` — the CRG test taxonomy. +- `templates/language-testing-guide-TEMPLATE.md` — the per-language skeleton. +- `component-readiness-grades/` · `toolchain-readiness-grades/` — testing → grade. ## Changelog -**1.0.0 (2024-04-14)**: Initial release with Rust and Julia standards \ No newline at end of file +- **2.0.0 (2026-07-03)**: RFC-2119 conformance requirements (R1–R9) mapped to the + CRG taxonomy; per-language guide template + required set; AffineScript added; + anti-theatre rule; removed the stale 2024 roadmap/duplicate snapshot. +- **1.0.0 (2024-04-14)**: Initial release (Rust + Julia). diff --git a/standards/templates/language-testing-guide-TEMPLATE.md b/standards/templates/language-testing-guide-TEMPLATE.md new file mode 100644 index 00000000..5c7391a2 --- /dev/null +++ b/standards/templates/language-testing-guide-TEMPLATE.md @@ -0,0 +1,66 @@ + + +# Testing Tools Guide + +**Version:** 1.0.0 +**Date:** +**Status:** +**Parent standard:** `language-testing-standards.md` (R1–R9) + +One-paragraph purpose: what this guide covers and who maintains it. + +## Requirement mapping + +The single most important table: map each requirement from +`language-testing-standards.md` to a CONCRETE, CI-runnable tool, or the literal +`none` (visible, never a silent gap). A `none` on a MUST row is a real gap. + +| # | Requirement | Level | Tool | CI invocation | Status | +|---|---|---|---|---|---| +| R1 | Unit test runner | MUST | `` | `` | pass / gap | +| R2 | Formatter (check mode) | MUST | `` | `` | pass / gap | +| R3 | Linter / static analysis | MUST | `` | `` | pass / gap | +| R4 | Coverage | SHOULD | `` | `` | pass / gap | +| R5 | Property-based / fuzz | SHOULD | `` | `` | pass / gap | +| R6 | Benchmark | SHOULD | `` | `` | pass / gap | +| R7 | Security / dependency audit | MUST\* | `` | `` | pass / gap | +| R8 | Contract / pre-post | MAY | `` | `` | pass / gap | +| R9 | Proof check | MUST\* | `` | `` | pass / gap | + +`MUST*` = R7 applies only if the language has a package ecosystem; R9 only if the +language's role includes formal verification. + +## Tools + +For each tool named above: purpose, install, minimal usage, and the exact CI +step. Keep it copy-pasteable. + +### () +- **Purpose:** … +- **Install:** … +- **Usage:** … +- **CI:** … + +## Recommended CI pipeline + +A single, SHA-pinned, copy-pasteable CI workflow that runs the MUST rows as +blocking and the SHOULD rows as reported. No `continue-on-error` on a MUST check. + +## Best practices + +Language-specific conventions (project layout, coverage target, warnings-as-errors). + +## Known gaps + +List every `none`/`gap` from the requirement mapping with the reason and, if +tracked, the issue/charter. This section MUST be honest — an empty "Known gaps" +means every requirement is genuinely met. + +## Resources + +Links to the tools' upstreams and the parent standard. + +**Maintainers:** <@owner> +**Last Updated:** From a073125941fb41a65bf96741e5b65916a8323c83 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 3 Jul 2026 02:32:41 +0000 Subject: [PATCH 2/2] feat(wave6): canonical-names guard, DYADT residual fix, licence-flag record MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Capstone wave: close the last pull-forward holes and record the flag-only licence discipline. (Campaign umbrella #460.) - scripts/verify-claims.sh: close the residual DYADT bypass from the adversarial review (#461) — an always-matching `contains:` regex (`.*`, `^`, `$`, `.`, `.+`) no longer confirms vacuously; it now requires a literal character and returns `unverifiable trivial-pattern` otherwise. CLAIM-FORMAT.adoc pins the `contains:` dialect to POSIX ERE and states the trivial-pattern rule; a conformance vector (always-match) + a wave4 assertion lock it (10 vectors, 15 dyadt assertions). - scripts/check-canonical-names.sh: reintroduction guard for the deprecated names (6a2 -> descriptiles, agent_instructions -> bot_directives) from the CANONICAL-NAMES.adoc mandate. Inspects only ADDED diff lines so the chartered bulk migration of existing occurrences is untouched; skips meta-references (lines that describe the deprecation). Wired into `just validate` and the pre-commit hook. scripts/tests/wave6-canonical-names-test.sh (4/4). - audits/licence-flags-2026-07.adoc: flag-only record — the whole program made no SPDX edits and no auto licence PRs; DYADT treats licence claims as manual-only end to end. Two owner-review flags surfaced, none acted on. CHANGELOG is git-cliff-generated (not hand-edited); the first tagged release + the rand<0.9.3 advisory expiry are owner actions tracked under #460. Orphan A2ML-ATOMICITY-PROTOCOL registration + REORGANIZATION-PLAN archival deferred to #460 as minor cleanup. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_0114ps6mY5jAH4Sz --- .machine_readable/REGISTRY.a2ml | 2 +- Justfile | 6 ++ audits/licence-flags-2026-07.adoc | 49 +++++++++++++ .../spec/CLAIM-FORMAT.adoc | 10 +++ .../spec/conformance/always-match.a2ml | 10 +++ .../spec/conformance/always-match.expected | 1 + hooks/pre-commit | 11 +++ scripts/check-canonical-names.sh | 70 +++++++++++++++++++ scripts/tests/wave4-dyadt-test.sh | 10 +++ scripts/tests/wave6-canonical-names-test.sh | 39 +++++++++++ scripts/verify-claims.sh | 5 ++ 11 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 audits/licence-flags-2026-07.adoc create mode 100644 did-you-actually-do-that/spec/conformance/always-match.a2ml create mode 100644 did-you-actually-do-that/spec/conformance/always-match.expected create mode 100755 scripts/check-canonical-names.sh create mode 100755 scripts/tests/wave6-canonical-names-test.sh diff --git a/.machine_readable/REGISTRY.a2ml b/.machine_readable/REGISTRY.a2ml index 2c95e67a..8e76eed6 100644 --- a/.machine_readable/REGISTRY.a2ml +++ b/.machine_readable/REGISTRY.a2ml @@ -234,7 +234,7 @@ name = "DYADT — Did-You-Actually-Do-That" stream = "governance" home = "did-you-actually-do-that/" canonical_doc = "did-you-actually-do-that/README.adoc" -source_hash = "sha256:2ae635b9ede51e76781cb7c171108f2a4505b0aae9ac97fb05c910915141eb2a" +source_hash = "sha256:453bf00d0dfac71576b5e7b4068fb8987abc3337d4bc3bf75c081e0332ae1dff" route = "post-action agent-claim verification (Tier 4 accountability)" [[spec]] diff --git a/Justfile b/Justfile index e3a8da9d..4835a86a 100644 --- a/Justfile +++ b/Justfile @@ -89,6 +89,10 @@ dyadt-test: language-guides-check: @bash scripts/check-language-guide.sh +# Block reintroduction of deprecated names (6a2, agent_instructions) in new diff +canonical-names-check base="origin/main": + @bash scripts/check-canonical-names.sh "{{base}}" + # Aggregate compliance gate: registry drift is the HARD gate (registry-check, # a hard dep). The RSR self-audit is INFORMATIONAL — a monorepo is not expected # to score Gold — but a *broken* audit (exit 4 / unexpected) must fail loudly @@ -97,6 +101,8 @@ validate: registry-check @echo "=== validate: registry drift (HARD GATE) — passed as a dependency above ===" @echo "=== validate: per-language testing guides (structural, HARD GATE) ===" @bash scripts/check-language-guide.sh + @echo "=== validate: canonical-names reintroduction guard (vs origin/main) ===" + @bash scripts/check-canonical-names.sh origin/main || bash scripts/check-canonical-names.sh HEAD @echo "=== validate: RSR self-audit (INFORMATIONAL grade; errors fail loudly) ===" @bash scripts/rsr-selfaudit.sh . @echo "=== validate: done ===" diff --git a/audits/licence-flags-2026-07.adoc b/audits/licence-flags-2026-07.adoc new file mode 100644 index 00000000..571a3c11 --- /dev/null +++ b/audits/licence-flags-2026-07.adoc @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: CC-BY-SA-4.0 +// SPDX-FileCopyrightText: 2026 Jonathan D.A. Jewell += Licence Flags — Estate Audit & Optimization (2026-07) +:icons: font + +[.lead] +Flag-only record for the estate audit-and-optimization program (umbrella +`hyperpolymath/standards#460`). Per the Manual-Only licence policy +(`.claude/CLAUDE.md`), this program made **no** licence/SPDX edits and generated +**no** automated licence-change PRs. This document records that discipline and +flags — for owner review only — anything licence-adjacent that surfaced. + +== Discipline upheld + +* No SPDX headers were added, changed, or swept in any wave (0, 1, 3, 4, 5, 6). +* New files created by the program carry the SPDX identifier matching the + repo's classification at birth (authoring, not relicensing): scripts → + `MPL-2.0`, prose/specs → `CC-BY-SA-4.0`. This is consistent with the estate + policy that new files may carry the correct SPDX from birth. +* The DYADT verifier (`did-you-actually-do-that/`) treats any licence/SPDX claim + as `manual-only` end to end — it returns `unverifiable`, never auto-`confirmed`, + and the consequence ledger's confirmation rate is explicitly unaffected by + licence verdicts. The Manual-Only policy is preserved *by construction* in the + new tooling. + +== Flags for owner review (no action taken) + +None of the following were edited; they are surfaced for the owner to rule on. + +[cols="1,3", options="header"] +|=== +| Location | Observation (flag-only) + +| `rhodium-standard-repositories/rsr-audit.sh` +| Header carries a dual `SPDX-License-Identifier: MPL-2.0 AND Palimpsest-0.8`. + Palimpsest is a carve-out family; whether this file should carry a + Palimpsest component is an owner ruling, not an audit action. Left untouched. + +| `SECURITY-ADVISORIES.adoc` +| A standing deferred `rand < 0.9.3` advisory is recorded. Not a licence matter, + but flagged alongside release hygiene: renew with an expiry date or bump. + (Tracked under the umbrella's release-hygiene item.) +|=== + +== No sweep, no auto-PR + +This program did not run, and must not be read as licensing, any bulk SPDX +normalisation. Licence drift findings remain FLAG-ONLY and owner-gated, per the +neurophone#99 precedent and the estate licence-policy umbrella. diff --git a/did-you-actually-do-that/spec/CLAIM-FORMAT.adoc b/did-you-actually-do-that/spec/CLAIM-FORMAT.adoc index 0bdebded..b985b508 100644 --- a/did-you-actually-do-that/spec/CLAIM-FORMAT.adoc +++ b/did-you-actually-do-that/spec/CLAIM-FORMAT.adoc @@ -106,6 +106,16 @@ A verifier MUST reject (`unverifiable`) a claim whose `verifier` is not listed as compatible with its `claim_class` in VERIFICATION-PROTOCOL — an agent MUST NOT be able to choose a weaker verifier than the claim class warrants. +=== `contains:` regex dialect (normative) + +The `contains:` predicate uses **POSIX Extended Regular Expressions +(ERE)**, matched case-sensitively against the whole file (`grep -E`). A pattern +that contains no literal character — i.e. is trivially always-matching (`.*`, +`.`, `^`, `$`, `.+`, and the like) — is NOT evidence and MUST be rejected as +`unverifiable` (reason `trivial-pattern`); it would otherwise confirm any +non-empty file. `stdout-contains:` is a fixed-string (non-regex) substring +match. + == Honesty constraints (normative) . A claim's verdict MUST be derived from primary evidence re-collected by the diff --git a/did-you-actually-do-that/spec/conformance/always-match.a2ml b/did-you-actually-do-that/spec/conformance/always-match.a2ml new file mode 100644 index 00000000..297bceb1 --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/always-match.a2ml @@ -0,0 +1,10 @@ +[claims] +schema = "dyadt/claim@1" +actor = "conformance" +[[claim]] +id = "C1" +claim_class = "file-changed" +statement = "a vacuous always-matching contains pattern" +target = "README.adoc" +expect = "contains:.*" +verifier = "git-diff" diff --git a/did-you-actually-do-that/spec/conformance/always-match.expected b/did-you-actually-do-that/spec/conformance/always-match.expected new file mode 100644 index 00000000..450c8c22 --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/always-match.expected @@ -0,0 +1 @@ +C1 unverifiable diff --git a/hooks/pre-commit b/hooks/pre-commit index c2340b52..7a40ff90 100755 --- a/hooks/pre-commit +++ b/hooks/pre-commit @@ -91,6 +91,17 @@ if [ -f scripts/build-registry.sh ]; then fi fi +# Canonical-names guard: block reintroduction of the deprecated names listed in +# CANONICAL-NAMES.adoc within the staged diff. Only added lines are checked, so +# not-yet-migrated existing occurrences do not trip it. +if [ -f scripts/check-canonical-names.sh ]; then + if ! bash scripts/check-canonical-names.sh HEAD >/dev/null 2>&1; then + echo -e "${RED}✗ Deprecated name reintroduced (see CANONICAL-NAMES.adoc):${NC}" + bash scripts/check-canonical-names.sh HEAD 2>&1 | grep '❌' || true + ERRORS=$((ERRORS + 1)) + fi +fi + # Final result echo "" if [ $ERRORS -gt 0 ]; then diff --git a/scripts/check-canonical-names.sh b/scripts/check-canonical-names.sh new file mode 100755 index 00000000..15b230b0 --- /dev/null +++ b/scripts/check-canonical-names.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: MPL-2.0 +# +# check-canonical-names.sh — block REINTRODUCTION of deprecated names. +# +# CANONICAL-NAMES.adoc (owner mandate 2026-06-30) deprecates: +# * 6a2 -> descriptiles +# * agent_instructions -> bot_directives +# The bulk migration of existing occurrences is chartered separately; this guard +# stops NEW occurrences from landing in the meantime. It inspects only the +# ADDED lines of a diff (grandfathered existing text is untouched), so it can be +# wired into pre-commit and CI without tripping on the not-yet-migrated files. +# +# Usage: check-canonical-names.sh [base-ref] +# base-ref default: origin/main (CI) then HEAD (pre-commit staged diff). +# Exit: 0 no new deprecated tokens · 1 a deprecated token was added · 2 usage + +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +BASE="${1:-}" +if [ -z "$BASE" ]; then + if git rev-parse --verify -q origin/main >/dev/null 2>&1; then BASE="origin/main"; else BASE="HEAD"; fi +fi + +# Deprecated token -> canonical replacement (for the error message). +declare -A REPL=( ["6a2"]="descriptiles" ["agent_instructions"]="bot_directives" ) + +# Files that legitimately NAME the deprecated tokens (the mandate itself, this +# guard, migration/charter docs). Excluded from the check. +is_excluded() { + case "$1" in + CANONICAL-NAMES.adoc|scripts/check-canonical-names.sh|scripts/tests/*|\ + *MIGRATION*|*migration*|*CHANGELOG*|*/6a2/*|.machine_readable/6a2/*) return 0 ;; + esac + return 1 +} + +# Added lines in the working diff vs BASE, per file. +added_diff() { git diff "$BASE" -- . 2>/dev/null; } + +rc=0 +current_file="" +while IFS= read -r line; do + case "$line" in + "+++ b/"*) current_file="${line#+++ b/}" ;; + "+"*) + is_excluded "$current_file" && continue + body="${line#+}" + # Skip a line that is DESCRIBING the deprecation rather than using the old + # name — it also mentions the canonical replacement or the mandate itself + # (e.g. tooling comments, this guard's own wiring, docs about the rename). + if printf '%s' "$body" | grep -Eqi 'deprecat|canonical|reintroduc|descriptiles|bot_directives'; then + continue + fi + for tok in "${!REPL[@]}"; do + # word-ish boundary so e.g. 'v6a2ml' style false hits are limited + if printf '%s' "$body" | grep -Eq "(^|[^A-Za-z0-9])$tok([^A-Za-z0-9]|$)"; then + echo "❌ $current_file: reintroduces deprecated '$tok' — use '${REPL[$tok]}' (CANONICAL-NAMES.adoc)" + echo " + $body" + rc=1 + fi + done ;; + esac +done < <(added_diff) + +if [ "$rc" -eq 0 ]; then + echo "✅ no deprecated names reintroduced (vs $BASE)" +fi +exit $rc diff --git a/scripts/tests/wave4-dyadt-test.sh b/scripts/tests/wave4-dyadt-test.sh index 938438a7..d07ed20f 100755 --- a/scripts/tests/wave4-dyadt-test.sh +++ b/scripts/tests/wave4-dyadt-test.sh @@ -166,6 +166,16 @@ target = "echo marker >&2; true" expect = "stdout-contains:marker" verifier = "command-transcript"' [[ "$(reason_of "$TMP/se.a2ml" C1)" == REFUTED* ]] && ok "stderr does not satisfy stdout-contains" || bad "stderr false-confirmed stdout claim" +# an always-matching contains: regex is not evidence (#461) +mk am.a2ml '[claims] +[[claim]] +id = "C1" +claim_class = "file-changed" +target = "README.adoc" +expect = "contains:.*" +verifier = "git-diff"' +[[ "$(reason_of "$TMP/am.a2ml" C1)" == unverifiable*trivial-pattern ]] && ok "always-matching contains: -> unverifiable" || bad "always-match pattern confirmed vacuously" + # licence claim phrased only in the statement is still manual-only mk lic.a2ml '[claims] [[claim]] diff --git a/scripts/tests/wave6-canonical-names-test.sh b/scripts/tests/wave6-canonical-names-test.sh new file mode 100755 index 00000000..d3484d0b --- /dev/null +++ b/scripts/tests/wave6-canonical-names-test.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: MPL-2.0 +set -uo pipefail +# +# Wave-6 regression: the canonical-names reintroduction guard must block a NEW +# deprecated token while leaving grandfathered existing occurrences alone. + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +CHK="$ROOT/scripts/check-canonical-names.sh" + +pass=0 fail=0 +ok() { echo " ✅ $1"; pass=$((pass + 1)); } +bad() { echo " ❌ $1"; fail=$((fail + 1)); } + +cd "$ROOT" + +echo "== guard blocks a newly-added deprecated token ==" +f="wave6_guard_probe.txt" +printf 'this file uses the 6a2 layout\n' > "$f" +git add "$f" 2>/dev/null +if bash "$CHK" HEAD >/dev/null 2>&1; then bad "new '6a2' token not blocked"; else ok "new '6a2' token blocked"; fi +# agent_instructions too +printf 'agent_instructions live here\n' > "$f" +git add "$f" 2>/dev/null +if bash "$CHK" HEAD >/dev/null 2>&1; then bad "new 'agent_instructions' not blocked"; else ok "new 'agent_instructions' blocked"; fi +git reset -q "$f" 2>/dev/null; rm -f "$f" + +echo "== guard passes with no offending additions ==" +printf 'a perfectly canonical descriptiles + bot_directives line\n' > "$f" +git add "$f" 2>/dev/null +bash "$CHK" HEAD >/dev/null 2>&1 && ok "canonical names pass" || bad "canonical names wrongly blocked" +git reset -q "$f" 2>/dev/null; rm -f "$f" + +echo "== the guard excludes CANONICAL-NAMES.adoc itself ==" +grep -q 'CANONICAL-NAMES.adoc' "$CHK" && ok "mandate doc is excluded from the guard" || bad "mandate doc not excluded" + +echo +echo "Wave-6 canonical-names regression: $pass passed, $fail failed" +[ "$fail" -eq 0 ] diff --git a/scripts/verify-claims.sh b/scripts/verify-claims.sh index 86c8ed9e..1149eb1a 100755 --- a/scripts/verify-claims.sh +++ b/scripts/verify-claims.sh @@ -95,6 +95,11 @@ v_git_diff() { # target expect contains:*) local re="${expect#contains:}" [ -n "$re" ] || { echo "unverifiable empty-pattern"; return; } + # Reject a trivially-always-matching regex (`.*`, `^`, `$`, `.`, `.+`, …): + # a pattern with no literal character is not evidence — it confirms any + # non-empty file. Require at least one literal after stripping ERE metachars. + local bare; bare="$(printf '%s' "$re" | tr -d '.^$*+?()[]{}|\\')" + [ -n "$bare" ] || { echo "unverifiable trivial-pattern"; return; } if [ ! -f "$target" ]; then echo "unverifiable not-a-regular-file"; return; fi if [ ! -r "$target" ]; then echo "unverifiable unreadable"; return; fi # distinguish "pattern absent" (refuted) from "bad regex" (unverifiable)