From 7aafc77a110c98ca86688c6c9cd9280b09dab328 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 3 Jul 2026 01:46:44 +0000 Subject: [PATCH] =?UTF-8?q?feat(wave4):=20DYADT=20=E2=80=94=20post-action?= =?UTF-8?q?=20agent-claim=20verification=20(Tier=204)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The estate gated what an agent may do BEFORE it acts (gatekeeper, AGENTIC, contractiles) but had no stage that checks, mechanically, that an agent's CLAIMED outcomes actually happened. DYADT is that missing Tier 4: it takes an agent's asserted outcomes and confirms/refutes each against primary evidence, never trusting the agent's own narration. New registered spec `did-you-actually-do-that/` (governance stream): - README.adoc: the four-tier accountability pipeline (admission → pre-action → in-session gates → post-action verification). - spec/CLAIM-FORMAT.adoc: typed claims (claim_class, target, expect, verifier) + example CLAIMS.a2ml. Licence claims are always manual-only. - spec/VERIFICATION-PROTOCOL.adoc: the verifier taxonomy and the confirmed|refuted|unverifiable verdicts. unverifiable is loud, never green. A verifier must RE-DERIVE evidence, never read back the agent's evidence field. - spec/CONSEQUENCE-LEDGER.adoc: append-only, dual-signed ledger + per-actor confirmation rate that Tier-3 contractiles MAY gate on. - spec/conformance/: 6 executable vectors (+ runner) covering every verdict class — the shared ground truth the production verifier is TDD'd against. - docs/NAMING-RESOLUTION.adoc + CANONICAL-NAMES.adoc: resolve the PLASMA collision (PLASMA = licence/exactness only; claim verification = DYADT). Executable + dogfooded: - scripts/verify-claims.sh: reference verifier (git-diff / command-transcript / claims-compose local verifiers; network + manual return unverifiable). - Root CLAIMS.a2ml: 7 claims about THIS change, all re-derived from primary evidence; .github/workflows/dyadt-verify.yml runs the verifier + conformance on push/PR. If a claim here were false, CI refutes it and fails loudly. - scripts/tests/wave4-dyadt-test.sh (7/7): proves a false claim is REFUTED and the incompatible-verifier + manual-only guards fire. - Registered in build-registry.sh (32 specs); honest scorecard added (5/5 MUST met, 90% systems coverage; the network verifier is an honest fail). Production actuator (continuous, wired to hypatia/gitbot-fleet, real ledger enforcement) is chartered for hyperpolymath/did-you-actually-do-that against these conformance vectors. Licence rows manual-only throughout (flag-only). Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_0114ps6mY5jAH4Sz --- .github/workflows/dyadt-verify.yml | 44 +++++ .machine_readable/REGISTRY.a2ml | 11 +- .../did-you-actually-do-that.scorecard.a2ml | 87 ++++++++++ CANONICAL-NAMES.adoc | 17 ++ CLAIMS.a2ml | 68 ++++++++ COMPLIANCE-DASHBOARD.md | 9 +- Justfile | 12 ++ TOPOLOGY.md | 3 +- did-you-actually-do-that/README.adoc | 99 ++++++++++++ .../docs/NAMING-RESOLUTION.adoc | 53 ++++++ .../spec/CLAIM-FORMAT.adoc | 127 +++++++++++++++ .../spec/CONSEQUENCE-LEDGER.adoc | 94 +++++++++++ .../spec/VERIFICATION-PROTOCOL.adoc | 112 +++++++++++++ .../spec/conformance/README.adoc | 20 +++ .../spec/conformance/confirmed-command.a2ml | 10 ++ .../conformance/confirmed-command.expected | 1 + .../spec/conformance/contains-fixture.a2ml | 10 ++ .../conformance/contains-fixture.expected | 1 + .../spec/conformance/fixture.txt | 1 + .../conformance/incompatible-verifier.a2ml | 10 ++ .../incompatible-verifier.expected | 1 + .../spec/conformance/manual-licence.a2ml | 10 ++ .../spec/conformance/manual-licence.expected | 1 + .../spec/conformance/refuted-command.a2ml | 10 ++ .../spec/conformance/refuted-command.expected | 1 + .../spec/conformance/run-conformance.sh | 34 ++++ .../conformance/unverifiable-network.a2ml | 10 ++ .../conformance/unverifiable-network.expected | 1 + scripts/build-registry.sh | 1 + scripts/tests/wave4-dyadt-test.sh | 115 +++++++++++++ scripts/verify-claims.sh | 153 ++++++++++++++++++ 31 files changed, 1120 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/dyadt-verify.yml create mode 100644 .machine_readable/scorecards/did-you-actually-do-that.scorecard.a2ml create mode 100644 CLAIMS.a2ml create mode 100644 did-you-actually-do-that/README.adoc create mode 100644 did-you-actually-do-that/docs/NAMING-RESOLUTION.adoc create mode 100644 did-you-actually-do-that/spec/CLAIM-FORMAT.adoc create mode 100644 did-you-actually-do-that/spec/CONSEQUENCE-LEDGER.adoc create mode 100644 did-you-actually-do-that/spec/VERIFICATION-PROTOCOL.adoc create mode 100644 did-you-actually-do-that/spec/conformance/README.adoc create mode 100644 did-you-actually-do-that/spec/conformance/confirmed-command.a2ml create mode 100644 did-you-actually-do-that/spec/conformance/confirmed-command.expected create mode 100644 did-you-actually-do-that/spec/conformance/contains-fixture.a2ml create mode 100644 did-you-actually-do-that/spec/conformance/contains-fixture.expected create mode 100644 did-you-actually-do-that/spec/conformance/fixture.txt create mode 100644 did-you-actually-do-that/spec/conformance/incompatible-verifier.a2ml create mode 100644 did-you-actually-do-that/spec/conformance/incompatible-verifier.expected create mode 100644 did-you-actually-do-that/spec/conformance/manual-licence.a2ml create mode 100644 did-you-actually-do-that/spec/conformance/manual-licence.expected create mode 100644 did-you-actually-do-that/spec/conformance/refuted-command.a2ml create mode 100644 did-you-actually-do-that/spec/conformance/refuted-command.expected create mode 100755 did-you-actually-do-that/spec/conformance/run-conformance.sh create mode 100644 did-you-actually-do-that/spec/conformance/unverifiable-network.a2ml create mode 100644 did-you-actually-do-that/spec/conformance/unverifiable-network.expected create mode 100755 scripts/tests/wave4-dyadt-test.sh create mode 100755 scripts/verify-claims.sh diff --git a/.github/workflows/dyadt-verify.yml b/.github/workflows/dyadt-verify.yml new file mode 100644 index 00000000..aa4a1593 --- /dev/null +++ b/.github/workflows/dyadt-verify.yml @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: MPL-2.0 +# dyadt-verify — DYADT dogfood. Verify this repo's CLAIMS.a2ml against primary +# evidence, and run the DYADT conformance vector suite. If a claim this change +# makes is false, the verifier REFUTES it and this job fails loudly. +name: DYADT Verify + +on: + push: + branches: [ main, master ] + pull_request: + branches: [ main, master ] + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + verify-claims: + name: Verify CLAIMS.a2ml + conformance + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - name: Checkout repository + uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0 + with: + fetch-depth: 0 # need origin/main as the DYADT diff base + + - name: DYADT conformance vectors + run: | + bash did-you-actually-do-that/spec/conformance/run-conformance.sh + + - name: Verify this change's own claims (dogfood) + env: + DYADT_BASE: origin/${{ github.base_ref || 'main' }} + run: | + if [ -f CLAIMS.a2ml ]; then + bash scripts/verify-claims.sh CLAIMS.a2ml + else + echo "No CLAIMS.a2ml at root — nothing to verify." + fi diff --git a/.machine_readable/REGISTRY.a2ml b/.machine_readable/REGISTRY.a2ml index f9bb1974..4109567d 100644 --- a/.machine_readable/REGISTRY.a2ml +++ b/.machine_readable/REGISTRY.a2ml @@ -20,7 +20,7 @@ version = "1.0.0" generator = "scripts/build-registry.sh" hash_algorithm = "sha256(git ls-files -s ) # local; external: recorded pin" -entry_count = 31 +entry_count = 32 [registry.streams] foundation = "A2ML format family + K9 + contractiles (Stream 1)" @@ -228,6 +228,15 @@ canonical_doc = "session-management-standards/README.adoc" source_hash = "sha256:4b1a97d2ef91578b2c262f1af5b93577cb36d53d234c330947538f4eafc01a6c" route = "continuity / verify / handover protocols" +[[spec]] +id = "did-you-actually-do-that" +name = "DYADT — Did-You-Actually-Do-That" +stream = "governance" +home = "did-you-actually-do-that/" +canonical_doc = "did-you-actually-do-that/README.adoc" +source_hash = "sha256:445359ddcc92b56dfc8e8a3bdc16062439f1236b5fd0f42099113e7afa86d2e0" +route = "post-action agent-claim verification (Tier 4 accountability)" + [[spec]] id = "ensaid-config" name = "ENSAID Config" diff --git a/.machine_readable/scorecards/did-you-actually-do-that.scorecard.a2ml b/.machine_readable/scorecards/did-you-actually-do-that.scorecard.a2ml new file mode 100644 index 00000000..70e314a4 --- /dev/null +++ b/.machine_readable/scorecards/did-you-actually-do-that.scorecard.a2ml @@ -0,0 +1,87 @@ +# SPDX-License-Identifier: CC-BY-SA-4.0 +# did-you-actually-do-that.scorecard.a2ml +# Hand-authored source. Regenerate the dashboard with: just scorecards +# Schema: .machine_readable/scorecards/scorecard.schema.json + +[scorecard] +spec_id = "did-you-actually-do-that" +version = "1.0.0" +assessed_date = "2026-07-03" +assessor = "estate-audit" + +[[must]] +id = "M1" +text = "DYADT MUST define a typed, machine-checkable claim format." +system = "did-you-actually-do-that/spec/CLAIM-FORMAT.adoc (normative claim classes + required fields)" +status = "pass" +evidence = "spec/CLAIM-FORMAT.adoc present; 7 claim classes and required-field table defined; example CLAIMS.a2ml shipped." +effects = "Without a fixed claim format the parallel production verifier has no contract to build against." + +[[must]] +id = "M2" +text = "DYADT MUST ship a verifier that re-derives outcomes from primary evidence and can REFUTE a false claim." +system = "scripts/verify-claims.sh, exercised by scripts/tests/wave4-dyadt-test.sh" +status = "pass" +evidence = "wave4-dyadt-test.sh (7/7) asserts a false command is REFUTED despite an honest-sounding statement; verifier never reads back the agent's evidence field." +effects = "A verifier that trusted the statement would be theatre — the whole point is mechanical refutation." + +[[must]] +id = "M3" +text = "Verdicts MUST be exactly confirmed|refuted|unverifiable, and unverifiable MUST be loud (non-passing)." +system = "scripts/verify-claims.sh (exit 1 on any refuted or, by default, unverifiable)" +status = "pass" +evidence = "wave4-dyadt-test.sh asserts refuted -> non-zero exit; all-confirmed -> exit 0; VERIFICATION-PROTOCOL.adoc fixes the three-verdict contract." +effects = "Silent 'assumed pass' would reintroduce the false-green disease DYADT exists to cure." + +[[must]] +id = "M4" +text = "A conformance vector suite MUST exist so any verifier can be validated against shared ground truth." +system = "did-you-actually-do-that/spec/conformance/run-conformance.sh (+ 6 vector pairs)" +status = "pass" +evidence = "run-conformance.sh passes 6/6 vectors covering confirmed/refuted/unverifiable/incompatible-verifier/manual-only." +effects = "Without shared vectors the reference and production verifiers could silently diverge." + +[[must]] +id = "M5" +text = "The change introducing DYADT MUST dogfood it: a CLAIMS.a2ml verified in CI." +system = ".github/workflows/dyadt-verify.yml runs scripts/verify-claims.sh CLAIMS.a2ml" +status = "pass" +evidence = "Root CLAIMS.a2ml ships 7 claims about this change; dyadt-verify.yml runs the verifier + conformance suite on push/PR." +effects = "If a claim here were false, CI is refuted and fails loudly — the spec proves itself on itself." + +[[should]] +id = "S1" +text = "DYADT SHOULD specify an append-only, dual-signed consequence ledger and a per-actor confirmation rate." +system = "did-you-actually-do-that/spec/CONSEQUENCE-LEDGER.adoc + .machine_readable/ledger/ format" +status = "pass" +evidence = "CONSEQUENCE-LEDGER.adoc defines the entry format, the confirmation-rate formula, and how Tier 3 MAY gate on it." +effects = "Verification without memory cannot escalate on a repeatedly-over-claiming actor." + +[[should]] +id = "S2" +text = "The PLASMA naming collision SHOULD be resolved and recorded in canon." +system = "did-you-actually-do-that/docs/NAMING-RESOLUTION.adoc + CANONICAL-NAMES.adoc entry" +status = "pass" +evidence = "NAMING-RESOLUTION.adoc splits DYADT (claim verification) from PLASMA (licence/exactness); a DYADT/PLASMA entry is added to CANONICAL-NAMES.adoc." +effects = "Ambiguous 'grounded by PLASMA' references would keep entangling licence exactness with claim verification." + +[[should]] +id = "S3" +text = "The verifier SHOULD implement the network verifiers (ci-run, issue-state, pr-state) against real forge/CI APIs." +system = "none — the reference verifier returns 'unverifiable: needs-network-verifier' for network classes" +status = "fail" +effects = "ci-green / issue-closed / pr-merged claims cannot be confirmed by the reference impl; the production verifier in hyperpolymath/did-you-actually-do-that must implement them (chartered)." + +[[could]] +id = "C1" +text = "Tier-3 contractiles COULD block pre_merge on an actor's DYADT confirmation rate." +system = "none (ledger-gate.sh is illustrative in CONSEQUENCE-LEDGER.adoc, not implemented)" +status = "aspirational" +effects = "A live consequence gate is the end state; it depends on the production ledger writer existing first." + +[[could]] +id = "C2" +text = "The ledger COULD be a central estate-wide store rather than per-repo." +system = "none" +status = "aspirational" +effects = "Cross-repo actor reputation needs a central ledger; a reach target, not a current gap." diff --git a/CANONICAL-NAMES.adoc b/CANONICAL-NAMES.adoc index 0d1961bb..065c9a8c 100644 --- a/CANONICAL-NAMES.adoc +++ b/CANONICAL-NAMES.adoc @@ -62,6 +62,23 @@ DEPRECATED NAMES — DO NOT REINTRODUCE (estate mandate 2026-06-30): mismatch of broken parts adding to the pain of the agentic experience. It is not an institution. +| **PLASMA** +| Palimpsest licence / exactness tooling (only) +| "the claim-grounder", "the DYADT engine", "agent-claim verifier" +| PLASMA (`palimpsest-plasma`) is licence & exactness tooling *only* — the + authority for licence identity, invariants, and equivalence. It is NOT an + agent-claim verifier. Post-action verification of an agent's asserted + outcomes is *DYADT* (see below), which subsumes the former "claim-grounder" + concept and drops the PLASMA name. See + `did-you-actually-do-that/docs/NAMING-RESOLUTION.adoc`. + +| **DYADT** +| Did-You-Actually-Do-That +| "PLASMA", "the grounder" +| Post-action agent-claim verification (Tier 4 of the accountability pipeline): + takes an agent's asserted outcomes and mechanically confirms/refutes each + against primary evidence. Home: `did-you-actually-do-that/`. + |=== == Deprecated names — do not reintroduce (MANDATE, 2026-06-30) diff --git a/CLAIMS.a2ml b/CLAIMS.a2ml new file mode 100644 index 00000000..391ca0b5 --- /dev/null +++ b/CLAIMS.a2ml @@ -0,0 +1,68 @@ +# SPDX-License-Identifier: CC-BY-SA-4.0 +# CLAIMS.a2ml — DYADT dogfood: what this change claims to have done. +# Verified in CI by scripts/verify-claims.sh (did-you-actually-do-that/). +# Every claim below is re-derived from primary evidence, not from this text. +# Claims use content markers (not "created") so they stay true after the change +# merges into the base branch — a claim must describe a durable fact. + +[claims] +schema = "dyadt/claim@1" +session = "session_0114ps6mY5jAH4Sz" +actor = "claude-opus-4-8" +emitted_commit = "HEAD" + +[[claim]] +id = "C1" +claim_class = "file-changed" +statement = "the DYADT front door defines the Tier-4 accountability pipeline" +target = "did-you-actually-do-that/README.adoc" +expect = "contains:Post-Action Claim Verification" +verifier = "git-diff" + +[[claim]] +id = "C2" +claim_class = "file-changed" +statement = "the claim format spec defines typed claim classes" +target = "did-you-actually-do-that/spec/CLAIM-FORMAT.adoc" +expect = "contains:claim_class" +verifier = "git-diff" + +[[claim]] +id = "C3" +claim_class = "file-changed" +statement = "the verification protocol fixes the three-verdict contract" +target = "did-you-actually-do-that/spec/VERIFICATION-PROTOCOL.adoc" +expect = "contains:unverifiable" +verifier = "git-diff" + +[[claim]] +id = "C4" +claim_class = "file-changed" +statement = "the consequence ledger defines a per-actor confirmation rate" +target = "did-you-actually-do-that/spec/CONSEQUENCE-LEDGER.adoc" +expect = "contains:confirmation rate" +verifier = "git-diff" + +[[claim]] +id = "C5" +claim_class = "file-changed" +statement = "the reference verifier can emit an unverifiable verdict" +target = "scripts/verify-claims.sh" +expect = "contains:unverifiable" +verifier = "git-diff" + +[[claim]] +id = "C6" +claim_class = "command-ran" +statement = "the DYADT conformance vector suite passes" +target = "bash did-you-actually-do-that/spec/conformance/run-conformance.sh" +expect = "exit==0" +verifier = "command-transcript" + +[[claim]] +id = "C7" +claim_class = "test-passed" +statement = "the verifier refutes a false claim and confirms a true one (self-check)" +target = "bash scripts/tests/wave4-dyadt-test.sh" +expect = "exit==0" +verifier = "command-transcript" diff --git a/COMPLIANCE-DASHBOARD.md b/COMPLIANCE-DASHBOARD.md index 8ee5060d..42d15808 100644 --- a/COMPLIANCE-DASHBOARD.md +++ b/COMPLIANCE-DASHBOARD.md @@ -42,6 +42,7 @@ | `toolchain-readiness-grades` | ❌ gap | 1/5 | 2/4 | 0/3 | 83% | 2026-07-03 | | `rhodium-standard-repositories` | ❌ gap | 2/3 | 1/2 | 0/1 | 50% | 2026-07-03 | | `session-management-standards` | ❌ gap | 1/5 | 1/4 | 0/3 | 41% | 2026-07-03 | +| `did-you-actually-do-that` | ✅ met | 5/5 | 2/3 | 0/2 | 90% | 2026-07-03 | | `ensaid-config` | ❌ gap | 0/5 | 0/3 | 0/3 | 90% | 2026-07-03 | | `accessibility` | ❌ gap | 2/5 | 0/5 | 0/3 | 100% | 2026-07-03 | | `publication-pre-flight` | ❌ gap | 0/5 | 0/4 | 0/2 | 36% | 2026-07-03 | @@ -51,10 +52,10 @@ ## Estate rollup -- **Specs registered (local):** 28 -- **Specs with a scorecard:** 28 / 28 -- **MUST requirements:** 41 passing / 138 total (74 failing) -- **Estate systems coverage:** 66% of 328 graded requirements have a mechanical check +- **Specs registered (local):** 29 +- **Specs with a scorecard:** 29 / 29 +- **MUST requirements:** 46 passing / 143 total (74 failing) +- **Estate systems coverage:** 67% of 338 graded requirements have a mechanical check ## How this dashboard stays honest diff --git a/Justfile b/Justfile index 6dd0734b..8da52930 100644 --- a/Justfile +++ b/Justfile @@ -73,6 +73,18 @@ scorecards-check: scorecards-check-strict: @bash scripts/build-scorecards.sh --check --strict +# DYADT: verify a CLAIMS.a2ml against primary evidence (default: root CLAIMS.a2ml) +verify-claims path="CLAIMS.a2ml": + @bash scripts/verify-claims.sh "{{path}}" + +# DYADT: run the conformance vector suite +dyadt-conformance: + @bash did-you-actually-do-that/spec/conformance/run-conformance.sh + +# DYADT regression test (confirm/refute/unverifiable + guards) +dyadt-test: + @bash scripts/tests/wave4-dyadt-test.sh + # Aggregate compliance gate: registry drift is the HARD gate (registry-check, # a hard dep). The RSR self-audit is INFORMATIONAL — a monorepo is not expected # to score Gold — but a *broken* audit (exit 4 / unexpected) must fail loudly diff --git a/TOPOLOGY.md b/TOPOLOGY.md index 5a2b4f2b..2fdf0382 100644 --- a/TOPOLOGY.md +++ b/TOPOLOGY.md @@ -9,7 +9,7 @@ > It cannot freeze: every regeneration re-reads ground truth. Do not edit by hand. - **Phase:** active  |  **Maturity:** experimental  |  **STATE last-updated:** 2026-06-03T00:00:00Z -- **Registry entries:** 31 specs across 6 streams +- **Registry entries:** 32 specs across 6 streams - **Front door:** human → [README.adoc](README.adoc); machine → [0-AI-MANIFEST.a2ml](0-AI-MANIFEST.a2ml) - **Registry:** [.machine_readable/REGISTRY.a2ml](.machine_readable/REGISTRY.a2ml) (index + source hashes) · prose: [REGISTRY.adoc](REGISTRY.adoc) @@ -55,6 +55,7 @@ |---|---|---| | RSR — Rhodium Standard Repositories | [`rhodium-standard-repositories/`](rhodium-standard-repositories/) | the repository-compliance standard every repo is graded against | | Session Management Standards | [`session-management-standards/`](session-management-standards/) | continuity / verify / handover protocols | +| DYADT — Did-You-Actually-Do-That | [`did-you-actually-do-that/`](did-you-actually-do-that/) | post-action agent-claim verification (Tier 4 accountability) | | ENSAID Config | [`ensaid-config/`](ensaid-config/) | the ensaid configuration standard | | Accessibility Standard | [`accessibility/`](accessibility/) | estate accessibility requirements | | Publication Pre-Flight | [`publication-pre-flight/`](publication-pre-flight/) | submission gate (HOL + Zenodo checklists) | diff --git a/did-you-actually-do-that/README.adoc b/did-you-actually-do-that/README.adoc new file mode 100644 index 00000000..372cd35d --- /dev/null +++ b/did-you-actually-do-that/README.adoc @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: CC-BY-SA-4.0 +// SPDX-FileCopyrightText: 2026 Jonathan D.A. Jewell += Did-You-Actually-Do-That (DYADT) — Post-Action Claim Verification +:toc: preamble +:icons: font + +[.lead] +DYADT is the estate standard for *checking, mechanically, that an agent's +claimed outcomes actually happened*. It is the missing **Tier 4** of the +agent-accountability pipeline: the estate already gates what an agent may do +*before* it acts; DYADT gates what an agent *says it did* after. + +== The problem it closes + +An LLM agent will report "I fixed the bug", "tests pass", "the PR is merged", +"I closed the issue" — and be wrong, or lying, or accidentally green. Every +prior soundness hole in this estate's own tooling (vacuous validators, `|| true` +masks, stub steps that print success) is a special case of one disease: *a claim +was trusted instead of verified*. The doctrine already names the cure — +_ground-truth by running the tool, always fail loudly, report faithfully_ — but +until now there was **no mechanical stage that takes an agent's asserted outcome +and confirms it against reality**. + +DYADT is that stage. It is deliberately dumb and adversarial: it does not trust +the agent's narration, it re-derives the outcome from primary evidence (the git +tree, a real CI run, the actual issue state) and returns one of three verdicts — +`confirmed`, `refuted`, or `unverifiable` — where *unverifiable is loud, never +green*. + +== Where it sits: the four-tier accountability pipeline + +[cols="1,3,2", options="header"] +|=== +| Tier | What it governs | Home + +| 1. Admission +| An agent MUST read the manifest/context before acting. +| `0-ai-gatekeeper-protocol/` + +| 2. Pre-action gating +| Entropy budgets, intent classification, confirmation for risky ops — *before* an op runs. +| `agentic-a2ml/` (`AGENTIC.a2ml`) + +| 3. In-session invariant gates +| Contractile MUST/TRUST/BUST/DUST/ADJUST/INTEND — block at session-close / pre-push / pre-merge. +| `contractiles/` + +| **4. Post-action verification (this spec)** +| **Take the agent's claimed outcomes and mechanically confirm each against primary evidence; record the verdicts in an append-only consequence ledger.** +| **`did-you-actually-do-that/`** +|=== + +Tiers 1–3 answer "should this happen / may this happen". Tier 4 answers the +question none of them do: **"it says it happened — did it?"** + +== The spec set + +[cols="1,3", options="header"] +|=== +| Document | What it defines + +| link:spec/CLAIM-FORMAT.adoc[`spec/CLAIM-FORMAT.adoc`] +| The typed claim: `claim_class`, asserted evidence, and the verifier binding. Example: link:spec/CLAIMS.a2ml[`spec/CLAIMS.a2ml`]. + +| link:spec/VERIFICATION-PROTOCOL.adoc[`spec/VERIFICATION-PROTOCOL.adoc`] +| The mechanical verifier taxonomy and the `confirmed` / `refuted` / `unverifiable` verdict rules. + +| link:spec/CONSEQUENCE-LEDGER.adoc[`spec/CONSEQUENCE-LEDGER.adoc`] +| The append-only, dual-signed record of verdicts and the per-actor confirmation rate that Tier 3 MAY gate on. + +| link:spec/conformance/[`spec/conformance/`] +| Executable vectors (claim + fixture + expected verdict) a production verifier is TDD'd against. + +| link:docs/NAMING-RESOLUTION.adoc[`docs/NAMING-RESOLUTION.adoc`] +| Resolves the "PLASMA" naming collision (claim-grounder vs. licence tooling). +|=== + +== Reference implementation vs. this spec + +This repository is the **declaration layer**. It ships: + +* the normative spec set above, +* a *reference* verifier, `scripts/verify-claims.sh` (in the standards repo + root), that implements enough of the protocol to check a `CLAIMS.a2ml` file in + CI, and +* a dogfood `CLAIMS.a2ml` that this very change verifies about itself. + +The *production* actuator — continuous, in-session, wired into +hypatia/gitbot-fleet with real consequence enforcement — is built in the +`hyperpolymath/did-you-actually-do-that` repository against the conformance +vectors here. That work is chartered as a campaign issue; it MUST NOT diverge +from this contract. + +== Dogfaced from birth + +The commit that introduces DYADT ships a filled `CLAIMS.a2ml` describing what it +claims to have done, and CI runs `verify-claims.sh` over it. The spec's first +conformance run is on itself — if a claim here is false, the check is `refuted` +and the build fails loudly. That is the whole point. diff --git a/did-you-actually-do-that/docs/NAMING-RESOLUTION.adoc b/did-you-actually-do-that/docs/NAMING-RESOLUTION.adoc new file mode 100644 index 00000000..a30e1ee1 --- /dev/null +++ b/did-you-actually-do-that/docs/NAMING-RESOLUTION.adoc @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: CC-BY-SA-4.0 +// SPDX-FileCopyrightText: 2026 Jonathan D.A. Jewell += DYADT / PLASMA Naming Resolution +:icons: font + +== The collision + +"PLASMA" has been used in the estate for two unrelated things: + +. *palimpsest-plasma* — licence tooling / exactness engine for the Palimpsest + licence family (the "distrust the neural for exactness" authority for + licences, invariants, equivalence). +. *A claim-grounder* — the idea, referenced in several places, of an engine that + checks whether an agent's asserted outcomes are true. + +Using one name for both is a live source of confusion: an audit that says +"grounded by PLASMA" is ambiguous about which system, and it entangles the +licence-exactness authority with agent-claim verification, which are different +concerns with different trust models. + +== The resolution + +[cols="1,3", options="header"] +|=== +| Name | Sole meaning going forward + +| *PLASMA* / *palimpsest-plasma* +| Licence & exactness tooling only. It remains the authority for licence + identity, invariants, and equivalence ("distrust the neural for exactness"). + It is NOT an agent-claim verifier. + +| *DYADT* (`did-you-actually-do-that`) +| Post-action agent-claim verification. The claim-grounder concept is subsumed + here and drops the PLASMA name entirely. +|=== + +Concretely: + +* The "claim-grounder" concept is **renamed to DYADT verification** wherever it + meant "check the agent's asserted outcomes". Documents SHOULD say "DYADT" or + "the DYADT verifier", never "PLASMA", for that concept. +* `palimpsest-plasma` keeps PLASMA exclusively for licence/exactness work. +* Where a pipeline genuinely needs *both* (e.g. a licence claim that must be + checked), DYADT returns `unverifiable` with reason `manual-only` and defers to + PLASMA/human governance — it does not attempt to ground licence exactness + itself. The boundary is explicit. + +== Recorded in canon + +This resolution is to be reflected in `CANONICAL-NAMES.adoc` at the standards +repo root (the canonical registry of estate naming decisions) so that future +tooling and agents inherit the disambiguation. Until that entry lands, this +document is the authority for the DYADT/PLASMA split. diff --git a/did-you-actually-do-that/spec/CLAIM-FORMAT.adoc b/did-you-actually-do-that/spec/CLAIM-FORMAT.adoc new file mode 100644 index 00000000..0bdebded --- /dev/null +++ b/did-you-actually-do-that/spec/CLAIM-FORMAT.adoc @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: CC-BY-SA-4.0 +// SPDX-FileCopyrightText: 2026 Jonathan D.A. Jewell += DYADT Claim Format +:toc: preamble +:icons: font + +[.lead] +A *claim* is a typed, machine-checkable assertion that an agent did a specific +thing. Claims are emitted as A2ML (`CLAIMS.a2ml`) and consumed by a verifier +that re-derives the outcome from primary evidence. This document is normative; +keywords MUST / SHOULD / MAY are RFC-2119. + +== File shape + +A `CLAIMS.a2ml` file has one `[claims]` header and one `[[claim]]` block per +assertion. It SHOULD live at the root of the work product it describes (a repo, +a PR worktree, a session directory). + +[source,toml] +---- +[claims] +schema = "dyadt/claim@1" +session = "session_0114ps6mY5jAH4Sz" # opaque session id +actor = "claude-opus-4-8" # the agent making the claims +emitted_commit = "HEAD" # git ref the claims are about (optional) + +[[claim]] +id = "C1" +claim_class = "file-changed" +statement = "check-6scm.sh no longer passes vacuously" +target = "a2ml/scripts/check-6scm.sh" +expect = "modified" +verifier = "git-diff" + +[[claim]] +id = "C2" +claim_class = "command-ran" +statement = "the Wave-0 regression suite passes" +target = "bash scripts/tests/wave0-false-green-test.sh" +expect = "exit==0" +verifier = "command-transcript" +---- + +== Required fields per claim + +[cols="1,1,3", options="header"] +|=== +| Field | Required | Meaning + +| `id` | MUST | Stable id unique within the file (`C1`, `C2`, …). +| `claim_class` | MUST | One of the claim classes below. Determines which verifier applies. +| `statement` | MUST | Human-readable assertion (what the agent says it did). Never the sole basis for a verdict. +| `target` | MUST | The concrete object the claim is about: a path, a command line, an issue/PR ref, a CI job — interpreted per `claim_class`. +| `expect` | MUST | The machine-checkable expected condition (see per-class table). +| `verifier` | MUST | The verifier that discharges this claim (see VERIFICATION-PROTOCOL). MUST be compatible with `claim_class`. +| `evidence` | SHOULD | The agent's cited proof (a diff stat, a log excerpt, a URL). Advisory input to the verifier, never trusted as the verdict. +| `not_before` | MAY | An ISO timestamp; the verifier MUST treat evidence older than this as `unverifiable` (stale-evidence guard). +|=== + +== Claim classes + +Each claim class fixes the *kind* of reality being asserted and the shape of +`target` / `expect`. + +[cols="1,2,2,2", options="header"] +|=== +| `claim_class` | Asserts | `target` | `expect` examples + +| `file-changed` +| A file was created / modified / deleted. +| repo-relative path +| `created` \| `modified` \| `deleted` \| `contains:` \| `sha256:` + +| `test-passed` +| A named test / suite passed. +| test command or suite id +| `exit==0` \| `passed>=N` + +| `ci-green` +| A CI run/job concluded successfully. +| workflow/job name or run id +| `conclusion==success` + +| `issue-closed` +| An issue reached a state. +| `owner/repo#N` +| `state==closed` \| `state==open` + +| `pr-merged` +| A PR reached a state. +| `owner/repo#N` +| `merged==true` \| `state==open` + +| `command-ran` +| A command was run and produced an outcome. +| the exact command line +| `exit==0` \| `stdout-contains:` + +| `claim-of-claims` +| A referenced `CLAIMS.a2ml` was itself all-`confirmed` (composition). +| path to another CLAIMS.a2ml +| `all-confirmed` +|=== + +A verifier MUST reject (`unverifiable`) a claim whose `verifier` is not listed +as compatible with its `claim_class` in VERIFICATION-PROTOCOL — an +agent MUST NOT be able to choose a weaker verifier than the claim class warrants. + +== Honesty constraints (normative) + +. A claim's verdict MUST be derived from primary evidence re-collected by the + verifier, never from the `statement` or `evidence` fields the agent wrote. +. The absence of a required field, an unknown `claim_class`, or an + incompatible `verifier` MUST yield `unverifiable`, which is a *loud* outcome + (it does not pass). +. A claim MUST NOT be silently dropped. Every `[[claim]]` in the file MUST + appear in the verifier's output with exactly one verdict. +. Licence/SPDX claims MUST NOT be auto-`confirmed`: they are `manual-only` + (the estate Manual-Only licence policy). A verifier encountering a + licence-touching claim MUST return `unverifiable` with reason `manual-only`. + +== Relationship to AGENTIC.a2ml + +`AGENTIC.a2ml` (Tier 2) records *intended* actions and their permission before +execution. `CLAIMS.a2ml` (Tier 4) records *asserted* outcomes after execution. +A session MAY link the two by `id` so the ledger can show intent → claim → +verdict as one chain. DYADT does not require AGENTIC to be present. diff --git a/did-you-actually-do-that/spec/CONSEQUENCE-LEDGER.adoc b/did-you-actually-do-that/spec/CONSEQUENCE-LEDGER.adoc new file mode 100644 index 00000000..2b1cfd01 --- /dev/null +++ b/did-you-actually-do-that/spec/CONSEQUENCE-LEDGER.adoc @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: CC-BY-SA-4.0 +// SPDX-FileCopyrightText: 2026 Jonathan D.A. Jewell += DYADT Consequence Ledger +:toc: preamble +:icons: font + +[.lead] +Verification without memory is theatre. The consequence ledger is the +append-only record of every verification run and its verdicts, keyed by actor, +so that a repeatedly-over-claiming agent accrues a falling confirmation rate that +downstream gates can act on. This document is normative. + +== What it is + +An *append-only*, dual-signed log. Each entry records one verification run: +which claims, which verdicts, over which work product, by which actor, attested +by both the actor and the verifier. Entries are never edited or deleted — a +correction is a new entry that references the prior one. + +Home: `.machine_readable/ledger/` in the verified repo (or a central estate +ledger the production verifier writes to). One append-only file per period +(e.g. `ledger-2026-07.a2ml`) keeps entries small and diff-friendly. + +== Entry format + +[source,toml] +---- +[[entry]] +ts = "2026-07-03T01:30:00Z" # when the verification ran +session = "session_0114ps6mY5jAH4Sz" +actor = "claude-opus-4-8" # the agent whose claims were checked +work = "did-you-actually-do-that (Wave 4)" +claims_file = "CLAIMS.a2ml" +claims_sha256 = "sha256:…" # hash of the exact claims verified +confirmed = 6 +refuted = 0 +unverifiable = 1 +verifier = "scripts/verify-claims.sh@" +# Dual signature: BOTH the actor and the verifier attest. Reuses the contractile +# pledge-signing mechanism (contractiles/ TRUST/INTEND). A missing signature +# makes the entry advisory-only (it is recorded, but MUST NOT be used to gate). +actor_sig = "ssh-ed25519 …" +verifier_sig = "ssh-ed25519 …" +prior = "" # id/hash of a superseded entry, if a correction +---- + +== The confirmation rate + +For an actor, over a window, the *confirmation rate* is: + + confirmed / (confirmed + refuted) + +`unverifiable` outcomes are excluded from the denominator (they are not evidence +of either honesty or dishonesty) but MUST be reported alongside, because a spike +in `unverifiable` is itself a signal (an agent emitting unverifiable claims to +dodge scrutiny). + +A conforming ledger reader MUST be able to compute, per actor: total claims, +confirmation rate, refutation count, and unverifiable count over a time window. + +== How Tier 3 MAY gate on it (composition) + +The contractile layer (Tier 3) MAY read the ledger at `pre_merge` and block when +an actor's recent confirmation rate falls below a repo-declared floor, or when +the current session produced any `refuted` claim. This is expressed as an +ordinary contractile check: + +[source] +---- +### dyadt-confirmation-floor +- description: the acting agent's DYADT confirmation rate MUST be >= the repo floor +- run: bash scripts/ledger-gate.sh --actor "$AGENT" --floor 0.95 --window 30d +- severity: high +---- + +The floor, window, and severity are the repo's choice. DYADT defines the ledger +and the rate; it does not mandate a specific threshold — that is governance. + +== Non-goals (normative) + +. The ledger is *not* a reputation score to shame agents. It is an evidence + store. Any downstream use MUST cite specific entries, never a bare number. +. The ledger MUST NOT record secrets, tokens, or full command output — only the + claim identities, verdicts, hashes, and signatures. Evidence bodies live in + the run logs the entry references, not in the ledger. +. Licence/SPDX verdicts are always `unverifiable` (`manual-only`) and therefore + never move a confirmation rate — the Manual-Only licence policy is preserved + end to end. + +== Retention + +Ledger files are append-only and retained indefinitely by default (they are the +audit trail). A repo MAY define a retention/rotation policy, but MUST NOT rewrite +history — rotation means starting a new period file, never editing an old one. diff --git a/did-you-actually-do-that/spec/VERIFICATION-PROTOCOL.adoc b/did-you-actually-do-that/spec/VERIFICATION-PROTOCOL.adoc new file mode 100644 index 00000000..d25e8d33 --- /dev/null +++ b/did-you-actually-do-that/spec/VERIFICATION-PROTOCOL.adoc @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: CC-BY-SA-4.0 +// SPDX-FileCopyrightText: 2026 Jonathan D.A. Jewell += DYADT Verification Protocol +:toc: preamble +:icons: font + +[.lead] +How a claim becomes a verdict. This document is normative. The governing +principle is _ground-truth by running the tool_: a verifier re-collects primary +evidence and MUST NOT derive a verdict from the agent's `statement` or cited +`evidence`. + +== Verdicts + +[cols="1,3", options="header"] +|=== +| Verdict | Meaning + +| `confirmed` +| Primary evidence, re-collected now, satisfies `expect`. + +| `refuted` +| Primary evidence was collected and *contradicts* the claim (the file is + unchanged, the command exited non-zero, the PR is still open). + +| `unverifiable` +| The verifier could not collect trustworthy primary evidence: a required field + is missing, the `verifier` is incompatible with the `claim_class`, the + evidence is stale (`not_before`), a network verifier had no credentials, or + the claim is licence-touching (`manual-only`). **`unverifiable` is a loud, + non-passing outcome.** +|=== + +There is no fourth verdict and no "assumed pass". Silence is not success. + +== The verifier taxonomy + +[cols="1,2,2", options="header"] +|=== +| Verifier id | Discharges `claim_class` | Primary evidence it collects + +| `git-diff` +| `file-changed` +| `git` status of the target path vs. a base ref (created/modified/deleted), + or the working-tree file's content (`contains:`/`sha256:`). + +| `command-transcript` +| `command-ran`, `test-passed` +| Runs the target command in a clean shell; judges by exit code / stdout. + +| `ci-run` +| `ci-green` +| The CI provider's API — the run/job's real `conclusion`. (Network.) + +| `issue-state` +| `issue-closed` +| The forge API — the issue's real `state`. (Network.) + +| `pr-state` +| `pr-merged` +| The forge API — the PR's real `merged`/`state`. (Network.) + +| `claims-compose` +| `claim-of-claims` +| Recursively verifies a referenced `CLAIMS.a2ml`; `confirmed` iff all-confirmed. + +| `manual` +| any +| Nothing automatable — returns `unverifiable` with reason `manual-only`. Used + for licence/SPDX and other governance-only claims. +|=== + +A verifier MUST refuse (`unverifiable`, reason `incompatible-verifier`) any +claim whose `verifier` is not paired with its `claim_class` above. This closes +the "pick a weaker check" hole — an agent cannot claim `pr-merged` and have it +discharged by `command-transcript`. + +== Adversarial stance (normative) + +. *Re-derive, never read back.* The verifier MUST collect evidence itself. Any + implementation that parses the agent's `evidence` field and returns + `confirmed` on that basis is non-conforming. +. *Default-refute on ambiguity for refutable classes.* If primary evidence can + be collected but does not clearly satisfy `expect`, the verdict is `refuted`, + not `unverifiable`. `unverifiable` is reserved for "could not collect + trustworthy evidence at all". +. *Fail loud, fail closed.* A conforming CI integration MUST fail the build when + any claim is `refuted`, and (unless explicitly opted out per run) when any + claim is `unverifiable`. +. *No network fabrication.* A network verifier without credentials MUST return + `unverifiable` (reason `needs-network-verifier`) — it MUST NOT guess, and MUST + NOT fall back to reading the agent's cited evidence. +. *Stale evidence is unverifiable.* If `not_before` is set and the primary + evidence predates it, the verdict is `unverifiable` (reason `stale-evidence`). + +== Multi-verifier corroboration (SHOULD) + +For a high-stakes `claim_class` (e.g. `pr-merged`, `ci-green`), a production +verifier SHOULD collect from more than one independent source where available +(e.g. the forge API *and* the local git state of the merge commit) and downgrade +to `refuted`/`unverifiable` on disagreement. The reference verifier implements +single-source local verifiers; corroboration is a production concern tracked in +the conformance vectors. + +== Output contract + +For each `[[claim]]` in the input, a verifier MUST emit exactly one line +associating `id` → verdict → reason, and a summary of counts. Every claim in the +file MUST appear (no silent drops). The reference implementation +(`scripts/verify-claims.sh`) is the canonical output shape; production verifiers +MAY emit richer machine formats (JSON, SARIF) but MUST preserve one-verdict-per-claim +and the loud-exit semantics. diff --git a/did-you-actually-do-that/spec/conformance/README.adoc b/did-you-actually-do-that/spec/conformance/README.adoc new file mode 100644 index 00000000..b405b969 --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/README.adoc @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: CC-BY-SA-4.0 += DYADT Conformance Vectors +:icons: font + +Each vector is a pair: + +* `.a2ml` — a `CLAIMS.a2ml` input, and +* `.expected` — one `\ \` line per claim, the verdict a + conforming verifier MUST produce. + +`run-conformance.sh` runs the reference verifier over every vector and diffs the +produced verdicts against `.expected`. A production verifier (in +`hyperpolymath/did-you-actually-do-that`) MUST pass this same suite — it is the +shared ground truth, exactly as the a2ml spec uses `tests/vectors/`. + +The vectors deliberately use base-independent conditions (fixed commands, +fixture-file contents, verifier/class mismatches, licence claims) so the +expected verdicts are stable regardless of git history. + +Run: `bash did-you-actually-do-that/spec/conformance/run-conformance.sh` diff --git a/did-you-actually-do-that/spec/conformance/confirmed-command.a2ml b/did-you-actually-do-that/spec/conformance/confirmed-command.a2ml new file mode 100644 index 00000000..0da1b4fa --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/confirmed-command.a2ml @@ -0,0 +1,10 @@ +[claims] +schema = "dyadt/claim@1" +actor = "conformance" +[[claim]] +id = "C1" +claim_class = "command-ran" +statement = "true succeeds" +target = "true" +expect = "exit==0" +verifier = "command-transcript" diff --git a/did-you-actually-do-that/spec/conformance/confirmed-command.expected b/did-you-actually-do-that/spec/conformance/confirmed-command.expected new file mode 100644 index 00000000..bcf5b013 --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/confirmed-command.expected @@ -0,0 +1 @@ +C1 confirmed diff --git a/did-you-actually-do-that/spec/conformance/contains-fixture.a2ml b/did-you-actually-do-that/spec/conformance/contains-fixture.a2ml new file mode 100644 index 00000000..c036f451 --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/contains-fixture.a2ml @@ -0,0 +1,10 @@ +[claims] +schema = "dyadt/claim@1" +actor = "conformance" +[[claim]] +id = "C1" +claim_class = "file-changed" +statement = "the fixture contains the marker" +target = "did-you-actually-do-that/spec/conformance/fixture.txt" +expect = "contains:DYADT fixture" +verifier = "git-diff" diff --git a/did-you-actually-do-that/spec/conformance/contains-fixture.expected b/did-you-actually-do-that/spec/conformance/contains-fixture.expected new file mode 100644 index 00000000..bcf5b013 --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/contains-fixture.expected @@ -0,0 +1 @@ +C1 confirmed diff --git a/did-you-actually-do-that/spec/conformance/fixture.txt b/did-you-actually-do-that/spec/conformance/fixture.txt new file mode 100644 index 00000000..d255380b --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/fixture.txt @@ -0,0 +1 @@ +hello DYADT fixture diff --git a/did-you-actually-do-that/spec/conformance/incompatible-verifier.a2ml b/did-you-actually-do-that/spec/conformance/incompatible-verifier.a2ml new file mode 100644 index 00000000..cf754dbe --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/incompatible-verifier.a2ml @@ -0,0 +1,10 @@ +[claims] +schema = "dyadt/claim@1" +actor = "conformance" +[[claim]] +id = "C1" +claim_class = "pr-merged" +statement = "PR merged, but checked with the wrong verifier" +target = "hyperpolymath/standards#454" +expect = "merged==true" +verifier = "command-transcript" diff --git a/did-you-actually-do-that/spec/conformance/incompatible-verifier.expected b/did-you-actually-do-that/spec/conformance/incompatible-verifier.expected new file mode 100644 index 00000000..450c8c22 --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/incompatible-verifier.expected @@ -0,0 +1 @@ +C1 unverifiable diff --git a/did-you-actually-do-that/spec/conformance/manual-licence.a2ml b/did-you-actually-do-that/spec/conformance/manual-licence.a2ml new file mode 100644 index 00000000..d70b7099 --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/manual-licence.a2ml @@ -0,0 +1,10 @@ +[claims] +schema = "dyadt/claim@1" +actor = "conformance" +[[claim]] +id = "C1" +claim_class = "file-changed" +statement = "SPDX licence header added" +target = "some/file.rs" +expect = "contains:SPDX-License-Identifier" +verifier = "git-diff" diff --git a/did-you-actually-do-that/spec/conformance/manual-licence.expected b/did-you-actually-do-that/spec/conformance/manual-licence.expected new file mode 100644 index 00000000..450c8c22 --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/manual-licence.expected @@ -0,0 +1 @@ +C1 unverifiable diff --git a/did-you-actually-do-that/spec/conformance/refuted-command.a2ml b/did-you-actually-do-that/spec/conformance/refuted-command.a2ml new file mode 100644 index 00000000..afded3d7 --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/refuted-command.a2ml @@ -0,0 +1,10 @@ +[claims] +schema = "dyadt/claim@1" +actor = "conformance" +[[claim]] +id = "C1" +claim_class = "command-ran" +statement = "false is claimed to succeed" +target = "false" +expect = "exit==0" +verifier = "command-transcript" diff --git a/did-you-actually-do-that/spec/conformance/refuted-command.expected b/did-you-actually-do-that/spec/conformance/refuted-command.expected new file mode 100644 index 00000000..9ac81b34 --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/refuted-command.expected @@ -0,0 +1 @@ +C1 refuted diff --git a/did-you-actually-do-that/spec/conformance/run-conformance.sh b/did-you-actually-do-that/spec/conformance/run-conformance.sh new file mode 100755 index 00000000..d0c1a218 --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/run-conformance.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: MPL-2.0 +set -uo pipefail +# +# run-conformance.sh — run the reference DYADT verifier over every conformance +# vector and diff the produced verdicts against the .expected files. +# +# A conforming verifier (reference or production) MUST pass this suite. + +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT="$(git -C "$HERE" rev-parse --show-toplevel)" +VERIFIER="${DYADT_VERIFIER:-$ROOT/scripts/verify-claims.sh}" + +pass=0 fail=0 +for vec in "$HERE"/*.a2ml; do + name="$(basename "$vec" .a2ml)" + exp="$HERE/$name.expected" + [ -f "$exp" ] || { echo " ❌ $name: no .expected file"; fail=$((fail+1)); continue; } + # Run with unverifiable allowed so the verifier reports every verdict without + # early-exiting; we assert on the per-claim verdicts, not the process code. + got="$(cd "$ROOT" && DYADT_ALLOW_UNVERIFIABLE=1 bash "$VERIFIER" "$vec" 2>/dev/null \ + | grep -oE '[A-Z][0-9]+ (confirmed|REFUTED|unverifiable)' \ + | awk '{print $1, tolower($2)}' | sort)" + want="$(sort "$exp")" + if [ "$got" = "$want" ]; then + echo " ✅ $name"; pass=$((pass+1)) + else + echo " ❌ $name"; echo " want: $(echo "$want" | tr '\n' ';')"; echo " got: $(echo "$got" | tr '\n' ';')"; fail=$((fail+1)) + fi +done + +echo "" +echo "DYADT conformance: $pass passed, $fail failed" +[ "$fail" -eq 0 ] diff --git a/did-you-actually-do-that/spec/conformance/unverifiable-network.a2ml b/did-you-actually-do-that/spec/conformance/unverifiable-network.a2ml new file mode 100644 index 00000000..1bcc6ac9 --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/unverifiable-network.a2ml @@ -0,0 +1,10 @@ +[claims] +schema = "dyadt/claim@1" +actor = "conformance" +[[claim]] +id = "C1" +claim_class = "ci-green" +statement = "a CI job passed" +target = "governance" +expect = "conclusion==success" +verifier = "ci-run" diff --git a/did-you-actually-do-that/spec/conformance/unverifiable-network.expected b/did-you-actually-do-that/spec/conformance/unverifiable-network.expected new file mode 100644 index 00000000..450c8c22 --- /dev/null +++ b/did-you-actually-do-that/spec/conformance/unverifiable-network.expected @@ -0,0 +1 @@ +C1 unverifiable diff --git a/scripts/build-registry.sh b/scripts/build-registry.sh index ee081700..787b6532 100755 --- a/scripts/build-registry.sh +++ b/scripts/build-registry.sh @@ -81,6 +81,7 @@ component-readiness-grades|readiness|component-readiness-grades/|CRG — Compone toolchain-readiness-grades|readiness|toolchain-readiness-grades/|TRG — Toolchain Readiness Grades|per-toolchain readiness profile templates rhodium-standard-repositories|governance|rhodium-standard-repositories/|RSR — Rhodium Standard Repositories|the repository-compliance standard every repo is graded against session-management-standards|governance|session-management-standards/|Session Management Standards|continuity / verify / handover protocols +did-you-actually-do-that|governance|did-you-actually-do-that/|DYADT — Did-You-Actually-Do-That|post-action agent-claim verification (Tier 4 accountability) ensaid-config|governance|ensaid-config/|ENSAID Config|the ensaid configuration standard accessibility|governance|accessibility/|Accessibility Standard|estate accessibility requirements publication-pre-flight|governance|publication-pre-flight/|Publication Pre-Flight|submission gate (HOL + Zenodo checklists) diff --git a/scripts/tests/wave4-dyadt-test.sh b/scripts/tests/wave4-dyadt-test.sh new file mode 100755 index 00000000..048b687f --- /dev/null +++ b/scripts/tests/wave4-dyadt-test.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: MPL-2.0 +set -uo pipefail +# +# Wave-4 DYADT regression test. +# +# The whole point of DYADT is that it can REFUTE a false claim and does not +# confirm on the agent's say-so. This exercises confirm / refute / unverifiable +# and the incompatible-verifier + manual-only guards, plus the conformance suite. + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +V="$ROOT/scripts/verify-claims.sh" +TMP="$(mktemp -d)"; trap 'rm -rf "$TMP"' EXIT + +pass=0 fail=0 +ok() { echo " ✅ $1"; pass=$((pass + 1)); } +bad() { echo " ❌ $1"; fail=$((fail + 1)); } + +# verdict_of -> prints confirmed|refuted|unverifiable +verdict_of() { + cd "$ROOT" && DYADT_ALLOW_UNVERIFIABLE=1 bash "$V" "$1" 2>/dev/null \ + | grep -oE "$2 (confirmed|REFUTED|unverifiable)" | awk '{print tolower($2)}' | head -1 +} + +echo "== confirm / refute (does not trust the statement) ==" +cat > "$TMP/t.a2ml" <<'EOF' +[claims] +schema = "dyadt/claim@1" +actor = "test" +[[claim]] +id = "C1" +claim_class = "command-ran" +statement = "this command succeeds" +target = "true" +expect = "exit==0" +verifier = "command-transcript" +[[claim]] +id = "C2" +claim_class = "command-ran" +statement = "LIE: this command succeeds (it does not)" +target = "false" +expect = "exit==0" +verifier = "command-transcript" +EOF +[ "$(verdict_of "$TMP/t.a2ml" C1)" = confirmed ] && ok "true command confirmed" || bad "true command not confirmed" +[ "$(verdict_of "$TMP/t.a2ml" C2)" = refuted ] && ok "false command REFUTED despite honest-sounding statement" || bad "false command not refuted" + +echo "== guards ==" +# incompatible verifier -> unverifiable +cat > "$TMP/i.a2ml" <<'EOF' +[claims] +schema = "dyadt/claim@1" +actor = "test" +[[claim]] +id = "C1" +claim_class = "pr-merged" +statement = "PR merged" +target = "o/r#1" +expect = "merged==true" +verifier = "command-transcript" +EOF +[ "$(verdict_of "$TMP/i.a2ml" C1)" = unverifiable ] && ok "incompatible verifier -> unverifiable" || bad "incompatible verifier not caught" + +# licence claim -> unverifiable (manual-only) +cat > "$TMP/l.a2ml" <<'EOF' +[claims] +schema = "dyadt/claim@1" +actor = "test" +[[claim]] +id = "C1" +claim_class = "file-changed" +statement = "SPDX licence header added" +target = "x.rs" +expect = "contains:SPDX-License-Identifier" +verifier = "git-diff" +EOF +[ "$(verdict_of "$TMP/l.a2ml" C1)" = unverifiable ] && ok "licence claim -> manual-only unverifiable" || bad "licence claim not manual-only" + +echo "== loud exit semantics ==" +# a refuted claim makes the process exit non-zero (fail loudly) +cat > "$TMP/r.a2ml" <<'EOF' +[claims] +schema = "dyadt/claim@1" +actor = "test" +[[claim]] +id = "C1" +claim_class = "command-ran" +statement = "false succeeds" +target = "false" +expect = "exit==0" +verifier = "command-transcript" +EOF +( cd "$ROOT" && bash "$V" "$TMP/r.a2ml" >/dev/null 2>&1 ); [ $? -ne 0 ] && ok "refuted claim fails loudly (non-zero exit)" || bad "refuted claim did not fail the run" + +# an all-confirmed file exits 0 +cat > "$TMP/g.a2ml" <<'EOF' +[claims] +schema = "dyadt/claim@1" +actor = "test" +[[claim]] +id = "C1" +claim_class = "command-ran" +statement = "true succeeds" +target = "true" +expect = "exit==0" +verifier = "command-transcript" +EOF +( cd "$ROOT" && bash "$V" "$TMP/g.a2ml" >/dev/null 2>&1 ); [ $? -eq 0 ] && ok "all-confirmed file exits 0" || bad "all-confirmed file did not exit 0" + +echo "== conformance suite ==" +bash "$ROOT/did-you-actually-do-that/spec/conformance/run-conformance.sh" >/dev/null 2>&1 && ok "conformance vectors pass" || bad "conformance vectors failed" + +echo +echo "Wave-4 DYADT regression: $pass passed, $fail failed" +[ "$fail" -eq 0 ] diff --git a/scripts/verify-claims.sh b/scripts/verify-claims.sh new file mode 100755 index 00000000..aac0e1fc --- /dev/null +++ b/scripts/verify-claims.sh @@ -0,0 +1,153 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: MPL-2.0 +# SPDX-FileCopyrightText: 2026 Jonathan D.A. Jewell (hyperpolymath) +# +# verify-claims.sh — reference verifier for the DYADT claim format. +# See did-you-actually-do-that/spec/{CLAIM-FORMAT,VERIFICATION-PROTOCOL}.adoc. +# +# Re-derives each claim's outcome from PRIMARY EVIDENCE (git tree, real command +# runs) — never from the agent's own statement/evidence text — and emits one +# verdict per claim: confirmed | refuted | unverifiable. `unverifiable` is LOUD: +# by default the run fails unless every claim is `confirmed`. +# +# This reference impl handles the LOCAL verifiers (git-diff, command-transcript, +# claims-compose). Network verifiers (ci-run, issue-state, pr-state) and the +# manual verifier return `unverifiable` with a reason — the production verifier +# in hyperpolymath/did-you-actually-do-that implements those against real APIs. +# +# Usage: verify-claims.sh [path/to/CLAIMS.a2ml] (default: ./CLAIMS.a2ml) +# env DYADT_BASE git ref claims are diffed against (default: origin/main, then HEAD~1) +# env DYADT_ALLOW_UNVERIFIABLE=1 treat unverifiable as non-fatal (still reported) +# Exit: 0 all confirmed (or unverifiable allowed) · 1 a claim refuted/unverifiable · 2 usage/parse + +set -uo pipefail + +CLAIMS="${1:-CLAIMS.a2ml}" +[ -f "$CLAIMS" ] || { echo "error: claims file not found: $CLAIMS" >&2; exit 2; } + +BASE="${DYADT_BASE:-}" +if [ -z "$BASE" ]; then + if git rev-parse --verify -q origin/main >/dev/null 2>&1; then BASE="origin/main" + elif git rev-parse --verify -q HEAD~1 >/dev/null 2>&1; then BASE="HEAD~1" + else BASE=""; fi +fi + +# --- primary-evidence verifiers --------------------------------------------- + +# git-diff: file-changed. echoes confirmed|refuted|unverifiable + reason +v_git_diff() { # target expect + local target="$1" expect="$2" existed_now=0 existed_base=0 + [ -e "$target" ] && existed_now=1 + if [ -n "$BASE" ] && git cat-file -e "$BASE:$target" 2>/dev/null; then existed_base=1; fi + case "$expect" in + created) + { [ "$existed_now" = 1 ] && [ "$existed_base" = 0 ]; } && echo "confirmed created" || echo "refuted not-newly-created" ;; + modified) + if [ "$existed_now" = 1 ] && [ "$existed_base" = 1 ]; then + if [ -n "$BASE" ] && ! git diff --quiet "$BASE" -- "$target" 2>/dev/null; then echo "confirmed modified"; else echo "refuted unchanged"; fi + else echo "refuted not-modified-pair"; fi ;; + deleted) + { [ "$existed_now" = 0 ] && [ "$existed_base" = 1 ]; } && echo "confirmed deleted" || echo "refuted not-deleted" ;; + contains:*) + local re="${expect#contains:}" + if [ "$existed_now" = 1 ] && grep -Eq -- "$re" "$target" 2>/dev/null; then echo "confirmed contains"; else echo "refuted missing-pattern"; fi ;; + sha256:*) + local want="${expect#sha256:}" got + if [ "$existed_now" = 1 ]; then got="$(sha256sum "$target" | cut -d' ' -f1)"; [ "$got" = "$want" ] && echo "confirmed sha256" || echo "refuted sha256-mismatch"; else echo "refuted absent"; fi ;; + *) echo "unverifiable bad-expect" ;; + esac +} + +# command-transcript: run the command, judge by exit / stdout +v_command() { # target(command) expect + local cmd="$1" expect="$2" out rc + out="$(bash -c "$cmd" 2>&1)"; rc=$? + case "$expect" in + exit==*) [ "$rc" = "${expect#exit==}" ] && echo "confirmed exit=$rc" || echo "refuted exit=$rc" ;; + stdout-contains:*) grep -Fq -- "${expect#stdout-contains:}" <<< "$out" && echo "confirmed stdout-match" || echo "refuted stdout-nomatch" ;; + *) echo "unverifiable bad-expect" ;; + esac +} + +# claims-compose: referenced CLAIMS.a2ml must be all-confirmed +v_compose() { # target(path) expect + local path="$1" expect="$2" + [ "$expect" = "all-confirmed" ] || { echo "unverifiable bad-expect"; return; } + [ -f "$path" ] || { echo "refuted no-such-claims"; return; } + if DYADT_ALLOW_UNVERIFIABLE=0 bash "$0" "$path" >/dev/null 2>&1; then echo "confirmed all-confirmed"; else echo "refuted child-not-all-confirmed"; fi +} + +# --- dispatch ---------------------------------------------------------------- +# Compatibility: verifier -> claim_classes it may discharge. +compatible() { # verifier claim_class + case "$1:$2" in + git-diff:file-changed) return 0 ;; + command-transcript:command-ran|command-transcript:test-passed) return 0 ;; + ci-run:ci-green) return 0 ;; + issue-state:issue-closed) return 0 ;; + pr-state:pr-merged) return 0 ;; + claims-compose:claim-of-claims) return 0 ;; + manual:*) return 0 ;; + *) return 1 ;; + esac +} + +verify_one() { # id class target expect verifier + local id="$1" class="$2" target="$3" expect="$4" verifier="$5" + # licence/SPDX claims are always manual-only (estate policy) + case "$class $target $expect" in + *[Ll]icence*|*[Ll]icense*|*SPDX*) echo "unverifiable manual-only-licence"; return ;; + esac + if ! compatible "$verifier" "$class"; then echo "unverifiable incompatible-verifier"; return; fi + case "$verifier" in + git-diff) v_git_diff "$target" "$expect" ;; + command-transcript) v_command "$target" "$expect" ;; + claims-compose) v_compose "$target" "$expect" ;; + ci-run|issue-state|pr-state) echo "unverifiable needs-network-verifier" ;; + manual) echo "unverifiable manual-only" ;; + *) echo "unverifiable unknown-verifier" ;; + esac +} + +# --- parse + run ------------------------------------------------------------- +field() { sed -E "s/^$1 = \"//; s/\"$//"; } + +id="" class="" target="" expect="" verifier="" +n=0 confirmed=0 refuted=0 unver=0 +declare -a rows=() + +emit() { + [ -z "$id" ] && return + local res verdict reason + res="$(verify_one "$id" "$class" "$target" "$expect" "$verifier")" + verdict="${res%% *}"; reason="${res#* }" + n=$((n+1)) + case "$verdict" in + confirmed) confirmed=$((confirmed+1)); rows+=(" ✅ $id confirmed [$class] $reason") ;; + refuted) refuted=$((refuted+1)); rows+=(" ❌ $id REFUTED [$class] $reason — statement: $statement") ;; + *) unver=$((unver+1)); rows+=(" ⚠️ $id unverifiable [$class] $reason") ;; + esac +} + +statement="" +while IFS= read -r raw; do + line="${raw#"${raw%%[![:space:]]*}"}" + case "$line" in + '[[claim]]'*) emit; id=""; class=""; target=""; expect=""; verifier=""; statement="" ;; + 'id = "'*) id="$(printf '%s' "$line" | field id)" ;; + 'claim_class = "'*) class="$(printf '%s' "$line" | field claim_class)" ;; + 'target = "'*) target="$(printf '%s' "$line" | field target)" ;; + 'expect = "'*) expect="$(printf '%s' "$line" | field expect)" ;; + 'verifier = "'*) verifier="$(printf '%s' "$line" | field verifier)" ;; + 'statement = "'*) statement="$(printf '%s' "$line" | field statement)" ;; + esac +done < "$CLAIMS" +emit + +echo "DYADT verify: $CLAIMS (base: ${BASE:-})" +printf '%s\n' "${rows[@]}" +echo " ── $confirmed confirmed · $refuted refuted · $unver unverifiable (of $n claims)" + +if [ "$refuted" -gt 0 ]; then exit 1; fi +if [ "$unver" -gt 0 ] && [ "${DYADT_ALLOW_UNVERIFIABLE:-0}" != "1" ]; then exit 1; fi +exit 0