From c157333caa33c638d4be18b79e0f342bfe2d77ee Mon Sep 17 00:00:00 2001 From: JTInventory Date: Wed, 24 Jun 2026 18:08:10 +0000 Subject: [PATCH] feat: add optional no-mistakes gate mode --- AGENTS.md | 32 +++++-- README.md | 2 +- bin/fm-brief.sh | 24 ++++- bin/fm-fleet-sync.sh | 6 +- bin/fm-home-seed.sh | 6 +- bin/fm-pr-check.sh | 53 ++++++++++- bin/fm-project-mode.sh | 50 ++++++----- bin/fm-spawn.sh | 23 +++-- docs/architecture.md | 3 +- docs/configuration.md | 1 + docs/scripts.md | 2 +- tests/fm-brief-nm-gate.test.sh | 66 ++++++++++++++ tests/fm-pr-check.test.sh | 101 +++++++++++++++++++++ tests/fm-project-mode.test.sh | 66 ++++++++++++++ tests/fm-secondmate-lifecycle-e2e.test.sh | 4 +- tests/fm-secondmate-safety.test.sh | 4 +- tests/fm-spawn-nm-gate.test.sh | 105 ++++++++++++++++++++++ 17 files changed, 493 insertions(+), 55 deletions(-) create mode 100755 tests/fm-brief-nm-gate.test.sh create mode 100755 tests/fm-pr-check.test.sh create mode 100755 tests/fm-project-mode.test.sh create mode 100755 tests/fm-spawn-nm-gate.test.sh diff --git a/AGENTS.md b/AGENTS.md index 696c8d4..dc2981f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -72,16 +72,16 @@ bin/ helper scripts, committed; read each script's header before config/crew-harness crewmate harness override; LOCAL, gitignored; absent or "default" = same as firstmate data/ personal fleet records; LOCAL, gitignored as a whole backlog.md task queue, dependencies, history - captain.md captain's curated personal preferences and working style; LOCAL, gitignored, and canonical even if harness memory mirrors it - projects.md thin fleet navigation registry; firstmate-private, parsed by fm-project-mode.sh (section 6) - secondmates.md secondmate routing table; firstmate-private, maintained by fm-home-seed.sh (section 6) + captain.md captain's curated personal preferences and working style - approval posture, communication style, release habits; LOCAL, gitignored; compact rewrite-and-prune counterpart to shared AGENTS.md; canonical harness-portable home, even if harness memory mirrors it as a recall cache + projects.md thin fleet navigation registry: one line per project under projects/ with name, delivery mode, optional "+yolo", optional "+nm-gate", and a one-line description. It is firstmate-private, not a project knowledge dump; fm-project-mode.sh parses it (section 6) + secondmates.md secondmate routing table: one line per persistent domain supervisor, with a natural-language scope, non-exclusive project clone list, and home path; fm-home-seed.sh maintains it and validates unique ids, unique homes, and non-overlapping home paths (section 6) /brief.md per-task crewmate brief, or per-secondmate charter brief when kind=secondmate /report.md scout task deliverable, written by the crewmate; survives teardown projects/ cloned repos; gitignored; READ-ONLY for you state/ volatile runtime signals; gitignored .status appended by crewmates: ": " lines .turn-ended touched by turn-end hooks - .meta written by fm-spawn: window=, worktree=, project=, harness=, kind=, mode=, yolo=; kind=secondmate also records home= and projects= (fm-pr-check appends pr=) + .meta written by fm-spawn: window=, worktree=, project=, harness=, kind=, mode=, yolo=, nm_gate=, nm_status=; kind=secondmate also records home= and projects= (fm-pr-check upserts pr= and pr_source=) .check.sh optional slow poll you write per task (e.g. merged-PR check) .wake-queue durable queued wakes: epochseqkindkeypayload .afk durable away-mode flag; present = sub-supervisor may inject escalations (set by /afk, cleared on user return) @@ -180,7 +180,7 @@ Every project in the fleet has one line: - [] - (added ) ``` -The registry line records the project name, delivery mode, optional `+yolo` posture, and one-line description. +The registry line records the project name, delivery mode, optional `+yolo` posture, optional `+nm-gate` delivery gate, and one-line description. Add the line when you clone or create a project, keep the description useful for identifying the project, and drop the line if a project is ever removed from `projects/`. Do not turn the registry into a knowledge dump. Durable descriptive detail belongs in the project's own `AGENTS.md`. @@ -236,10 +236,19 @@ Do not eagerly backfill every project. - `no-mistakes` (default; `[...]` may be omitted) - full pipeline -> PR -> captain merge. Highest assurance. - `direct-PR` - push + open a PR via `gh-axi`, no pipeline -> captain merge. +- `direct-PR +nm-gate` - direct-PR remains the base mode, but Firstmate may run no-mistakes as a post-scope ship gate after a worker stops. - `local-only` - local branch, no remote, no PR; firstmate reviews the diff, the captain approves, firstmate merges to local `main` (section 7). Orthogonal to mode is an optional `+yolo` flag (`[direct-PR +yolo]`), default off and **not recommended**: with `yolo` on, firstmate makes the approval decisions itself instead of asking the captain (section 7). When the captain adds a project without saying, default to `no-mistakes` with yolo off; only set a faster mode or `+yolo` on the captain's explicit say-so. +Also orthogonal is optional `+nm-gate`, currently intended for `direct-PR` projects that have no-mistakes available but should not use it as the base mode. +It never applies to scout/report tasks. +Workers must not self-run no-mistakes for `+nm-gate`. +They stop at `done: ready for Firstmate PR scope review`. +Firstmate reviews git status, intended PR scope, and generated operational data before the captain decides whether to run `git push no-mistakes `, `no-mistakes axi run`, or normal direct PR flow. +Generated operational data requires explicit captain approval before no-mistakes or PR scope. +Passing no-mistakes does not auto-chain into merge, teardown, service restart, or another task. + **Clone existing:** `git clone projects/`, add its registry line with the chosen mode, then initialize only if the mode is `no-mistakes`. **Create new:** for `no-mistakes` and `direct-PR` modes a new project needs a GitHub repo first (they push to an `origin` remote); a `local-only` project needs no remote at all - a purely local git repo is fine. @@ -289,7 +298,7 @@ When you create a new secondmate, hand its in-scope queued items off from the ma Then classify the shape: -- **Ship** (the default): the deliverable is a change to the project. It ships through the project's delivery mode: `no-mistakes`, `direct-PR`, or `local-only`. +- **Ship** (the default): the deliverable is a change to the project. It ships through the project's delivery mode: `no-mistakes`, `direct-PR`, optional `direct-PR +nm-gate`, or `local-only`. - **Scout:** the deliverable is knowledge - an investigation, a plan, a bug reproduction, an audit. It ends in a report at `data//report.md`, never a PR. When the captain asks "what's wrong", "how would we", or "find out why" about a project, that is a scout task; dispatch it instead of doing the digging yourself. Then classify readiness: @@ -318,7 +327,7 @@ bin/fm-spawn.sh =projects/ =projects/ [--scout] # batc Dispatch several tasks in one call by passing `id=repo` pairs instead of a single ` `; each pair is spawned through the same single-task path, a shared `--scout` applies to all, and the looping happens inside the script so you never hand-write a multi-task shell loop. If one pair fails, the rest still run and the batch exits non-zero. -The script resolves the harness (`fm-harness.sh crew`), owns the verified launch templates, resolves the project's delivery mode (`fm-project-mode.sh`) for ship/scout tasks, and records `harness=`, `kind=`, `mode=`, and `yolo=` in the task's meta; a non-flag third argument containing whitespace is treated as a raw launch command (only for verifying new adapters). +The script resolves the harness (`fm-harness.sh crew`), owns the verified launch templates, resolves the project's delivery mode (`fm-project-mode.sh`) for ship/scout tasks, and records `harness=`, `kind=`, `mode=`, `yolo=`, `nm_gate=`, and `nm_status=` in the task's meta; a non-flag third argument containing whitespace is treated as a raw launch command (only for verifying new adapters). For `kind=secondmate`, the same script launches in the registered or explicit firstmate home instead of running `treehouse get` for a project, records `home=` and `projects=`, and uses the charter brief as the launch prompt. For ship and scout tasks, the script creates the window (in your current tmux session, or a dedicated `firstmate` session when you are outside tmux), runs `treehouse get`, waits for the worktree subshell, asserts the resolved worktree is a genuine isolated worktree distinct from the primary checkout (aborting the spawn otherwise, to prevent the worktree tangle of section 8), installs the turn-end hook, records `state/.meta`, and launches the agent with the brief. @@ -340,6 +349,7 @@ A ship task's path from `done` to landed on `main` is set by the project's `mode - **no-mistakes** - the stages below as written: no-mistakes validation pipeline -> PR -> captain merge. - **direct-PR** - no pipeline. The crewmate pushes and opens the PR itself (its brief says so) and reports `done: PR `. Skip the Validate step and go straight to PR ready (run `fm-pr-check`, relay the PR). Teardown uses the normal pushed-branch check. +- **direct-PR +nm-gate** - no worker-run pipeline. The crewmate prepares a clean intended diff, runs targeted checks, reports recommended PR scope, and stops at `done: ready for Firstmate PR scope review`. Firstmate reviews status and scope, asks the captain before including generated operational data, then the captain decides whether to run the no-mistakes gate or use normal direct PR flow. - **local-only** - no remote, no PR. The crewmate stops at `done: ready in branch fm/`. Review the diff with `bin/fm-review-diff.sh `, relay a one-paragraph summary to the captain, and on approval run `bin/fm-merge-local.sh ` to fast-forward local `main` (it refuses anything but a clean fast-forward - if it does, have the crewmate rebase). No `fm-pr-check`. Then teardown, whose safety check requires the branch already merged into local `main`, OR the work pushed to any remote (a fork counts - relevant for upstream-contribution PRs on a local-only-registered project). When reviewing any crewmate branch diff, use `bin/fm-review-diff.sh ` rather than `git diff ...branch` directly. @@ -357,10 +367,16 @@ The no-mistakes pipeline fixes auto-fix findings on its own (inside its own work When it reports `needs-decision` (ask-user findings), relay the findings to the captain unless `yolo=on` permits routine approval on your judgment, then send the decision back as a short instruction (the crewmate responds via `no-mistakes axi respond`). Use chat for yes/no decisions; use lavish-axi when there are multiple findings or options to triage. +For `direct-PR +nm-gate`, do not send `/no-mistakes` or `$no-mistakes` to the worker. +Firstmate owns the gate decision after scope review. +Before the first real gate run in a worktree, inspect current no-mistakes help/status there. +Do not run the gate while the branch contains accidental broad generated churn. + ### PR ready For PR-based ship tasks, the ready signal depends on mode: `no-mistakes` reports `done: PR checks green` after CI is green, while `direct-PR` reports `done: PR ` after opening the PR. -Run `bin/fm-pr-check.sh ` - it records `pr=` in the task's meta and arms the watcher's merge poll. +Run `bin/fm-pr-check.sh [direct|no-mistakes] [nm-status]` - it records `pr=` and `pr_source=` in the task's meta and arms the watcher's merge poll. +Use `nm-status=passed` only when the no-mistakes gate actually passed or produced the PR; otherwise use the conservative default `pr_recorded`. Tell the captain: the PR's full URL (always the complete `https://...` link, never a bare `#number` - the captain's terminal makes a full URL clickable), a one-paragraph summary, and, for `no-mistakes`, the risk level it emitted. (The check contract, for any custom `state/.check.sh` you write yourself: print one line only when firstmate should wake, print nothing otherwise, and finish before `FM_CHECK_TIMEOUT`.) diff --git a/README.md b/README.md index 8d1a7d1..b16df8e 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ This is.. a directory that turns any agent into your firstmate, and you the capt - **A visible crew** - every crewmate works in its own tmux window you can watch or type into; the first mate reconciles. - **Disposable worktrees** - each task runs in a clean [treehouse](https://github.com/kunchenguid/treehouse) git worktree, so parallel work on one repo never collides. - **Two task shapes** - ship tasks deliver a change; scout tasks investigate, plan, reproduce, or audit and leave a report. -- **Explicit project modes** - each project ships via `no-mistakes`, `direct-PR`, or `local-only`, with an optional `+yolo` autonomy flag. +- **Explicit project modes** - each project ships via `no-mistakes`, `direct-PR`, or `local-only`, with optional `+yolo` and `+nm-gate` flags. - **Optional secondmates** - opt in to persistent domain supervisors that run from isolated firstmate homes with their own `FM_HOME`, state, projects, and session lock. - **Event-driven, zero-token supervision** - a bash watcher sleeps on the fleet and wakes the first mate only when something needs you. - **Guarded by construction** - the first mate is read-only over your projects outside clean default-branch refreshes, safe branch pruning, and approved `local-only` fast-forward merges; crewmates make every project change behind your merge approval. diff --git a/bin/fm-brief.sh b/bin/fm-brief.sh index 3acc082..56b5233 100755 --- a/bin/fm-brief.sh +++ b/bin/fm-brief.sh @@ -20,7 +20,8 @@ # (data/projects.md via fm-project-mode.sh; see AGENTS.md project management # and task lifecycle): # no-mistakes implement -> /no-mistakes pipeline -> PR -> captain merge (default) -# direct-PR implement -> push + open PR via gh-axi (no pipeline) -> captain merge +# direct-PR implement -> push + open PR via gh-axi (no pipeline) -> captain merge, +# unless +nm-gate makes Firstmate own post-scope validation # local-only implement on branch, stop and report "ready in branch" (no push/PR); # firstmate reviews, captain approves, firstmate merges to local main # Ship briefs begin with a worktree-isolation assertion before the branch step. @@ -154,15 +155,29 @@ fi # Ship task: shape Setup / Rule 1 / Definition of done by the project's delivery mode. # yolo does not affect the brief (it governs firstmate's approval behaviour), so discard it. -read -r MODE _ <\`, \`no-mistakes axi run\`, or normal direct PR flow. +EOF +) + else + RULE1='1. Never push to the default branch (push only your `fm/'"$ID"'` branch). Never merge a PR.' + DOD=$(cat </dev/null || echo "no-mistakes off") - mode=${mode_line%% *} + mode_line=$("$FM_ROOT/bin/fm-project-mode.sh" "$label" 2>/dev/null || echo "no-mistakes off off") + read -r mode _ _nm_gate <&2; return 1; } git -C "$src" rev-parse --is-inside-work-tree >/dev/null 2>&1 || { echo "error: project $project is not a git repo" >&2; return 1; } - read -r mode _ <&2; return 1; } git -C "$src" rev-parse --is-inside-work-tree >/dev/null 2>&1 || { echo "error: project $project is not a git repo" >&2; return 1; } - read -r mode _ < to state/.meta and arms the +# Record a PR-ready task: upserts pr= to state/.meta and arms the # watcher's merge poll by writing state/.check.sh, which prints one line iff # the PR is merged (the watcher's check contract: output = wake firstmate, # silence = keep sleeping). -# Usage: fm-pr-check.sh +# Usage: fm-pr-check.sh [direct|no-mistakes] [nm-status] +# nm-status is optional and may be pr_recorded|passed|failed|skipped. +# For no-mistakes PRs, the default is pr_recorded; pass "passed" only when the +# gate actually passed or produced the PR. set -eu SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -13,12 +16,54 @@ STATE="${FM_STATE_OVERRIDE:-$FM_HOME/state}" "$FM_ROOT/bin/fm-guard.sh" || true ID=$1 URL=$2 +SOURCE=${3:-direct} +NM_STATUS=${4:-} +case "$SOURCE" in + direct|no-mistakes) ;; + *) echo "error: pr source must be direct or no-mistakes" >&2; exit 1 ;; +esac +case "$NM_STATUS" in + ''|pr_recorded|passed|failed|skipped) ;; + *) echo "error: nm-status must be pr_recorded, passed, failed, or skipped" >&2; exit 1 ;; +esac META="$STATE/$ID.meta" -if [ -f "$META" ] && ! grep -qxF "pr=$URL" "$META"; then - echo "pr=$URL" >> "$META" +meta_upsert() { + local key=$1 value=$2 tmp + [ -f "$META" ] || return 0 + tmp=$(mktemp "$META.tmp.XXXXXX") + awk -v key="$key" -v value="$value" -F= ' + $1 == key { + if (!seen) { + print key "=" value + seen = 1 + } + next + } + { print } + END { + if (!seen) print key "=" value + } + ' "$META" > "$tmp" + mv "$tmp" "$META" +} + +meta_has() { + local key=$1 value=$2 + [ -f "$META" ] || return 1 + grep -qxF "$key=$value" "$META" +} + +if [ "$SOURCE" = no-mistakes ] && [ -z "$NM_STATUS" ]; then + NM_STATUS=pr_recorded +elif [ "$SOURCE" = direct ] && [ -z "$NM_STATUS" ] && meta_has nm_gate on; then + NM_STATUS=skipped fi +meta_upsert pr "$URL" +meta_upsert pr_source "$SOURCE" +[ -n "$NM_STATUS" ] && meta_upsert nm_status "$NM_STATUS" + cat > "$STATE/$ID.check.sh" </dev/null) [ "\$state" = "MERGED" ] && echo "merged" diff --git a/bin/fm-project-mode.sh b/bin/fm-project-mode.sh index 666292a..d5b55b0 100755 --- a/bin/fm-project-mode.sh +++ b/bin/fm-project-mode.sh @@ -1,12 +1,15 @@ #!/usr/bin/env bash -# Resolve a project's delivery mode and yolo flag from the data/projects.md registry. -# Prints two words to stdout: " " where mode is one of -# no-mistakes|direct-PR|local-only and yolo is on|off. +# Resolve a project's delivery mode, yolo flag, and optional no-mistakes gate flag +# from the data/projects.md registry. +# Prints three words to stdout: " " where mode is one of +# no-mistakes|direct-PR|local-only and yolo/nm_gate are on|off. # # Registry line format (data/projects.md): -# - - (added ) -> no-mistakes off (legacy default) -# - [] - (added ) -> off -# - [ +yolo] - (added ) -> on +# - - (added ) -> no-mistakes off off (legacy default) +# - [] - (added ) -> off off +# - [ +yolo] - (added ) -> on off +# - [ +nm-gate] - (added ) -> off on +# - [ +yolo +nm-gate] - (added ) -> on on # # mode = how a finished change reaches main: # no-mistakes full pipeline -> PR -> captain merge (default) @@ -15,9 +18,12 @@ # yolo (orthogonal) = when on, firstmate makes approval decisions itself (PR merges, # ask-user findings, local-only merge approval) without checking the captain - except # anything destructive/irreversible/security-sensitive, which still escalates. +# nm_gate (orthogonal) = when on, no-mistakes is available as a Firstmate-owned +# post-scope delivery gate. It is not the base delivery mode and does not apply +# to scout/report tasks. # -# An unknown/missing project or unknown mode falls back to "no-mistakes off" and warns -# to stderr, so a typo never silently drops the gate. +# An unknown/missing project or unknown mode falls back to "no-mistakes off off" +# and warns to stderr, so a typo never silently drops the default gate. # Usage: fm-project-mode.sh set -eu @@ -29,38 +35,42 @@ REG="$DATA/projects.md" NAME=${1:?usage: fm-project-mode.sh } if [ ! -f "$REG" ]; then - echo "warn: no registry at $REG; defaulting $NAME to no-mistakes off" >&2 - echo "no-mistakes off" + echo "warn: no registry at $REG; defaulting $NAME to no-mistakes off off" >&2 + echo "no-mistakes off off" exit 0 fi -# awk emits " " (one line) or nothing if the project is absent. +# awk emits " " (one line) or nothing if the project is absent. parsed=$(awk -v n="$NAME" ' $1=="-" && $2==n { - mode="no-mistakes"; yolo="off"; + mode="no-mistakes"; yolo="off"; nm_gate="off"; if ($3 ~ /^\[/) { s=""; for (i=3; i<=NF; i++) { s = s (s==""?"":" ") $i; if ($i ~ /\]$/) break } gsub(/^\[|\]$/, "", s); # strip the surrounding brackets k = split(s, a, " "); - if (a[1] != "" && a[1] != "+yolo") mode = a[1]; + if (a[1] != "" && a[1] != "+yolo" && a[1] != "+nm-gate") mode = a[1]; for (j=1; j<=k; j++) if (a[j]=="+yolo") yolo="on"; + for (j=1; j<=k; j++) if (a[j]=="+nm-gate") nm_gate="on"; } - print mode, yolo; exit + print mode, yolo, nm_gate; exit } ' "$REG") if [ -z "$parsed" ]; then - echo "warn: project \"$NAME\" not in registry; defaulting to no-mistakes off" >&2 - echo "no-mistakes off" + echo "warn: project \"$NAME\" not in registry; defaulting to no-mistakes off off" >&2 + echo "no-mistakes off off" exit 0 fi -mode=${parsed%% *} -yolo=${parsed##* } +set -- $parsed +mode=${1:-no-mistakes} +yolo=${2:-off} +nm_gate=${3:-off} case "$mode" in no-mistakes|direct-PR|local-only) ;; - *) echo "warn: unknown mode \"$mode\" for $NAME; defaulting to no-mistakes off" >&2; mode=no-mistakes; yolo=off ;; + *) echo "warn: unknown mode \"$mode\" for $NAME; defaulting to no-mistakes off off" >&2; mode=no-mistakes; yolo=off; nm_gate=off ;; esac case "$yolo" in on|off) ;; *) yolo=off ;; esac -echo "$mode $yolo" +case "$nm_gate" in on|off) ;; *) nm_gate=off ;; esac +echo "$mode $yolo $nm_gate" diff --git a/bin/fm-spawn.sh b/bin/fm-spawn.sh index 86bc38c..89a67b9 100755 --- a/bin/fm-spawn.sh +++ b/bin/fm-spawn.sh @@ -25,9 +25,9 @@ # __PIEXT__ absolute path to state/.pi-ext.ts (pi turn-end extension, # written by this script; outside the worktree to avoid pi's trust gate) # Per-harness turn-end hooks are installed automatically; some live outside the worktree. -# On success prints: spawned harness= kind= mode= yolo= window= worktree= -# mode/yolo are resolved per-project from data/projects.md for ship/scout tasks; -# secondmate spawns record mode=secondmate, yolo=off, home=, and projects=. +# On success prints: spawned harness= kind= mode= yolo= nm_gate= window= worktree= +# mode/yolo/nm_gate are resolved per-project from data/projects.md for ship/scout tasks; +# secondmate spawns record mode=secondmate, yolo=off, nm_gate=off, home=, and projects=. set -eu SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" @@ -425,20 +425,23 @@ EOF esac fi -# Per-project delivery mode + yolo flag (bin/fm-project-mode.sh; AGENTS.md project management and task lifecycle). +# Per-project delivery mode + yolo/no-mistakes-gate flags (bin/fm-project-mode.sh; AGENTS.md project management and task lifecycle). # Recorded in meta so fm-teardown's safety check and the validate/merge stages can # branch on them. Mode governs ship tasks; a scout's deliverable is a report, not a -# merge, so scout teardown ignores mode. +# merge, so scout teardown ignores mode and scouts never use the no-mistakes gate. SECONDMATE_PROJECTS= if [ "$KIND" = secondmate ]; then MODE=secondmate YOLO=off + NM_GATE=off SECONDMATE_PROJECTS=$(secondmate_registry_value "$ID" projects || true) else PROJ_NAME=$(basename "$PROJ_ABS") - read -r MODE YOLO < - ...` to lease a fresh firstmate worktree fo The lease is held under the secondmate id until explicit retirement or seed rollback returns it, so normal restarts do not free or recycle the home. Teardown of a leased home fails closed if `treehouse return` cannot release the lease; plain-clone homes with no treehouse pool slot are removed directly. Secondmate routes cover `no-mistakes` and `direct-PR` projects; `local-only` projects remain main-firstmate work. +Project registry lines may include optional `+yolo` and `+nm-gate` flags inside the bracketed mode, such as `[direct-PR +nm-gate]`. For `no-mistakes` projects, seeding initializes only projects newly cloned into a secondmate home and refuses to mutate a preexisting clone that is not already initialized. After creating a secondmate, move existing main-backlog items that you have judged in-scope with `fm-backlog-handoff.sh ...`; it is idempotent and refuses in-flight items or non-secondmate homes. Set `FM_SECONDMATE_CHARTER` to seed from inline charter text when no filled charter brief exists; set `FM_SECONDMATE_SCOPE` when the routing scope should differ from the charter text. diff --git a/docs/scripts.md b/docs/scripts.md index fbf67d1..115e32a 100644 --- a/docs/scripts.md +++ b/docs/scripts.md @@ -14,7 +14,7 @@ Each file also starts with a short header comment. | `fm-guard.sh` | Warn when the primary checkout is tangled, when queued wakes are pending, or when a stale or missing watcher needs a prominent banner | | `fm-home-seed.sh` | Lease/provision a secondmate home transactionally, clone projects, initialize gates, and maintain `data/secondmates.md` | | `fm-spawn.sh` | Spawn one task, several `id=repo` pairs, or a persistent secondmate with `--secondmate`; ship/scout spawns require an isolated treehouse worktree | -| `fm-project-mode.sh` | Resolve a project's delivery mode and `+yolo` flag from `data/projects.md` | +| `fm-project-mode.sh` | Resolve a project's delivery mode, `+yolo`, and `+nm-gate` flags from `data/projects.md` | | `fm-merge-local.sh` | Fast-forward a `local-only` project's local default branch after approval | | `fm-review-diff.sh` | Review a crewmate branch against the authoritative base, with optional `--stat` output | | `fm-watch-arm.sh` | Verified per-home watcher re-arm; reports `started`, `healthy`, or `FAILED`; `--restart` relaunches only this home's watcher | diff --git a/tests/fm-brief-nm-gate.test.sh b/tests/fm-brief-nm-gate.test.sh new file mode 100755 index 0000000..af50fcf --- /dev/null +++ b/tests/fm-brief-nm-gate.test.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +set -u + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +BRIEF="$ROOT/bin/fm-brief.sh" +TMP_ROOT=$(mktemp -d "${TMPDIR:-/tmp}/fm-brief-nm-gate-tests.XXXXXX") +trap 'rm -rf "$TMP_ROOT"' EXIT + +fail() { + printf 'not ok - %s\n' "$1" >&2 + exit 1 +} + +pass() { + printf 'ok - %s\n' "$1" +} + +make_home() { + local name=$1 line=$2 home + home="$TMP_ROOT/$name" + mkdir -p "$home/data" "$home/state" + printf '%s\n' "$line" > "$home/data/projects.md" + printf '%s\n' "$home" +} + +test_direct_pr_old_behavior_unchanged() { + local home brief + home=$(make_home old-direct '- app [direct-PR] - app (added 2026-06-24)') + FM_HOME="$home" "$BRIEF" task-old app >/dev/null || fail "old direct-PR brief failed" + brief="$home/data/task-old/brief.md" + + grep -F 'This project ships **direct-PR**' "$brief" >/dev/null || fail "old direct-PR heading changed" + grep -F 'push your branch and open a PR with `gh-axi`' "$brief" >/dev/null || fail "old direct-PR push/PR instruction missing" + grep -F 'ready for Firstmate PR scope review' "$brief" >/dev/null && fail "old direct-PR brief got nm-gate scope-review text" + pass "direct-PR old behavior remains unchanged" +} + +test_nm_gate_worker_stops_at_scope_review() { + local home brief + home=$(make_home gated '- app [direct-PR +nm-gate] - app (added 2026-06-24)') + FM_HOME="$home" "$BRIEF" task-gated app >/dev/null || fail "nm-gate brief failed" + brief="$home/data/task-gated/brief.md" + + grep -F 'This project ships **direct-PR +nm-gate**' "$brief" >/dev/null || fail "nm-gate heading missing" + grep -F 'ready for Firstmate PR scope review' "$brief" >/dev/null || fail "scope-review stop missing" + grep -F 'Commit only if the task explicitly authorizes commits.' "$brief" >/dev/null || fail "commit guard missing" + grep -F 'Do NOT run /no-mistakes. Do NOT push. Do NOT open a PR.' "$brief" >/dev/null || fail "worker no-push/no-pr/no-gate rule missing" + grep -F 'push your branch and open a PR with `gh-axi`' "$brief" >/dev/null && fail "nm-gate brief still tells worker to push/open PR" + pass "nm-gate worker brief stops at Firstmate scope review" +} + +test_scout_ignores_nm_gate() { + local home brief + home=$(make_home scout '- app [direct-PR +nm-gate] - app (added 2026-06-24)') + FM_HOME="$home" "$BRIEF" task-scout app --scout >/dev/null || fail "scout brief failed" + brief="$home/data/task-scout/brief.md" + + grep -F 'This is a SCOUT task' "$brief" >/dev/null || fail "scout brief missing scout contract" + grep -F 'no-mistakes' "$brief" >/dev/null && fail "scout brief mentioned no-mistakes" + grep -F 'ready for Firstmate PR scope review' "$brief" >/dev/null && fail "scout brief got nm-gate delivery text" + pass "scout briefs ignore nm-gate" +} + +test_direct_pr_old_behavior_unchanged +test_nm_gate_worker_stops_at_scope_review +test_scout_ignores_nm_gate diff --git a/tests/fm-pr-check.test.sh b/tests/fm-pr-check.test.sh new file mode 100755 index 0000000..370b4d2 --- /dev/null +++ b/tests/fm-pr-check.test.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +set -u + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +PR_CHECK="$ROOT/bin/fm-pr-check.sh" +TMP_ROOT=$(mktemp -d "${TMPDIR:-/tmp}/fm-pr-check-tests.XXXXXX") +trap 'rm -rf "$TMP_ROOT"' EXIT + +fail() { + printf 'not ok - %s\n' "$1" >&2 + exit 1 +} + +pass() { + printf 'ok - %s\n' "$1" +} + +make_home() { + local name=$1 home + home="$TMP_ROOT/$name" + mkdir -p "$home/state" + touch "$home/state/.last-watcher-beat" + printf '%s\n' "$home" +} + +count_key() { + local key=$1 file=$2 + grep -c "^$key=" "$file" 2>/dev/null || true +} + +test_pr_check_upserts_meta_fields() { + local home meta + home=$(make_home upsert) + meta="$home/state/task.meta" + cat > "$meta" </dev/null \ + || fail "fm-pr-check no-mistakes pr_recorded failed" + [ "$(count_key pr "$meta")" = 1 ] || fail "pr key duplicated" + [ "$(count_key pr_source "$meta")" = 1 ] || fail "pr_source key duplicated" + [ "$(count_key nm_status "$meta")" = 1 ] || fail "nm_status key duplicated" + grep -Fx 'pr=https://github.com/example/repo/pull/2' "$meta" >/dev/null || fail "pr not upserted" + grep -Fx 'pr_source=no-mistakes' "$meta" >/dev/null || fail "pr_source not upserted" + grep -Fx 'nm_status=pr_recorded' "$meta" >/dev/null || fail "conservative nm_status not recorded" + pass "fm-pr-check upserts PR metadata without duplicates" +} + +test_no_mistakes_passed_requires_explicit_status() { + local home meta + home=$(make_home passed) + meta="$home/state/task.meta" + cat > "$meta" </dev/null \ + || fail "fm-pr-check default no-mistakes failed" + grep -Fx 'nm_status=pr_recorded' "$meta" >/dev/null || fail "no-mistakes default should be pr_recorded" + grep -Fx 'nm_status=passed' "$meta" >/dev/null && fail "no-mistakes source alone marked passed" + + FM_HOME="$home" "$PR_CHECK" task https://github.com/example/repo/pull/3 no-mistakes passed >/dev/null \ + || fail "fm-pr-check explicit passed failed" + grep -Fx 'nm_status=passed' "$meta" >/dev/null || fail "explicit passed status not recorded" + pass "no-mistakes passed status must be explicit" +} + +test_check_script_only_wakes_on_merged_pr() { + local home check fakebin out + home=$(make_home no-auto-chain) + printf '%s\n' 'kind=ship' > "$home/state/task.meta" + FM_HOME="$home" "$PR_CHECK" task https://github.com/example/repo/pull/4 direct >/dev/null \ + || fail "fm-pr-check direct failed" + check="$home/state/task.check.sh" + [ -x "$check" ] || chmod +x "$check" + + fakebin="$home/fakebin" + mkdir -p "$fakebin" + cat > "$fakebin/gh" <<'SH' +#!/usr/bin/env bash +printf '%s\n' "${FM_FAKE_PR_STATE:-OPEN}" +SH + chmod +x "$fakebin/gh" + + out=$(PATH="$fakebin:$PATH" FM_FAKE_PR_STATE=OPEN bash "$check") + [ -z "$out" ] || fail "check script woke before merge: $out" + out=$(PATH="$fakebin:$PATH" FM_FAKE_PR_STATE=MERGED bash "$check") + [ "$out" = "merged" ] || fail "check script did not wake on merge: $out" + pass "fm-pr-check arms merge poll only, with no auto-chain" +} + +test_pr_check_upserts_meta_fields +test_no_mistakes_passed_requires_explicit_status +test_check_script_only_wakes_on_merged_pr diff --git a/tests/fm-project-mode.test.sh b/tests/fm-project-mode.test.sh new file mode 100755 index 0000000..c7871a1 --- /dev/null +++ b/tests/fm-project-mode.test.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +set -u + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +MODE="$ROOT/bin/fm-project-mode.sh" +TMP_ROOT=$(mktemp -d "${TMPDIR:-/tmp}/fm-project-mode-tests.XXXXXX") +trap 'rm -rf "$TMP_ROOT"' EXIT + +fail() { + printf 'not ok - %s\n' "$1" >&2 + exit 1 +} + +pass() { + printf 'ok - %s\n' "$1" +} + +run_mode() { + local home=$1 project=$2 + FM_HOME="$home" "$MODE" "$project" 2>/dev/null +} + +test_legacy_defaults_and_old_direct_pr() { + local home out + home="$TMP_ROOT/legacy" + mkdir -p "$home/data" + cat > "$home/data/projects.md" < "$home/data/projects.md" </dev/null || fail "registry validation failed after seed" diff --git a/tests/fm-secondmate-safety.test.sh b/tests/fm-secondmate-safety.test.sh index 905b0c8..2c93223 100755 --- a/tests/fm-secondmate-safety.test.sh +++ b/tests/fm-secondmate-safety.test.sh @@ -21,9 +21,9 @@ test_fm_home_parameterization() { printf '%s\n' '- app [local-only +yolo] - test app (added 2026-06-22)' > "$home_one/data/projects.md" out=$(FM_HOME="$home_one" "$ROOT/bin/fm-project-mode.sh" app) - [ "$out" = "local-only on" ] || fail "fm-project-mode did not read projects.md from FM_HOME" + [ "$out" = "local-only on off" ] || fail "fm-project-mode did not read projects.md from FM_HOME" out=$(FM_HOME="$home_two" "$ROOT/bin/fm-project-mode.sh" app 2>/dev/null) - [ "$out" = "no-mistakes off" ] || fail "fm-project-mode did not isolate missing registry by home" + [ "$out" = "no-mistakes off off" ] || fail "fm-project-mode did not isolate missing registry by home" FM_HOME="$home_one" "$ROOT/bin/fm-brief.sh" task-a app >/dev/null || fail "brief scaffold failed under FM_HOME" brief="$home_one/data/task-a/brief.md" diff --git a/tests/fm-spawn-nm-gate.test.sh b/tests/fm-spawn-nm-gate.test.sh new file mode 100755 index 0000000..f61fd17 --- /dev/null +++ b/tests/fm-spawn-nm-gate.test.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash +set -u + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +SPAWN="$ROOT/bin/fm-spawn.sh" +TMP_ROOT=$(mktemp -d "${TMPDIR:-/tmp}/fm-spawn-nm-gate-tests.XXXXXX") +trap 'rm -rf "$TMP_ROOT"' EXIT + +fail() { + printf 'not ok - %s\n' "$1" >&2 + exit 1 +} + +pass() { + printf 'ok - %s\n' "$1" +} + +make_fake_tmux() { + local dir=$1 fakebin + fakebin="$dir/fakebin" + mkdir -p "$fakebin" + cat > "$fakebin/tmux" <<'SH' +#!/usr/bin/env bash +set -u +case "${1:-}" in + display-message) + for a in "$@"; do + case "$a" in + '#S') printf '%s\n' test-session; exit 0 ;; + '#{pane_current_path}') printf '%s\n' "$FM_FAKE_WT"; exit 0 ;; + esac + done + printf '%s\n' test-session + exit 0 ;; + list-windows|new-window|send-keys|has-session|new-session) exit 0 ;; +esac +exit 0 +SH + chmod +x "$fakebin/tmux" + printf '%s\n' "$fakebin" +} + +make_git_repo() { + local path=$1 + git init -q "$path" + git -C "$path" -c user.email=t@t -c user.name=t commit -q --allow-empty -m init +} + +make_case() { + local name=$1 line=$2 home project wt fakebin + home="$TMP_ROOT/$name/home" + project="$home/projects/app" + wt="$TMP_ROOT/$name/wt" + mkdir -p "$home/data/task/data" "$home/state" "$home/projects" "$project" "$wt" + mkdir -p "$home/data" + printf '%s\n' "$line" > "$home/data/projects.md" + make_git_repo "$project" + make_git_repo "$wt" + printf 'brief\n' > "$home/data/task/brief.md" + fakebin=$(make_fake_tmux "$TMP_ROOT/$name") + printf '%s\t%s\t%s\n' "$home" "$wt" "$fakebin" +} + +run_spawn_case() { + local home=$1 wt=$2 fakebin=$3 kind_arg=${4:-} + if [ -n "$kind_arg" ]; then + PATH="$fakebin:$PATH" TMUX=1 FM_FAKE_WT="$wt" FM_HOME="$home" FM_SPAWN_NO_GUARD=1 \ + "$SPAWN" task projects/app codex "$kind_arg" >/dev/null + else + PATH="$fakebin:$PATH" TMUX=1 FM_FAKE_WT="$wt" FM_HOME="$home" FM_SPAWN_NO_GUARD=1 \ + "$SPAWN" task projects/app codex >/dev/null + fi +} + +test_ship_records_nm_gate_pending_scope_review() { + local row home wt fakebin meta + row=$(make_case ship '- app [direct-PR +nm-gate] - app (added 2026-06-24)') + IFS=$'\t' read -r home wt fakebin </dev/null || fail "mode missing" + grep -Fx 'yolo=off' "$meta" >/dev/null || fail "yolo missing" + grep -Fx 'nm_gate=on' "$meta" >/dev/null || fail "ship nm_gate=on missing" + grep -Fx 'nm_status=pending_scope_review' "$meta" >/dev/null || fail "ship nm_status pending missing" + pass "ship spawn records nm-gate pending scope review" +} + +test_scout_records_nm_gate_off() { + local row home wt fakebin meta + row=$(make_case scout '- app [direct-PR +nm-gate] - app (added 2026-06-24)') + IFS=$'\t' read -r home wt fakebin </dev/null || fail "kind=scout missing" + grep -Fx 'nm_gate=off' "$meta" >/dev/null || fail "scout nm_gate should be off" + grep -Fx 'nm_status=not_applicable' "$meta" >/dev/null || fail "scout nm_status should be not_applicable" + pass "scout spawn ignores nm-gate" +} + +test_ship_records_nm_gate_pending_scope_review +test_scout_records_nm_gate_off