dtsong · dtsong · Mar 8, 2026 · Feb 23, 2026 · chatgpt-codex-connector · Feb 23, 2026
diff --git a/.governance-version b/.governance-version
@@ -1 +1 @@
-v1.3.0
+v1.4.1
diff --git a/pipeline/config/model-routing.yaml b/pipeline/config/model-routing.yaml
@@ -1,8 +1,4 @@
-# Model Routing Configuration
-# See specs/SKILL-MODEL-ROUTING-SPEC.md for full specification.
-spec_version: "1.4"
-
-# Default model preferences by skill classification
+spec_version: '1.4'
 defaults:
   coordinator:
     preferred: haiku
@@ -16,26 +12,23 @@ defaults:
     preferred: sonnet
     minimum: haiku
     reasoning_demand: medium
-
-# Budget zone thresholds (percentage of max_simultaneous_tokens)
 zones:
-  green: 0.70
-  yellow: 0.90
-  red: 1.00
-
-# Task type defaults
+  green: 0.7
+  yellow: 0.9
+  red: 1.0
 task_types:
   mechanical:
     preferred: haiku
-    description: "File tracing, pattern matching, structure validation"
+    description: File tracing, pattern matching, structure validation
   analytical:
     preferred: sonnet
-    description: "Classification, code review, gap analysis"
+    description: Classification, code review, gap analysis
   reasoning:
     preferred: opus
-    description: "Debugging, architecture, complex reasoning"
-
-# Tier-to-model mapping
+    description: "Tasks requiring adversarial reasoning, formal constraint satisfaction,\
+      \ or long-chain causal reasoning across system boundaries \u2014 regardless\
+      \ of application domain (e.g., adversarial security analysis, formal verification,\
+      \ vulnerability chain synthesis)"
 tiers:
   haiku:
     claude_code: claude-haiku-4-5-20251001
@@ -46,6 +39,4 @@ tiers:
   opus:
     claude_code: claude-opus-4-6
     cost_ratio: 25
-
-# Per-skill overrides (empty by default — suites populate as needed)
 overrides: {}
diff --git a/pipeline/config/security-suppressions.json b/pipeline/config/security-suppressions.json
@@ -1,5 +1,6 @@
 {
   "spec_version": "1.3",
   "suppressions": [],
-  "_comment": "Per-file security check suppressions. Each entry: {\"file\": \"path/to/file.md\", \"rule\": \"rule-id\", \"rationale\": \"why\"}"
+  "_comment": "Per-file security check suppressions. Each entry: {\"file\": \"path/to/file.md\", \"rule\": \"rule-id\", \"rationale\": \"why\"}",
+  "overrides": {}
 }
diff --git a/pipeline/hooks/check_references.py b/pipeline/hooks/check_references.py
@@ -63,6 +63,12 @@ def _looks_like_file_path(s):
     # Filter out things that are clearly not paths
     if " " in s:
         return False
+    # Filter out template placeholders like references/<name>.md
+    if "<" in s or ">" in s:
+        return False
+    # Filter out home-relative and variable-based paths
+    if s.startswith("~/") or s.startswith("$"):
+        return False
     return True
 
 

diff --git a/pipeline/scripts/analyze-patterns.py b/pipeline/scripts/analyze-patterns.py
diff --git a/pipeline/scripts/budget-report.py b/pipeline/scripts/budget-report.py
@@ -59,7 +59,7 @@ def get_budget_limits(rel_path, classification, budgets):
         word_key = classification + "_max_words"
         token_key = classification + "_max_tokens"
         if word_key in override:
-            return override[word_key], override.get(token_key, budgets.get(token_key))
+            return override[word_key], override[token_key]
 
     word_key = classification + "_max_words"
     token_key = classification + "_max_tokens"

diff --git a/pipeline/scripts/check-regressions.py b/pipeline/scripts/check-regressions.py
diff --git a/pipeline/scripts/context-load-analysis.py b/pipeline/scripts/context-load-analysis.py
diff --git a/pipeline/specs/SKILL-MODEL-ROUTING-SPEC.md b/pipeline/specs/SKILL-MODEL-ROUTING-SPEC.md
@@ -81,21 +81,21 @@ Map abstract tiers to concrete models per platform:
 
 tiers:
   haiku:
-    claude_code: claude-haiku-4-5
+    claude_code: claude-haiku-4-5-20251001
     codex: codex-spark
     description: "Mechanical tasks — file tracing, pattern matching, formatting"
     cost_ratio: 1          # Baseline
 
   sonnet:
     claude_code: claude-sonnet-4-6
     codex: gpt-5.3-codex
-    description: "Analytical tasks — classification, multi-factor decisions, standard coding"
-    cost_ratio: 5          # ~5x haiku (Sonnet 4.6: $3/$15 per MTok)
+    description: "Analytical tasks — classification, multi-factor decisions, standard coding; also standard STRIDE/threat analysis, architecture review, research synthesis (Sonnet 4.6 handles these with 1M context and strong long-horizon reasoning)"
+    cost_ratio: 5          # ~5x haiku (Sonnet 4.6: $3/$15 per MTok; 59% user preference over Opus 4.5)
 
   opus:
     claude_code: claude-opus-4-6
     codex: gpt-5.3-codex-xl    # hypothetical — map to best available
-    description: "Adversarial security analysis, formal verification, vulnerability chain synthesis"
+    description: "Tasks requiring adversarial reasoning, formal constraint satisfaction, or long-chain causal reasoning across system boundaries — regardless of application domain (e.g., adversarial security analysis, formal verification, vulnerability chain synthesis)"
     cost_ratio: 25         # ~25x haiku (Opus: $15/$75 per MTok)
 
 # Session defaults
@@ -295,6 +295,14 @@ Coordinators are already the entry point for skill suites. They're the natural
 place to make model routing decisions because they see the full request before
 dispatching to a specialist.
 
+**All coordinators should use `preferred: haiku`.** Coordinator routing is
+classification and dispatch — the complexity lies in the routing table design,
+not the model tier. This holds even for security-domain coordinators: a
+well-structured routing table with explicit ambiguity-triage logic achieves the
+same accuracy as a sonnet-preferred coordinator at 1/5th the cost. Coordinators
+fire on every request (the hot path); 5x overhead on every invocation is not
+justified for a routing-only role.
+
 Updated coordinator template:
 
 ```markdown
@@ -580,6 +588,8 @@ The `check_frontmatter.py` hook should validate:
 - `minimum` is ≤ `preferred` in tier order (haiku < sonnet < opus)
 - `reasoning_demand` is one of: low, medium, high, variable
 - `conditions` entries have `when` + either `downgrade_to` or `hold_at`
+- **`reasoning_demand: high` requires `preferred: opus`** — `preferred: sonnet` + `reasoning_demand: high` is an invalid combination. The degradation cascade holds `high` skills at opus under budget pressure, but if the skill is sonnet-preferred it never reaches opus, making `high` functionally dead code. Validators should reject this pairing.
+- `conditions` entries only support downgrade actions (`downgrade_to`, `hold_at`) — there is no `upgrade_to` action. Skills that need a higher model tier under some conditions should set `preferred` to that tier and use `conditions` to downgrade for simpler cases.
 
 Validation failures are **Warn** tier — model config is optional and advisory.
 
@@ -604,7 +614,7 @@ The per-skill token log format (§5.1) integrates with existing observability:
 
 tiers:
   haiku:
-    claude_code: claude-haiku-4-5
+    claude_code: claude-haiku-4-5-20251001
     cost_ratio: 1
   sonnet:
     claude_code: claude-sonnet-4-6