denoslab · stedrew · Mar 13, 2026 · Mar 12, 2026 · Mar 13, 2026
diff --git a/README.md b/README.md
@@ -96,6 +96,41 @@ agentevac-study \
 
 This runs a grid search over information noise, delay, and trust parameters and fits results against a reference metrics file.
 
+## Experiment Workflow
+
+The repository includes ready-to-run shell scripts for a staged research workflow:
+
+```bash
+# 1. Pilot sanity check across the three scenarios, messaging on/off
+bash scripts/run_stage0_pilot.sh
+
+# 2. Main scenario comparison with moderate uncertainty
+bash scripts/run_stage1_scenarios.sh
+
+# 3. Uncertainty sensitivity (sigma × delay)
+bash scripts/run_stage2_uncertainty.sh
+
+# 4. Trust × messaging interaction study
+bash scripts/run_stage3_trust_messaging.sh
+
+# 5. Coarse calibration against a reference metrics file
+bash scripts/run_stage4_calibration.sh outputs/reference_metrics.json
+
+# 6. Local refinement around the best calibration region
+bash scripts/run_stage5_refine_calibration.sh
+```
+
+Stage purpose summary:
+
+- `run_stage0_pilot.sh`: quick behavioral sanity check before expensive sweeps
+- `run_stage1_scenarios.sh`: compare `no_notice`, `alert_guided`, `advice_guided`
+- `run_stage2_uncertainty.sh`: study `INFO_SIGMA` and `INFO_DELAY_S`
+- `run_stage3_trust_messaging.sh`: test interaction between trust and communication
+- `run_stage4_calibration.sh`: rank parameter sets against a reference outcome
+- `run_stage5_refine_calibration.sh`: refine around promising calibrated regions
+
+All scripts run headless with `sumo` and write outputs under `outputs/stage*/`.
+
 ## Plotting Completed Runs
 
 Install the plotting dependency:

diff --git a/scripts/run_stage0_pilot.sh b/scripts/run_stage0_pilot.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT_DIR"
+
+SEEDS=(12345 12346 12347)
+
+for messaging in on off; do
+  for seed in "${SEEDS[@]}"; do
+    echo "[STAGE0] messaging=${messaging} seed=${seed}"
+    SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \
+      --output-dir "outputs/stage0/pilot_msg_${messaging}_seed_${seed}" \
+      --sumo-binary sumo \
+      --sigma-values 40 \
+      --delay-values 0 \
+      --trust-values 0.5 \
+      --scenario-values no_notice,alert_guided,advice_guided \
+      --messaging "$messaging"
+  done
+done
diff --git a/scripts/run_stage1_scenarios.sh b/scripts/run_stage1_scenarios.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT_DIR"
+
+SEEDS=(12345 12346 12347 12348 12349)
+
+for messaging in on off; do
+  for seed in "${SEEDS[@]}"; do
+    echo "[STAGE1] messaging=${messaging} seed=${seed}"
+    SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \
+      --output-dir "outputs/stage1/scenarios_msg_${messaging}_seed_${seed}" \
+      --sumo-binary sumo \
+      --sigma-values 40 \
+      --delay-values 30 \
+      --trust-values 0.5 \
+      --scenario-values no_notice,alert_guided,advice_guided \
+      --messaging "$messaging"
+  done
+done
diff --git a/scripts/run_stage2_uncertainty.sh b/scripts/run_stage2_uncertainty.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT_DIR"
+
+SEEDS=(12345 12346 12347 12348 12349)
+
+for seed in "${SEEDS[@]}"; do
+  echo "[STAGE2] seed=${seed}"
+  SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \
+    --output-dir "outputs/stage2/uncertainty_seed_${seed}" \
+    --sumo-binary sumo \
+    --sigma-values 0,20,40,80 \
+    --delay-values 0,30,60 \
+    --trust-values 0.5 \
+    --scenario-values no_notice,alert_guided,advice_guided \
+    --messaging on
+done
diff --git a/scripts/run_stage3_trust_messaging.sh b/scripts/run_stage3_trust_messaging.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT_DIR"
+
+SEEDS=(12345 12346 12347 12348 12349)
+
+for messaging in on off; do
+  for seed in "${SEEDS[@]}"; do
+    echo "[STAGE3] messaging=${messaging} seed=${seed}"
+    SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \
+      --output-dir "outputs/stage3/trust_msg_${messaging}_seed_${seed}" \
+      --sumo-binary sumo \
+      --sigma-values 40 \
+      --delay-values 30 \
+      --trust-values 0.0,0.25,0.5,0.75,1.0 \
+      --scenario-values no_notice,alert_guided,advice_guided \
+      --messaging "$messaging"
+  done
+done
diff --git a/scripts/run_stage4_calibration.sh b/scripts/run_stage4_calibration.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT_DIR"
+
+REFERENCE_PATH="${1:-outputs/reference_metrics.json}"
+
+python3 -m agentevac.analysis.study_runner \
+  --reference "$REFERENCE_PATH" \
+  --output-dir "outputs/stage4" \
+  --sumo-binary sumo \
+  --sigma-values 0,20,40,80 \
+  --delay-values 0,30,60,120 \
+  --trust-values 0.0,0.25,0.5,0.75,1.0 \
+  --scenario-values no_notice,alert_guided,advice_guided \
+  --messaging on \
+  --top-k 10
diff --git a/scripts/run_stage5_refine_calibration.sh b/scripts/run_stage5_refine_calibration.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT_DIR"
+
+SEEDS=(12345 12346 12347 12348 12349)
+
+for seed in "${SEEDS[@]}"; do
+  echo "[STAGE5] seed=${seed}"
+  SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \
+    --output-dir "outputs/stage5/refined_seed_${seed}" \
+    --sumo-binary sumo \
+    --sigma-values 20,40,60 \
+    --delay-values 15,30,45 \
+    --trust-values 0.25,0.5,0.75 \
+    --scenario-values alert_guided,advice_guided \
+    --messaging on
+done
diff --git a/tests/test_experiment_stage_scripts.py b/tests/test_experiment_stage_scripts.py
@@ -0,0 +1,25 @@
+"""Basic validation for staged experiment shell scripts."""
+
+from pathlib import Path
+import subprocess
+
+
+SCRIPT_NAMES = [
+    "run_stage0_pilot.sh",
+    "run_stage1_scenarios.sh",
+    "run_stage2_uncertainty.sh",
+    "run_stage3_trust_messaging.sh",
+    "run_stage4_calibration.sh",
+    "run_stage5_refine_calibration.sh",
+]
+
+
+def test_stage_scripts_exist():
+    for name in SCRIPT_NAMES:
+        assert Path("scripts", name).exists()
+
+
+def test_stage_scripts_are_valid_bash():
+    for name in SCRIPT_NAMES:
+        path = Path("scripts", name)
+        subprocess.run(["bash", "-n", str(path)], check=True)