From da86386a683fa97563127acc58ec0ff487a67947 Mon Sep 17 00:00:00 2001
From: legend5teve <2659618982@qq.com>
Date: Thu, 12 Mar 2026 01:34:05 -0600
Subject: [PATCH] feat: add planned stages for experiments

---
 README.md                                | 35 ++++++++++++++++++++++++
 scripts/run_stage0_pilot.sh              | 21 ++++++++++++++
 scripts/run_stage1_scenarios.sh          | 21 ++++++++++++++
 scripts/run_stage2_uncertainty.sh        | 19 +++++++++++++
 scripts/run_stage3_trust_messaging.sh    | 21 ++++++++++++++
 scripts/run_stage4_calibration.sh        | 18 ++++++++++++
 scripts/run_stage5_refine_calibration.sh | 19 +++++++++++++
 tests/test_experiment_stage_scripts.py   | 25 +++++++++++++++++
 8 files changed, 179 insertions(+)
 create mode 100755 scripts/run_stage0_pilot.sh
 create mode 100755 scripts/run_stage1_scenarios.sh
 create mode 100755 scripts/run_stage2_uncertainty.sh
 create mode 100755 scripts/run_stage3_trust_messaging.sh
 create mode 100755 scripts/run_stage4_calibration.sh
 create mode 100755 scripts/run_stage5_refine_calibration.sh
 create mode 100644 tests/test_experiment_stage_scripts.py

diff --git a/README.md b/README.md
index 40ffa64..d8f1a58 100644
--- a/README.md
+++ b/README.md
@@ -95,3 +95,38 @@ agentevac-study \
 ```
 
 This runs a grid search over information noise, delay, and trust parameters and fits results against a reference metrics file.
+
+## Experiment Workflow
+
+The repository includes ready-to-run shell scripts for a staged research workflow:
+
+```bash
+# 1. Pilot sanity check across the three scenarios, messaging on/off
+bash scripts/run_stage0_pilot.sh
+
+# 2. Main scenario comparison with moderate uncertainty
+bash scripts/run_stage1_scenarios.sh
+
+# 3. Uncertainty sensitivity (sigma × delay)
+bash scripts/run_stage2_uncertainty.sh
+
+# 4. Trust × messaging interaction study
+bash scripts/run_stage3_trust_messaging.sh
+
+# 5. Coarse calibration against a reference metrics file
+bash scripts/run_stage4_calibration.sh outputs/reference_metrics.json
+
+# 6. Local refinement around the best calibration region
+bash scripts/run_stage5_refine_calibration.sh
+```
+
+Stage purpose summary:
+
+- `run_stage0_pilot.sh`: quick behavioral sanity check before expensive sweeps
+- `run_stage1_scenarios.sh`: compare `no_notice`, `alert_guided`, `advice_guided`
+- `run_stage2_uncertainty.sh`: study `INFO_SIGMA` and `INFO_DELAY_S`
+- `run_stage3_trust_messaging.sh`: test interaction between trust and communication
+- `run_stage4_calibration.sh`: rank parameter sets against a reference outcome
+- `run_stage5_refine_calibration.sh`: refine around promising calibrated regions
+
+All scripts run headless with `sumo` and write outputs under `outputs/stage*/`.
diff --git a/scripts/run_stage0_pilot.sh b/scripts/run_stage0_pilot.sh
new file mode 100755
index 0000000..b726b7d
--- /dev/null
+++ b/scripts/run_stage0_pilot.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT_DIR"
+
+SEEDS=(12345 12346 12347)
+
+for messaging in on off; do
+  for seed in "${SEEDS[@]}"; do
+    echo "[STAGE0] messaging=${messaging} seed=${seed}"
+    SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \
+      --output-dir "outputs/stage0/pilot_msg_${messaging}_seed_${seed}" \
+      --sumo-binary sumo \
+      --sigma-values 40 \
+      --delay-values 0 \
+      --trust-values 0.5 \
+      --scenario-values no_notice,alert_guided,advice_guided \
+      --messaging "$messaging"
+  done
+done
diff --git a/scripts/run_stage1_scenarios.sh b/scripts/run_stage1_scenarios.sh
new file mode 100755
index 0000000..7be4f57
--- /dev/null
+++ b/scripts/run_stage1_scenarios.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT_DIR"
+
+SEEDS=(12345 12346 12347 12348 12349)
+
+for messaging in on off; do
+  for seed in "${SEEDS[@]}"; do
+    echo "[STAGE1] messaging=${messaging} seed=${seed}"
+    SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \
+      --output-dir "outputs/stage1/scenarios_msg_${messaging}_seed_${seed}" \
+      --sumo-binary sumo \
+      --sigma-values 40 \
+      --delay-values 30 \
+      --trust-values 0.5 \
+      --scenario-values no_notice,alert_guided,advice_guided \
+      --messaging "$messaging"
+  done
+done
diff --git a/scripts/run_stage2_uncertainty.sh b/scripts/run_stage2_uncertainty.sh
new file mode 100755
index 0000000..545e49d
--- /dev/null
+++ b/scripts/run_stage2_uncertainty.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT_DIR"
+
+SEEDS=(12345 12346 12347 12348 12349)
+
+for seed in "${SEEDS[@]}"; do
+  echo "[STAGE2] seed=${seed}"
+  SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \
+    --output-dir "outputs/stage2/uncertainty_seed_${seed}" \
+    --sumo-binary sumo \
+    --sigma-values 0,20,40,80 \
+    --delay-values 0,30,60 \
+    --trust-values 0.5 \
+    --scenario-values no_notice,alert_guided,advice_guided \
+    --messaging on
+done
diff --git a/scripts/run_stage3_trust_messaging.sh b/scripts/run_stage3_trust_messaging.sh
new file mode 100755
index 0000000..74cf954
--- /dev/null
+++ b/scripts/run_stage3_trust_messaging.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT_DIR"
+
+SEEDS=(12345 12346 12347 12348 12349)
+
+for messaging in on off; do
+  for seed in "${SEEDS[@]}"; do
+    echo "[STAGE3] messaging=${messaging} seed=${seed}"
+    SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \
+      --output-dir "outputs/stage3/trust_msg_${messaging}_seed_${seed}" \
+      --sumo-binary sumo \
+      --sigma-values 40 \
+      --delay-values 30 \
+      --trust-values 0.0,0.25,0.5,0.75,1.0 \
+      --scenario-values no_notice,alert_guided,advice_guided \
+      --messaging "$messaging"
+  done
+done
diff --git a/scripts/run_stage4_calibration.sh b/scripts/run_stage4_calibration.sh
new file mode 100755
index 0000000..70a049a
--- /dev/null
+++ b/scripts/run_stage4_calibration.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT_DIR"
+
+REFERENCE_PATH="${1:-outputs/reference_metrics.json}"
+
+python3 -m agentevac.analysis.study_runner \
+  --reference "$REFERENCE_PATH" \
+  --output-dir "outputs/stage4" \
+  --sumo-binary sumo \
+  --sigma-values 0,20,40,80 \
+  --delay-values 0,30,60,120 \
+  --trust-values 0.0,0.25,0.5,0.75,1.0 \
+  --scenario-values no_notice,alert_guided,advice_guided \
+  --messaging on \
+  --top-k 10
diff --git a/scripts/run_stage5_refine_calibration.sh b/scripts/run_stage5_refine_calibration.sh
new file mode 100755
index 0000000..7eafb5b
--- /dev/null
+++ b/scripts/run_stage5_refine_calibration.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)"
+cd "$ROOT_DIR"
+
+SEEDS=(12345 12346 12347 12348 12349)
+
+for seed in "${SEEDS[@]}"; do
+  echo "[STAGE5] seed=${seed}"
+  SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \
+    --output-dir "outputs/stage5/refined_seed_${seed}" \
+    --sumo-binary sumo \
+    --sigma-values 20,40,60 \
+    --delay-values 15,30,45 \
+    --trust-values 0.25,0.5,0.75 \
+    --scenario-values alert_guided,advice_guided \
+    --messaging on
+done
diff --git a/tests/test_experiment_stage_scripts.py b/tests/test_experiment_stage_scripts.py
new file mode 100644
index 0000000..0a7d1b4
--- /dev/null
+++ b/tests/test_experiment_stage_scripts.py
@@ -0,0 +1,25 @@
+"""Basic validation for staged experiment shell scripts."""
+
+from pathlib import Path
+import subprocess
+
+
+SCRIPT_NAMES = [
+    "run_stage0_pilot.sh",
+    "run_stage1_scenarios.sh",
+    "run_stage2_uncertainty.sh",
+    "run_stage3_trust_messaging.sh",
+    "run_stage4_calibration.sh",
+    "run_stage5_refine_calibration.sh",
+]
+
+
+def test_stage_scripts_exist():
+    for name in SCRIPT_NAMES:
+        assert Path("scripts", name).exists()
+
+
+def test_stage_scripts_are_valid_bash():
+    for name in SCRIPT_NAMES:
+        path = Path("scripts", name)
+        subprocess.run(["bash", "-n", str(path)], check=True)