From da86386a683fa97563127acc58ec0ff487a67947 Mon Sep 17 00:00:00 2001 From: legend5teve <2659618982@qq.com> Date: Thu, 12 Mar 2026 01:34:05 -0600 Subject: [PATCH] feat: add planned stages for experiments --- README.md | 35 ++++++++++++++++++++++++ scripts/run_stage0_pilot.sh | 21 ++++++++++++++ scripts/run_stage1_scenarios.sh | 21 ++++++++++++++ scripts/run_stage2_uncertainty.sh | 19 +++++++++++++ scripts/run_stage3_trust_messaging.sh | 21 ++++++++++++++ scripts/run_stage4_calibration.sh | 18 ++++++++++++ scripts/run_stage5_refine_calibration.sh | 19 +++++++++++++ tests/test_experiment_stage_scripts.py | 25 +++++++++++++++++ 8 files changed, 179 insertions(+) create mode 100755 scripts/run_stage0_pilot.sh create mode 100755 scripts/run_stage1_scenarios.sh create mode 100755 scripts/run_stage2_uncertainty.sh create mode 100755 scripts/run_stage3_trust_messaging.sh create mode 100755 scripts/run_stage4_calibration.sh create mode 100755 scripts/run_stage5_refine_calibration.sh create mode 100644 tests/test_experiment_stage_scripts.py diff --git a/README.md b/README.md index 40ffa64..d8f1a58 100644 --- a/README.md +++ b/README.md @@ -95,3 +95,38 @@ agentevac-study \ ``` This runs a grid search over information noise, delay, and trust parameters and fits results against a reference metrics file. + +## Experiment Workflow + +The repository includes ready-to-run shell scripts for a staged research workflow: + +```bash +# 1. Pilot sanity check across the three scenarios, messaging on/off +bash scripts/run_stage0_pilot.sh + +# 2. Main scenario comparison with moderate uncertainty +bash scripts/run_stage1_scenarios.sh + +# 3. Uncertainty sensitivity (sigma × delay) +bash scripts/run_stage2_uncertainty.sh + +# 4. Trust × messaging interaction study +bash scripts/run_stage3_trust_messaging.sh + +# 5. Coarse calibration against a reference metrics file +bash scripts/run_stage4_calibration.sh outputs/reference_metrics.json + +# 6. Local refinement around the best calibration region +bash scripts/run_stage5_refine_calibration.sh +``` + +Stage purpose summary: + +- `run_stage0_pilot.sh`: quick behavioral sanity check before expensive sweeps +- `run_stage1_scenarios.sh`: compare `no_notice`, `alert_guided`, `advice_guided` +- `run_stage2_uncertainty.sh`: study `INFO_SIGMA` and `INFO_DELAY_S` +- `run_stage3_trust_messaging.sh`: test interaction between trust and communication +- `run_stage4_calibration.sh`: rank parameter sets against a reference outcome +- `run_stage5_refine_calibration.sh`: refine around promising calibrated regions + +All scripts run headless with `sumo` and write outputs under `outputs/stage*/`. diff --git a/scripts/run_stage0_pilot.sh b/scripts/run_stage0_pilot.sh new file mode 100755 index 0000000..b726b7d --- /dev/null +++ b/scripts/run_stage0_pilot.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT_DIR" + +SEEDS=(12345 12346 12347) + +for messaging in on off; do + for seed in "${SEEDS[@]}"; do + echo "[STAGE0] messaging=${messaging} seed=${seed}" + SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \ + --output-dir "outputs/stage0/pilot_msg_${messaging}_seed_${seed}" \ + --sumo-binary sumo \ + --sigma-values 40 \ + --delay-values 0 \ + --trust-values 0.5 \ + --scenario-values no_notice,alert_guided,advice_guided \ + --messaging "$messaging" + done +done diff --git a/scripts/run_stage1_scenarios.sh b/scripts/run_stage1_scenarios.sh new file mode 100755 index 0000000..7be4f57 --- /dev/null +++ b/scripts/run_stage1_scenarios.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT_DIR" + +SEEDS=(12345 12346 12347 12348 12349) + +for messaging in on off; do + for seed in "${SEEDS[@]}"; do + echo "[STAGE1] messaging=${messaging} seed=${seed}" + SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \ + --output-dir "outputs/stage1/scenarios_msg_${messaging}_seed_${seed}" \ + --sumo-binary sumo \ + --sigma-values 40 \ + --delay-values 30 \ + --trust-values 0.5 \ + --scenario-values no_notice,alert_guided,advice_guided \ + --messaging "$messaging" + done +done diff --git a/scripts/run_stage2_uncertainty.sh b/scripts/run_stage2_uncertainty.sh new file mode 100755 index 0000000..545e49d --- /dev/null +++ b/scripts/run_stage2_uncertainty.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT_DIR" + +SEEDS=(12345 12346 12347 12348 12349) + +for seed in "${SEEDS[@]}"; do + echo "[STAGE2] seed=${seed}" + SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \ + --output-dir "outputs/stage2/uncertainty_seed_${seed}" \ + --sumo-binary sumo \ + --sigma-values 0,20,40,80 \ + --delay-values 0,30,60 \ + --trust-values 0.5 \ + --scenario-values no_notice,alert_guided,advice_guided \ + --messaging on +done diff --git a/scripts/run_stage3_trust_messaging.sh b/scripts/run_stage3_trust_messaging.sh new file mode 100755 index 0000000..74cf954 --- /dev/null +++ b/scripts/run_stage3_trust_messaging.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT_DIR" + +SEEDS=(12345 12346 12347 12348 12349) + +for messaging in on off; do + for seed in "${SEEDS[@]}"; do + echo "[STAGE3] messaging=${messaging} seed=${seed}" + SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \ + --output-dir "outputs/stage3/trust_msg_${messaging}_seed_${seed}" \ + --sumo-binary sumo \ + --sigma-values 40 \ + --delay-values 30 \ + --trust-values 0.0,0.25,0.5,0.75,1.0 \ + --scenario-values no_notice,alert_guided,advice_guided \ + --messaging "$messaging" + done +done diff --git a/scripts/run_stage4_calibration.sh b/scripts/run_stage4_calibration.sh new file mode 100755 index 0000000..70a049a --- /dev/null +++ b/scripts/run_stage4_calibration.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT_DIR" + +REFERENCE_PATH="${1:-outputs/reference_metrics.json}" + +python3 -m agentevac.analysis.study_runner \ + --reference "$REFERENCE_PATH" \ + --output-dir "outputs/stage4" \ + --sumo-binary sumo \ + --sigma-values 0,20,40,80 \ + --delay-values 0,30,60,120 \ + --trust-values 0.0,0.25,0.5,0.75,1.0 \ + --scenario-values no_notice,alert_guided,advice_guided \ + --messaging on \ + --top-k 10 diff --git a/scripts/run_stage5_refine_calibration.sh b/scripts/run_stage5_refine_calibration.sh new file mode 100755 index 0000000..7eafb5b --- /dev/null +++ b/scripts/run_stage5_refine_calibration.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT_DIR" + +SEEDS=(12345 12346 12347 12348 12349) + +for seed in "${SEEDS[@]}"; do + echo "[STAGE5] seed=${seed}" + SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \ + --output-dir "outputs/stage5/refined_seed_${seed}" \ + --sumo-binary sumo \ + --sigma-values 20,40,60 \ + --delay-values 15,30,45 \ + --trust-values 0.25,0.5,0.75 \ + --scenario-values alert_guided,advice_guided \ + --messaging on +done diff --git a/tests/test_experiment_stage_scripts.py b/tests/test_experiment_stage_scripts.py new file mode 100644 index 0000000..0a7d1b4 --- /dev/null +++ b/tests/test_experiment_stage_scripts.py @@ -0,0 +1,25 @@ +"""Basic validation for staged experiment shell scripts.""" + +from pathlib import Path +import subprocess + + +SCRIPT_NAMES = [ + "run_stage0_pilot.sh", + "run_stage1_scenarios.sh", + "run_stage2_uncertainty.sh", + "run_stage3_trust_messaging.sh", + "run_stage4_calibration.sh", + "run_stage5_refine_calibration.sh", +] + + +def test_stage_scripts_exist(): + for name in SCRIPT_NAMES: + assert Path("scripts", name).exists() + + +def test_stage_scripts_are_valid_bash(): + for name in SCRIPT_NAMES: + path = Path("scripts", name) + subprocess.run(["bash", "-n", str(path)], check=True)