diff --git a/README.md b/README.md index 1aec29e..64e88eb 100644 --- a/README.md +++ b/README.md @@ -96,6 +96,41 @@ agentevac-study \ This runs a grid search over information noise, delay, and trust parameters and fits results against a reference metrics file. +## Experiment Workflow + +The repository includes ready-to-run shell scripts for a staged research workflow: + +```bash +# 1. Pilot sanity check across the three scenarios, messaging on/off +bash scripts/run_stage0_pilot.sh + +# 2. Main scenario comparison with moderate uncertainty +bash scripts/run_stage1_scenarios.sh + +# 3. Uncertainty sensitivity (sigma × delay) +bash scripts/run_stage2_uncertainty.sh + +# 4. Trust × messaging interaction study +bash scripts/run_stage3_trust_messaging.sh + +# 5. Coarse calibration against a reference metrics file +bash scripts/run_stage4_calibration.sh outputs/reference_metrics.json + +# 6. Local refinement around the best calibration region +bash scripts/run_stage5_refine_calibration.sh +``` + +Stage purpose summary: + +- `run_stage0_pilot.sh`: quick behavioral sanity check before expensive sweeps +- `run_stage1_scenarios.sh`: compare `no_notice`, `alert_guided`, `advice_guided` +- `run_stage2_uncertainty.sh`: study `INFO_SIGMA` and `INFO_DELAY_S` +- `run_stage3_trust_messaging.sh`: test interaction between trust and communication +- `run_stage4_calibration.sh`: rank parameter sets against a reference outcome +- `run_stage5_refine_calibration.sh`: refine around promising calibrated regions + +All scripts run headless with `sumo` and write outputs under `outputs/stage*/`. + ## Plotting Completed Runs Install the plotting dependency: diff --git a/scripts/run_stage0_pilot.sh b/scripts/run_stage0_pilot.sh new file mode 100755 index 0000000..b726b7d --- /dev/null +++ b/scripts/run_stage0_pilot.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT_DIR" + +SEEDS=(12345 12346 12347) + +for messaging in on off; do + for seed in "${SEEDS[@]}"; do + echo "[STAGE0] messaging=${messaging} seed=${seed}" + SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \ + --output-dir "outputs/stage0/pilot_msg_${messaging}_seed_${seed}" \ + --sumo-binary sumo \ + --sigma-values 40 \ + --delay-values 0 \ + --trust-values 0.5 \ + --scenario-values no_notice,alert_guided,advice_guided \ + --messaging "$messaging" + done +done diff --git a/scripts/run_stage1_scenarios.sh b/scripts/run_stage1_scenarios.sh new file mode 100755 index 0000000..7be4f57 --- /dev/null +++ b/scripts/run_stage1_scenarios.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT_DIR" + +SEEDS=(12345 12346 12347 12348 12349) + +for messaging in on off; do + for seed in "${SEEDS[@]}"; do + echo "[STAGE1] messaging=${messaging} seed=${seed}" + SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \ + --output-dir "outputs/stage1/scenarios_msg_${messaging}_seed_${seed}" \ + --sumo-binary sumo \ + --sigma-values 40 \ + --delay-values 30 \ + --trust-values 0.5 \ + --scenario-values no_notice,alert_guided,advice_guided \ + --messaging "$messaging" + done +done diff --git a/scripts/run_stage2_uncertainty.sh b/scripts/run_stage2_uncertainty.sh new file mode 100755 index 0000000..545e49d --- /dev/null +++ b/scripts/run_stage2_uncertainty.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT_DIR" + +SEEDS=(12345 12346 12347 12348 12349) + +for seed in "${SEEDS[@]}"; do + echo "[STAGE2] seed=${seed}" + SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \ + --output-dir "outputs/stage2/uncertainty_seed_${seed}" \ + --sumo-binary sumo \ + --sigma-values 0,20,40,80 \ + --delay-values 0,30,60 \ + --trust-values 0.5 \ + --scenario-values no_notice,alert_guided,advice_guided \ + --messaging on +done diff --git a/scripts/run_stage3_trust_messaging.sh b/scripts/run_stage3_trust_messaging.sh new file mode 100755 index 0000000..74cf954 --- /dev/null +++ b/scripts/run_stage3_trust_messaging.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT_DIR" + +SEEDS=(12345 12346 12347 12348 12349) + +for messaging in on off; do + for seed in "${SEEDS[@]}"; do + echo "[STAGE3] messaging=${messaging} seed=${seed}" + SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \ + --output-dir "outputs/stage3/trust_msg_${messaging}_seed_${seed}" \ + --sumo-binary sumo \ + --sigma-values 40 \ + --delay-values 30 \ + --trust-values 0.0,0.25,0.5,0.75,1.0 \ + --scenario-values no_notice,alert_guided,advice_guided \ + --messaging "$messaging" + done +done diff --git a/scripts/run_stage4_calibration.sh b/scripts/run_stage4_calibration.sh new file mode 100755 index 0000000..70a049a --- /dev/null +++ b/scripts/run_stage4_calibration.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT_DIR" + +REFERENCE_PATH="${1:-outputs/reference_metrics.json}" + +python3 -m agentevac.analysis.study_runner \ + --reference "$REFERENCE_PATH" \ + --output-dir "outputs/stage4" \ + --sumo-binary sumo \ + --sigma-values 0,20,40,80 \ + --delay-values 0,30,60,120 \ + --trust-values 0.0,0.25,0.5,0.75,1.0 \ + --scenario-values no_notice,alert_guided,advice_guided \ + --messaging on \ + --top-k 10 diff --git a/scripts/run_stage5_refine_calibration.sh b/scripts/run_stage5_refine_calibration.sh new file mode 100755 index 0000000..7eafb5b --- /dev/null +++ b/scripts/run_stage5_refine_calibration.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +cd "$ROOT_DIR" + +SEEDS=(12345 12346 12347 12348 12349) + +for seed in "${SEEDS[@]}"; do + echo "[STAGE5] seed=${seed}" + SUMO_SEED="$seed" python3 -m agentevac.analysis.experiments \ + --output-dir "outputs/stage5/refined_seed_${seed}" \ + --sumo-binary sumo \ + --sigma-values 20,40,60 \ + --delay-values 15,30,45 \ + --trust-values 0.25,0.5,0.75 \ + --scenario-values alert_guided,advice_guided \ + --messaging on +done diff --git a/tests/test_experiment_stage_scripts.py b/tests/test_experiment_stage_scripts.py new file mode 100644 index 0000000..0a7d1b4 --- /dev/null +++ b/tests/test_experiment_stage_scripts.py @@ -0,0 +1,25 @@ +"""Basic validation for staged experiment shell scripts.""" + +from pathlib import Path +import subprocess + + +SCRIPT_NAMES = [ + "run_stage0_pilot.sh", + "run_stage1_scenarios.sh", + "run_stage2_uncertainty.sh", + "run_stage3_trust_messaging.sh", + "run_stage4_calibration.sh", + "run_stage5_refine_calibration.sh", +] + + +def test_stage_scripts_exist(): + for name in SCRIPT_NAMES: + assert Path("scripts", name).exists() + + +def test_stage_scripts_are_valid_bash(): + for name in SCRIPT_NAMES: + path = Path("scripts", name) + subprocess.run(["bash", "-n", str(path)], check=True)