diff --git a/agentevac/simulation/main.py b/agentevac/simulation/main.py index 9569b3b..64131aa 100644 --- a/agentevac/simulation/main.py +++ b/agentevac/simulation/main.py @@ -92,6 +92,7 @@ summarize_neighborhood_observation, compute_social_departure_pressure, ) +from agentevac.utils.run_parameters import write_run_parameter_log from agentevac.utils.replay import RouteReplay # ---- OpenAI (LLM control) ---- @@ -249,6 +250,10 @@ def _parse_cli_args() -> argparse.Namespace: "--metrics-log-path", help="Override METRICS_LOG_PATH env var (timestamp is appended).", ) + parser.add_argument( + "--params-log-path", + help="Override PARAMS_LOG_PATH env var (companion run suffix is preserved).", + ) parser.add_argument("--overlay-max-label-chars", type=int, help="Max overlay label characters.") parser.add_argument("--overlay-poi-layer", type=int, help="POI layer for overlays.") parser.add_argument("--overlay-poi-offset-m", type=float, help="POI offset in meters.") @@ -321,6 +326,7 @@ def _float_from_env_or_cli(cli_value: Optional[float], env_key: str, default: fl if CLI_ARGS.metrics is not None: METRICS_ENABLED = (CLI_ARGS.metrics == "on") METRICS_LOG_PATH = CLI_ARGS.metrics_log_path or os.getenv("METRICS_LOG_PATH", "outputs/run_metrics.json") +PARAMS_LOG_PATH = CLI_ARGS.params_log_path or os.getenv("PARAMS_LOG_PATH", "outputs/run_params.json") WEB_DASHBOARD_ENABLED = _parse_bool(os.getenv("WEB_DASHBOARD_ENABLED", "0"), False) if CLI_ARGS.web_dashboard is not None: WEB_DASHBOARD_ENABLED = (CLI_ARGS.web_dashboard == "on") @@ -1311,6 +1317,61 @@ def cleanup(self, active_vehicle_ids: List[str]): } +def _run_parameter_payload() -> Dict[str, Any]: + """Build the persisted run-parameter snapshot used by post-run plotting tools.""" + return { + "run_mode": RUN_MODE, + "scenario": SCENARIO_MODE, + "sumo_binary": SUMO_BINARY, + "messaging_controls": { + "enabled": MESSAGING_ENABLED, + "max_message_chars": MAX_MESSAGE_CHARS, + "max_inbox_messages": MAX_INBOX_MESSAGES, + "max_sends_per_agent_per_round": MAX_SENDS_PER_AGENT_PER_ROUND, + "max_broadcasts_per_round": MAX_BROADCASTS_PER_ROUND, + "ttl_rounds": TTL_ROUNDS, + }, + "driver_briefing_thresholds": { + "margin_very_close_m": MARGIN_VERY_CLOSE_M, + "margin_near_m": MARGIN_NEAR_M, + "margin_buffered_m": MARGIN_BUFFERED_M, + "risk_density_low": RISK_DENSITY_LOW, + "risk_density_medium": RISK_DENSITY_MEDIUM, + "risk_density_high": RISK_DENSITY_HIGH, + "delay_fast_ratio": DELAY_FAST_RATIO, + "delay_moderate_ratio": DELAY_MODERATE_RATIO, + "delay_heavy_ratio": DELAY_HEAVY_RATIO, + "caution_min_margin_m": CAUTION_MIN_MARGIN_M, + "recommended_min_margin_m": RECOMMENDED_MIN_MARGIN_M, + }, + "cognition": { + "info_sigma": INFO_SIGMA, + "info_delay_s": INFO_DELAY_S, + "social_signal_max_messages": SOCIAL_SIGNAL_MAX_MESSAGES, + "theta_trust": DEFAULT_THETA_TRUST, + "belief_inertia": BELIEF_INERTIA, + }, + "departure": { + "theta_r": DEFAULT_THETA_R, + "theta_u": DEFAULT_THETA_U, + "gamma": DEFAULT_GAMMA, + }, + "utility": { + "lambda_e": DEFAULT_LAMBDA_E, + "lambda_t": DEFAULT_LAMBDA_T, + }, + "neighbor_observation": { + "scope": NEIGHBOR_SCOPE, + "window_s": DEFAULT_NEIGHBOR_WINDOW_S, + "social_recent_weight": DEFAULT_SOCIAL_RECENT_WEIGHT, + "social_total_weight": DEFAULT_SOCIAL_TOTAL_WEIGHT, + "social_trigger": DEFAULT_SOCIAL_TRIGGER, + "social_min_danger": DEFAULT_SOCIAL_MIN_DANGER, + "max_system_observations": MAX_SYSTEM_OBSERVATIONS, + }, + } + + # ========================= # Step 4: Define SUMO configuration # ========================= @@ -1330,6 +1391,11 @@ def cleanup(self, active_vehicle_ids: List[str]): replay = RouteReplay(RUN_MODE, REPLAY_LOG_PATH) events = LiveEventStream(EVENTS_ENABLED, EVENTS_LOG_PATH, EVENTS_STDOUT) metrics = RunMetricsCollector(METRICS_ENABLED, METRICS_LOG_PATH, RUN_MODE) +params_log_path = write_run_parameter_log( + PARAMS_LOG_PATH, + _run_parameter_payload(), + reference_path=metrics.path or events.path or replay.path, +) dashboard = WebDashboard( enabled=WEB_DASHBOARD_ENABLED, host=WEB_DASHBOARD_HOST, @@ -1357,6 +1423,7 @@ def cleanup(self, active_vehicle_ids: List[str]): print(f"[EVENTS] enabled={EVENTS_ENABLED} path={events.path} stdout={EVENTS_STDOUT}") if metrics.path: print(f"[METRICS] enabled={METRICS_ENABLED} path={metrics.path}") +print(f"[RUN_PARAMS] path={params_log_path}") print( f"[WEB_DASHBOARD] enabled={dashboard.enabled} host={WEB_DASHBOARD_HOST} " f"port={WEB_DASHBOARD_PORT} max_events={WEB_DASHBOARD_MAX_EVENTS}" diff --git a/agentevac/utils/run_parameters.py b/agentevac/utils/run_parameters.py new file mode 100644 index 0000000..b7801de --- /dev/null +++ b/agentevac/utils/run_parameters.py @@ -0,0 +1,79 @@ +"""Helpers for recording and locating per-run parameter snapshots.""" + +from __future__ import annotations + +import json +import time +from pathlib import Path +from typing import Any, Mapping, Optional + +_REFERENCE_PREFIXES = ( + "run_params_", + "run_metrics_", + "metrics_", + "events_", + "llm_routes_", + "routes_", +) + + +def reference_suffix(reference_path: str | Path) -> str: + """Return the variable suffix portion of a run artifact filename. + + Examples: + ``run_metrics_20260311_012202.json`` -> ``20260311_012202`` + ``metrics_sigma-40_20260311_012202.json`` -> ``sigma-40_20260311_012202`` + """ + stem = Path(reference_path).stem + for prefix in _REFERENCE_PREFIXES: + if stem.startswith(prefix): + suffix = stem[len(prefix):] + if suffix: + return suffix + return stem + + +def build_parameter_log_path(base_path: str, *, reference_path: Optional[str | Path] = None) -> str: + """Build a parameter-log path, preserving a companion artifact suffix when possible.""" + base = Path(base_path) + ext = base.suffix or ".json" + stem = base.stem if base.suffix else base.name + + if reference_path: + suffix = reference_suffix(reference_path) + candidate = base.with_name(f"{stem}_{suffix}{ext}") + idx = 1 + while candidate.exists(): + candidate = base.with_name(f"{stem}_{suffix}_{idx:02d}{ext}") + idx += 1 + return str(candidate) + + ts = time.strftime("%Y%m%d_%H%M%S") + candidate = base.with_name(f"{stem}_{ts}{ext}") + idx = 1 + while candidate.exists(): + candidate = base.with_name(f"{stem}_{ts}_{idx:02d}{ext}") + idx += 1 + return str(candidate) + + +def write_run_parameter_log( + base_path: str, + payload: Mapping[str, Any], + *, + reference_path: Optional[str | Path] = None, +) -> str: + """Write one JSON parameter snapshot to disk and return its path.""" + target = Path(build_parameter_log_path(base_path, reference_path=reference_path)) + target.parent.mkdir(parents=True, exist_ok=True) + with target.open("w", encoding="utf-8") as fh: + json.dump(dict(payload), fh, ensure_ascii=False, indent=2, sort_keys=True) + fh.write("\n") + return str(target) + + +def companion_parameter_path(reference_path: str | Path, *, base_name: str = "run_params") -> Path: + """Derive the expected companion parameter-log path for a run artifact.""" + ref = Path(reference_path) + suffix = reference_suffix(ref) + return ref.with_name(f"{base_name}_{suffix}.json") diff --git a/scripts/_plot_common.py b/scripts/_plot_common.py index 9f837ae..df2a2f4 100644 --- a/scripts/_plot_common.py +++ b/scripts/_plot_common.py @@ -7,6 +7,8 @@ from pathlib import Path from typing import Any, Iterable, List +from agentevac.utils.run_parameters import companion_parameter_path + def newest_file(pattern: str) -> Path: """Return the newest file matching ``pattern``. @@ -30,6 +32,19 @@ def resolve_input(path_arg: str | None, pattern: str) -> Path: return newest_file(pattern) +def resolve_optional_run_params(path_arg: str | None, reference_path: Path | None) -> Path | None: + """Resolve an explicit or companion run-parameter log path if available.""" + if path_arg: + path = Path(path_arg) + if not path.exists(): + raise FileNotFoundError(f"Input file does not exist: {path}") + return path + if reference_path is None: + return None + candidate = companion_parameter_path(reference_path) + return candidate if candidate.exists() else None + + def load_json(path: Path) -> Any: """Load a JSON document from ``path``.""" with path.open("r", encoding="utf-8") as fh: diff --git a/scripts/plot_agent_communication.py b/scripts/plot_agent_communication.py index 6474639..1932cf1 100644 --- a/scripts/plot_agent_communication.py +++ b/scripts/plot_agent_communication.py @@ -9,9 +9,25 @@ from typing import Any try: - from scripts._plot_common import ensure_output_path, load_jsonl, require_matplotlib, resolve_input, top_items + from scripts._plot_common import ( + ensure_output_path, + load_json, + load_jsonl, + require_matplotlib, + resolve_input, + resolve_optional_run_params, + top_items, + ) except ModuleNotFoundError: - from _plot_common import ensure_output_path, load_jsonl, require_matplotlib, resolve_input, top_items + from _plot_common import ( + ensure_output_path, + load_json, + load_jsonl, + require_matplotlib, + resolve_input, + resolve_optional_run_params, + top_items, + ) def _parse_args() -> argparse.Namespace: @@ -26,6 +42,10 @@ def _parse_args() -> argparse.Namespace: "--dialogs", help="Path to a *.dialogs.csv file. Defaults to the newest outputs/*.dialogs.csv.", ) + parser.add_argument( + "--params", + help="Optional companion run_params JSON path. Defaults to the matching run_params_.json when present.", + ) parser.add_argument( "--out", help="Output PNG path. Defaults to .communication.png.", @@ -156,6 +176,24 @@ def _plot_dialog_modes(ax, dialog_rows: list[dict[str, str]]) -> None: ax2.set_ylabel("Average Response Length (chars)") +def _messaging_summary(params: dict | None) -> str | None: + """Format messaging anti-bloat controls for the dashboard footer.""" + if not params: + return None + messaging = params.get("messaging_controls") or {} + if not messaging: + return None + return ( + "Messaging controls: " + f"enabled={messaging.get('enabled', '?')} " + f"max_chars={messaging.get('max_message_chars', '?')} " + f"max_inbox={messaging.get('max_inbox_messages', '?')} " + f"max_sends={messaging.get('max_sends_per_agent_per_round', '?')} " + f"max_broadcasts={messaging.get('max_broadcasts_per_round', '?')} " + f"ttl_rounds={messaging.get('ttl_rounds', '?')}" + ) + + def plot_agent_communication( *, events_path: Path, @@ -163,10 +201,12 @@ def plot_agent_communication( out_path: Path, show: bool, top_n: int, + params_path: Path | None = None, ) -> None: plt = require_matplotlib() event_rows = load_jsonl(events_path) dialog_rows = _load_dialog_rows(dialogs_path) + params = load_json(params_path) if params_path else None sender_counts: dict[str, int] = {} recipient_counts: dict[str, int] = {} @@ -202,10 +242,17 @@ def plot_agent_communication( _plot_round_series(axes[1, 0], event_rows) _plot_dialog_modes(axes[1, 1], dialog_rows) - fig.tight_layout(rect=(0, 0, 1, 0.95)) + footer = _messaging_summary(params) + rect_bottom = 0.04 if footer else 0.0 + if footer: + fig.text(0.02, 0.012, footer, ha="left", va="bottom", fontsize=8) + + fig.tight_layout(rect=(0, rect_bottom, 1, 0.95)) fig.savefig(out_path, dpi=160, bbox_inches="tight") print(f"[PLOT] events={events_path}") print(f"[PLOT] dialogs={dialogs_path}") + if params_path: + print(f"[PLOT] params={params_path}") print(f"[PLOT] output={out_path}") if show: plt.show() @@ -216,6 +263,7 @@ def main() -> None: args = _parse_args() events_path = resolve_input(args.events, "outputs/events_*.jsonl") dialogs_path = resolve_input(args.dialogs, "outputs/*.dialogs.csv") + params_path = resolve_optional_run_params(args.params, events_path) out_path = ensure_output_path(events_path, args.out, suffix="communication") plot_agent_communication( events_path=events_path, @@ -223,6 +271,7 @@ def main() -> None: out_path=out_path, show=args.show, top_n=args.top_n, + params_path=params_path, ) diff --git a/scripts/plot_all_run_artifacts.py b/scripts/plot_all_run_artifacts.py index e6003ef..f9da937 100644 --- a/scripts/plot_all_run_artifacts.py +++ b/scripts/plot_all_run_artifacts.py @@ -33,6 +33,7 @@ def _parse_args() -> argparse.Namespace: parser.add_argument("--events", help="Explicit events JSONL path.") parser.add_argument("--replay", help="Explicit llm_routes JSONL path.") parser.add_argument("--dialogs", help="Explicit dialogs CSV path.") + parser.add_argument("--params", help="Explicit run_params JSON path.") parser.add_argument( "--results-json", help="Optional experiment_results.json to also generate the multi-run comparison figure.", @@ -61,7 +62,7 @@ def _resolve_run_id(args: argparse.Namespace) -> str: """Resolve the run ID from CLI args or the newest events file.""" if args.run_id: return str(args.run_id) - for path_arg in (args.events, args.metrics, args.replay, args.dialogs): + for path_arg in (args.events, args.metrics, args.replay, args.dialogs, args.params): if path_arg: match = re.search(r"(\d{8}_\d{6})", Path(path_arg).name) if match: @@ -77,6 +78,7 @@ def _resolve_paths(args: argparse.Namespace, run_id: str) -> dict[str, Path | No events = _maybe_path(args.events) replay = _maybe_path(args.replay) dialogs = _maybe_path(args.dialogs) + params = _maybe_path(args.params) if metrics is None: candidate = Path(f"outputs/run_metrics_{run_id}.json") @@ -90,12 +92,16 @@ def _resolve_paths(args: argparse.Namespace, run_id: str) -> dict[str, Path | No if dialogs is None: candidate = Path(f"outputs/llm_routes_{run_id}.dialogs.csv") dialogs = candidate if candidate.exists() else newest_file("outputs/*.dialogs.csv") + if params is None: + candidate = Path(f"outputs/run_params_{run_id}.json") + params = candidate if candidate.exists() else None return { "metrics": metrics, "events": events, "replay": replay, "dialogs": dialogs, + "params": params, } @@ -112,6 +118,7 @@ def main() -> None: events_path = paths["events"] replay_path = paths["replay"] dialogs_path = paths["dialogs"] + params_path = paths["params"] assert metrics_path is not None assert events_path is not None assert dialogs_path is not None @@ -121,6 +128,7 @@ def main() -> None: out_path=out_dir / "run_metrics.dashboard.png", show=args.show, top_n=args.top_n, + params_path=params_path, ) plot_timeline( events_path, @@ -135,6 +143,7 @@ def main() -> None: out_path=out_dir / "agent_communication.png", show=args.show, top_n=args.top_n, + params_path=params_path, ) if replay_path is not None: plot_agent_round_timeline( @@ -175,6 +184,8 @@ def main() -> None: if replay_path: print(f"[PLOT] replay={replay_path}") print(f"[PLOT] dialogs={dialogs_path}") + if params_path: + print(f"[PLOT] params={params_path}") if comparison_source: print(f"[PLOT] comparison_source={comparison_source}") diff --git a/scripts/plot_experiment_comparison.py b/scripts/plot_experiment_comparison.py index e63ba50..c66f795 100644 --- a/scripts/plot_experiment_comparison.py +++ b/scripts/plot_experiment_comparison.py @@ -8,9 +8,9 @@ from typing import Any try: - from scripts._plot_common import ensure_output_path, load_json, require_matplotlib + from scripts._plot_common import ensure_output_path, load_json, require_matplotlib, resolve_optional_run_params except ModuleNotFoundError: - from _plot_common import ensure_output_path, load_json, require_matplotlib + from _plot_common import ensure_output_path, load_json, require_matplotlib, resolve_optional_run_params def _parse_args() -> argparse.Namespace: @@ -57,6 +57,22 @@ def _metrics_row(metrics: dict[str, Any]) -> dict[str, float]: } +def _param_metadata(path: Path) -> dict[str, Any]: + """Load companion run parameters for plots that only have KPI JSON files.""" + params_path = resolve_optional_run_params(None, path) + if params_path is None: + return {} + payload = load_json(params_path) + cognition = payload.get("cognition") or {} + return { + "scenario": str(payload.get("scenario", "unknown")), + "info_sigma": _safe_float(cognition.get("info_sigma")), + "info_delay_s": _safe_float(cognition.get("info_delay_s")), + "theta_trust": _safe_float(cognition.get("theta_trust")), + "params_path": str(params_path), + } + + def load_cases(results_json: Path | None, metrics_glob: str) -> tuple[list[dict[str, Any]], Path]: rows: list[dict[str, Any]] = [] if results_json is not None: @@ -72,12 +88,13 @@ def load_cases(results_json: Path | None, metrics_glob: str) -> tuple[list[dict[ continue metrics = load_json(path) case = item.get("case") or {} + params_meta = _param_metadata(path) row = { "label": str(item.get("case_id") or path.stem), - "scenario": str(case.get("scenario", "unknown")), - "info_sigma": _safe_float(case.get("info_sigma")), - "info_delay_s": _safe_float(case.get("info_delay_s")), - "theta_trust": _safe_float(case.get("theta_trust")), + "scenario": str(case.get("scenario", params_meta.get("scenario", "unknown"))), + "info_sigma": _safe_float(case.get("info_sigma", params_meta.get("info_sigma"))), + "info_delay_s": _safe_float(case.get("info_delay_s", params_meta.get("info_delay_s"))), + "theta_trust": _safe_float(case.get("theta_trust", params_meta.get("theta_trust"))), "metrics_path": str(path), } row.update(_metrics_row(metrics)) @@ -89,12 +106,13 @@ def load_cases(results_json: Path | None, metrics_glob: str) -> tuple[list[dict[ raise SystemExit(f"No metrics files match pattern: {metrics_glob}") for path in matches: metrics = load_json(path) + params_meta = _param_metadata(path) row = { "label": path.stem, - "scenario": "unknown", - "info_sigma": 0.0, - "info_delay_s": 0.0, - "theta_trust": 0.0, + "scenario": str(params_meta.get("scenario", "unknown")), + "info_sigma": _safe_float(params_meta.get("info_sigma")), + "info_delay_s": _safe_float(params_meta.get("info_delay_s")), + "theta_trust": _safe_float(params_meta.get("theta_trust")), "metrics_path": str(path), } row.update(_metrics_row(metrics)) diff --git a/scripts/plot_run_metrics.py b/scripts/plot_run_metrics.py index d09638e..8982bdb 100644 --- a/scripts/plot_run_metrics.py +++ b/scripts/plot_run_metrics.py @@ -7,12 +7,27 @@ from pathlib import Path try: - from scripts._plot_common import ensure_output_path, load_json, require_matplotlib, resolve_input, top_items + from scripts._plot_common import ( + ensure_output_path, + load_json, + require_matplotlib, + resolve_input, + resolve_optional_run_params, + top_items, + ) except ModuleNotFoundError: - from _plot_common import ensure_output_path, load_json, require_matplotlib, resolve_input, top_items + from _plot_common import ( + ensure_output_path, + load_json, + require_matplotlib, + resolve_input, + resolve_optional_run_params, + top_items, + ) def _parse_args() -> argparse.Namespace: + """Parse CLI arguments for the run-metrics dashboard.""" parser = argparse.ArgumentParser( description="Visualize one run_metrics_*.json file as a 2x2 dashboard." ) @@ -20,6 +35,10 @@ def _parse_args() -> argparse.Namespace: "--metrics", help="Path to a metrics JSON file. Defaults to the newest outputs/run_metrics_*.json.", ) + parser.add_argument( + "--params", + help="Optional companion run_params JSON path. Defaults to the matching run_params_.json when present.", + ) parser.add_argument( "--out", help="Output PNG path. Defaults to .dashboard.png.", @@ -39,6 +58,7 @@ def _parse_args() -> argparse.Namespace: def _draw_or_empty(ax, items: list[tuple[str, float]], title: str, ylabel: str, color: str, *, highest_first: bool = True): + """Draw a bar panel, or a centered placeholder if no rows are available.""" if not items: ax.text(0.5, 0.5, "No data", ha="center", va="center", fontsize=11) ax.set_title(title) @@ -56,58 +76,146 @@ def _draw_or_empty(ax, items: list[tuple[str, float]], title: str, ylabel: str, ax.set_ylabel(ylabel) -def plot_metrics_dashboard(metrics_path: Path, *, out_path: Path, show: bool, top_n: int) -> None: +def _kpi_specs(metrics: dict) -> list[dict[str, object]]: + """Build the four top-level KPI descriptors used in the dashboard header panel.""" + return [ + { + "title": "Departure variance", + "value": float(metrics.get("departure_time_variability", 0.0)), + "ylabel": "Seconds^2", + "color": "#4C78A8", + "fmt": "{:.3f}", + }, + { + "title": "Route entropy", + "value": float(metrics.get("route_choice_entropy", 0.0)), + "ylabel": "Entropy (nats)", + "color": "#F58518", + "fmt": "{:.3f}", + }, + { + "title": "Hazard exposure", + "value": float(metrics.get("average_hazard_exposure", {}).get("global_average", 0.0)), + "ylabel": "Average risk score", + "color": "#E45756", + "fmt": "{:.3f}", + }, + { + "title": "Avg travel time", + "value": float(metrics.get("average_travel_time", {}).get("average", 0.0)), + "ylabel": "Seconds", + "color": "#54A24B", + "fmt": "{:.2f}", + }, + ] + + +def _plot_kpi_grid(fig, slot, metrics: dict) -> None: + """Render the KPI summary as four mini subplots with independent y scales.""" + kpi_grid = slot.subgridspec(2, 2, wspace=0.35, hspace=0.45) + for idx, spec in enumerate(_kpi_specs(metrics)): + ax = fig.add_subplot(kpi_grid[idx // 2, idx % 2]) + value = float(spec["value"]) + ymax = max(1.0, value * 1.15) if value >= 0.0 else max(1.0, abs(value) * 1.15) + ax.bar([0], [value], color=str(spec["color"]), width=0.5) + ax.set_title(str(spec["title"]), fontsize=10) + ax.set_ylabel(str(spec["ylabel"]), fontsize=9) + ax.set_xticks([]) + ax.set_ylim(min(0.0, value * 1.1), ymax) + ax.grid(axis="y", linestyle=":", alpha=0.35) + label = str(spec["fmt"]).format(value) + text_y = value if value > 0.0 else ymax * 0.04 + va = "bottom" + if value < 0.0: + text_y = value + va = "top" + ax.text(0, text_y, label, ha="center", va=va, fontsize=10) + + +def _briefing_summary(params: dict | None) -> str | None: + """Format driver-briefing thresholds for the dashboard footer.""" + if not params: + return None + briefing = params.get("driver_briefing_thresholds") or {} + if not briefing: + return None + return ( + "Briefing thresholds: " + f"margin_m={briefing.get('margin_very_close_m', '?')}/" + f"{briefing.get('margin_near_m', '?')}/" + f"{briefing.get('margin_buffered_m', '?')} " + f"risk_density={briefing.get('risk_density_low', '?')}/" + f"{briefing.get('risk_density_medium', '?')}/" + f"{briefing.get('risk_density_high', '?')} " + f"delay_ratio={briefing.get('delay_fast_ratio', '?')}/" + f"{briefing.get('delay_moderate_ratio', '?')}/" + f"{briefing.get('delay_heavy_ratio', '?')} " + f"advisory_margin_m={briefing.get('caution_min_margin_m', '?')}/" + f"{briefing.get('recommended_min_margin_m', '?')}" + ) + + +def plot_metrics_dashboard( + metrics_path: Path, + *, + out_path: Path, + show: bool, + top_n: int, + params_path: Path | None = None, +) -> None: + """Render the run-metrics dashboard and save it to ``out_path``.""" plt = require_matplotlib() metrics = load_json(metrics_path) - - kpis = { - "Departure variance": float(metrics.get("departure_time_variability", 0.0)), - "Route entropy": float(metrics.get("route_choice_entropy", 0.0)), - "Hazard exposure": float(metrics.get("average_hazard_exposure", {}).get("global_average", 0.0)), - "Avg travel time": float(metrics.get("average_travel_time", {}).get("average", 0.0)), - } + params = load_json(params_path) if params_path else None exposure = metrics.get("average_hazard_exposure", {}).get("per_agent_average", {}) or {} travel = metrics.get("average_travel_time", {}).get("per_agent", {}) or {} instability = metrics.get("decision_instability", {}).get("per_agent_changes", {}) or {} - fig, axes = plt.subplots(2, 2, figsize=(14, 10)) + fig = plt.figure(figsize=(14, 10)) + grid = fig.add_gridspec(2, 2, wspace=0.28, hspace=0.3) fig.suptitle( f"AgentEvac Run Metrics\n{metrics_path.name} | mode={metrics.get('run_mode', 'unknown')} " f"| departed={metrics.get('departed_agents', 0)} | arrived={metrics.get('arrived_agents', 0)}", fontsize=14, ) - axes[0, 0].bar(range(len(kpis)), list(kpis.values()), color=["#4C78A8", "#F58518", "#E45756", "#54A24B"]) - axes[0, 0].set_xticks(range(len(kpis))) - axes[0, 0].set_xticklabels(list(kpis.keys()), rotation=20, ha="right") - axes[0, 0].set_title("Run KPI Summary") - axes[0, 0].set_ylabel("Value") + _plot_kpi_grid(fig, grid[0, 0], metrics) + ax_travel = fig.add_subplot(grid[0, 1]) + ax_exposure = fig.add_subplot(grid[1, 0]) + ax_instability = fig.add_subplot(grid[1, 1]) _draw_or_empty( - axes[0, 1], + ax_travel, top_items(travel, top_n), f"Per-Agent Travel Time (top {top_n})", "Seconds", "#4C78A8", ) _draw_or_empty( - axes[1, 0], + ax_exposure, top_items(exposure, top_n), f"Per-Agent Hazard Exposure (top {top_n})", "Average Risk Score", "#E45756", ) _draw_or_empty( - axes[1, 1], + ax_instability, top_items({k: float(v) for k, v in instability.items()}, top_n), f"Per-Agent Decision Instability (top {top_n})", "Choice Changes", "#72B7B2", ) - fig.tight_layout(rect=(0, 0, 1, 0.95)) + footer = _briefing_summary(params) + rect_bottom = 0.04 if footer else 0.0 + if footer: + fig.text(0.02, 0.012, footer, ha="left", va="bottom", fontsize=8) + + fig.tight_layout(rect=(0, rect_bottom, 1, 0.95)) fig.savefig(out_path, dpi=160, bbox_inches="tight") print(f"[PLOT] metrics={metrics_path}") + if params_path: + print(f"[PLOT] params={params_path}") print(f"[PLOT] output={out_path}") if show: plt.show() @@ -115,10 +223,18 @@ def plot_metrics_dashboard(metrics_path: Path, *, out_path: Path, show: bool, to def main() -> None: + """CLI entry point for the run-metrics dashboard.""" args = _parse_args() metrics_path = resolve_input(args.metrics, "outputs/run_metrics_*.json") + params_path = resolve_optional_run_params(args.params, metrics_path) out_path = ensure_output_path(metrics_path, args.out, suffix="dashboard") - plot_metrics_dashboard(metrics_path, out_path=out_path, show=args.show, top_n=args.top_n) + plot_metrics_dashboard( + metrics_path, + out_path=out_path, + show=args.show, + top_n=args.top_n, + params_path=params_path, + ) if __name__ == "__main__": diff --git a/tests/test_plot_agent_communication.py b/tests/test_plot_agent_communication.py new file mode 100644 index 0000000..4716545 --- /dev/null +++ b/tests/test_plot_agent_communication.py @@ -0,0 +1,26 @@ +"""Unit tests for scripts.plot_agent_communication.""" + +from scripts.plot_agent_communication import _messaging_summary + + +class TestMessagingSummary: + def test_formats_messaging_controls(self): + summary = _messaging_summary( + { + "messaging_controls": { + "enabled": True, + "max_message_chars": 400, + "max_inbox_messages": 20, + "max_sends_per_agent_per_round": 3, + "max_broadcasts_per_round": 20, + "ttl_rounds": 10, + } + } + ) + assert summary is not None + assert "Messaging controls:" in summary + assert "max_chars=400" in summary + assert "ttl_rounds=10" in summary + + def test_returns_none_without_messaging_payload(self): + assert _messaging_summary({}) is None diff --git a/tests/test_plot_all_run_artifacts.py b/tests/test_plot_all_run_artifacts.py index b5c0f0b..70085f4 100644 --- a/tests/test_plot_all_run_artifacts.py +++ b/tests/test_plot_all_run_artifacts.py @@ -14,6 +14,7 @@ def test_prefers_explicit_run_id(self): metrics=None, replay=None, dialogs=None, + params=None, ) assert _resolve_run_id(args) == "20260309_030340" @@ -24,6 +25,7 @@ def test_extracts_run_id_from_explicit_path(self): metrics=None, replay=None, dialogs=None, + params=None, ) assert _resolve_run_id(args) == "20260309_030340" @@ -40,17 +42,20 @@ def test_prefers_matching_run_id_files(self, tmp_path, monkeypatch): "step,time_s,veh_id,control_mode,model,system_prompt,user_prompt,response_text,parsed_json,error\n", encoding="utf-8", ) + (out / "run_params_20260309_030340.json").write_text("{}", encoding="utf-8") args = Namespace( metrics=None, events=None, replay=None, dialogs=None, + params=None, ) paths = _resolve_paths(args, "20260309_030340") assert paths["metrics"] == out / "run_metrics_20260309_030340.json" assert paths["events"] == out / "events_20260309_030340.jsonl" assert paths["replay"] == out / "llm_routes_20260309_030340.jsonl" assert paths["dialogs"] == out / "llm_routes_20260309_030340.dialogs.csv" + assert paths["params"] == out / "run_params_20260309_030340.json" def test_missing_replay_returns_none(self, tmp_path, monkeypatch): monkeypatch.chdir(tmp_path) @@ -67,6 +72,8 @@ def test_missing_replay_returns_none(self, tmp_path, monkeypatch): events=None, replay=None, dialogs=None, + params=None, ) paths = _resolve_paths(args, "20260309_030340") assert paths["replay"] is None + assert paths["params"] is None diff --git a/tests/test_plot_experiment_comparison.py b/tests/test_plot_experiment_comparison.py new file mode 100644 index 0000000..04e1dcc --- /dev/null +++ b/tests/test_plot_experiment_comparison.py @@ -0,0 +1,50 @@ +"""Unit tests for scripts.plot_experiment_comparison.""" + +import json +from pathlib import Path + +from scripts.plot_experiment_comparison import load_cases + + +class TestLoadCases: + def test_metrics_glob_uses_companion_run_params(self, tmp_path, monkeypatch): + monkeypatch.chdir(tmp_path) + out = Path("outputs") + out.mkdir() + metrics_path = out / "run_metrics_20260311_012202.json" + params_path = out / "run_params_20260311_012202.json" + metrics_path.write_text( + json.dumps( + { + "departure_time_variability": 12.0, + "route_choice_entropy": 0.5, + "average_hazard_exposure": {"global_average": 0.1}, + "average_travel_time": {"average": 42.0}, + "arrived_agents": 3, + "departed_agents": 4, + } + ), + encoding="utf-8", + ) + params_path.write_text( + json.dumps( + { + "scenario": "alert_guided", + "cognition": { + "info_sigma": 40.0, + "info_delay_s": 5.0, + "theta_trust": 0.7, + }, + } + ), + encoding="utf-8", + ) + + rows, source_path = load_cases(None, "outputs/run_metrics_*.json") + + assert source_path == metrics_path + assert len(rows) == 1 + assert rows[0]["scenario"] == "alert_guided" + assert rows[0]["info_sigma"] == 40.0 + assert rows[0]["info_delay_s"] == 5.0 + assert rows[0]["theta_trust"] == 0.7 diff --git a/tests/test_plot_run_metrics.py b/tests/test_plot_run_metrics.py new file mode 100644 index 0000000..fb5e21b --- /dev/null +++ b/tests/test_plot_run_metrics.py @@ -0,0 +1,124 @@ +"""Unit tests for scripts.plot_run_metrics.""" + +from scripts.plot_run_metrics import _briefing_summary, _kpi_specs, _plot_kpi_grid + + +class TestKpiSpecs: + def test_extracts_expected_values(self): + metrics = { + "departure_time_variability": 59.2975, + "route_choice_entropy": 0.686473, + "average_hazard_exposure": {"global_average": 0.0}, + "average_travel_time": {"average": 599.4855}, + } + specs = _kpi_specs(metrics) + values = {str(item["title"]): float(item["value"]) for item in specs} + assert values["Departure variance"] == 59.2975 + assert values["Route entropy"] == 0.686473 + assert values["Hazard exposure"] == 0.0 + assert values["Avg travel time"] == 599.4855 + + def test_missing_fields_default_to_zero(self): + specs = _kpi_specs({}) + assert all(float(item["value"]) == 0.0 for item in specs) + + +class TestPlotMetricsDashboard: + class _FakeAxis: + def __init__(self): + self.ylabel = None + self.ylim = None + self.title = None + self.text_calls = [] + + def bar(self, *args, **kwargs): + return None + + def set_title(self, value, **kwargs): + self.title = value + + def set_ylabel(self, value, **kwargs): + self.ylabel = value + + def set_xticks(self, *args, **kwargs): + return None + + def set_ylim(self, *args, **kwargs): + self.ylim = args + + def grid(self, *args, **kwargs): + return None + + def text(self, *args, **kwargs): + self.text_calls.append((args, kwargs)) + + class _FakeSubGrid: + def __getitem__(self, key): + return key + + class _FakeSlot: + def subgridspec(self, *args, **kwargs): + return TestPlotMetricsDashboard._FakeSubGrid() + + class _FakeFigure: + def __init__(self): + self.axes = [] + + def add_subplot(self, _slot): + ax = TestPlotMetricsDashboard._FakeAxis() + self.axes.append(ax) + return ax + + def test_plot_kpi_grid_creates_four_separate_panels(self): + metrics = { + "departure_time_variability": 59.2975, + "route_choice_entropy": 0.686473, + "average_hazard_exposure": {"global_average": 0.0}, + "average_travel_time": {"average": 599.4855}, + } + fig = self._FakeFigure() + slot = self._FakeSlot() + + _plot_kpi_grid(fig, slot, metrics) + + assert len(fig.axes) == 4 + assert [ax.title for ax in fig.axes] == [ + "Departure variance", + "Route entropy", + "Hazard exposure", + "Avg travel time", + ] + assert [ax.ylabel for ax in fig.axes] == [ + "Seconds^2", + "Entropy (nats)", + "Average risk score", + "Seconds", + ] + assert all(ax.ylim is not None for ax in fig.axes) + + +class TestBriefingSummary: + def test_formats_driver_briefing_thresholds(self): + summary = _briefing_summary( + { + "driver_briefing_thresholds": { + "margin_very_close_m": 100.0, + "margin_near_m": 300.0, + "margin_buffered_m": 700.0, + "risk_density_low": 0.12, + "risk_density_medium": 0.35, + "risk_density_high": 0.70, + "delay_fast_ratio": 1.1, + "delay_moderate_ratio": 1.3, + "delay_heavy_ratio": 1.6, + "caution_min_margin_m": 100.0, + "recommended_min_margin_m": 300.0, + } + } + ) + assert summary is not None + assert "Briefing thresholds:" in summary + assert "margin_m=100.0/300.0/700.0" in summary + + def test_returns_none_without_briefing_payload(self): + assert _briefing_summary({}) is None diff --git a/tests/test_run_parameters.py b/tests/test_run_parameters.py new file mode 100644 index 0000000..bdac106 --- /dev/null +++ b/tests/test_run_parameters.py @@ -0,0 +1,48 @@ +"""Unit tests for agentevac.utils.run_parameters.""" + +from pathlib import Path + +from agentevac.utils.run_parameters import ( + build_parameter_log_path, + companion_parameter_path, + reference_suffix, + write_run_parameter_log, +) + + +class TestReferenceSuffix: + def test_strips_known_metric_prefix(self): + assert reference_suffix("outputs/run_metrics_20260311_012202.json") == "20260311_012202" + + def test_preserves_case_id_prefixes(self): + assert ( + reference_suffix("outputs/experiments/metrics_sigma-40_delay-0_20260311_012202.json") + == "sigma-40_delay-0_20260311_012202" + ) + + +class TestBuildParameterLogPath: + def test_uses_reference_suffix_for_companion_names(self, tmp_path): + path = build_parameter_log_path( + str(tmp_path / "run_params.json"), + reference_path=tmp_path / "run_metrics_20260311_012202.json", + ) + assert Path(path).name == "run_params_20260311_012202.json" + + +class TestWriteRunParameterLog: + def test_writes_json_using_reference_suffix(self, tmp_path): + target = write_run_parameter_log( + str(tmp_path / "run_params.json"), + {"scenario": "advice_guided"}, + reference_path=tmp_path / "events_20260311_012202.jsonl", + ) + path = Path(target) + assert path.name == "run_params_20260311_012202.json" + assert path.read_text(encoding="utf-8").strip().startswith("{") + + +class TestCompanionParameterPath: + def test_matches_metrics_artifact_suffix(self): + candidate = companion_parameter_path(Path("outputs/run_metrics_20260311_012202.json")) + assert candidate == Path("outputs/run_params_20260311_012202.json")