diff --git a/scripts/validation/check_readiness_declared_actual.py b/scripts/validation/check_readiness_declared_actual.py index 8919f511..ef14244a 100644 --- a/scripts/validation/check_readiness_declared_actual.py +++ b/scripts/validation/check_readiness_declared_actual.py @@ -31,24 +31,35 @@ def _load_report(path: Path) -> dict[str, Any]: def _collect_mismatches(report: dict[str, Any]) -> list[dict[str, Any]]: mismatches: list[dict[str, Any]] = [] - domains = report.get("domains", []) - for domain in domains: - domain_name = domain.get("name", "unknown-domain") - for component in domain.get("components", []): + + def collect(scope_name: str, scope: dict[str, Any]) -> None: + domain_name = scope.get("domain") or scope.get("name") or "unknown-domain" + dataset_name = scope.get("id") or scope.get("name") or "unknown-dataset" + for component in scope.get("components", []): declared = component.get("declared_exists") actual = component.get("actual_exists") if declared is None or actual is None: continue - if bool(declared) != bool(actual): - mismatches.append( - { - "domain": domain_name, - "component": component.get("name", "unknown-component"), - "path": component.get("path", "unknown-path"), - "declared_exists": bool(declared), - "actual_exists": bool(actual), - } - ) + if bool(declared) == bool(actual): + continue + mismatches.append( + { + "scope": scope_name, + "dataset": dataset_name, + "domain": domain_name, + "component": component.get("name", "unknown-component"), + "path": component.get("path", "unknown-path"), + "declared_exists": bool(declared), + "actual_exists": bool(actual), + } + ) + + for dataset in report.get("datasets", []): + collect("datasets", dataset) + + for domain in report.get("domains", []): + collect("domains", domain) + return mismatches @@ -57,7 +68,8 @@ def _print_report(label: str, mismatches: list[dict[str, Any]]) -> None: for mismatch in mismatches: print( " - " - f"{mismatch['domain']} :: {mismatch['component']} ({mismatch['path']}): " + f"[{mismatch['scope']}] {mismatch['dataset']} / {mismatch['domain']} :: " + f"{mismatch['component']} ({mismatch['path']}): " f"declared_exists={mismatch['declared_exists']} vs " f"actual_exists={mismatch['actual_exists']}" ) diff --git a/tests/test_readiness_declared_actual_guard.py b/tests/test_readiness_declared_actual_guard.py new file mode 100644 index 00000000..a57e9c46 --- /dev/null +++ b/tests/test_readiness_declared_actual_guard.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +import importlib.util +from pathlib import Path + + +MODULE_PATH = Path("scripts/validation/check_readiness_declared_actual.py") + + +def _load_module(): + spec = importlib.util.spec_from_file_location("readiness_guard", MODULE_PATH) + assert spec is not None and spec.loader is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module + + +def test_collect_mismatches_detects_dataset_component_drift() -> None: + module = _load_module() + report = { + "datasets": [ + { + "id": "urban_heat", + "domain": "climate", + "components": [ + { + "name": "data", + "path": "data/socio_ecology/urban_heat/urban_heat_global.csv", + "declared_exists": False, + "actual_exists": True, + }, + { + "name": "metadata", + "path": "data/socio_ecology/urban_heat/urban_heat_global.metadata.json", + "declared_exists": True, + "actual_exists": True, + }, + ], + } + ] + } + + mismatches = module._collect_mismatches(report) + + assert len(mismatches) == 1 + mismatch = mismatches[0] + assert mismatch["dataset"] == "urban_heat" + assert mismatch["domain"] == "climate" + assert mismatch["component"] == "data" + assert mismatch["declared_exists"] is False + assert mismatch["actual_exists"] is True + + +def test_collect_mismatches_keeps_legacy_domain_shape_compatible() -> None: + module = _load_module() + report = { + "domains": [ + { + "name": "legacy-domain", + "components": [ + { + "name": "legacy-component", + "path": "legacy/path", + "declared_exists": True, + "actual_exists": False, + } + ], + } + ] + } + + mismatches = module._collect_mismatches(report) + + assert len(mismatches) == 1 + assert mismatches[0]["dataset"] == "legacy-domain" + assert mismatches[0]["domain"] == "legacy-domain"