diff --git a/apps/step_update/cli.py b/apps/step_update/cli.py index 5a111897340..f6ab72df82e 100644 --- a/apps/step_update/cli.py +++ b/apps/step_update/cli.py @@ -198,10 +198,21 @@ def _update_data_step( elif (folder / step_info["name"]).is_dir(): # Gather all relevant files from this folder. step_files = [file_name for file_name in list(folder.glob(f"{step_info['name']}/*")) if file_name.is_file()] + elif step_info["step_type"] == "export": + # Some export steps, such as YAML-only multidim charts, do not have a Python script. + step_files = [ + file_name + for file_name in list(folder.glob("*")) + if file_name.is_file() and str(file_name.stem).split(".")[0] in [step_info["name"], "shared"] + ] else: log.error(f"No step files found for step {step}.") return 1 + if not step_files: + log.error(f"No step files found for step {step}.") + return 1 + # Define the new step. if step_info["version"] == "latest": # If the step has version "latest", the new step will also have version "latest". diff --git a/dag/animal_welfare.yml b/dag/animal_welfare.yml index 728f0200317..3ca2ce05461 100644 --- a/dag/animal_welfare.yml +++ b/dag/animal_welfare.yml @@ -181,3 +181,8 @@ steps: - data://garden/animal_welfare/2026-04-28/global_hen_inventory data://grapher/animal_welfare/2026-04-28/eggs_and_hens_statistics: - data://garden/animal_welfare/2026-04-28/eggs_and_hens_statistics + # + # Single-chart collection replicating chart 7118 (https://admin.owid.io/admin/charts/7118/edit). + # + export://multidim/animal_welfare/latest/banning_of_chick_culling: + - data://grapher/animal_welfare/2026-04-16/chick_culling_laws diff --git a/etl/collection/chart_upsert.py b/etl/collection/chart_upsert.py new file mode 100644 index 00000000000..b9d0c474150 --- /dev/null +++ b/etl/collection/chart_upsert.py @@ -0,0 +1,121 @@ +"""Upsert a Collection with zero dimensions as a regular Grapher chart. + +When a multidim `Collection` has no dimensions and exactly one view, it is the degenerate +"single chart" case. Instead of pushing it as a multi-dim data page via the `/multi-dims/` +admin endpoint, we translate the view into a standard Grapher chart config and push it +via `AdminAPI.create_chart` / `AdminAPI.update_chart`. + +This keeps one authoring format (mdim config) across the chart↔multidim spectrum: adding a +dimension to a single-chart collection promotes it to a proper multidim without a config +migration. +""" + +from typing import TYPE_CHECKING, Any + +import structlog +from sqlalchemy.orm import Session +from sqlalchemy.orm.exc import NoResultFound + +from apps.chart_sync.admin_api import AdminAPI +from etl.collection.utils import map_indicator_path_to_id +from etl.config import DEFAULT_GRAPHER_SCHEMA, GRAPHER_USER_ID, OWIDEnv +from etl.grapher.model import Chart + +if TYPE_CHECKING: + from etl.collection.model.core import Collection + from etl.collection.model.view import View + +log = structlog.get_logger() + + +_AXIS_ORDER = ("y", "x", "size", "color") + + +def upsert_collection_as_chart(collection: "Collection", owid_env: OWIDEnv) -> int: + """Push a zero-dimension collection to Grapher as a regular chart. + + Expects `len(collection.dimensions) == 0` and `len(collection.views) == 1`. + """ + if len(collection.dimensions) != 0: + raise ValueError("upsert_collection_as_chart called on a collection with dimensions.") + if len(collection.views) != 1: + raise ValueError(f"Chart mode (no dimensions) requires exactly one view; got {len(collection.views)}.") + + view = collection.views[0] + slug = _resolve_chart_slug(collection, view) + config = _build_chart_config(view, slug) + + admin_api = AdminAPI(owid_env) + user_id = int(GRAPHER_USER_ID) if GRAPHER_USER_ID else None + + with Session(owid_env.engine) as session: + try: + existing = Chart.load_chart(session, slug=slug) + except NoResultFound: + existing = None + + if existing is not None: + # Preserve publication state unless the user explicitly overrode it. + config.setdefault("isPublished", existing.config.get("isPublished", False)) + log.info("collection.chart.update", slug=slug, chart_id=existing.id) + admin_api.update_chart(chart_id=existing.id, chart_config=config, user_id=user_id) + chart_id = existing.id + else: + # New charts default to unpublished so humans can review before go-live. + config.setdefault("isPublished", False) + log.info("collection.chart.create", slug=slug) + result = admin_api.create_chart(chart_config=config, user_id=user_id) + chart_id = result["chartId"] + + log.info( + "collection.chart.upsert_success", + slug=slug, + chart_id=chart_id, + admin_url=f"{owid_env.admin_site}/admin/charts/{chart_id}/edit", + ) + return chart_id + + +def _resolve_chart_slug(collection: "Collection", view: "View") -> str: + """Derive the chart slug from the collection's short_name. + + Grapher chart slugs are conventionally dash-separated; the mdim short_name is snake_case. + """ + del view # unused for now; kept in the signature for future explicit-slug overrides + return collection.short_name.replace("_", "-") + + +def _build_chart_config(view: "View", slug: str) -> dict[str, Any]: + """Translate `view.config` + `view.indicators` into a grapher chart config dict.""" + config: dict[str, Any] = dict(view.config or {}) + config["slug"] = slug + config.setdefault("$schema", DEFAULT_GRAPHER_SCHEMA) + + # Resolve indicator catalog paths (y/x/size/color) to variable IDs and emit as + # the grapher `dimensions` block, which charts identify by numeric variableId. + dimensions: list[dict[str, Any]] = [] + for axis in _AXIS_ORDER: + entries = _axis_entries(view, axis) + for indicator in entries: + dim: dict[str, Any] = {"property": axis, "variableId": int(map_indicator_path_to_id(indicator.catalogPath))} + if indicator.display: + dim["display"] = indicator.display + dimensions.append(dim) + if not dimensions: + raise ValueError(f"Chart view for slug '{slug}' has no indicators.") + config["dimensions"] = dimensions + + # Rewrite catalog-path references in `sortColumnSlug` and `map.columnSlug` to IDs. + if "sortColumnSlug" in config: + config["sortColumnSlug"] = str(map_indicator_path_to_id(config["sortColumnSlug"])) + if isinstance(config.get("map"), dict) and "columnSlug" in config["map"]: + config["map"]["columnSlug"] = str(map_indicator_path_to_id(config["map"]["columnSlug"])) + + return config + + +def _axis_entries(view: "View", axis: str) -> list: + value = getattr(view.indicators, axis, None) + if value is None: + return [] + return value if isinstance(value, list) else [value] diff --git a/etl/collection/model/core.py b/etl/collection/model/core.py index 04ce466acee..e705495bd95 100644 --- a/etl/collection/model/core.py +++ b/etl/collection/model/core.py @@ -251,6 +251,14 @@ def save( # ty: ignore[invalid-method-override] self.upsert_to_db(owid_env) def upsert_to_db(self, owid_env: OWIDEnv): + # Degenerate "single chart" case: no dimensions means no multi-dim page — push to the + # charts table via the chart admin endpoint instead of `/multi-dims/`. + if len(self.dimensions) == 0: + from etl.collection.chart_upsert import upsert_collection_as_chart + + upsert_collection_as_chart(self, owid_env) + return + # Replace especial fields URIs with IDs (e.g. sortColumnSlug). # TODO: I think we could move this to the Grapher side. config = replace_catalog_paths_with_ids(self.to_dict()) diff --git a/etl/steps/__init__.py b/etl/steps/__init__.py index 36646205ed8..74b04ced588 100644 --- a/etl/steps/__init__.py +++ b/etl/steps/__init__.py @@ -1187,6 +1187,13 @@ def __init__(self, path: str, dependencies: list[Step]) -> None: def __str__(self) -> str: return f"export://{self.path}" + def can_execute(self, archive_ok: bool = True) -> bool: + sp = self._search_path + if not archive_ok and "/archive/" in sp.as_posix(): + return False + + return super().can_execute(archive_ok=archive_ok) or self._is_multidim_yaml_only() + def run(self) -> None: # make sure the enclosing folder is there self._dest_dir.parent.mkdir(parents=True, exist_ok=True) @@ -1202,12 +1209,31 @@ def run(self) -> None: DataStep._run_py_isolated(self) # ty: ignore else: DataStep._run_py(self) # ty: ignore + elif self._is_multidim_yaml_only(): + # YAML-only multidim: no .py, just a .config.yml. Run the default + # boilerplate (load_collection_config → create_collection → save). + self._run_multidim_yaml_only(sp) # save checksum (only update index.json, don't call ds.save() which iterates # table_names and would pick up custom JSON files written by the export script) ds.metadata.source_checksum = self.checksum_input() ds.metadata.save(ds._index_file) + def _is_multidim_yaml_only(self) -> bool: + """True if this is an `export://multidim/...` step backed only by a `.config.yml`.""" + if not self.path.startswith("multidim/"): + return False + return self._search_path.with_suffix(".config.yml").exists() + + def _run_multidim_yaml_only(self, search_path: Path) -> None: + from etl.helpers import PathFinder + + # Synthesise the `.py` path PathFinder expects; the file doesn't need to exist, + # PathFinder only parses namespace/version/short_name out of the path components. + paths_ = PathFinder(str(search_path.with_suffix(".py"))) + collection = paths_.create_collection(config=paths_.load_collection_config()) + collection.save() + def checksum_output(self) -> str: # output checksum is checksum of all ingredients return self.checksum_input() diff --git a/etl/steps/export/multidim/animal_welfare/latest/banning_of_chick_culling.config.yml b/etl/steps/export/multidim/animal_welfare/latest/banning_of_chick_culling.config.yml new file mode 100644 index 00000000000..ab9bec999d9 --- /dev/null +++ b/etl/steps/export/multidim/animal_welfare/latest/banning_of_chick_culling.config.yml @@ -0,0 +1,38 @@ +# Single-chart collection: `dimensions` is empty, so this is pushed as a regular +# Grapher chart (not a multi-dim data page). Replicates chart: +# https://admin.owid.io/admin/charts/7118/edit +title: + title: "Which countries have banned chick culling?" + title_variant: "" +default_selection: + - "World" +topic_tags: + - "Animal Welfare" +dimensions: [] +views: + - dimensions: {} + indicators: + y: + - "chick_culling_laws#status" + config: + $schema: "https://files.ourworldindata.org/schemas/grapher-schema.009.json" + title: "Which countries have banned chick culling?" + subtitle: "Chick culling is the process of separating and killing unwanted male and unhealthy female chicks that cannot produce eggs in industrialized egg facilities." + note: "In Switzerland grinding is banned but gassing is still allowed. Belgium has only a regional ban." + originUrl: "/animal-welfare" + tab: "map" + hasMapTab: true + chartTypes: [] + yAxis: + min: "auto" + map: + hideTimeline: true + colorScale: + baseColorScheme: "BinaryMapPaletteA" + customNumericColorsActive: true + customCategoryColors: + Banned: "#4881c6" + "No laws": "#b6a28c" + "Not banned": "#ad9882" + "Partially banned": "#a084c1" + "Banned but not yet effective": "#3e94a1" diff --git a/etl/version_tracker.py b/etl/version_tracker.py index 814b17106e5..d8f646f47d7 100644 --- a/etl/version_tracker.py +++ b/etl/version_tracker.py @@ -500,15 +500,19 @@ def get_path_to_script(self, step: str, omit_base_dir: bool = False) -> Path | N log.error(f"Unknown channel {channel} for step {step}.") path_to_script_detected = None - # A step script can exist either as a .py file, as a .ipynb file, or a __init__.py file inside a folder. + # A step definition can exist either as a .py file, as a .ipynb file, or a __init__.py file inside a folder. # In the case of snapshots, there may or may not be a .py file, but there definitely needs to be a dvc file. # In that case, the corresponding script is not trivial to find, but at least we can return the dvc file. - for path_to_script_candidate in [ + path_to_script_candidates = [ path_to_script.with_suffix(".py"), # ty: ignore path_to_script.with_suffix(".ipynb"), # ty: ignore path_to_script / "__init__.py", # ty: ignore path_to_script.with_name(path_to_script.name + ".dvc"), # ty: ignore - ]: + ] + if step_type == "export" and channel == "multidim": + path_to_script_candidates.append(path_to_script.with_suffix(".config.yml")) # ty: ignore + + for path_to_script_candidate in path_to_script_candidates: if path_to_script_candidate.exists(): path_to_script_detected = path_to_script_candidate break diff --git a/schemas/multidim-schema.json b/schemas/multidim-schema.json index 976f2711cd8..0bd7007b67d 100644 --- a/schemas/multidim-schema.json +++ b/schemas/multidim-schema.json @@ -327,13 +327,11 @@ }, "dimensions": { "type": "array", + "minItems": 0, "title": "Collection dimensions", "description": "Array of dimension objects that define the filter dropdowns available to users.", "requirement_level": "required", "guidelines": [ - [ - "Must contain at least one dimension." - ], [ "Each dimension creates a dropdown filter in the user interface." ], @@ -342,6 +340,9 @@ ], [ "Should be ordered by importance or logical flow." + ], + [ + "May be an empty array (single-chart case): the collection is materialised as a regular Grapher chart instead of a multi-dimensional data page. In that case, `views` must contain exactly one view." ] ], "items": { diff --git a/tests/test_etl.py b/tests/test_etl.py index 320817e24ed..209a2e9a3f0 100644 --- a/tests/test_etl.py +++ b/tests/test_etl.py @@ -15,12 +15,12 @@ from etl.steps import DataStep, Step, compile_steps, filter_to_subgraph -def test_all_data_steps_have_code(): +def test_all_data_steps_can_execute(): # find all data steps steps = [s for s in get_all_steps() if isinstance(s, DataStep)] for s in steps: - assert s.can_execute(), f'no code found for step "data://{s.path}"' + assert s.can_execute(), f'no executable step definition found for "{s}"' def test_sub_dag_import(): diff --git a/vscode_extensions/chart-preview/install/archived/chart-preview-0.0.1.vsix b/vscode_extensions/chart-preview/install/archived/chart-preview-0.0.1.vsix new file mode 100644 index 00000000000..7a244378a28 Binary files /dev/null and b/vscode_extensions/chart-preview/install/archived/chart-preview-0.0.1.vsix differ diff --git a/vscode_extensions/chart-preview/install/chart-preview-0.0.2.vsix b/vscode_extensions/chart-preview/install/chart-preview-0.0.2.vsix new file mode 100644 index 00000000000..8fff6bd3498 Binary files /dev/null and b/vscode_extensions/chart-preview/install/chart-preview-0.0.2.vsix differ diff --git a/vscode_extensions/chart-preview/package-lock.json b/vscode_extensions/chart-preview/package-lock.json index 7f4da51866f..cf202b39f8b 100644 --- a/vscode_extensions/chart-preview/package-lock.json +++ b/vscode_extensions/chart-preview/package-lock.json @@ -1,12 +1,12 @@ { "name": "chart-preview", - "version": "0.0.1", + "version": "0.0.2", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "chart-preview", - "version": "0.0.1", + "version": "0.0.2", "devDependencies": { "@types/node": "20.x", "@types/vscode": "^1.100.0", diff --git a/vscode_extensions/chart-preview/package.json b/vscode_extensions/chart-preview/package.json index 61da2365d1e..24992d9ecd5 100644 --- a/vscode_extensions/chart-preview/package.json +++ b/vscode_extensions/chart-preview/package.json @@ -3,7 +3,7 @@ "publisher": "owid", "displayName": "Chart Preview", "description": "Live preview panel for .chart.yml files using OWID Grapher", - "version": "0.0.1", + "version": "0.0.2", "repository": { "type": "git", "url": "https://github.com/owid/etl" diff --git a/vscode_extensions/chart-preview/src/extension.ts b/vscode_extensions/chart-preview/src/extension.ts index e5b0db7d1cd..fd5d9887a11 100644 --- a/vscode_extensions/chart-preview/src/extension.ts +++ b/vscode_extensions/chart-preview/src/extension.ts @@ -136,17 +136,55 @@ async function parseChartYml(filePath: string, wsRoot: string): Promise<{ stepUr } /** - * Extract export step URI and catalog path from an export/multidim file path. + * Extract export step URI / catalog path / chart info from an export/multidim file path. * Supports both .config.yml and .py files. * Catalog path defaults to namespace/version/name#name (matching PathFinder.create_collection). + * + * For collections with `dimensions: []` (single-chart case) the ETL pushes to the chart + * admin endpoint, not the multi-dim one. New charts are created as unpublished drafts, + * so preview them through the admin Grapher route (`admin/grapher/{slug}`), which can + * render unpublished charts. + * + * We only classify as "chart" when the YAML has `dimensions: []` AND there is no sibling + * `.py` file. A sibling `.py` can populate dimensions programmatically (e.g. the + * air_pollution step uses `dimensions: []` as a placeholder), so the empty-list alone + * isn't a reliable signal. */ -function parseExportMultidim(filePath: string, wsRoot: string): { stepUri: string; catalogPath: string } { +async function parseExportMultidim( + filePath: string, + wsRoot: string, +): Promise<{ stepUri: string; catalogPath: string; isChart: boolean; chartSlug: string }> { const exportDir = path.join(wsRoot, 'etl', 'steps', 'export', 'multidim'); const rel = path.relative(exportDir, filePath); const stepPath = rel.replace(/\.(config\.yml|py)$/, ''); const shortName = path.basename(stepPath); const catalogPath = `${stepPath}#${shortName}`; - return { stepUri: `export://multidim/${stepPath}`, catalogPath }; + // Grapher slugs are dash-separated; the ETL short_name is snake_case. + const chartSlug = shortName.replace(/_/g, '-'); + + const configPath = filePath.endsWith('.config.yml') + ? filePath + : filePath.replace(/\.py$/, '.config.yml'); + const pyPath = configPath.replace(/\.config\.yml$/, '.py'); + + let hasEmptyDimensions = false; + try { + const content = await readFile(configPath, 'utf8'); + hasEmptyDimensions = /^dimensions:\s*\[\s*\]\s*(#.*)?$/m.test(content); + } catch { + // No sibling .config.yml — can't classify, fall back to mdim. + } + + let hasPy = false; + try { + await readFile(pyPath, 'utf8'); + hasPy = true; + } catch { + // No sibling .py — fully-declarative step. + } + + const isChart = hasEmptyDimensions && !hasPy; + return { stepUri: `export://multidim/${stepPath}`, catalogPath, isChart, chartSlug }; } /** @@ -263,10 +301,17 @@ const chartStrategy: PreviewStrategy = { let fileName: string; if (filePath.includes('/export/multidim/')) { - const parsed = parseExportMultidim(filePath, wsRoot); + const parsed = await parseExportMultidim(filePath, wsRoot); stepUri = parsed.stepUri; - stagingUrl = `http://${containerName}/admin/grapher/${encodeURIComponent(parsed.catalogPath)}`; - isMdim = true; + if (parsed.isChart) { + // Zero-dim collection → ETL pushed a regular chart. Use the admin preview + // route so newly-created unpublished charts render too. + stagingUrl = `http://${containerName}/admin/grapher/${parsed.chartSlug}`; + isMdim = true; + } else { + stagingUrl = `http://${containerName}/admin/grapher/${encodeURIComponent(parsed.catalogPath)}`; + isMdim = true; + } etlArgs = [stepUri, '--export', '--watch', '--private']; fileName = path.basename(filePath).replace(/\.(config\.yml|py)$/, ''); } else { diff --git a/vscode_extensions/clickable-dag-steps/CHANGELOG.md b/vscode_extensions/clickable-dag-steps/CHANGELOG.md index 5cfb048f9b8..83880079d1d 100644 --- a/vscode_extensions/clickable-dag-steps/CHANGELOG.md +++ b/vscode_extensions/clickable-dag-steps/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## [0.0.5] – 2026-05-11 +- Support YAML-only export steps by linking DAG entries to adjacent `.config.yml` files when no `.py` file exists (carried over after merging master). + +## [0.0.4] – 2026-04-27 +- Extract URI-line classifier as `classifyDagLine` so the heuristic and any future refactor share one definition; add a focused test suite. + ## [0.0.3] – 2025-05-27 - Improved decoration system with clearer rules for active vs archive DAG files. - Enhanced error indicators (❌❗❓⚠️) to provide more detailed information. diff --git a/vscode_extensions/clickable-dag-steps/install/clickable-dag-steps-0.0.5.vsix b/vscode_extensions/clickable-dag-steps/install/clickable-dag-steps-0.0.5.vsix new file mode 100644 index 00000000000..ecec21a8318 Binary files /dev/null and b/vscode_extensions/clickable-dag-steps/install/clickable-dag-steps-0.0.5.vsix differ diff --git a/vscode_extensions/clickable-dag-steps/package-lock.json b/vscode_extensions/clickable-dag-steps/package-lock.json index 275ae1a81b8..06491e6d005 100644 --- a/vscode_extensions/clickable-dag-steps/package-lock.json +++ b/vscode_extensions/clickable-dag-steps/package-lock.json @@ -1,12 +1,12 @@ { "name": "clickable-dag-steps", - "version": "0.0.4", + "version": "0.0.5", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "clickable-dag-steps", - "version": "0.0.4", + "version": "0.0.5", "devDependencies": { "@types/js-yaml": "^4.0.9", "@types/mocha": "^10.0.8", diff --git a/vscode_extensions/clickable-dag-steps/package.json b/vscode_extensions/clickable-dag-steps/package.json index 7af9533c025..30f83d1415b 100644 --- a/vscode_extensions/clickable-dag-steps/package.json +++ b/vscode_extensions/clickable-dag-steps/package.json @@ -4,7 +4,7 @@ "publisher": "owid", "displayName": "Clickable DAG Steps", "description": "Make steps in ETL DAG clickable", - "version": "0.0.4", + "version": "0.0.5", "engines": { "vscode": "^1.100.0" }, diff --git a/vscode_extensions/clickable-dag-steps/src/extension.ts b/vscode_extensions/clickable-dag-steps/src/extension.ts index eb9bf6aea24..f66c6b58c84 100644 --- a/vscode_extensions/clickable-dag-steps/src/extension.ts +++ b/vscode_extensions/clickable-dag-steps/src/extension.ts @@ -72,8 +72,9 @@ function parseStepUri(uri: string): { scheme: string; key: string; version: stri const dir = path.join(base, channel, namespace, version); const filePaths = [ path.join(dir, shortName + '.py'), + ...(scheme === 'export' ? [path.join(dir, shortName + '.config.yml')] : []), path.join(dir, shortName, '__init__.py'), - path.join(dir, shortName + '.ipynb') + path.join(dir, shortName + '.ipynb'), ]; return { scheme, key, version, fullKey, filePaths }; }