Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions apps/step_update/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,10 +198,21 @@ def _update_data_step(
elif (folder / step_info["name"]).is_dir():
# Gather all relevant files from this folder.
step_files = [file_name for file_name in list(folder.glob(f"{step_info['name']}/*")) if file_name.is_file()]
elif step_info["step_type"] == "export":
# Some export steps, such as YAML-only multidim charts, do not have a Python script.
step_files = [
file_name
for file_name in list(folder.glob("*"))
if file_name.is_file() and str(file_name.stem).split(".")[0] in [step_info["name"], "shared"]
]
else:
log.error(f"No step files found for step {step}.")
return 1

if not step_files:
log.error(f"No step files found for step {step}.")
return 1

# Define the new step.
if step_info["version"] == "latest":
# If the step has version "latest", the new step will also have version "latest".
Expand Down
5 changes: 5 additions & 0 deletions dag/animal_welfare.yml
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,8 @@ steps:
- data://garden/animal_welfare/2026-04-28/global_hen_inventory
data://grapher/animal_welfare/2026-04-28/eggs_and_hens_statistics:
- data://garden/animal_welfare/2026-04-28/eggs_and_hens_statistics
#
# Single-chart collection replicating chart 7118 (https://admin.owid.io/admin/charts/7118/edit).
#
export://multidim/animal_welfare/latest/banning_of_chick_culling:
- data://grapher/animal_welfare/2026-04-16/chick_culling_laws
121 changes: 121 additions & 0 deletions etl/collection/chart_upsert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
"""Upsert a Collection with zero dimensions as a regular Grapher chart.

When a multidim `Collection` has no dimensions and exactly one view, it is the degenerate
"single chart" case. Instead of pushing it as a multi-dim data page via the `/multi-dims/`
admin endpoint, we translate the view into a standard Grapher chart config and push it
via `AdminAPI.create_chart` / `AdminAPI.update_chart`.

This keeps one authoring format (mdim config) across the chart↔multidim spectrum: adding a
dimension to a single-chart collection promotes it to a proper multidim without a config
migration.
"""

from typing import TYPE_CHECKING, Any

import structlog
from sqlalchemy.orm import Session
from sqlalchemy.orm.exc import NoResultFound

from apps.chart_sync.admin_api import AdminAPI
from etl.collection.utils import map_indicator_path_to_id
from etl.config import DEFAULT_GRAPHER_SCHEMA, GRAPHER_USER_ID, OWIDEnv
from etl.grapher.model import Chart

if TYPE_CHECKING:
from etl.collection.model.core import Collection
from etl.collection.model.view import View

log = structlog.get_logger()


_AXIS_ORDER = ("y", "x", "size", "color")


def upsert_collection_as_chart(collection: "Collection", owid_env: OWIDEnv) -> int:
"""Push a zero-dimension collection to Grapher as a regular chart.

Expects `len(collection.dimensions) == 0` and `len(collection.views) == 1`.
"""
if len(collection.dimensions) != 0:
raise ValueError("upsert_collection_as_chart called on a collection with dimensions.")
if len(collection.views) != 1:
raise ValueError(f"Chart mode (no dimensions) requires exactly one view; got {len(collection.views)}.")

view = collection.views[0]
slug = _resolve_chart_slug(collection, view)
config = _build_chart_config(view, slug)
Comment thread
pabloarosado marked this conversation as resolved.

admin_api = AdminAPI(owid_env)
user_id = int(GRAPHER_USER_ID) if GRAPHER_USER_ID else None

with Session(owid_env.engine) as session:
try:
existing = Chart.load_chart(session, slug=slug)
except NoResultFound:
existing = None

if existing is not None:
# Preserve publication state unless the user explicitly overrode it.
config.setdefault("isPublished", existing.config.get("isPublished", False))
log.info("collection.chart.update", slug=slug, chart_id=existing.id)
admin_api.update_chart(chart_id=existing.id, chart_config=config, user_id=user_id)
chart_id = existing.id
Comment on lines +57 to +62

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, if a chart already exists in DB, we can just overwrite it via ETL? Am I understanding it correctly?

Couldn't this cause unwanted overwrites? Say the chart exists in DB and is live (i.e., this if evaluates to True). If the config in ETL differs from that in the DB (because someone has edited it in either ETL or the DB), wouldn't the DB version always be overwritten?

I see that on staging, one can reject changes in the Chart diff (I suppose), but the ETL config still appears and may have been merged into master. Then, in master, if etl runs, the DB chart would be overwritten, no?

I'm just not sure I understand this logic. At the moment, it feels a bit dangerous, but I could be misunderstanding it.

else:
# New charts default to unpublished so humans can review before go-live.
config.setdefault("isPublished", False)
log.info("collection.chart.create", slug=slug)
result = admin_api.create_chart(chart_config=config, user_id=user_id)
chart_id = result["chartId"]

log.info(
"collection.chart.upsert_success",
slug=slug,
chart_id=chart_id,
admin_url=f"{owid_env.admin_site}/admin/charts/{chart_id}/edit",
)
return chart_id


def _resolve_chart_slug(collection: "Collection", view: "View") -> str:
"""Derive the chart slug from the collection's short_name.

Grapher chart slugs are conventionally dash-separated; the mdim short_name is snake_case.
"""
del view # unused for now; kept in the signature for future explicit-slug overrides
Comment thread
pabloarosado marked this conversation as resolved.
return collection.short_name.replace("_", "-")


def _build_chart_config(view: "View", slug: str) -> dict[str, Any]:
"""Translate `view.config` + `view.indicators` into a grapher chart config dict."""
config: dict[str, Any] = dict(view.config or {})
config["slug"] = slug
config.setdefault("$schema", DEFAULT_GRAPHER_SCHEMA)

# Resolve indicator catalog paths (y/x/size/color) to variable IDs and emit as
# the grapher `dimensions` block, which charts identify by numeric variableId.
dimensions: list[dict[str, Any]] = []
for axis in _AXIS_ORDER:
entries = _axis_entries(view, axis)
for indicator in entries:
dim: dict[str, Any] = {"property": axis, "variableId": int(map_indicator_path_to_id(indicator.catalogPath))}
if indicator.display:
dim["display"] = indicator.display
dimensions.append(dim)
if not dimensions:
raise ValueError(f"Chart view for slug '{slug}' has no indicators.")
config["dimensions"] = dimensions

# Rewrite catalog-path references in `sortColumnSlug` and `map.columnSlug` to IDs.
if "sortColumnSlug" in config:
config["sortColumnSlug"] = str(map_indicator_path_to_id(config["sortColumnSlug"]))
if isinstance(config.get("map"), dict) and "columnSlug" in config["map"]:
config["map"]["columnSlug"] = str(map_indicator_path_to_id(config["map"]["columnSlug"]))

return config


def _axis_entries(view: "View", axis: str) -> list:
value = getattr(view.indicators, axis, None)
if value is None:
return []
return value if isinstance(value, list) else [value]
8 changes: 8 additions & 0 deletions etl/collection/model/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,14 @@ def save( # ty: ignore[invalid-method-override]
self.upsert_to_db(owid_env)

def upsert_to_db(self, owid_env: OWIDEnv):
# Degenerate "single chart" case: no dimensions means no multi-dim page — push to the
# charts table via the chart admin endpoint instead of `/multi-dims/`.
if len(self.dimensions) == 0:
from etl.collection.chart_upsert import upsert_collection_as_chart

upsert_collection_as_chart(self, owid_env)
return

# Replace especial fields URIs with IDs (e.g. sortColumnSlug).
# TODO: I think we could move this to the Grapher side.
config = replace_catalog_paths_with_ids(self.to_dict())
Expand Down
26 changes: 26 additions & 0 deletions etl/steps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1187,6 +1187,13 @@ def __init__(self, path: str, dependencies: list[Step]) -> None:
def __str__(self) -> str:
return f"export://{self.path}"

def can_execute(self, archive_ok: bool = True) -> bool:
sp = self._search_path
if not archive_ok and "/archive/" in sp.as_posix():
return False

return super().can_execute(archive_ok=archive_ok) or self._is_multidim_yaml_only()

def run(self) -> None:
# make sure the enclosing folder is there
self._dest_dir.parent.mkdir(parents=True, exist_ok=True)
Expand All @@ -1202,12 +1209,31 @@ def run(self) -> None:
DataStep._run_py_isolated(self) # ty: ignore
else:
DataStep._run_py(self) # ty: ignore
elif self._is_multidim_yaml_only():
# YAML-only multidim: no .py, just a .config.yml. Run the default
# boilerplate (load_collection_config → create_collection → save).
self._run_multidim_yaml_only(sp)

# save checksum (only update index.json, don't call ds.save() which iterates
# table_names and would pick up custom JSON files written by the export script)
ds.metadata.source_checksum = self.checksum_input()
ds.metadata.save(ds._index_file)

def _is_multidim_yaml_only(self) -> bool:
"""True if this is an `export://multidim/...` step backed only by a `.config.yml`."""
if not self.path.startswith("multidim/"):
return False
return self._search_path.with_suffix(".config.yml").exists()

def _run_multidim_yaml_only(self, search_path: Path) -> None:
from etl.helpers import PathFinder

# Synthesise the `.py` path PathFinder expects; the file doesn't need to exist,
# PathFinder only parses namespace/version/short_name out of the path components.
paths_ = PathFinder(str(search_path.with_suffix(".py")))
collection = paths_.create_collection(config=paths_.load_collection_config())
collection.save()

def checksum_output(self) -> str:
# output checksum is checksum of all ingredients
return self.checksum_input()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Single-chart collection: `dimensions` is empty, so this is pushed as a regular
# Grapher chart (not a multi-dim data page). Replicates chart:
# https://admin.owid.io/admin/charts/7118/edit
title:
title: "Which countries have banned chick culling?"
title_variant: ""
default_selection:
- "World"
topic_tags:
- "Animal Welfare"
dimensions: []
views:
- dimensions: {}
indicators:
y:
- "chick_culling_laws#status"
config:
$schema: "https://files.ourworldindata.org/schemas/grapher-schema.009.json"
title: "Which countries have banned chick culling?"
subtitle: "Chick culling is the process of separating and killing unwanted male and unhealthy female chicks that cannot produce eggs in industrialized egg facilities."
note: "In Switzerland grinding is banned but gassing is still allowed. Belgium has only a regional ban."
originUrl: "/animal-welfare"
tab: "map"
hasMapTab: true
chartTypes: []
yAxis:
min: "auto"
map:
hideTimeline: true
colorScale:
baseColorScheme: "BinaryMapPaletteA"
customNumericColorsActive: true
customCategoryColors:
Banned: "#4881c6"
"No laws": "#b6a28c"
"Not banned": "#ad9882"
"Partially banned": "#a084c1"
"Banned but not yet effective": "#3e94a1"
10 changes: 7 additions & 3 deletions etl/version_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -500,15 +500,19 @@ def get_path_to_script(self, step: str, omit_base_dir: bool = False) -> Path | N
log.error(f"Unknown channel {channel} for step {step}.")

path_to_script_detected = None
# A step script can exist either as a .py file, as a .ipynb file, or a __init__.py file inside a folder.
# A step definition can exist either as a .py file, as a .ipynb file, or a __init__.py file inside a folder.
# In the case of snapshots, there may or may not be a .py file, but there definitely needs to be a dvc file.
# In that case, the corresponding script is not trivial to find, but at least we can return the dvc file.
for path_to_script_candidate in [
path_to_script_candidates = [
path_to_script.with_suffix(".py"), # ty: ignore
path_to_script.with_suffix(".ipynb"), # ty: ignore
path_to_script / "__init__.py", # ty: ignore
path_to_script.with_name(path_to_script.name + ".dvc"), # ty: ignore
]:
]
if step_type == "export" and channel == "multidim":
path_to_script_candidates.append(path_to_script.with_suffix(".config.yml")) # ty: ignore

for path_to_script_candidate in path_to_script_candidates:
if path_to_script_candidate.exists():
path_to_script_detected = path_to_script_candidate
break
Expand Down
7 changes: 4 additions & 3 deletions schemas/multidim-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -327,13 +327,11 @@
},
"dimensions": {
"type": "array",
"minItems": 0,
"title": "Collection dimensions",
"description": "Array of dimension objects that define the filter dropdowns available to users.",
"requirement_level": "required",
"guidelines": [
[
"Must contain at least one dimension."
],
[
"Each dimension creates a dropdown filter in the user interface."
],
Expand All @@ -342,6 +340,9 @@
],
[
"Should be ordered by importance or logical flow."
],
[
"May be an empty array (single-chart case): the collection is materialised as a regular Grapher chart instead of a multi-dimensional data page. In that case, `views` must contain exactly one view."
]
],
"items": {
Expand Down
4 changes: 2 additions & 2 deletions tests/test_etl.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
from etl.steps import DataStep, Step, compile_steps, filter_to_subgraph


def test_all_data_steps_have_code():
def test_all_data_steps_can_execute():
# find all data steps
steps = [s for s in get_all_steps() if isinstance(s, DataStep)]

for s in steps:
assert s.can_execute(), f'no code found for step "data://{s.path}"'
assert s.can_execute(), f'no executable step definition found for "{s}"'


def test_sub_dag_import():
Expand Down
Binary file not shown.
Binary file not shown.
4 changes: 2 additions & 2 deletions vscode_extensions/chart-preview/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion vscode_extensions/chart-preview/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"publisher": "owid",
"displayName": "Chart Preview",
"description": "Live preview panel for .chart.yml files using OWID Grapher",
"version": "0.0.1",
"version": "0.0.2",
"repository": {
"type": "git",
"url": "https://github.com/owid/etl"
Expand Down
Loading
Loading