Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class Experiments:
name="mini",
type=utils.ExperimentType.FULL_MUPHYS,
uri="https://polybox.ethz.ch/index.php/s/F8bK2C8tkpf8Xy2/download?files=mini.tar.gz",
known_hash=None, # TODO(msimberg):
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,19 @@ class Experiments:
name="mini",
type=utils.ExperimentType.GRAUPEL_ONLY,
uri="https://polybox.ethz.ch/index.php/s/7B9MWyKTTBrNQBd/download?files=mini.tar.gz",
known_hash=None, # TODO(msimberg):
)
TINY: Final = utils.MuphysExperiment(
name="tiny",
type=utils.ExperimentType.GRAUPEL_ONLY,
uri="https://polybox.ethz.ch/index.php/s/7B9MWyKTTBrNQBd/download?files=tiny.tar.gz",
known_hash=None, # TODO(msimberg):
)
R2B05: Final = utils.MuphysExperiment(
name="R2B05",
type=utils.ExperimentType.GRAUPEL_ONLY,
uri="https://polybox.ethz.ch/index.php/s/7B9MWyKTTBrNQBd/download?files=R2B05.tar.gz",
known_hash=None, # TODO(msimberg):
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ class MuphysExperiment:
name: str
type: ExperimentType
uri: str
known_hash: str | None
dt: float = 30.0
qnc: float = 100.0

Expand All @@ -49,4 +50,8 @@ def __str__(self):
@pytest.fixture(autouse=True)
def download_test_data(experiment: MuphysExperiment) -> None:
"""Downloads test data for an experiment (implicit fixture)."""
data_handling.download_test_data(_path_to_experiment_testdata(experiment), uri=experiment.uri)
data_handling.download_test_data(
_path_to_experiment_testdata(experiment),
uri=experiment.uri,
known_hash=experiment.known_hash,
)
4 changes: 2 additions & 2 deletions model/testing/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ dependencies = [
"gt4py==1.1.8",
"numpy>=1.23.3",
'packaging>=20.0',
"pooch>=1.7.0",
"pytest>=8.0.1",
"serialbox4py>=2.6.2",
"typing-extensions>=4.11.0",
"wget>=3.2"
"typing-extensions>=4.11.0"
]
description = "Testing utils for the icon4py model."
license = {text = "BSD-3 License"}
Expand Down
37 changes: 15 additions & 22 deletions model/testing/src/icon4py/model/testing/data_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,25 +11,27 @@
import os
import pathlib
import shutil
import tarfile
import tempfile

import pooch

from icon4py.model.testing import config, locking


def download_and_extract(
uri: str,
dst: pathlib.Path,
known_hash: str | None,
) -> None:
"""
Download and extract a tar file with locking.

Args:
uri: download url for archived data
dst: the archive is extracted at this path
known_hash: expected hash of the archive for integrity verification,
or None to skip verification

Downloads to a temporary directory in the destination directory
(not /tmp to avoid space constraints).
Uses pooch for downloading and archive extraction.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does someone know why we avoid /tmp? If we want to keep avoiding it then I think this comment should still mention /tmp. Otherwise I'd prefer to move to /tmp, or more correctly to $TMPDIR (is that a thing on macos?). Possibly with an override through ICON4PY_TMP_DATA_PATH or something like that?

Side note, but I'd also like to see ICON4PY_TEST_DATA_PATH default to something in ~/.cache/icon4py (or $XDG_CACHE_DIR/icon4py) but I don't know if others agree? I already set ICON4PY_TEST_DATA_PATH to ~/.cache/icon4py to share the cache between different icon4py worktrees.

This can also be out of scope for this PR.

"""
dst.mkdir(parents=True, exist_ok=True)

Expand All @@ -47,28 +49,19 @@ def download_and_extract(
item.unlink()
elif item.is_dir():
shutil.rmtree(item)
_perform_download(uri, dst)
pooch.retrieve(
url=uri,
known_hash=known_hash,
path=str(dst),
fname="archive.tar.gz",
processor=pooch.Untar(extract_dir="."),
)
completion_marker.touch()


def _perform_download(uri: str, dst: pathlib.Path) -> None:
try:
import wget # type: ignore[import-untyped]
except ImportError as err:
raise RuntimeError(f"To download data file from {uri}, please install `wget`") from err

with tempfile.TemporaryDirectory(dir=dst) as temp_dir:
temp_path = pathlib.Path(temp_dir) / "download.tar.gz"
wget.download(uri, out=str(temp_path))
if not tarfile.is_tarfile(temp_path):
raise OSError(f"{temp_path} needs to be a valid tar file")
with tarfile.open(temp_path, mode="r:*") as tf:
tf.extractall(path=dst)


def download_test_data(dst: pathlib.Path, uri: str) -> None:
def download_test_data(dst: pathlib.Path, uri: str, known_hash: str | None) -> None:
if config.ENABLE_TESTDATA_DOWNLOAD:
download_and_extract(uri, dst)
download_and_extract(uri, dst, known_hash=known_hash)
else:
# If test data download is disabled, we check if the directory exists
# and isn't empty without locking. We assume the location is managed by the user
Expand Down
19 changes: 19 additions & 0 deletions model/testing/src/icon4py/model/testing/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class GridDescription:
params: icon_grid.GlobalGridParams
file_name: str
uri: str
known_hash: str | None


class Grids:
Expand All @@ -58,6 +59,7 @@ class Grids:
),
file_name="icon_grid_R01B01.nc",
uri="https://polybox.ethz.ch/index.php/s/9M5JX4LJr3LGPqz/download",
known_hash=None, # TODO(msimberg):
)
R02B04_GLOBAL: Final = GridDescription(
name="r02b04_global",
Expand All @@ -73,6 +75,7 @@ class Grids:
),
file_name="icon_grid_0013_R02B04_R.nc",
uri="https://polybox.ethz.ch/index.php/s/BRiF7XrCCpGqpEF/download",
known_hash=None, # TODO(msimberg):
)
R02B06_GLOBAL: Final = GridDescription(
name="r02b06_global",
Expand All @@ -88,6 +91,7 @@ class Grids:
),
file_name="icon_grid_0021_R02B06_G.nc",
uri="https://polybox.ethz.ch/index.php/s/WsHr5e2MKpHkkmp/download",
known_hash=None, # TODO(msimberg):
)
R02B07_GLOBAL: Final = GridDescription(
name="r02b07_global",
Expand All @@ -103,6 +107,7 @@ class Grids:
),
file_name="icon_grid_0023_R02B07_G.nc",
uri="https://polybox.ethz.ch/index.php/s/RMqNbaeHLD5tDd6/download",
known_hash=None, # TODO(msimberg):
)
R19_B07_MCH_LOCAL: Final = GridDescription(
name="mch_opr_r19b07_icon_ch2",
Expand All @@ -119,6 +124,7 @@ class Grids:
),
file_name="icon_grid_0002_R19B07_mch.nc",
uri="https://polybox.ethz.ch/index.php/s/tFQian4aDzTES6c/download",
known_hash=None, # TODO(msimberg):
)
MCH_OPR_R04B07_DOMAIN01: Final = GridDescription(
name="mch_opr_r4b7",
Expand All @@ -135,6 +141,7 @@ class Grids:
),
file_name="mch_opr_r4b7_DOM01.nc",
uri="https://polybox.ethz.ch/index.php/s/ZL7LeEDijGCSJGz/download",
known_hash=None, # TODO(msimberg):
)
MCH_OPR_R19B08_DOMAIN01: Final = GridDescription(
name="mch_opr_r19b08",
Expand All @@ -151,6 +158,7 @@ class Grids:
),
file_name="domain1_DOM01.nc",
uri="https://polybox.ethz.ch/index.php/s/P6XfWcYjnrsNmeX/download",
known_hash=None, # TODO(msimberg):
)
MCH_CH_R04B09_DSL: Final = GridDescription(
name="mch_ch_r04b09_dsl",
Expand All @@ -167,6 +175,7 @@ class Grids:
),
file_name="grid.nc",
uri="https://polybox.ethz.ch/index.php/s/hD232znfEPBh4Oh/download",
known_hash=None, # TODO(msimberg):
)
TORUS_100X116_1000M: Final = GridDescription(
name="torus_100x116_res1000",
Expand All @@ -181,6 +190,7 @@ class Grids:
),
file_name="Torus_Triangles_100x116_1000m.nc",
uri="https://polybox.ethz.ch/index.php/s/yqvotFss9i1OKzs/download",
known_hash=None, # TODO(msimberg):
)
TORUS_50000x5000: Final = GridDescription(
name="torus_50000x5000_res500",
Expand All @@ -195,6 +205,7 @@ class Grids:
),
file_name="Torus_Triangles_50000m_x_5000m_res500m.nc",
uri="https://polybox.ethz.ch/index.php/s/eclzK00TM9nnLtE/download",
known_hash=None, # TODO(msimberg):
)
TORUS_1000X1000_250M: Final = GridDescription(
name="torus_1000x1000_res250",
Expand All @@ -209,6 +220,7 @@ class Grids:
),
file_name="Torus_Triangles_1000m_x_1000m_res250m.nc",
uri="https://polybox.ethz.ch/index.php/s/eMDbDbdmKLkDiwp/download",
known_hash=None, # TODO(msimberg):
)


Expand All @@ -226,6 +238,8 @@ class Experiment:
description: str
grid: GridDescription
num_levels: int
# Per-rank known hashes for serialized data archives, keyed by communicator size (1, 2, 4).
known_hashes: dict[int, str | None]
version: int = 3


Expand All @@ -235,30 +249,35 @@ class Experiments:
description="EXCLAIM Aquaplanet experiment",
grid=Grids.R02B04_GLOBAL,
num_levels=60,
known_hashes={1: None, 2: None, 4: None}, # TODO(msimberg):
)
MCH_CH_R04B09: Final = Experiment(
name="exclaim_ch_r04b09_dsl",
description="Regional setup used by EXCLAIM to validate the icon-exclaim.",
grid=Grids.MCH_CH_R04B09_DSL,
num_levels=65,
known_hashes={1: None, 2: None, 4: None}, # TODO(msimberg):
)
JW: Final = Experiment(
name="exclaim_nh35_tri_jws",
description="Jablonowski Williamson atmospheric test case",
grid=Grids.R02B04_GLOBAL,
num_levels=35,
known_hashes={1: None, 2: None, 4: None}, # TODO(msimberg):
)
GAUSS3D: Final = Experiment(
name="exclaim_gauss3d",
description="Gauss 3d test case",
grid=Grids.TORUS_50000x5000,
num_levels=35,
known_hashes={1: None, 2: None, 4: None}, # TODO(msimberg):
)
WEISMAN_KLEMP_TORUS: Final = Experiment(
name="exclaim_nh_weisman_klemp",
description="Weisman-Klemp experiment on Torus Grid",
grid=Grids.TORUS_50000x5000,
num_levels=64,
known_hashes={1: None, 2: None, 4: None}, # TODO(msimberg):
)


Expand Down
4 changes: 3 additions & 1 deletion model/testing/src/icon4py/model/testing/fixtures/datatest.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,9 @@ def _download_ser_data(
archive_path = definitions.SERIALIZED_DATA_DIR + "/" + archive_filename
uri = dt_utils.get_serialized_data_url(root_url, archive_path)
destination_path = dt_utils.get_datapath_for_experiment(_experiment, processor_props)
data_handling.download_test_data(destination_path.parent, uri)
data_handling.download_test_data(
destination_path.parent, uri, known_hash=_experiment.known_hashes[comm_size]
)
except KeyError as err:
raise RuntimeError(
f"No data for communicator of size {comm_size} exists, use 1, 2 or 4"
Expand Down
1 change: 1 addition & 0 deletions model/testing/src/icon4py/model/testing/grid_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def _download_grid_file(grid: definitions.GridDescription) -> pathlib.Path:
data_handling.download_and_extract(
grid.uri,
grid_directory,
known_hash=grid.known_hash,
)
else:
# If grid download is disabled, we check if the file exists
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ build = [
"pip>=22.1.1",
"setuptools>=70.1.1",
"wheel>=0.45.0",
"wget>=3.2"
"pooch>=1.7.0"
]
dev = [
{include-group = "build"},
Expand Down
18 changes: 6 additions & 12 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.