diff --git a/src/nemo_run/__init__.py b/src/nemo_run/__init__.py index 7fac81ad..dfbc3fc6 100644 --- a/src/nemo_run/__init__.py +++ b/src/nemo_run/__init__.py @@ -27,9 +27,7 @@ from nemo_run.core.execution.local import LocalExecutor from nemo_run.core.execution.skypilot import SkypilotExecutor from nemo_run.core.execution.slurm import SlurmExecutor -from nemo_run.core.packaging.base import Packager -from nemo_run.core.packaging.git import GitArchivePackager -from nemo_run.core.packaging.pattern import PatternPackager +from nemo_run.core.packaging import GitArchivePackager, HybridPackager, Packager, PatternPackager from nemo_run.core.tunnel.client import LocalTunnel, SSHTunnel from nemo_run.devspace.base import DevSpace from nemo_run.help import help @@ -54,6 +52,7 @@ "ExecutorMacros", "Experiment", "FaultTolerance", + "HybridPackager", "GitArchivePackager", "PatternPackager", "help", diff --git a/src/nemo_run/core/packaging/__init__.py b/src/nemo_run/core/packaging/__init__.py index 5e23fd4e..2d935ccc 100644 --- a/src/nemo_run/core/packaging/__init__.py +++ b/src/nemo_run/core/packaging/__init__.py @@ -15,6 +15,7 @@ from nemo_run.core.packaging.base import Packager from nemo_run.core.packaging.git import GitArchivePackager +from nemo_run.core.packaging.hybrid import HybridPackager from nemo_run.core.packaging.pattern import PatternPackager -__all__ = ["Packager", "GitArchivePackager", "PatternPackager"] +__all__ = ["Packager", "GitArchivePackager", "PatternPackager", "HybridPackager"] diff --git a/src/nemo_run/core/packaging/hybrid.py b/src/nemo_run/core/packaging/hybrid.py new file mode 100644 index 00000000..72683404 --- /dev/null +++ b/src/nemo_run/core/packaging/hybrid.py @@ -0,0 +1,49 @@ +import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import Dict + +from invoke.context import Context + +from nemo_run.core.packaging.base import Packager + + +@dataclass(kw_only=True) +class HybridPackager(Packager): + """ + A packager that combines multiple other packagers into one final archive. + Each subpackager is mapped to a target directory name, which will become + the top-level folder under which that packager’s content is placed. + """ + + sub_packagers: Dict[str, Packager] = field(default_factory=dict) + + def package(self, path: Path, job_dir: str, name: str) -> str: + final_tar_gz = os.path.join(job_dir, f"{name}.tar.gz") + if os.path.exists(final_tar_gz): + return final_tar_gz + + # Create an empty tar to append packaged files from each sub-packager + tmp_tar = final_tar_gz + ".tmp" + ctx = Context() + ctx.run(f"tar -cf {tmp_tar} --files-from /dev/null") + + # For each subpackager, run its .package() method and extract to a subfolder + for folder_name, packager in self.sub_packagers.items(): + subarchive_path = packager.package(path, job_dir, f"{name}_{folder_name}") + + # Create a temp folder, extract subarchive content into it, + # then add that folder to the final tar under the desired subpath + tmp_extract_dir = os.path.join(job_dir, f"__extract_{folder_name}") + os.makedirs(tmp_extract_dir, exist_ok=True) + + ctx.run(f"tar -xf {subarchive_path} -C {tmp_extract_dir}") + ctx.run(f"tar -rf {tmp_tar} -C {tmp_extract_dir} . --transform='s,^,{folder_name}/,'") + ctx.run(f"rm -rf {tmp_extract_dir}") + ctx.run(f"rm {subarchive_path}") + + # Finally, compress the combined tar + ctx.run(f"gzip -c {tmp_tar} > {final_tar_gz}") + ctx.run(f"rm {tmp_tar}") + + return final_tar_gz diff --git a/test/core/packaging/test_hybrid.py b/test/core/packaging/test_hybrid.py new file mode 100644 index 00000000..eef9cb45 --- /dev/null +++ b/test/core/packaging/test_hybrid.py @@ -0,0 +1,80 @@ +import filecmp +import os +import subprocess +import tempfile +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from nemo_run.core.packaging.base import Packager +from nemo_run.core.packaging.hybrid import HybridPackager +from test.conftest import MockContext + + +@pytest.fixture +def mock_subpackager_one(tmp_path) -> Packager: + """ + Creates a mocked Packager that packages a single file named file1.txt. + """ + mock_packager = MagicMock(spec=Packager) + # Prepare a small file to tar + file_path = tmp_path / "file1.txt" + file_path.write_text("Content from packager one") + + tar_path = str(tmp_path / "packager_one.tar.gz") + subprocess.run(["tar", "-czf", tar_path, "-C", str(tmp_path), "file1.txt"], check=True) + + # Make the package() call return the path to this tar + mock_packager.package.return_value = tar_path + return mock_packager + + +@pytest.fixture +def mock_subpackager_two(tmp_path) -> Packager: + """ + Creates a mocked Packager that packages a single file named file2.txt. + """ + mock_packager = MagicMock(spec=Packager) + # Prepare a small file to tar + file_path = tmp_path / "file2.txt" + file_path.write_text("Content from packager two") + + tar_path = str(tmp_path / "packager_two.tar.gz") + subprocess.run(["tar", "-czf", tar_path, "-C", str(tmp_path), "file2.txt"], check=True) + + mock_packager.package.return_value = tar_path + return mock_packager + + +@patch("nemo_run.core.packaging.hybrid.Context", MockContext) +def test_hybrid_packager(mock_subpackager_one, mock_subpackager_two, tmp_path): + hybrid = HybridPackager( + sub_packagers={ + "1": mock_subpackager_one, + "2": mock_subpackager_two, + } + ) + with tempfile.TemporaryDirectory() as job_dir: + output_tar = hybrid.package(Path(tmp_path), job_dir, "hybrid_test") + + assert os.path.exists(output_tar) + + # Extract the resulting tar to verify contents + extract_dir = os.path.join(job_dir, "hybrid_extracted") + os.makedirs(extract_dir, exist_ok=True) + subprocess.run(["tar", "-xzf", output_tar, "-C", extract_dir], check=True) + + # Compare subfolder "1" for file1.txt + cmp = filecmp.dircmp( + os.path.dirname(mock_subpackager_one.package.return_value), + os.path.join(extract_dir, "1"), + ) + assert not cmp.diff_files + + # Compare subfolder "2" for file2.txt + cmp = filecmp.dircmp( + os.path.dirname(mock_subpackager_two.package.return_value), + os.path.join(extract_dir, "2"), + ) + assert not cmp.diff_files