Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions src/nemo_run/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@
from nemo_run.core.execution.local import LocalExecutor
from nemo_run.core.execution.skypilot import SkypilotExecutor
from nemo_run.core.execution.slurm import SlurmExecutor
from nemo_run.core.packaging.base import Packager
from nemo_run.core.packaging.git import GitArchivePackager
from nemo_run.core.packaging.pattern import PatternPackager
from nemo_run.core.packaging import GitArchivePackager, HybridPackager, Packager, PatternPackager
from nemo_run.core.tunnel.client import LocalTunnel, SSHTunnel
from nemo_run.devspace.base import DevSpace
from nemo_run.help import help
Expand All @@ -54,6 +52,7 @@
"ExecutorMacros",
"Experiment",
"FaultTolerance",
"HybridPackager",
"GitArchivePackager",
"PatternPackager",
"help",
Expand Down
3 changes: 2 additions & 1 deletion src/nemo_run/core/packaging/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from nemo_run.core.packaging.base import Packager
from nemo_run.core.packaging.git import GitArchivePackager
from nemo_run.core.packaging.hybrid import HybridPackager
from nemo_run.core.packaging.pattern import PatternPackager

__all__ = ["Packager", "GitArchivePackager", "PatternPackager"]
__all__ = ["Packager", "GitArchivePackager", "PatternPackager", "HybridPackager"]
49 changes: 49 additions & 0 deletions src/nemo_run/core/packaging/hybrid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import Dict

from invoke.context import Context

from nemo_run.core.packaging.base import Packager


@dataclass(kw_only=True)
class HybridPackager(Packager):
"""
A packager that combines multiple other packagers into one final archive.
Each subpackager is mapped to a target directory name, which will become
the top-level folder under which that packager’s content is placed.
"""

sub_packagers: Dict[str, Packager] = field(default_factory=dict)

def package(self, path: Path, job_dir: str, name: str) -> str:
final_tar_gz = os.path.join(job_dir, f"{name}.tar.gz")
if os.path.exists(final_tar_gz):
return final_tar_gz

# Create an empty tar to append packaged files from each sub-packager
tmp_tar = final_tar_gz + ".tmp"
ctx = Context()
ctx.run(f"tar -cf {tmp_tar} --files-from /dev/null")

# For each subpackager, run its .package() method and extract to a subfolder
for folder_name, packager in self.sub_packagers.items():
subarchive_path = packager.package(path, job_dir, f"{name}_{folder_name}")

# Create a temp folder, extract subarchive content into it,
# then add that folder to the final tar under the desired subpath
tmp_extract_dir = os.path.join(job_dir, f"__extract_{folder_name}")
os.makedirs(tmp_extract_dir, exist_ok=True)

ctx.run(f"tar -xf {subarchive_path} -C {tmp_extract_dir}")
ctx.run(f"tar -rf {tmp_tar} -C {tmp_extract_dir} . --transform='s,^,{folder_name}/,'")
ctx.run(f"rm -rf {tmp_extract_dir}")
ctx.run(f"rm {subarchive_path}")

# Finally, compress the combined tar
ctx.run(f"gzip -c {tmp_tar} > {final_tar_gz}")
ctx.run(f"rm {tmp_tar}")

return final_tar_gz
80 changes: 80 additions & 0 deletions test/core/packaging/test_hybrid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import filecmp
import os
import subprocess
import tempfile
from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest

from nemo_run.core.packaging.base import Packager
from nemo_run.core.packaging.hybrid import HybridPackager
from test.conftest import MockContext


@pytest.fixture
def mock_subpackager_one(tmp_path) -> Packager:
"""
Creates a mocked Packager that packages a single file named file1.txt.
"""
mock_packager = MagicMock(spec=Packager)
# Prepare a small file to tar
file_path = tmp_path / "file1.txt"
file_path.write_text("Content from packager one")

tar_path = str(tmp_path / "packager_one.tar.gz")
subprocess.run(["tar", "-czf", tar_path, "-C", str(tmp_path), "file1.txt"], check=True)

# Make the package() call return the path to this tar
mock_packager.package.return_value = tar_path
return mock_packager


@pytest.fixture
def mock_subpackager_two(tmp_path) -> Packager:
"""
Creates a mocked Packager that packages a single file named file2.txt.
"""
mock_packager = MagicMock(spec=Packager)
# Prepare a small file to tar
file_path = tmp_path / "file2.txt"
file_path.write_text("Content from packager two")

tar_path = str(tmp_path / "packager_two.tar.gz")
subprocess.run(["tar", "-czf", tar_path, "-C", str(tmp_path), "file2.txt"], check=True)

mock_packager.package.return_value = tar_path
return mock_packager


@patch("nemo_run.core.packaging.hybrid.Context", MockContext)
def test_hybrid_packager(mock_subpackager_one, mock_subpackager_two, tmp_path):
hybrid = HybridPackager(
sub_packagers={
"1": mock_subpackager_one,
"2": mock_subpackager_two,
}
)
with tempfile.TemporaryDirectory() as job_dir:
output_tar = hybrid.package(Path(tmp_path), job_dir, "hybrid_test")

assert os.path.exists(output_tar)

# Extract the resulting tar to verify contents
extract_dir = os.path.join(job_dir, "hybrid_extracted")
os.makedirs(extract_dir, exist_ok=True)
subprocess.run(["tar", "-xzf", output_tar, "-C", extract_dir], check=True)

# Compare subfolder "1" for file1.txt
cmp = filecmp.dircmp(
os.path.dirname(mock_subpackager_one.package.return_value),
os.path.join(extract_dir, "1"),
)
assert not cmp.diff_files

# Compare subfolder "2" for file2.txt
cmp = filecmp.dircmp(
os.path.dirname(mock_subpackager_two.package.return_value),
os.path.join(extract_dir, "2"),
)
assert not cmp.diff_files
Loading