Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions src/nemo_run/core/packaging/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,9 +154,7 @@ def package(self, path: Path, job_dir: str, name: str) -> str:
f"find {relative_include_pattern} -type f | "
f"tar -cf {os.path.join(git_base_path, pattern_tar_file_name)} -T -"
)
tar_concatenate_cmd = (
f"cat {pattern_tar_file_name} >> {output_file}.tmp && rm {pattern_tar_file_name}"
)
tar_concatenate_cmd = f"cat {os.path.join(git_base_path, pattern_tar_file_name)} >> {output_file}.tmp && rm {pattern_tar_file_name}"

with ctx.cd(include_pattern_relative_path):
ctx.run(include_pattern_cmd)
Expand Down
26 changes: 23 additions & 3 deletions src/nemo_run/core/packaging/hybrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,14 @@ class HybridPackager(Packager):
"""
A packager that combines multiple other packagers into one final archive.
Each subpackager is mapped to a target directory name, which will become
the top-level folder under which that packager’s content is placed.
the top-level folder under which that packager's content is placed.

If `extract_at_root` is True, the contents of each sub-packager are extracted
directly at the root of the final archive (i.e. without being nested in a subfolder).
"""

sub_packagers: Dict[str, Packager] = field(default_factory=dict)
extract_at_root: bool = False

def package(self, path: Path, job_dir: str, name: str) -> str:
final_tar_gz = os.path.join(job_dir, f"{name}.tar.gz")
Expand All @@ -28,7 +32,8 @@ def package(self, path: Path, job_dir: str, name: str) -> str:
ctx = Context()
ctx.run(f"tar -cf {tmp_tar} --files-from /dev/null")

# For each subpackager, run its .package() method and extract to a subfolder
# For each subpackager, run its .package() method,
# extract the content and add it to the final tar
for folder_name, packager in self.sub_packagers.items():
subarchive_path = packager.package(path, job_dir, f"{name}_{folder_name}")

Expand All @@ -38,7 +43,22 @@ def package(self, path: Path, job_dir: str, name: str) -> str:
os.makedirs(tmp_extract_dir, exist_ok=True)

ctx.run(f"tar -xf {subarchive_path} -C {tmp_extract_dir}")
ctx.run(f"tar -rf {tmp_tar} -C {tmp_extract_dir} . --transform='s,^,{folder_name}/,'")

# If extract_at_root is True then add files directly to the archive root.
# Otherwise, add them under a subfolder named after the key.
if self.extract_at_root:
ctx.run(f"tar -rf {tmp_tar} -C {tmp_extract_dir} .")
else:
sysname = os.uname().sysname
if sysname == "Darwin":
# BSD tar uses the -s option with a chosen delimiter (here we use a comma)
# The first -s replaces an entry that is exactly "."
# The second -s replaces entries starting with "./" (i.e. files inside)
transform_option = f"-s ',^\\.$,{folder_name},' -s ',^\\./,{folder_name}/,'"
else:
transform_option = f"--transform='s,^,{folder_name}/,'"
ctx.run(f"tar {transform_option} -rf {tmp_tar} -C {tmp_extract_dir} .")

ctx.run(f"rm -rf {tmp_extract_dir}")
ctx.run(f"rm {subarchive_path}")

Expand Down
32 changes: 32 additions & 0 deletions test/core/packaging/test_hybrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,35 @@ def test_hybrid_packager(mock_subpackager_one, mock_subpackager_two, tmp_path):
os.path.join(extract_dir, "2"),
)
assert not cmp.diff_files


@patch("nemo_run.core.packaging.hybrid.Context", MockContext)
def test_hybrid_packager_extract_at_root(mock_subpackager_one, mock_subpackager_two, tmp_path):
hybrid = HybridPackager(
sub_packagers={
"1": mock_subpackager_one,
"2": mock_subpackager_two,
},
extract_at_root=True,
)
with tempfile.TemporaryDirectory() as job_dir:
output_tar = hybrid.package(Path(tmp_path), job_dir, "hybrid_test_extract")
assert os.path.exists(output_tar)

# Extract the tar and verify that files are extracted at the root
extract_dir = os.path.join(job_dir, "hybrid_extracted")
os.makedirs(extract_dir, exist_ok=True)
subprocess.run(["tar", "-xzf", output_tar, "-C", extract_dir], check=True)

file1 = os.path.join(extract_dir, "file1.txt")
file2 = os.path.join(extract_dir, "file2.txt")
assert os.path.exists(file1), f"Expected {file1} to exist, but it does not."
assert os.path.exists(file2), f"Expected {file2} to exist, but it does not."

with open(file1, "r") as f:
content1 = f.read()
with open(file2, "r") as f:
content2 = f.read()

assert content1 == "Content from packager one", f"Unexpected content in {file1}: {content1}"
assert content2 == "Content from packager two", f"Unexpected content in {file2}: {content2}"
Loading