Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 132 additions & 3 deletions nemo_run/core/packaging/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ class GitArchivePackager(Packager):
#. This extracted tar file becomes the working directory for your job.

.. note::
git archive will only package code committed in the specified ref.
Any uncommitted code will not be packaged.
We are working on adding an option to package uncommitted code but it is not ready yet.
By default, git archive will only package code committed in the specified ref.
You can use include_uncommitted=True and include_untracked=True to package
uncommitted changes and untracked files respectively.
"""

basepath: str = ""
Expand Down Expand Up @@ -72,6 +72,12 @@ class GitArchivePackager(Packager):
check_uncommitted_changes: bool = False
check_untracked_files: bool = False

#: Include uncommitted changes in the archive
include_uncommitted: bool = False

#: Include untracked files in the archive
include_untracked: bool = False

def package(self, path: Path, job_dir: str, name: str) -> str:
output_file = os.path.join(job_dir, f"{name}.tar.gz")
if os.path.exists(output_file):
Expand Down Expand Up @@ -132,6 +138,129 @@ def package(self, path: Path, job_dir: str, name: str) -> str:
ctx.run(git_archive_cmd)
if self.include_submodules:
ctx.run(git_submodule_cmd)

# Handle uncommitted changes
if self.include_uncommitted:
unstaged_file_id = uuid.uuid4()
unstaged_tar_file = f"unstaged_{unstaged_file_id}.tmp"

with ctx.cd(git_base_path):
# Get the list of files with unstaged changes
changed_files = (
subprocess.run(
"git diff --name-only", shell=True, capture_output=True, text=True
)
.stdout.strip()
.split("\n")
)

if changed_files and changed_files[0]: # Check if non-empty
# Filter files in subpath if specified
if self.subpath:
filtered_files = []
for f in changed_files:
if f.startswith(self.subpath):
filtered_files.append(f)
changed_files = filtered_files

if changed_files:
# Create the tarfile in the right way based on subpath
if self.subpath:
# We need to change to the subpath dir and create the tar without the prefix
subpath_dir = os.path.join(git_base_path, self.subpath)

# Create list of files relative to the subpath
relative_files = []
for f in changed_files:
relative_files.append(
os.path.relpath(os.path.join(git_base_path, f), subpath_dir)
)

# Create tar from the subpath directory
tar_path = os.path.join(git_base_path, unstaged_tar_file)
with ctx.cd(subpath_dir):
if relative_files: # Only create tar if there are files
ctx.run(f"tar -cf {tar_path} {' '.join(relative_files)}")
else:
# Create empty tar file to avoid errors
ctx.run(f"tar -cf {tar_path} --files-from /dev/null")
else:
# No subpath, create tar from the git base path
if changed_files: # Only create tar if there are files
ctx.run(f"tar -cf {unstaged_tar_file} {' '.join(changed_files)}")
else:
# Create empty tar file to avoid errors
ctx.run(f"tar -cf {unstaged_tar_file} --files-from /dev/null")

Check warning

Code scanning / CodeQL

Unreachable code Warning

This statement is unreachable.

# Add to the main archive - use a more compatible approach
# Instead of using 'tar Af', use the extract-and-create approach for all platforms
temp_dir = f"temp_extract_unstaged_{unstaged_file_id}"
ctx.run(f"mkdir -p {temp_dir}")
ctx.run(f"tar xf {output_file}.tmp -C {temp_dir}")
ctx.run(f"tar xf {unstaged_tar_file} -C {temp_dir}")
ctx.run(f"tar cf {output_file}.tmp -C {temp_dir} .")
ctx.run(f"rm -rf {temp_dir}")
ctx.run(f"rm {unstaged_tar_file}")

# Handle untracked files
if self.include_untracked:
untracked_file_id = uuid.uuid4()
untracked_tar_file = f"untracked_{untracked_file_id}.tmp"

with ctx.cd(git_base_path):
# Get the list of untracked files
untracked_files = (
subprocess.run(
"git ls-files --others --exclude-standard",
shell=True,
capture_output=True,
text=True,
)
.stdout.strip()
.split("\n")
)

if untracked_files and untracked_files[0]: # Check if non-empty
# Filter files in subpath if specified
if self.subpath:
filtered_files = []
for f in untracked_files:
if f.startswith(self.subpath):
filtered_files.append(f)
untracked_files = filtered_files

if untracked_files:
# Create the tarfile in the right way based on subpath
if self.subpath:
# We need to change to the subpath dir and create the tar without the prefix
subpath_dir = os.path.join(git_base_path, self.subpath)

# Create list of files relative to the subpath
relative_files = []
for f in untracked_files:
relative_files.append(
os.path.relpath(os.path.join(git_base_path, f), subpath_dir)
)

# Create tar from the subpath directory
tar_path = os.path.join(git_base_path, untracked_tar_file)
with ctx.cd(subpath_dir):
ctx.run(f"tar -cf {tar_path} {' '.join(relative_files)}")
else:
# No subpath, create tar from the git base path
ctx.run(f"tar -cf {untracked_tar_file} {' '.join(untracked_files)}")

# Add to the main archive - use a more compatible approach
# Instead of using 'tar Af', use the extract-and-create approach for all platforms
temp_dir = f"temp_extract_untracked_{untracked_file_id}"
ctx.run(f"mkdir -p {temp_dir}")
ctx.run(f"tar xf {output_file}.tmp -C {temp_dir}")
ctx.run(f"tar xf {untracked_tar_file} -C {temp_dir}")
ctx.run(f"tar cf {output_file}.tmp -C {temp_dir} .")
ctx.run(f"rm -rf {temp_dir}")
ctx.run(f"rm {untracked_tar_file}")

# Process include_pattern files
if isinstance(self.include_pattern, str):
self.include_pattern = [self.include_pattern]

Expand Down
137 changes: 136 additions & 1 deletion test/core/packaging/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,8 @@ def test_package_with_multi_include_pattern_rel_path(packager, temp_repo, tmpdir
@patch("nemo_run.core.packaging.git.Context", MockContext)
def test_package_with_check_uncommitted_changes(packager, temp_repo):
temp_repo = Path(temp_repo)
open(temp_repo / "file1.txt", "w").write("Hello World")
with open(temp_repo / "file1.txt", "w") as f:
f.write("Modified content")

packager = GitArchivePackager(ref="HEAD", check_uncommitted_changes=True)
with pytest.raises(RuntimeError, match="Your repo has uncommitted changes"):
Expand Down Expand Up @@ -418,3 +419,137 @@ def test_package_without_include_submodules(packager, temp_repo):
),
)
assert len(os.listdir(os.path.join(job_dir, "extracted_output", "submodule"))) == 0


@patch("nemo_run.core.packaging.git.Context", MockContext)
def test_package_with_uncommitted_changes(packager, temp_repo):
temp_repo = Path(temp_repo)
with open(temp_repo / "file1.txt", "w") as f:
f.write("Modified content")

packager = GitArchivePackager(ref="HEAD", include_uncommitted=True)
with tempfile.TemporaryDirectory() as job_dir:
output_file = packager.package(Path(temp_repo), job_dir, "test_package")
assert os.path.exists(output_file)
extract_dir = os.path.join(job_dir, "extracted_output")
subprocess.check_call(shlex.split(f"mkdir -p {extract_dir}"))
subprocess.check_call(
shlex.split(f"tar -xvzf {output_file} -C {extract_dir} --ignore-zeros"),
)

# Verify that the modified file was included with changes
with open(os.path.join(extract_dir, "file1.txt"), "r") as f:
content = f.read()
assert content == "Modified content"


@patch("nemo_run.core.packaging.git.Context", MockContext)
def test_package_with_untracked_files(packager, temp_repo):
temp_repo = Path(temp_repo)
# Add an untracked file
with open(temp_repo / "untracked.txt", "w") as f:
f.write("Untracked content")

packager = GitArchivePackager(ref="HEAD", include_untracked=True)
with tempfile.TemporaryDirectory() as job_dir:
output_file = packager.package(Path(temp_repo), job_dir, "test_package")
assert os.path.exists(output_file)
extract_dir = os.path.join(job_dir, "extracted_output")
subprocess.check_call(shlex.split(f"mkdir -p {extract_dir}"))
subprocess.check_call(
shlex.split(f"tar -xvzf {output_file} -C {extract_dir} --ignore-zeros"),
)

# Verify that the untracked file was included
assert os.path.exists(os.path.join(extract_dir, "untracked.txt"))
with open(os.path.join(extract_dir, "untracked.txt"), "r") as f:
content = f.read()
assert content == "Untracked content"


@patch("nemo_run.core.packaging.git.Context", MockContext)
def test_package_with_uncommitted_and_untracked(packager, temp_repo):
temp_repo = Path(temp_repo)
with open(temp_repo / "file1.txt", "w") as f:
f.write("Modified content")

# Add an untracked file
with open(temp_repo / "untracked.txt", "w") as f:
f.write("Untracked content")

packager = GitArchivePackager(ref="HEAD", include_uncommitted=True, include_untracked=True)
with tempfile.TemporaryDirectory() as job_dir:
output_file = packager.package(Path(temp_repo), job_dir, "test_package")
assert os.path.exists(output_file)
extract_dir = os.path.join(job_dir, "extracted_output")
subprocess.check_call(shlex.split(f"mkdir -p {extract_dir}"))
subprocess.check_call(
shlex.split(f"tar -xvzf {output_file} -C {extract_dir} --ignore-zeros"),
)

# Verify that the modified file was included with changes
with open(os.path.join(extract_dir, "file1.txt"), "r") as f:
content = f.read()
assert content == "Modified content"

# Verify that the untracked file was included
assert os.path.exists(os.path.join(extract_dir, "untracked.txt"))
with open(os.path.join(extract_dir, "untracked.txt"), "r") as f:
content = f.read()
assert content == "Untracked content"


@patch("nemo_run.core.packaging.git.Context", MockContext)
def test_package_subpath_with_uncommitted_and_untracked(packager, temp_repo):
temp_repo = Path(temp_repo)
# Create a subdir
(temp_repo / "subdir").mkdir()

# Add a file in subdir and commit
with open(temp_repo / "subdir" / "committed.txt", "w") as f:
f.write("Committed content")
subprocess.check_call(["git", "add", "."], cwd=str(temp_repo))
subprocess.check_call(["git", "commit", "-m", "Add subdir"], cwd=str(temp_repo))

# Make an uncommitted change to the file in subdir
with open(temp_repo / "subdir" / "committed.txt", "w") as f:
f.write("Modified committed content")

# Add an untracked file in the subdir
with open(temp_repo / "subdir" / "untracked.txt", "w") as f:
f.write("Untracked content in subdir")

# Add a file outside of subdir that should not be included
with open(temp_repo / "outside.txt", "w") as f:
f.write("Outside content")

packager = GitArchivePackager(
ref="HEAD",
subpath="subdir",
include_uncommitted=True,
include_untracked=True,
)

with tempfile.TemporaryDirectory() as job_dir:
output_file = packager.package(Path(temp_repo), job_dir, "test_package")
assert os.path.exists(output_file)
extract_dir = os.path.join(job_dir, "extracted_output")
subprocess.check_call(shlex.split(f"mkdir -p {extract_dir}"))

subprocess.check_call(
shlex.split(f"tar -xvzf {output_file} -C {extract_dir} --ignore-zeros"),
)

# Verify that the modified file in subdir was included with changes
with open(os.path.join(extract_dir, "committed.txt"), "r") as f:
content = f.read()
assert content == "Modified committed content"

# Verify that the untracked file in subdir was included
assert os.path.exists(os.path.join(extract_dir, "untracked.txt"))
with open(os.path.join(extract_dir, "untracked.txt"), "r") as f:
content = f.read()
assert content == "Untracked content in subdir"

# Verify that files outside the subpath are not included
assert not os.path.exists(os.path.join(extract_dir, "outside.txt"))