From e55b448c4d482791774c63e7b040ecc5e7b464a5 Mon Sep 17 00:00:00 2001 From: ansjindal Date: Mon, 4 Aug 2025 10:58:55 +0200 Subject: [PATCH 1/4] update lepton executor to include custom prelaunch commands section Signed-off-by: ansjindal --- nemo_run/core/execution/lepton.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/nemo_run/core/execution/lepton.py b/nemo_run/core/execution/lepton.py index f3cd2c92..e3aec777 100644 --- a/nemo_run/core/execution/lepton.py +++ b/nemo_run/core/execution/lepton.py @@ -54,6 +54,7 @@ class LeptonExecutor(Executor): mounts: list[dict[str, Any]] = field(default_factory=list) lepton_job_dir: str = field(init=False, default="") custom_spec: dict[str, Any] = field(default_factory=dict) + pre_launch_commands: list[str] = field(default_factory=list) # Custom commands before launch def stop_job(self, job_id: str): """ @@ -244,8 +245,14 @@ def launch(self, name: str, cmd: list[str]) -> tuple[str, str]: if len(name) > 35: logger.warning("length of name exceeds 35 characters. Shortening...") name = name[:34] + + # Build pre-launch commands section + pre_launch_section = "" + if self.pre_launch_commands: + pre_launch_section = "\n".join(self.pre_launch_commands) + "\n" + launch_script = f""" -wget -O init.sh https://raw.githubusercontent.com/leptonai/scripts/main/lepton_env_to_pytorch.sh +{pre_launch_section}wget -O init.sh https://raw.githubusercontent.com/leptonai/scripts/main/lepton_env_to_pytorch.sh chmod +x init.sh source init.sh ln -s {self.lepton_job_dir}/ /nemo_run From 5ba91f7522234309fe988dbb870d0a2ead4edacf Mon Sep 17 00:00:00 2001 From: ansjindal Date: Tue, 5 Aug 2025 02:12:13 +0200 Subject: [PATCH 2/4] add test for prelaunch section Signed-off-by: ansjindal --- test/core/execution/test_lepton.py | 51 ++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/test/core/execution/test_lepton.py b/test/core/execution/test_lepton.py index 821c8d91..02042215 100644 --- a/test/core/execution/test_lepton.py +++ b/test/core/execution/test_lepton.py @@ -641,3 +641,54 @@ def test_macro_values(self): result = executor.macro_values() assert result is None + + def test_pre_launch_commands_initialization(self): + """Test that pre_launch_commands can be initialized and defaults to empty list.""" + # Test default initialization + executor = LeptonExecutor( + container_image="test-image", + nemo_run_dir="/test/path", + ) + assert executor.pre_launch_commands == [] + + # Test initialization with commands + commands = ["echo 'Setting up environment'", "export TEST_VAR=value"] + executor_with_commands = LeptonExecutor( + container_image="test-image", + nemo_run_dir="/test/path", + pre_launch_commands=commands, + ) + assert executor_with_commands.pre_launch_commands == commands + + def test_launch_script_with_pre_launch_commands(self): + """Test that pre_launch_commands are correctly included in the launch script.""" + + # Test without pre_launch_commands + executor = LeptonExecutor( + container_image="test-image", + nemo_run_dir="/test/path", + ) + + # Test script section generation - empty case + pre_launch_section = "" + if executor.pre_launch_commands: + pre_launch_section = "\n".join(executor.pre_launch_commands) + "\n" + assert pre_launch_section == "" + + # Test with pre_launch_commands + commands = ["echo 'Custom setup'", "export MY_VAR=test"] + executor_with_commands = LeptonExecutor( + container_image="test-image", + nemo_run_dir="/test/path", + pre_launch_commands=commands, + ) + + # Test script section generation - with commands + pre_launch_section_with_commands = "" + if executor_with_commands.pre_launch_commands: + pre_launch_section_with_commands = ( + "\n".join(executor_with_commands.pre_launch_commands) + "\n" + ) + + expected_pre_launch = "echo 'Custom setup'\nexport MY_VAR=test\n" + assert pre_launch_section_with_commands == expected_pre_launch From 9674220633c0ed9f39d178dc85ca7429ae3c5ab0 Mon Sep 17 00:00:00 2001 From: ansjindal Date: Tue, 5 Aug 2025 19:52:53 +0200 Subject: [PATCH 3/4] add more tests for checking the pre-launch-commands section Signed-off-by: ansjindal --- test/core/execution/test_lepton.py | 372 +++++++++++++++++++++++++++++ 1 file changed, 372 insertions(+) diff --git a/test/core/execution/test_lepton.py b/test/core/execution/test_lepton.py index 02042215..1aa5fa35 100644 --- a/test/core/execution/test_lepton.py +++ b/test/core/execution/test_lepton.py @@ -692,3 +692,375 @@ def test_launch_script_with_pre_launch_commands(self): expected_pre_launch = "echo 'Custom setup'\nexport MY_VAR=test\n" assert pre_launch_section_with_commands == expected_pre_launch + + @patch("nemo_run.core.execution.lepton.LeptonExecutor._validate_mounts") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.move_data") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.create_lepton_job") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.status") + @patch("builtins.open", new_callable=mock_open) + @patch("os.path.join") + def test_launch_method_name_validation( + self, + mock_join, + mock_file, + mock_status, + mock_create_job, + mock_move_data, + mock_validate_mounts, + ): + """Test the launch method's name validation and transformation logic.""" + # Setup mocks + mock_job = MagicMock() + mock_job.metadata.id_ = "test-job-id" + mock_create_job.return_value = mock_job + mock_status.return_value = LeptonJobState.Running + mock_join.return_value = "/fake/path/launch_script.sh" + + executor = LeptonExecutor( + container_image="test-image", + nemo_run_dir="/test/path", + ) + executor.job_dir = "/fake/job/dir" + executor.lepton_job_dir = "/fake/lepton/job/dir" + + # Test normal name (no transformation needed) + job_id, status = executor.launch("short-name", ["python", "script.py"]) + assert job_id == "test-job-id" + assert status == LeptonJobState.Running + + # Test name with underscores and dots (should be replaced) + job_id, status = executor.launch("test_job.name", ["python", "script.py"]) + assert job_id == "test-job-id" + + # Test uppercase name (should be lowercased) + job_id, status = executor.launch("UPPERCASE", ["python", "script.py"]) + assert job_id == "test-job-id" + + @patch("nemo_run.core.execution.lepton.LeptonExecutor._validate_mounts") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.move_data") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.create_lepton_job") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.status") + @patch("builtins.open", new_callable=mock_open) + @patch("os.path.join") + @patch("nemo_run.core.execution.lepton.logger") + def test_launch_method_long_name_truncation( + self, + mock_logger, + mock_join, + mock_file, + mock_status, + mock_create_job, + mock_move_data, + mock_validate_mounts, + ): + """Test that long names are properly truncated and warning is logged.""" + # Setup mocks + mock_job = MagicMock() + mock_job.metadata.id_ = "test-job-id" + mock_create_job.return_value = mock_job + mock_status.return_value = LeptonJobState.Running + mock_join.return_value = "/fake/path/launch_script.sh" + + executor = LeptonExecutor( + container_image="test-image", + nemo_run_dir="/test/path", + ) + executor.job_dir = "/fake/job/dir" + executor.lepton_job_dir = "/fake/lepton/job/dir" + + # Test long name (should be truncated and logged) + long_name = "this-is-a-very-long-name-that-exceeds-thirty-five-characters" + job_id, status = executor.launch(long_name, ["python", "script.py"]) + + # Verify warning was logged + mock_logger.warning.assert_called_with( + "length of name exceeds 35 characters. Shortening..." + ) + assert job_id == "test-job-id" + assert status == LeptonJobState.Running + + @patch("nemo_run.core.execution.lepton.LeptonExecutor._validate_mounts") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.move_data") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.create_lepton_job") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.status") + @patch("builtins.open", new_callable=mock_open) + @patch("os.path.join") + def test_launch_method_with_pre_launch_commands( + self, + mock_join, + mock_file, + mock_status, + mock_create_job, + mock_move_data, + mock_validate_mounts, + ): + """Test the launch method properly includes pre_launch_commands in the script.""" + # Setup mocks + mock_job = MagicMock() + mock_job.metadata.id_ = "test-job-id" + mock_create_job.return_value = mock_job + mock_status.return_value = LeptonJobState.Running + mock_join.return_value = "/fake/path/launch_script.sh" + + # Test with pre_launch_commands + commands = ["echo 'Setting up environment'", "export TEST_VAR=value", "mkdir -p /workspace"] + executor = LeptonExecutor( + container_image="test-image", + nemo_run_dir="/test/path", + pre_launch_commands=commands, + ) + executor.job_dir = "/fake/job/dir" + executor.lepton_job_dir = "/fake/lepton/job/dir" + + job_id, status = executor.launch("test-job", ["python", "train.py"]) + + # Verify launch script was written with pre_launch_commands + mock_file.assert_called_once_with("/fake/path/launch_script.sh", "w+") + handle = mock_file.return_value.__enter__.return_value + written_content = handle.write.call_args[0][0] + + # Verify pre_launch_commands are included at the beginning + assert "echo 'Setting up environment'" in written_content + assert "export TEST_VAR=value" in written_content + assert "mkdir -p /workspace" in written_content + + # Verify the script structure + assert "wget -O init.sh" in written_content + assert "chmod +x init.sh" in written_content + assert "source init.sh" in written_content + assert "ln -s /fake/lepton/job/dir/ /nemo_run" in written_content + assert "cd /nemo_run/code" in written_content + assert "python train.py" in written_content + + assert job_id == "test-job-id" + assert status == LeptonJobState.Running + + @patch("nemo_run.core.execution.lepton.LeptonExecutor._validate_mounts") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.move_data") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.create_lepton_job") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.status") + @patch("builtins.open", new_callable=mock_open) + @patch("os.path.join") + def test_launch_method_without_pre_launch_commands( + self, + mock_join, + mock_file, + mock_status, + mock_create_job, + mock_move_data, + mock_validate_mounts, + ): + """Test the launch method works correctly without pre_launch_commands.""" + # Setup mocks + mock_job = MagicMock() + mock_job.metadata.id_ = "test-job-id" + mock_create_job.return_value = mock_job + mock_status.return_value = LeptonJobState.Running + mock_join.return_value = "/fake/path/launch_script.sh" + + # Test without pre_launch_commands (default empty list) + executor = LeptonExecutor( + container_image="test-image", + nemo_run_dir="/test/path", + ) + executor.job_dir = "/fake/job/dir" + executor.lepton_job_dir = "/fake/lepton/job/dir" + + job_id, status = executor.launch("test-job", ["python", "script.py"]) + + # Verify launch script was written without pre_launch_commands + mock_file.assert_called_once_with("/fake/path/launch_script.sh", "w+") + handle = mock_file.return_value.__enter__.return_value + written_content = handle.write.call_args[0][0] + + # Verify no pre_launch_commands section + lines = written_content.split("\n") + # First non-empty line should be the wget command + first_command_line = next(line for line in lines if line.strip()) + assert first_command_line.strip().startswith("wget -O init.sh") + + # Verify the standard script structure is still there + assert "wget -O init.sh" in written_content + assert "chmod +x init.sh" in written_content + assert "source init.sh" in written_content + assert "ln -s /fake/lepton/job/dir/ /nemo_run" in written_content + assert "cd /nemo_run/code" in written_content + assert "python script.py" in written_content + + assert job_id == "test-job-id" + assert status == LeptonJobState.Running + + @patch("nemo_run.core.execution.lepton.LeptonExecutor._validate_mounts") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.move_data") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.create_lepton_job") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.status") + @patch("builtins.open", new_callable=mock_open) + @patch("os.path.join") + def test_launch_method_edge_cases( + self, + mock_join, + mock_file, + mock_status, + mock_create_job, + mock_move_data, + mock_validate_mounts, + ): + """Test edge cases for the launch method.""" + # Setup mocks + mock_job = MagicMock() + mock_job.metadata.id_ = "test-job-id" + mock_create_job.return_value = mock_job + mock_status.return_value = LeptonJobState.Running + mock_join.return_value = "/fake/path/launch_script.sh" + + executor = LeptonExecutor( + container_image="test-image", + nemo_run_dir="/test/path", + ) + executor.job_dir = "/fake/job/dir" + executor.lepton_job_dir = "/fake/lepton/job/dir" + + # Test with exactly 35 characters (should be truncated) + name_35_chars = "a" * 35 + job_id, status = executor.launch(name_35_chars, ["python", "script.py"]) + assert job_id == "test-job-id" + + # Test with empty command list + job_id, status = executor.launch("test-job", []) + assert job_id == "test-job-id" + + # Test with complex command + complex_cmd = [ + "python", + "-m", + "torch.distributed.launch", + "--nproc_per_node=8", + "train.py", + "--config", + "config.yaml", + ] + job_id, status = executor.launch("complex-job", complex_cmd) + assert job_id == "test-job-id" + + # Verify complex command is properly joined in script + handle = mock_file.return_value.__enter__.return_value + written_content = handle.write.call_args[0][0] + expected_cmd = " ".join(complex_cmd) + assert expected_cmd in written_content + + @patch("nemo_run.core.execution.lepton.LeptonExecutor._validate_mounts") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.move_data") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.create_lepton_job") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.status") + @patch("builtins.open", new_callable=mock_open) + @patch("os.path.join") + @patch("nemo_run.core.execution.lepton.logger") + def test_launch_error_paths( + self, + mock_logger, + mock_join, + mock_file, + mock_status, + mock_create_job, + mock_move_data, + mock_validate_mounts, + ): + """Test launch method error handling and logging.""" + executor = LeptonExecutor(container_image="test-image", nemo_run_dir="/test/path") + executor.job_dir = executor.lepton_job_dir = "/fake/dir" + mock_join.return_value = "/fake/launch_script.sh" + + # Test job creation failure + mock_create_job.return_value = None + with pytest.raises(RuntimeError, match="Failed to create Lepton job"): + executor.launch("test", ["cmd"]) + mock_logger.info.assert_any_call("Creating distributed workload") + + # Test missing job ID + mock_job = MagicMock() + mock_job.metadata.id_ = None + mock_create_job.return_value = mock_job + with pytest.raises(RuntimeError, match="Failed to retrieve job information"): + executor.launch("test", ["cmd"]) + + # Test status failure + mock_job.metadata.id_ = "job-id" + mock_status.return_value = None + with pytest.raises(RuntimeError, match="Failed to retrieve job status"): + executor.launch("test", ["cmd"]) + + # Test success path with logging + mock_status.return_value = LeptonJobState.Running + job_id, status = executor.launch("test", ["cmd"]) + assert job_id == "job-id" + mock_logger.info.assert_any_call("Copying experiment directory to remote filesystem") + + @patch("nemo_run.core.execution.lepton.LeptonExecutor._validate_mounts") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.move_data") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.create_lepton_job") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.status") + @patch("builtins.open", new_callable=mock_open) + @patch("os.path.join") + @patch("nemo_run.core.execution.lepton.logger") + def test_launch_long_name_truncation( + self, + mock_logger, + mock_join, + mock_file, + mock_status, + mock_create_job, + mock_move_data, + mock_validate_mounts, + ): + """Test name truncation warning and logic (lines 246-247).""" + executor = LeptonExecutor(container_image="test-image", nemo_run_dir="/test/path") + executor.job_dir = executor.lepton_job_dir = "/fake/dir" + mock_join.return_value = "/fake/launch_script.sh" + + mock_job = MagicMock() + mock_job.metadata.id_ = "job-id" + mock_create_job.return_value = mock_job + mock_status.return_value = LeptonJobState.Running + + # Test long name triggers warning and truncation + long_name = "a" * 50 # 50 characters, exceeds 35 + executor.launch(long_name, ["cmd"]) + mock_logger.warning.assert_called_with( + "length of name exceeds 35 characters. Shortening..." + ) + + @patch("nemo_run.core.execution.lepton.LeptonExecutor._validate_mounts") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.move_data") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.create_lepton_job") + @patch("nemo_run.core.execution.lepton.LeptonExecutor.status") + @patch("builtins.open", new_callable=mock_open) + @patch("os.path.join") + def test_launch_prelaunch_commands_join( + self, + mock_join, + mock_file, + mock_status, + mock_create_job, + mock_move_data, + mock_validate_mounts, + ): + """Test pre_launch_commands joining logic (line 252).""" + executor = LeptonExecutor( + container_image="test-image", + nemo_run_dir="/test/path", + pre_launch_commands=["echo setup", "export VAR=1"], + ) + executor.job_dir = executor.lepton_job_dir = "/fake/dir" + mock_join.return_value = "/fake/launch_script.sh" + + mock_job = MagicMock() + mock_job.metadata.id_ = "job-id" + mock_create_job.return_value = mock_job + mock_status.return_value = LeptonJobState.Running + + executor.launch("test", ["cmd"]) + + # Verify script contains joined pre_launch_commands + handle = mock_file.return_value.__enter__.return_value + written_content = handle.write.call_args[0][0] + assert "echo setup\nexport VAR=1\n" in written_content From 9187e24daf443970413a41de956e995f66916f20 Mon Sep 17 00:00:00 2001 From: ansjindal Date: Tue, 5 Aug 2025 19:59:27 +0200 Subject: [PATCH 4/4] update lepton executor tests Signed-off-by: ansjindal --- test/core/execution/test_lepton.py | 243 ++--------------------------- 1 file changed, 17 insertions(+), 226 deletions(-) diff --git a/test/core/execution/test_lepton.py b/test/core/execution/test_lepton.py index 1aa5fa35..0ce503f0 100644 --- a/test/core/execution/test_lepton.py +++ b/test/core/execution/test_lepton.py @@ -693,49 +693,6 @@ def test_launch_script_with_pre_launch_commands(self): expected_pre_launch = "echo 'Custom setup'\nexport MY_VAR=test\n" assert pre_launch_section_with_commands == expected_pre_launch - @patch("nemo_run.core.execution.lepton.LeptonExecutor._validate_mounts") - @patch("nemo_run.core.execution.lepton.LeptonExecutor.move_data") - @patch("nemo_run.core.execution.lepton.LeptonExecutor.create_lepton_job") - @patch("nemo_run.core.execution.lepton.LeptonExecutor.status") - @patch("builtins.open", new_callable=mock_open) - @patch("os.path.join") - def test_launch_method_name_validation( - self, - mock_join, - mock_file, - mock_status, - mock_create_job, - mock_move_data, - mock_validate_mounts, - ): - """Test the launch method's name validation and transformation logic.""" - # Setup mocks - mock_job = MagicMock() - mock_job.metadata.id_ = "test-job-id" - mock_create_job.return_value = mock_job - mock_status.return_value = LeptonJobState.Running - mock_join.return_value = "/fake/path/launch_script.sh" - - executor = LeptonExecutor( - container_image="test-image", - nemo_run_dir="/test/path", - ) - executor.job_dir = "/fake/job/dir" - executor.lepton_job_dir = "/fake/lepton/job/dir" - - # Test normal name (no transformation needed) - job_id, status = executor.launch("short-name", ["python", "script.py"]) - assert job_id == "test-job-id" - assert status == LeptonJobState.Running - - # Test name with underscores and dots (should be replaced) - job_id, status = executor.launch("test_job.name", ["python", "script.py"]) - assert job_id == "test-job-id" - - # Test uppercase name (should be lowercased) - job_id, status = executor.launch("UPPERCASE", ["python", "script.py"]) - assert job_id == "test-job-id" - @patch("nemo_run.core.execution.lepton.LeptonExecutor._validate_mounts") @patch("nemo_run.core.execution.lepton.LeptonExecutor.move_data") @patch("nemo_run.core.execution.lepton.LeptonExecutor.create_lepton_job") @@ -743,7 +700,7 @@ def test_launch_method_name_validation( @patch("builtins.open", new_callable=mock_open) @patch("os.path.join") @patch("nemo_run.core.execution.lepton.logger") - def test_launch_method_long_name_truncation( + def test_launch_method_comprehensive( self, mock_logger, mock_join, @@ -753,200 +710,34 @@ def test_launch_method_long_name_truncation( mock_move_data, mock_validate_mounts, ): - """Test that long names are properly truncated and warning is logged.""" - # Setup mocks - mock_job = MagicMock() - mock_job.metadata.id_ = "test-job-id" - mock_create_job.return_value = mock_job - mock_status.return_value = LeptonJobState.Running - mock_join.return_value = "/fake/path/launch_script.sh" - + """Test launch method name validation, pre_launch_commands, and script generation.""" + # Setup executor = LeptonExecutor( - container_image="test-image", - nemo_run_dir="/test/path", - ) - executor.job_dir = "/fake/job/dir" - executor.lepton_job_dir = "/fake/lepton/job/dir" - - # Test long name (should be truncated and logged) - long_name = "this-is-a-very-long-name-that-exceeds-thirty-five-characters" - job_id, status = executor.launch(long_name, ["python", "script.py"]) - - # Verify warning was logged - mock_logger.warning.assert_called_with( - "length of name exceeds 35 characters. Shortening..." + container_image="test-image", nemo_run_dir="/test", pre_launch_commands=["echo setup"] ) - assert job_id == "test-job-id" - assert status == LeptonJobState.Running - - @patch("nemo_run.core.execution.lepton.LeptonExecutor._validate_mounts") - @patch("nemo_run.core.execution.lepton.LeptonExecutor.move_data") - @patch("nemo_run.core.execution.lepton.LeptonExecutor.create_lepton_job") - @patch("nemo_run.core.execution.lepton.LeptonExecutor.status") - @patch("builtins.open", new_callable=mock_open) - @patch("os.path.join") - def test_launch_method_with_pre_launch_commands( - self, - mock_join, - mock_file, - mock_status, - mock_create_job, - mock_move_data, - mock_validate_mounts, - ): - """Test the launch method properly includes pre_launch_commands in the script.""" - # Setup mocks + executor.job_dir = executor.lepton_job_dir = "/fake" + mock_join.return_value = "/fake/script.sh" mock_job = MagicMock() - mock_job.metadata.id_ = "test-job-id" - mock_create_job.return_value = mock_job - mock_status.return_value = LeptonJobState.Running - mock_join.return_value = "/fake/path/launch_script.sh" - - # Test with pre_launch_commands - commands = ["echo 'Setting up environment'", "export TEST_VAR=value", "mkdir -p /workspace"] - executor = LeptonExecutor( - container_image="test-image", - nemo_run_dir="/test/path", - pre_launch_commands=commands, - ) - executor.job_dir = "/fake/job/dir" - executor.lepton_job_dir = "/fake/lepton/job/dir" - - job_id, status = executor.launch("test-job", ["python", "train.py"]) - - # Verify launch script was written with pre_launch_commands - mock_file.assert_called_once_with("/fake/path/launch_script.sh", "w+") - handle = mock_file.return_value.__enter__.return_value - written_content = handle.write.call_args[0][0] - - # Verify pre_launch_commands are included at the beginning - assert "echo 'Setting up environment'" in written_content - assert "export TEST_VAR=value" in written_content - assert "mkdir -p /workspace" in written_content - - # Verify the script structure - assert "wget -O init.sh" in written_content - assert "chmod +x init.sh" in written_content - assert "source init.sh" in written_content - assert "ln -s /fake/lepton/job/dir/ /nemo_run" in written_content - assert "cd /nemo_run/code" in written_content - assert "python train.py" in written_content - - assert job_id == "test-job-id" - assert status == LeptonJobState.Running - - @patch("nemo_run.core.execution.lepton.LeptonExecutor._validate_mounts") - @patch("nemo_run.core.execution.lepton.LeptonExecutor.move_data") - @patch("nemo_run.core.execution.lepton.LeptonExecutor.create_lepton_job") - @patch("nemo_run.core.execution.lepton.LeptonExecutor.status") - @patch("builtins.open", new_callable=mock_open) - @patch("os.path.join") - def test_launch_method_without_pre_launch_commands( - self, - mock_join, - mock_file, - mock_status, - mock_create_job, - mock_move_data, - mock_validate_mounts, - ): - """Test the launch method works correctly without pre_launch_commands.""" - # Setup mocks - mock_job = MagicMock() - mock_job.metadata.id_ = "test-job-id" + mock_job.metadata.id_ = "job-id" mock_create_job.return_value = mock_job mock_status.return_value = LeptonJobState.Running - mock_join.return_value = "/fake/path/launch_script.sh" - # Test without pre_launch_commands (default empty list) - executor = LeptonExecutor( - container_image="test-image", - nemo_run_dir="/test/path", - ) - executor.job_dir = "/fake/job/dir" - executor.lepton_job_dir = "/fake/lepton/job/dir" - - job_id, status = executor.launch("test-job", ["python", "script.py"]) + # Test name transformation and pre_launch_commands + job_id, status = executor.launch("Test_Job.Name", ["python", "script.py"]) + assert job_id == "job-id" - # Verify launch script was written without pre_launch_commands - mock_file.assert_called_once_with("/fake/path/launch_script.sh", "w+") + # Verify script content includes pre_launch_commands handle = mock_file.return_value.__enter__.return_value written_content = handle.write.call_args[0][0] - - # Verify no pre_launch_commands section - lines = written_content.split("\n") - # First non-empty line should be the wget command - first_command_line = next(line for line in lines if line.strip()) - assert first_command_line.strip().startswith("wget -O init.sh") - - # Verify the standard script structure is still there - assert "wget -O init.sh" in written_content - assert "chmod +x init.sh" in written_content - assert "source init.sh" in written_content - assert "ln -s /fake/lepton/job/dir/ /nemo_run" in written_content - assert "cd /nemo_run/code" in written_content + assert "echo setup\n" in written_content assert "python script.py" in written_content - assert job_id == "test-job-id" - assert status == LeptonJobState.Running - - @patch("nemo_run.core.execution.lepton.LeptonExecutor._validate_mounts") - @patch("nemo_run.core.execution.lepton.LeptonExecutor.move_data") - @patch("nemo_run.core.execution.lepton.LeptonExecutor.create_lepton_job") - @patch("nemo_run.core.execution.lepton.LeptonExecutor.status") - @patch("builtins.open", new_callable=mock_open) - @patch("os.path.join") - def test_launch_method_edge_cases( - self, - mock_join, - mock_file, - mock_status, - mock_create_job, - mock_move_data, - mock_validate_mounts, - ): - """Test edge cases for the launch method.""" - # Setup mocks - mock_job = MagicMock() - mock_job.metadata.id_ = "test-job-id" - mock_create_job.return_value = mock_job - mock_status.return_value = LeptonJobState.Running - mock_join.return_value = "/fake/path/launch_script.sh" - - executor = LeptonExecutor( - container_image="test-image", - nemo_run_dir="/test/path", + # Test long name truncation + long_name = "a" * 50 + executor.launch(long_name, ["cmd"]) + mock_logger.warning.assert_called_with( + "length of name exceeds 35 characters. Shortening..." ) - executor.job_dir = "/fake/job/dir" - executor.lepton_job_dir = "/fake/lepton/job/dir" - - # Test with exactly 35 characters (should be truncated) - name_35_chars = "a" * 35 - job_id, status = executor.launch(name_35_chars, ["python", "script.py"]) - assert job_id == "test-job-id" - - # Test with empty command list - job_id, status = executor.launch("test-job", []) - assert job_id == "test-job-id" - - # Test with complex command - complex_cmd = [ - "python", - "-m", - "torch.distributed.launch", - "--nproc_per_node=8", - "train.py", - "--config", - "config.yaml", - ] - job_id, status = executor.launch("complex-job", complex_cmd) - assert job_id == "test-job-id" - - # Verify complex command is properly joined in script - handle = mock_file.return_value.__enter__.return_value - written_content = handle.write.call_args[0][0] - expected_cmd = " ".join(complex_cmd) - assert expected_cmd in written_content @patch("nemo_run.core.execution.lepton.LeptonExecutor._validate_mounts") @patch("nemo_run.core.execution.lepton.LeptonExecutor.move_data")