diff --git a/src/cloudai/_core/configurator/cloudai_gym.py b/src/cloudai/_core/configurator/cloudai_gym.py
index 68bb72270..7069dc2be 100644
--- a/src/cloudai/_core/configurator/cloudai_gym.py
+++ b/src/cloudai/_core/configurator/cloudai_gym.py
@@ -136,17 +136,17 @@ def step(self, action: Any) -> Tuple[list, float, bool, dict]:
             return [-1.0], -1.0, True, {}
 
         logging.info(f"Running step {self.test_run.step} with action {action}")
-        self.runner.runner.test_scenario.test_runs = [copy.deepcopy(self.test_run)]
+        new_tr = copy.deepcopy(self.test_run)
+        self.runner.runner.test_scenario.test_runs = [new_tr]
         asyncio.run(self.runner.run())
+        self.test_run = self.runner.runner.test_scenario.test_runs[0]
 
         observation = self.get_observation(action)
         reward = self.compute_reward(observation)
-        done = False
-        info = {}
 
         self.write_trajectory(self.test_run.step, action, reward, observation)
 
-        return observation, reward, done, info
+        return observation, reward, False, {}
 
     def render(self, mode: str = "human"):
         """
diff --git a/src/cloudai/cli/handlers.py b/src/cloudai/cli/handlers.py
index 0f858717e..f6aa49b2d 100644
--- a/src/cloudai/cli/handlers.py
+++ b/src/cloudai/cli/handlers.py
@@ -108,7 +108,7 @@ def handle_dse_job(runner: Runner, args: argparse.Namespace):
             if result is None:
                 break
             step, action = result
-            test_run.step = step
+            env.test_run.step = step
             observation, reward, done, info = env.step(action)
             feedback = {"trial_index": step, "value": reward}
             agent.update_policy(feedback)
diff --git a/src/cloudai/workloads/nemo_run/report_generation_strategy.py b/src/cloudai/workloads/nemo_run/report_generation_strategy.py
index f09caed6c..8cbd98cd2 100644
--- a/src/cloudai/workloads/nemo_run/report_generation_strategy.py
+++ b/src/cloudai/workloads/nemo_run/report_generation_strategy.py
@@ -99,6 +99,7 @@ def generate_report(self) -> None:
             f.write("Max: {max}\n".format(max=stats["max"]))
 
     def get_metric(self, metric: str) -> float:
+        logging.debug(f"Getting metric {metric} from {self.results_file.absolute()}")
         step_timings = extract_timings(self.results_file)
         if not step_timings:
             return METRIC_ERROR
diff --git a/tests/conftest.py b/tests/conftest.py
index 447a68c2a..429c412e9 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -55,7 +55,7 @@ def slurm_system(tmp_path: Path) -> SlurmSystem:
         ],
     )
     system.scheduler = "slurm"
-    system.monitor_interval = 10
+    system.monitor_interval = 0
     return system
 
 
diff --git a/tests/test_cloudaigym.py b/tests/test_cloudaigym.py
index ca41d7cf4..865981b58 100644
--- a/tests/test_cloudaigym.py
+++ b/tests/test_cloudaigym.py
@@ -19,6 +19,7 @@
 import pytest
 
 from cloudai._core.configurator.cloudai_gym import CloudAIGymEnv
+from cloudai._core.configurator.grid_search import GridSearchAgent
 from cloudai._core.runner import Runner
 from cloudai._core.test import Test
 from cloudai._core.test_scenario import TestRun, TestScenario
@@ -68,7 +69,7 @@ def setup_env(slurm_system: SlurmSystem) -> tuple[TestRun, Runner]:
         slurm_system.output_path / test_scenario.name / test_run.name / f"{test_run.current_iteration}"
     )
 
-    runner = Runner(mode="run", system=slurm_system, test_scenario=test_scenario)
+    runner = Runner(mode="dry-run", system=slurm_system, test_scenario=test_scenario)
 
     return test_run, runner
 
@@ -237,3 +238,16 @@ def test_update_test_run_obj():
 
     env.update_test_run_obj(cmd_args, "trainer.num_nodes", [3, 4])
     assert cmd_args.trainer.num_nodes == [3, 4]
+
+
+def test_tr_output_path(setup_env: tuple[TestRun, Runner]):
+    test_run, runner = setup_env
+    test_run.test.test_definition.cmd_args.data.global_batch_size = 8  # avoid constraint check failure
+    env = CloudAIGymEnv(test_run=test_run, runner=runner)
+    agent = GridSearchAgent(env)
+
+    _, action = agent.select_action()
+    env.test_run.step = 42
+    env.step(action)
+
+    assert env.test_run.output_path.name == "42"