From 7fea5e3aafc488139786312b6f82417ed3958214 Mon Sep 17 00:00:00 2001 From: Guyue Huang Date: Thu, 8 Jan 2026 08:34:02 -0800 Subject: [PATCH 1/9] Implement retry in nemo_gym rollout collection Signed-off-by: Guyue Huang --- examples/nemo_gym/run_grpo_nemo_gym.py | 1 + nemo_rl/environments/nemo_gym.py | 56 +++++++++++++++++--------- 2 files changed, 39 insertions(+), 18 deletions(-) diff --git a/examples/nemo_gym/run_grpo_nemo_gym.py b/examples/nemo_gym/run_grpo_nemo_gym.py index c8d2c911e2..2b67e20439 100644 --- a/examples/nemo_gym/run_grpo_nemo_gym.py +++ b/examples/nemo_gym/run_grpo_nemo_gym.py @@ -253,6 +253,7 @@ def main() -> None: model_name=policy_generation.cfg["model_name"], base_urls=policy_generation.dp_openai_server_base_urls, initial_global_config_dict=config["env"]["nemo_gym"], + rollout_max_retries_to_avoid_lp_nan=policy_generation.cfg.get("rollout_max_retries_to_avoid_lp_nan", 1), ) nemo_gym = NemoGym.options( runtime_env={ diff --git a/nemo_rl/environments/nemo_gym.py b/nemo_rl/environments/nemo_gym.py index da47ff5184..064109be8a 100644 --- a/nemo_rl/environments/nemo_gym.py +++ b/nemo_rl/environments/nemo_gym.py @@ -28,6 +28,7 @@ class NemoGymConfig(TypedDict): model_name: str base_urls: List[str] initial_global_config_dict: Dict[str, Any] + rollout_max_retries_to_avoid_lp_nan: int = 1 @ray.remote(max_restarts=-1, max_task_retries=-1) # pragma: no cover @@ -111,25 +112,44 @@ async def run_rollouts( ) -> list[dict]: timer = Timer() - nemo_gym_num_rows = len(nemo_gym_examples) - nemo_gym_result_iterator = self.rch.run_examples( - examples=nemo_gym_examples, head_server_config=self.head_server_config - ) - timer.start("_run_rollouts_total") - nemo_rl_rowidxs = [] - nemo_rl_results = [] - for task in nemo_gym_result_iterator: - with timer.time(label=f"{timer_prefix}/await_results"): - nemo_gym_row, nemo_gym_result = await task - - with timer.time(label=f"{timer_prefix}/postprocess_results"): - nemo_rl_result = self._postprocess_nemo_gym_to_nemo_rl_result( - nemo_gym_result, tokenizer - ) - - nemo_rl_rowidxs.append(nemo_gym_row["_rowidx"]) - nemo_rl_results.append(nemo_rl_result) + max_retries, trial = self.cfg["rollout_max_retries_to_avoid_lp_nan"], 0 + while trial < max_retries: + + nemo_gym_num_rows = len(nemo_gym_examples) + nemo_gym_result_iterator = self.rch.run_examples( + examples=nemo_gym_examples, head_server_config=self.head_server_config + ) + + nemo_rl_rowidxs = [] + nemo_rl_results = [] + for task in nemo_gym_result_iterator: + with timer.time(label=f"{timer_prefix}/await_results"): + nemo_gym_row, nemo_gym_result = await task + + with timer.time(label=f"{timer_prefix}/postprocess_results"): + nemo_rl_result = self._postprocess_nemo_gym_to_nemo_rl_result( + nemo_gym_result, tokenizer + ) + + nemo_rl_rowidxs.append(nemo_gym_row["_rowidx"]) + nemo_rl_results.append(nemo_rl_result) + + + # determine if generation_logprobs contain NaN; if not, break; + logprob_contains_nan = False + for nemo_rl_result in nemo_rl_results: + for message in nemo_rl_result["message_log"]: + if "generation_logprobs" in message and message["generation_logprobs"] is not None: + if torch.isnan(message["generation_logprobs"]).any(): + logprob_contains_nan = True + break + if logprob_contains_nan: + trial += 1 + print(f"Generation logprobs contain NaN; retrying... (trial {trial}/{max_retries})") + continue + else: + break nemo_rl_sort_results = [None] * nemo_gym_num_rows for rowidx, result in zip(nemo_rl_rowidxs, nemo_rl_results): From 7f98eea4ed92ecff9bc13c8b2efe1a4ec660342a Mon Sep 17 00:00:00 2001 From: Guyue Huang Date: Thu, 8 Jan 2026 08:34:30 -0800 Subject: [PATCH 2/9] Unit test (need to debug) Signed-off-by: Guyue Huang --- tests/unit/environments/test_nemo_gym.py | 108 +++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/tests/unit/environments/test_nemo_gym.py b/tests/unit/environments/test_nemo_gym.py index 9812e23d17..b3ec5bb582 100644 --- a/tests/unit/environments/test_nemo_gym.py +++ b/tests/unit/environments/test_nemo_gym.py @@ -43,6 +43,8 @@ nemo_gym = None NEMO_GYM_INSTALLED = False +NEMO_GYM_ROLLOUT_MAX_RETRIES_TO_AVOID_LP_NAN = 3 +run_examples_called = 0 @pytest.mark.skipif( not NEMO_GYM_INSTALLED, @@ -112,6 +114,7 @@ def nemo_gym(nemo_gym_vllm_generation): model_name=nemo_gym_vllm_generation.cfg["model_name"], base_urls=nemo_gym_vllm_generation.dp_openai_server_base_urls, initial_global_config_dict=safe_load(yaml_str), + rollout_max_retries_to_avoid_lp_nan=1, ) env = NemoGym.options( runtime_env={ @@ -199,3 +202,108 @@ def _standardize(l: list[dict]): return list(map(_standardize_single_result, l)) assert _standardize(expected_result) == _standardize(actual_result) + +@pytest.fixture(scope="function") +def nemo_gym_with_patched_run_examples(nemo_gym_vllm_generation): + from nemo_gym.rollout_collection import RolloutCollectionHelper + from typing import List, Dict, Iterator, Optional + from asyncio import Future + from nemo_gym.server_utils import BaseServerConfig + from contextlib import contextmanager + + @contextmanager + def patch_run_examples(): + # patch nemo gym rollout collection helper + # 1. patch the run_examples method to return messages with NaN in generation_logprobs + # 2. patch the run_examples method to count for how many times it is called + + orig_run_examples = RolloutCollectionHelper.run_examples + + def new_run_examples(self, examples: List[Dict], head_server_config: Optional[BaseServerConfig] = None) -> Iterator[Future]: + print("[guyueh debug] calling new_run_examples: ") + global run_examples_called + run_examples_called += 1 + + for task in orig_run_examples(self, examples, head_server_config): + row, result = task + # insert a NaN in the generation_log_probs + has_generation_log_probs = False + for i in range(len(result["response"]["output"])): + if "generation_log_probs" in result["response"]["output"][i]: + result["response"]["output"][i]["generation_log_probs"][-1] = float('nan') + has_generation_log_probs = True + break + + if not has_generation_log_probs: + raise ValueError("No generation_log_probs found in the result") + yield row, result + + RolloutCollectionHelper.run_examples = new_run_examples + try: + yield + finally: + RolloutCollectionHelper.run_examples = orig_run_examples + + context = patch_run_examples() + context.__enter__() + + yaml_str = r"""example_multi_step_resources_server: + resources_servers: + example_multi_step: + entrypoint: app.py + domain: instruction_following +example_multi_step_simple_agent: + responses_api_agents: + simple_agent: + entrypoint: app.py + resources_server: + type: resources_servers + name: example_multi_step_resources_server + model_server: + type: responses_api_models + name: openai_model +openai_model: + responses_api_models: + vllm_model: + entrypoint: app.py + base_url: ${policy_base_url} + api_key: ${policy_api_key} + model: ${policy_model_name} + return_token_id_information: true + uses_reasoning_parser: true +""" + + config = NemoGymConfig( + model_name=nemo_gym_vllm_generation.cfg["model_name"], + base_urls=nemo_gym_vllm_generation.dp_openai_server_base_urls, + initial_global_config_dict=safe_load(yaml_str), + rollout_max_retries_to_avoid_lp_nan=NEMO_GYM_ROLLOUT_MAX_RETRIES_TO_AVOID_LP_NAN, + ) + env = NemoGym.options( + runtime_env={ + "py_executable": get_actor_python_env( + "nemo_rl.environments.nemo_gym.NemoGym" + ), + } + ).remote(config) + ray.get(env.health_check.remote()) + yield env + env.shutdown.remote() + ray.kill(env) + time.sleep(0.1) + context.__exit__(None, None, None) + + +@pytest.mark.skipif( + not NEMO_GYM_INSTALLED, + reason="Skipping NeMo-Gym test since NeMo-Gym is not installed!", +) +def test_nemo_gym_rollout_max_retries_to_avoid_lp_nan( + nemo_gym_with_patched_run_examples, + nemo_gym_sanity_test_data, + nemo_gym_vllm_generation, + nemo_gym_tokenizer, # noqa: F811 +): + """Test the rollout max retries to avoid LP NaN.""" + test_nemo_gym_sanity(nemo_gym_with_patched_run_examples, nemo_gym_sanity_test_data, nemo_gym_vllm_generation, nemo_gym_tokenizer) + assert run_examples_called == NEMO_GYM_ROLLOUT_MAX_RETRIES_TO_AVOID_LP_NAN \ No newline at end of file From 351992b84f308b2301c84ab00901e2296cf04c56 Mon Sep 17 00:00:00 2001 From: Guyue Huang Date: Mon, 9 Feb 2026 21:35:32 +0000 Subject: [PATCH 3/9] Remove unit test that doesn't work Signed-off-by: Guyue Huang --- tests/unit/environments/test_nemo_gym.py | 107 +---------------------- 1 file changed, 1 insertion(+), 106 deletions(-) diff --git a/tests/unit/environments/test_nemo_gym.py b/tests/unit/environments/test_nemo_gym.py index b3ec5bb582..7c3527f2bf 100644 --- a/tests/unit/environments/test_nemo_gym.py +++ b/tests/unit/environments/test_nemo_gym.py @@ -201,109 +201,4 @@ def _standardize_single_result(d: dict): def _standardize(l: list[dict]): return list(map(_standardize_single_result, l)) - assert _standardize(expected_result) == _standardize(actual_result) - -@pytest.fixture(scope="function") -def nemo_gym_with_patched_run_examples(nemo_gym_vllm_generation): - from nemo_gym.rollout_collection import RolloutCollectionHelper - from typing import List, Dict, Iterator, Optional - from asyncio import Future - from nemo_gym.server_utils import BaseServerConfig - from contextlib import contextmanager - - @contextmanager - def patch_run_examples(): - # patch nemo gym rollout collection helper - # 1. patch the run_examples method to return messages with NaN in generation_logprobs - # 2. patch the run_examples method to count for how many times it is called - - orig_run_examples = RolloutCollectionHelper.run_examples - - def new_run_examples(self, examples: List[Dict], head_server_config: Optional[BaseServerConfig] = None) -> Iterator[Future]: - print("[guyueh debug] calling new_run_examples: ") - global run_examples_called - run_examples_called += 1 - - for task in orig_run_examples(self, examples, head_server_config): - row, result = task - # insert a NaN in the generation_log_probs - has_generation_log_probs = False - for i in range(len(result["response"]["output"])): - if "generation_log_probs" in result["response"]["output"][i]: - result["response"]["output"][i]["generation_log_probs"][-1] = float('nan') - has_generation_log_probs = True - break - - if not has_generation_log_probs: - raise ValueError("No generation_log_probs found in the result") - yield row, result - - RolloutCollectionHelper.run_examples = new_run_examples - try: - yield - finally: - RolloutCollectionHelper.run_examples = orig_run_examples - - context = patch_run_examples() - context.__enter__() - - yaml_str = r"""example_multi_step_resources_server: - resources_servers: - example_multi_step: - entrypoint: app.py - domain: instruction_following -example_multi_step_simple_agent: - responses_api_agents: - simple_agent: - entrypoint: app.py - resources_server: - type: resources_servers - name: example_multi_step_resources_server - model_server: - type: responses_api_models - name: openai_model -openai_model: - responses_api_models: - vllm_model: - entrypoint: app.py - base_url: ${policy_base_url} - api_key: ${policy_api_key} - model: ${policy_model_name} - return_token_id_information: true - uses_reasoning_parser: true -""" - - config = NemoGymConfig( - model_name=nemo_gym_vllm_generation.cfg["model_name"], - base_urls=nemo_gym_vllm_generation.dp_openai_server_base_urls, - initial_global_config_dict=safe_load(yaml_str), - rollout_max_retries_to_avoid_lp_nan=NEMO_GYM_ROLLOUT_MAX_RETRIES_TO_AVOID_LP_NAN, - ) - env = NemoGym.options( - runtime_env={ - "py_executable": get_actor_python_env( - "nemo_rl.environments.nemo_gym.NemoGym" - ), - } - ).remote(config) - ray.get(env.health_check.remote()) - yield env - env.shutdown.remote() - ray.kill(env) - time.sleep(0.1) - context.__exit__(None, None, None) - - -@pytest.mark.skipif( - not NEMO_GYM_INSTALLED, - reason="Skipping NeMo-Gym test since NeMo-Gym is not installed!", -) -def test_nemo_gym_rollout_max_retries_to_avoid_lp_nan( - nemo_gym_with_patched_run_examples, - nemo_gym_sanity_test_data, - nemo_gym_vllm_generation, - nemo_gym_tokenizer, # noqa: F811 -): - """Test the rollout max retries to avoid LP NaN.""" - test_nemo_gym_sanity(nemo_gym_with_patched_run_examples, nemo_gym_sanity_test_data, nemo_gym_vllm_generation, nemo_gym_tokenizer) - assert run_examples_called == NEMO_GYM_ROLLOUT_MAX_RETRIES_TO_AVOID_LP_NAN \ No newline at end of file + assert _standardize(expected_result) == _standardize(actual_result) \ No newline at end of file From 50a33f6500dd28b9db3b87a72411d819004039f0 Mon Sep 17 00:00:00 2001 From: Guyue Huang Date: Mon, 9 Feb 2026 22:22:54 +0000 Subject: [PATCH 4/9] Fix lint Signed-off-by: Guyue Huang --- tests/unit/environments/test_nemo_gym.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/environments/test_nemo_gym.py b/tests/unit/environments/test_nemo_gym.py index 7c3527f2bf..fa402a082d 100644 --- a/tests/unit/environments/test_nemo_gym.py +++ b/tests/unit/environments/test_nemo_gym.py @@ -46,6 +46,7 @@ NEMO_GYM_ROLLOUT_MAX_RETRIES_TO_AVOID_LP_NAN = 3 run_examples_called = 0 + @pytest.mark.skipif( not NEMO_GYM_INSTALLED, reason="Skipping NeMo-Gym test since NeMo-Gym is not installed!", @@ -201,4 +202,4 @@ def _standardize_single_result(d: dict): def _standardize(l: list[dict]): return list(map(_standardize_single_result, l)) - assert _standardize(expected_result) == _standardize(actual_result) \ No newline at end of file + assert _standardize(expected_result) == _standardize(actual_result) From a3cd10777e81c82e2def01bc756263d908f50826 Mon Sep 17 00:00:00 2001 From: Guyue Huang Date: Mon, 9 Feb 2026 22:32:43 +0000 Subject: [PATCH 5/9] Fix lint Signed-off-by: Guyue Huang --- examples/nemo_gym/run_grpo_nemo_gym.py | 4 +++- nemo_rl/environments/nemo_gym.py | 15 +++++++++------ 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/examples/nemo_gym/run_grpo_nemo_gym.py b/examples/nemo_gym/run_grpo_nemo_gym.py index 2b67e20439..fdda50d80e 100644 --- a/examples/nemo_gym/run_grpo_nemo_gym.py +++ b/examples/nemo_gym/run_grpo_nemo_gym.py @@ -253,7 +253,9 @@ def main() -> None: model_name=policy_generation.cfg["model_name"], base_urls=policy_generation.dp_openai_server_base_urls, initial_global_config_dict=config["env"]["nemo_gym"], - rollout_max_retries_to_avoid_lp_nan=policy_generation.cfg.get("rollout_max_retries_to_avoid_lp_nan", 1), + rollout_max_retries_to_avoid_lp_nan=policy_generation.cfg.get( + "rollout_max_retries_to_avoid_lp_nan", 1 + ), ) nemo_gym = NemoGym.options( runtime_env={ diff --git a/nemo_rl/environments/nemo_gym.py b/nemo_rl/environments/nemo_gym.py index 5349a2aaed..d0bcd4fea2 100644 --- a/nemo_rl/environments/nemo_gym.py +++ b/nemo_rl/environments/nemo_gym.py @@ -115,7 +115,6 @@ async def run_rollouts( timer.start("_run_rollouts_total") max_retries, trial = self.cfg["rollout_max_retries_to_avoid_lp_nan"], 0 while trial < max_retries: - nemo_gym_num_rows = len(nemo_gym_examples) nemo_gym_result_iterator = self.rch.run_examples( examples=nemo_gym_examples, head_server_config=self.head_server_config @@ -134,22 +133,26 @@ async def run_rollouts( nemo_rl_rowidxs.append(nemo_gym_row["_rowidx"]) nemo_rl_results.append(nemo_rl_result) - - # determine if generation_logprobs contain NaN; if not, break; + # determine if generation_logprobs contain NaN; if not, break; logprob_contains_nan = False for nemo_rl_result in nemo_rl_results: for message in nemo_rl_result["message_log"]: - if "generation_logprobs" in message and message["generation_logprobs"] is not None: + if ( + "generation_logprobs" in message + and message["generation_logprobs"] is not None + ): if torch.isnan(message["generation_logprobs"]).any(): logprob_contains_nan = True break if logprob_contains_nan: trial += 1 - print(f"Generation logprobs contain NaN; retrying... (trial {trial}/{max_retries})") + print( + f"Generation logprobs contain NaN; retrying... (trial {trial}/{max_retries})" + ) continue else: - break + break nemo_rl_sort_results = [None] * nemo_gym_num_rows for rowidx, result in zip(nemo_rl_rowidxs, nemo_rl_results): From b71a7badd2291737be6666958b788ce36cece921 Mon Sep 17 00:00:00 2001 From: Guyue Huang <140554423+guyueh1@users.noreply.github.com> Date: Wed, 11 Feb 2026 14:34:46 -0800 Subject: [PATCH 6/9] Update tests/unit/environments/test_nemo_gym.py Co-authored-by: Terry Kong Signed-off-by: Guyue Huang <140554423+guyueh1@users.noreply.github.com> --- tests/unit/environments/test_nemo_gym.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/unit/environments/test_nemo_gym.py b/tests/unit/environments/test_nemo_gym.py index fa402a082d..e1e8b2a133 100644 --- a/tests/unit/environments/test_nemo_gym.py +++ b/tests/unit/environments/test_nemo_gym.py @@ -43,9 +43,6 @@ nemo_gym = None NEMO_GYM_INSTALLED = False -NEMO_GYM_ROLLOUT_MAX_RETRIES_TO_AVOID_LP_NAN = 3 -run_examples_called = 0 - @pytest.mark.skipif( not NEMO_GYM_INSTALLED, From d2d254a2dc835e8c10bfa8582aaead6846f4444c Mon Sep 17 00:00:00 2001 From: Guyue Huang <140554423+guyueh1@users.noreply.github.com> Date: Thu, 12 Feb 2026 09:49:57 -0800 Subject: [PATCH 7/9] Update nemo_rl/environments/nemo_gym.py Co-authored-by: Terry Kong Signed-off-by: Guyue Huang <140554423+guyueh1@users.noreply.github.com> --- nemo_rl/environments/nemo_gym.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo_rl/environments/nemo_gym.py b/nemo_rl/environments/nemo_gym.py index d0bcd4fea2..e557ce7078 100644 --- a/nemo_rl/environments/nemo_gym.py +++ b/nemo_rl/environments/nemo_gym.py @@ -113,8 +113,8 @@ async def run_rollouts( timer = Timer() timer.start("_run_rollouts_total") - max_retries, trial = self.cfg["rollout_max_retries_to_avoid_lp_nan"], 0 - while trial < max_retries: + max_attempts, trial = self.cfg["rollout_max_attempts_to_avoid_lp_nan"], 0 + while trial < max_attempts: nemo_gym_num_rows = len(nemo_gym_examples) nemo_gym_result_iterator = self.rch.run_examples( examples=nemo_gym_examples, head_server_config=self.head_server_config From c18a6f7b0b5a436d4d656f3156b213d874d4ce8f Mon Sep 17 00:00:00 2001 From: Guyue Huang Date: Thu, 12 Feb 2026 10:00:17 -0800 Subject: [PATCH 8/9] save Signed-off-by: Guyue Huang --- ...rpo_workplace_assistant_nemotron_nano_v2_9b.yaml | 1 + examples/nemo_gym/run_grpo_nemo_gym.py | 3 --- nemo_rl/environments/nemo_gym.py | 13 ++++++++++--- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml b/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml index f3e3dcccc8..aa451bac7b 100644 --- a/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml +++ b/examples/nemo_gym/grpo_workplace_assistant_nemotron_nano_v2_9b.yaml @@ -240,6 +240,7 @@ env: should_use_nemo_gym: true should_log_nemo_gym_responses: true # If you have low logging storage, set this to false nemo_gym: # This is passed into NeMo-Gym as the initial_global_config_dict + rollout_max_attempts_to_avoid_lp_nan: 1 is_trajectory_collection: false # Set this to true to enable trajectory collection (no training). You may also want to increase `policy.generation.vllm_cfg.gpu_memory_utilization` config_paths: - responses_api_models/vllm_model/configs/vllm_model_for_training.yaml # Required! And it must be *for_training diff --git a/examples/nemo_gym/run_grpo_nemo_gym.py b/examples/nemo_gym/run_grpo_nemo_gym.py index fdda50d80e..c8d2c911e2 100644 --- a/examples/nemo_gym/run_grpo_nemo_gym.py +++ b/examples/nemo_gym/run_grpo_nemo_gym.py @@ -253,9 +253,6 @@ def main() -> None: model_name=policy_generation.cfg["model_name"], base_urls=policy_generation.dp_openai_server_base_urls, initial_global_config_dict=config["env"]["nemo_gym"], - rollout_max_retries_to_avoid_lp_nan=policy_generation.cfg.get( - "rollout_max_retries_to_avoid_lp_nan", 1 - ), ) nemo_gym = NemoGym.options( runtime_env={ diff --git a/nemo_rl/environments/nemo_gym.py b/nemo_rl/environments/nemo_gym.py index e557ce7078..ba899cd31f 100644 --- a/nemo_rl/environments/nemo_gym.py +++ b/nemo_rl/environments/nemo_gym.py @@ -28,7 +28,6 @@ class NemoGymConfig(TypedDict): model_name: str base_urls: List[str] initial_global_config_dict: Dict[str, Any] - rollout_max_retries_to_avoid_lp_nan: int = 1 @ray.remote(max_restarts=-1, max_task_retries=-1) # pragma: no cover @@ -84,6 +83,14 @@ def __init__(self, cfg: NemoGymConfig): "port": self.head_server_port, } + self.rollout_max_attempts_to_avoid_lp_nan = initial_global_config_dict.pop( + "rollout_max_attempts_to_avoid_lp_nan", 1 + ) + + assert self.rollout_max_attempts_to_avoid_lp_nan >= 1, ( + "`rollout_max_attempts_to_avoid_lp_nan` must be at least 1" + ) + self.rh = RunHelper() self.rh.start( global_config_dict_parser_config=GlobalConfigDictParserConfig( @@ -113,7 +120,7 @@ async def run_rollouts( timer = Timer() timer.start("_run_rollouts_total") - max_attempts, trial = self.cfg["rollout_max_attempts_to_avoid_lp_nan"], 0 + max_attempts, trial = self.rollout_max_attempts_to_avoid_lp_nan, 0 while trial < max_attempts: nemo_gym_num_rows = len(nemo_gym_examples) nemo_gym_result_iterator = self.rch.run_examples( @@ -148,7 +155,7 @@ async def run_rollouts( if logprob_contains_nan: trial += 1 print( - f"Generation logprobs contain NaN; retrying... (trial {trial}/{max_retries})" + f"Generation logprobs contain NaN; retrying... (trial {trial}/{max_attempts})" ) continue else: From 7178ae3dea5d0e5620f1a3184d58c594201160a2 Mon Sep 17 00:00:00 2001 From: Guyue Huang <140554423+guyueh1@users.noreply.github.com> Date: Thu, 12 Feb 2026 10:06:29 -0800 Subject: [PATCH 9/9] Update test_nemo_gym.py Signed-off-by: Guyue Huang <140554423+guyueh1@users.noreply.github.com> --- tests/unit/environments/test_nemo_gym.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/environments/test_nemo_gym.py b/tests/unit/environments/test_nemo_gym.py index e1e8b2a133..05ce0936ed 100644 --- a/tests/unit/environments/test_nemo_gym.py +++ b/tests/unit/environments/test_nemo_gym.py @@ -106,13 +106,13 @@ def nemo_gym(nemo_gym_vllm_generation): model: ${policy_model_name} return_token_id_information: true uses_reasoning_parser: true +rollout_max_attempts_to_avoid_lp_nan: 1 """ config = NemoGymConfig( model_name=nemo_gym_vllm_generation.cfg["model_name"], base_urls=nemo_gym_vllm_generation.dp_openai_server_base_urls, initial_global_config_dict=safe_load(yaml_str), - rollout_max_retries_to_avoid_lp_nan=1, ) env = NemoGym.options( runtime_env={